class TermNumerator(Observable): def __init__(self, path, lruTaxonomyWriterCacheSize=100): Observable.__init__(self) taxoDirectory = MMapDirectory(File(path)) taxoDirectory.setUseUnmap(False) self._taxoWriter = DirectoryTaxonomyWriter(taxoDirectory, IndexWriterConfig.OpenMode.CREATE_OR_APPEND, LruTaxonomyWriterCache(lruTaxonomyWriterCacheSize)) def numerateTerm(self, term): if not term: return return self._taxoWriter.addCategory(FacetLabel([term])) def getTerm(self, nr): if not hasattr(self, "_taxoReader"): self._taxoReader = DirectoryTaxonomyReader(self._taxoWriter) tr = DirectoryTaxonomyReader.openIfChanged(self._taxoReader) if tr: self._taxoReader.close() self._taxoReader = tr return self._taxoReader.getPath(nr).components[0] def handleShutdown(self): print 'handle shutdown: saving TermNumerator' from sys import stdout; stdout.flush() self._taxoWriter.commit() def close(self): self._taxoWriter.close()
class IndexAndTaxonomy(object): def __init__(self, settings, indexDirectory=None, taxoDirectory=None): self._settings = settings self._similarity = settings.similarity self._numberOfConcurrentTasks = settings.numberOfConcurrentTasks self._reader = DirectoryReader.open(indexDirectory) self.taxoReader = DirectoryTaxonomyReader(taxoDirectory) self._readerSettingsWrapper = ReaderSettingsWrapper() self._readerSettingsWrapper.get = lambda: {"similarity": self.searcher.getSimilarity().toString(), "numberOfConcurrentTasks": self._numberOfConcurrentTasks} self._readerSettingsWrapper.set = self._setReadSettings self._searcher = None self._executor = None self._reopenSearcher = True def reopen(self): reader = DirectoryReader.openIfChanged(self._reader) if reader is None: return self._reader.close() self._reader = reader self._reopenSearcher = True taxoReader = DirectoryTaxonomyReader.openIfChanged(self.taxoReader) if taxoReader is None: return self.taxoReader.close() self.taxoReader = taxoReader @property def searcher(self): if not self._reopenSearcher: return self._searcher if self._settings.multithreaded: if self._executor: self._executor.shutdown(); self._executor = Executors.newFixedThreadPool(self._numberOfConcurrentTasks); self._searcher = SuperIndexSearcher(self._reader, self._executor, self._numberOfConcurrentTasks) else: self._searcher = IndexSearcher(self._reader) self._searcher.setSimilarity(self._similarity) self._reopenSearcher = False return self._searcher def _setReadSettings(self, similarity=None, numberOfConcurrentTasks=None): # This method must be thread-safe if similarity is None: self._similarity = self._settings.similarity else: self._similarity = BM25Similarity(similarity["k1"], similarity["b"]) if numberOfConcurrentTasks is None: self._numberOfConcurrentTasks = self._settings.numberOfConcurrentTasks else: self._numberOfConcurrentTasks = numberOfConcurrentTasks self._reopenSearcher = True def close(self): self.taxoReader.close() self._reader.close()
def getTerm(self, nr): if not hasattr(self, "_taxoReader"): self._taxoReader = DirectoryTaxonomyReader(self._taxoWriter) tr = DirectoryTaxonomyReader.openIfChanged(self._taxoReader) if tr: self._taxoReader.close() self._taxoReader = tr return self._taxoReader.getPath(nr).components[0]
def runDrillDown(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) facetRes = SimpleSearcher.searchWithDrillDown(indexReader, taxo) # close readers taxo.close() indexReader.close() # return result return facetRes
def runSimple(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) # returns List<FacetResult> facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo) # close readers taxo.close() indexReader.close() # return result return facetRes
def runDrillDown(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) for drilldown in drilldownCategories: print "search with drilldown: %s" % '/'.join(drilldown) facetRes = SimpleSearcher.searchWithDrillDown(drilldown, indexReader, taxo, self.facets_config) # close readers taxo.close() indexReader.close() # return result return facetRes
def __init__(self, settings, indexDirectory=None, taxoDirectory=None): self._settings = settings self._similarity = settings.similarity self._numberOfConcurrentTasks = settings.numberOfConcurrentTasks self._reader = DirectoryReader.open(indexDirectory) self.taxoReader = DirectoryTaxonomyReader(taxoDirectory) self._readerSettingsWrapper = ReaderSettingsWrapper() self._readerSettingsWrapper.get = lambda: { "similarity": self.searcher.getSimilarity().toString(), "numberOfConcurrentTasks": self._numberOfConcurrentTasks } self._readerSettingsWrapper.set = self._setReadSettings self._searcher = None self._executor = None self._reopenSearcher = True
def runSimple(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) for term in searchValues: print "\nsearch by term '%s' ..." % term facetRes = SimpleSearcher.searchWithTerm(term, indexReader, taxo, self.facets_config) print "\nsearch all documents ..." facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo, self.facets_config) # close readers taxo.close() indexReader.close() # return result return facetRes
def reopen(self): reader = DirectoryReader.openIfChanged(self._reader) if reader is None: return self._reader.close() self._reader = reader self._reopenSearcher = True taxoReader = DirectoryTaxonomyReader.openIfChanged(self.taxoReader) if taxoReader is None: return self.taxoReader.close() self.taxoReader = taxoReader
def __init__(self, settings, indexDirectory=None, taxoDirectory=None): self._settings = settings self._similarity = settings.similarity self._numberOfConcurrentTasks = settings.numberOfConcurrentTasks self._reader = DirectoryReader.open(indexDirectory) self.taxoReader = DirectoryTaxonomyReader(taxoDirectory) self._readerSettingsWrapper = ReaderSettingsWrapper() self._readerSettingsWrapper.get = lambda: {"similarity": self.searcher.getSimilarity().toString(), "numberOfConcurrentTasks": self._numberOfConcurrentTasks} self._readerSettingsWrapper.set = self._setReadSettings self._searcher = None self._executor = None self._reopenSearcher = True
class IndexAndTaxonomy(object): def __init__(self, settings, indexDirectory=None, taxoDirectory=None): self._settings = settings self._similarity = settings.similarity self._numberOfConcurrentTasks = settings.numberOfConcurrentTasks self._reader = DirectoryReader.open(indexDirectory) self.taxoReader = DirectoryTaxonomyReader(taxoDirectory) self._readerSettingsWrapper = ReaderSettingsWrapper() self._readerSettingsWrapper.get = lambda: { "similarity": self.searcher.getSimilarity().toString(), "numberOfConcurrentTasks": self._numberOfConcurrentTasks } self._readerSettingsWrapper.set = self._setReadSettings self._searcher = None self._executor = None self._reopenSearcher = True def reopen(self): reader = DirectoryReader.openIfChanged(self._reader) if reader is None: return self._reader.close() self._reader = reader self._reopenSearcher = True taxoReader = DirectoryTaxonomyReader.openIfChanged(self.taxoReader) if taxoReader is None: return self.taxoReader.close() self.taxoReader = taxoReader @property def searcher(self): if not self._reopenSearcher: return self._searcher if self._settings.multithreaded: if self._executor: self._executor.shutdown() self._executor = Executors.newFixedThreadPool( self._numberOfConcurrentTasks) self._searcher = SuperIndexSearcher(self._reader, self._executor, self._numberOfConcurrentTasks) else: self._searcher = IndexSearcher(self._reader) self._searcher.setSimilarity(self._similarity) self._reopenSearcher = False return self._searcher def _setReadSettings(self, similarity=None, numberOfConcurrentTasks=None): # This method must be thread-safe if similarity is None: self._similarity = self._settings.similarity else: self._similarity = BM25Similarity(similarity["k1"], similarity["b"]) if numberOfConcurrentTasks is None: self._numberOfConcurrentTasks = self._settings.numberOfConcurrentTasks else: self._numberOfConcurrentTasks = numberOfConcurrentTasks self._reopenSearcher = True def close(self): self.taxoReader.close() self._reader.close()