class PyLuceneTestCase(TestCase): def __init__(self, *args): super(PyLuceneTestCase, self).__init__(*args) def setUp(self): self.directory = RAMDirectory() def tearDown(self): self.directory.close() def getConfig(self, analyzer=None): return IndexWriterConfig(analyzer) def getWriter(self, directory=None, analyzer=None, open_mode=None, similarity=None, maxBufferedDocs=None, mergePolicy=None): if analyzer is None: analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(), 10000) config = self.getConfig(analyzer) if open_mode is None: open_mode = IndexWriterConfig.OpenMode.CREATE config.setOpenMode(open_mode) if similarity is not None: config.setSimilarity(similarity) if maxBufferedDocs is not None: config.setMaxBufferedDocs(maxBufferedDocs) if mergePolicy is not None: config.setMergePolicy(mergePolicy) if directory is None: directory = self.directory return IndexWriter(directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return IndexSearcher(self.getReader(directory=directory)) def getReader(self, directory=None): if directory is None: directory = self.directory return DirectoryReader.open(directory) def getOnlyLeafReader(self, reader): subReaders = reader.leaves() if subReaders.size() != 1: raise ValueError(reader + " has " + subReaders.size() + " segments instead of exactly one") return subReaders.get(0).reader()
class PyLuceneTestCase(TestCase): def __init__(self, *args): super(PyLuceneTestCase, self).__init__(*args) self.TEST_VERSION = Version.LUCENE_CURRENT def setUp(self): self.directory = RAMDirectory() def tearDown(self): self.directory.close() def getConfig(self, analyzer=None): return IndexWriterConfig(self.TEST_VERSION, analyzer) def getWriter(self, directory=None, analyzer=None, open_mode=None, similarity=None, maxBufferedDocs=None, mergePolicy=None): if analyzer is None: analyzer = LimitTokenCountAnalyzer( WhitespaceAnalyzer(self.TEST_VERSION), 10000) config = self.getConfig(analyzer) if open_mode is None: open_mode = IndexWriterConfig.OpenMode.CREATE config.setOpenMode(open_mode) if similarity is not None: config.setSimilarity(similarity) if maxBufferedDocs is not None: config.setMaxBufferedDocs(maxBufferedDocs) if mergePolicy is not None: config.setMergePolicy(mergePolicy) if directory is None: directory = self.directory return IndexWriter(directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return IndexSearcher(self.getReader(directory=directory)) def getReader(self, directory=None): if directory is None: directory = self.directory return DirectoryReader.open(directory)
class PyLuceneTestCase(TestCase): def __init__(self, *args): super(PyLuceneTestCase, self).__init__(*args) self.TEST_VERSION = Version.LUCENE_CURRENT def setUp(self): self.directory = RAMDirectory() def tearDown(self): self.directory.close() def getConfig(self, analyzer=None): return IndexWriterConfig(self.TEST_VERSION, analyzer) def getWriter(self, directory=None, analyzer=None, open_mode=None, similarity=None, maxBufferedDocs=None, mergePolicy=None): if analyzer is None: analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(self.TEST_VERSION), 10000) config = self.getConfig(analyzer) if open_mode is None: open_mode = IndexWriterConfig.OpenMode.CREATE config.setOpenMode(open_mode) if similarity is not None: config.setSimilarity(similarity) if maxBufferedDocs is not None: config.setMaxBufferedDocs(maxBufferedDocs) if mergePolicy is not None: config.setMergePolicy(mergePolicy) if directory is None: directory = self.directory return IndexWriter(directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return IndexSearcher(self.getReader(directory=directory)) def getReader(self, directory=None): if directory is None: directory = self.directory return DirectoryReader.open(directory)
def testTieBreaker(self): # MultiTermQuery provides (via attribute) information about which values # must be competitive to enter the priority queue. # # FuzzyQuery optimizes itself around this information, if the attribute # is not implemented correctly, there will be problems! # directory = RAMDirectory() writer = self.getWriter(directory=directory) self._addDoc("a123456", writer) self._addDoc("c123456", writer) self._addDoc("d123456", writer) self._addDoc("e123456", writer) directory2 = RAMDirectory() writer2 = self.getWriter(directory=directory2) self._addDoc("a123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("c123456", writer2) self._addDoc("f123456", writer2) ir1 = writer.getReader() ir2 = writer2.getReader() mr = MultiReader([ir1, ir2]) searcher = self.getSearcher(reader=mr) fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False) docs = searcher.search(fq, 2) self.assertEqual(5, docs.totalHits.value) # 5 docs, from the a and b's mr.close() ir1.close() ir2.close() writer.close() writer2.close() directory.close() directory2.close()
def testTieBreaker(self): # MultiTermQuery provides (via attribute) information about which values # must be competitive to enter the priority queue. # # FuzzyQuery optimizes itself around this information, if the attribute # is not implemented correctly, there will be problems! # directory = RAMDirectory() writer = self.getWriter(directory=directory) self._addDoc("a123456", writer) self._addDoc("c123456", writer) self._addDoc("d123456", writer) self._addDoc("e123456", writer) directory2 = RAMDirectory() writer2 = self.getWriter(directory=directory2) self._addDoc("a123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("c123456", writer2) self._addDoc("f123456", writer2) ir1 = writer.getReader() ir2 = writer2.getReader() mr = MultiReader([ir1, ir2]) searcher = self.getSearcher(reader=mr) fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False) docs = searcher.search(fq, 2) self.assertEqual(5, docs.totalHits) # 5 docs, from the a and b's mr.close() ir1.close() ir2.close() writer.close() writer2.close() directory.close() directory2.close()