def testTwoElementPipeline(self): lexicon = Lexicon(Splitter(), StupidPipelineElement('cats', 'fish'), WackyReversePipelineElement('fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('hsif') self.assertEqual(wids, [1])
def testThreeElementPipeline(self): lexicon = Lexicon(Splitter(), StopWordPipelineElement({'and': 1}), StupidPipelineElement('dogs', 'fish'), WackyReversePipelineElement('fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('hsif') self.assertEqual(wids, [2])
def make_old_index(): from Products.PluginIndexes.TextIndex.TextIndex import TextIndex from Products.PluginIndexes.TextIndex.Lexicon import Lexicon from Products.ZCTextIndex.StopDict import get_stopdict l = Lexicon(get_stopdict()) l.SplitterFunc = MySplitter() return TextIndex("read", lexicon=l)
def testTermToWordIdsWithProcess_post_glob(self): """This test is for added process_post_glob""" class AddedSplitter(Splitter): def process_post_glob(self, lst): assert lst == ['dogs'] return ['dogs'] lexicon = Lexicon(AddedSplitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('dogs') self.assertEqual(wids, [3])
def setUp(self): from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.Lexicon import Lexicon from Products.ZCTextIndex.Lexicon import Splitter # Only 'stop' is a stopword (but 'and' is still an operator) self.lexicon = Lexicon(Splitter(), FakeStopWordRemover()) self.parser = QueryParser(self.lexicon)
def test_reindex_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.index.index_doc(0, 'Sometimes change is good') self.index.index_doc(1, 'Then again, who asked') self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'Sometimes change isn\'t bad') transaction.commit() copy.index_doc(1, 'Then again, who asked you?') transaction.commit()
def test_upgrade_totaldoclen(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen()) self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen())
def test_upgrade_document_count(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to simulate an older index instance del self.index1.document_count self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count()) del self.index1.document_count self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count())
def index(rt, mboxfile, db, profiler): global NUM idx_time = 0 pack_time = 0 start_time = time.time() lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) extra = Extra() extra.lexicon_id = 'lexicon' extra.doc_attr = 'text' extra.index_type = 'Okapi BM25 Rank' caller = Extra() caller.lexicon = lexicon rt["index"] = idx = ZCTextIndex("index", extra, caller) if not EXCLUDE_TEXT: rt["documents"] = docs = IOBTree() else: docs = None transaction.commit() mbox = mailbox.UnixMailbox(open(mboxfile, 'rb')) if VERBOSE: print "opened", mboxfile if not NUM: NUM = sys.maxint if profiler: itime, ptime, i = profiler.runcall(indexmbox, mbox, idx, docs, db) else: itime, ptime, i = indexmbox(mbox, idx, docs, db) idx_time += itime pack_time += ptime transaction.commit() if PACK_INTERVAL and i % PACK_INTERVAL != 0: if VERBOSE >= 2: print "packing one last time..." p0 = time.clock() db.pack(time.time()) p1 = time.clock() if VERBOSE: print "pack took %s sec" % (p1 - p0) pack_time += p1 - p0 if VERBOSE: finish_time = time.time() print print "Index time", round(idx_time / 60, 3), "minutes" print "Pack time", round(pack_time / 60, 3), "minutes" print "Index bytes", Message.total_bytes rate = (Message.total_bytes / idx_time) / 1024 print "Index rate %.2f KB/sec" % rate print "Indexing began", time.ctime(start_time) print "Indexing ended", time.ctime(finish_time) print "Wall clock minutes", round((finish_time - start_time) / 60, 3)
def make_zc_index(): # there's an elaborate dance necessary to construct an index class Struct: pass extra = Struct() extra.doc_attr = "read" extra.lexicon_id = "lexicon" caller = Struct() caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover()) return ZCTextIndex("read", extra, caller)
class TestLexiconConflict(unittest.TestCase): db = None def tearDown(self): if self.db is not None: self.db.close() self.storage.cleanup() def openDB(self): from ZODB.FileStorage import FileStorage from ZODB.DB import DB n = 'fs_tmp__%s' % os.getpid() self.storage = FileStorage(n) self.db = DB(self.storage) def testAddWordConflict(self): self.l = Lexicon(Splitter()) self.openDB() r1 = self.db.open().root() r1['l'] = self.l transaction.commit() r2 = self.db.open().root() copy = r2['l'] # Make sure the data is loaded list(copy._wids.items()) list(copy._words.items()) copy.length() self.assertEqual(self.l._p_serial, copy._p_serial) self.l.sourceToWordIds('mary had a little lamb') transaction.commit() copy.sourceToWordIds('whose fleece was') copy.sourceToWordIds('white as snow') transaction.commit() self.assertEqual(copy.length(), 11) self.assertEqual(copy.length(), len(copy._words))
def testAddWordConflict(self): self.l = Lexicon(Splitter()) self.openDB() r1 = self.db.open().root() r1['l'] = self.l transaction.commit() r2 = self.db.open().root() copy = r2['l'] # Make sure the data is loaded list(copy._wids.items()) list(copy._words.items()) copy.length() self.assertEqual(self.l._p_serial, copy._p_serial) self.l.sourceToWordIds('mary had a little lamb') transaction.commit() copy.sourceToWordIds('whose fleece was') copy.sourceToWordIds('white as snow') transaction.commit() self.assertEqual(copy.length(), 11) self.assertEqual(copy.length(), len(copy._words))
def test_index_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'The time has come') transaction.commit() copy.index_doc(1, 'That time has gone') transaction.commit()
def setUp(self): app = Application() catalog = ZCatalog('Catalog') app._setObject('Catalog', catalog) self.catalog = catalog = app._getOb('Catalog') install_products(app, 'ManagableIndex') # field self.fi = self._createIndex('id', FieldIndex) # keyword self.ki = self._createIndex('kw', KeywordIndex) # range self.ri = self._createIndex( 'ri', RangeIndex, dict(CombineType='aggregate', ValueProviders=[ dict(id='rlow', type='AttributeLookup'), dict(id='rhigh', type='AttributeLookup'), ]), ) # word lexicon = Lexicon(Splitter()) app._setObject('lexicon', lexicon) self.wi = self._createIndex('wi', WordIndex, dict(Lexicon='lexicon')) # simple text self.sti = self._createIndex('sti', SimpleTextIndex, dict(Lexicon='lexicon')) # path self.pi = self._createIndex('pi', PathIndex) # create objects self.obj1 = obj1 = _Object() obj1.kw = (1, 2) obj1.fkw = _Caller(lambda obj: obj.kw) obj1.fid = _Caller(lambda obj: obj.id) self.obj2 = obj2 = _Object().__of__(obj1) obj2.id = 'id'
def testSplitterAdaptorNofold(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('CATS and dogs') wids = lexicon.termToWordIds('cats and dogs') self.assertEqual(wids, [0, 2, 3])
def setUp(self): self.lexicon = Lexicon(Splitter()) self.index = self.IndexFactory(self.lexicon)
def testOnePipelineElement(self): lexicon = Lexicon(Splitter(), StupidPipelineElement('dogs', 'fish')) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('fish') self.assertEqual(wids, [3])
def testUpgradeLength(self): from BTrees.Length import Length lexicon = Lexicon(Splitter()) del lexicon.length # Older instances don't override length lexicon.sourceToWordIds('how now brown cow') self.assert_(lexicon.length.__class__ is Length)
class IndexTest(object): # Subclasses must set a class variable IndexFactory to the appropriate # index object constructor. IndexFactory = None def setUp(self): self.lexicon = Lexicon(Splitter()) self.index = self.IndexFactory(self.lexicon) def test_index_document(self, docid=1): doc = 'simple document contains five words' self.assertFalse(self.index.has_doc(docid)) self.index.index_doc(docid, doc) self.assertTrue(self.index.has_doc(docid)) self.assertTrue(self.index._docweight[docid]) self.assertEqual(len(self.index._docweight), 1) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(docid)), 5) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assertIn(docid, map) def test_unindex_document(self): docid = 1 self.test_index_document(docid) self.index.unindex_doc(docid) self.assertEqual(len(self.index._docweight), 0) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 0) self.assertEqual(len(self.index._docwords), 0) self.assertEqual(len(self.index._wordinfo), self.index.length()) def test_index_two_documents(self): self.test_index_document() doc = 'another document just four' docid = 2 self.index.index_doc(docid, doc) self.assertTrue(self.index._docweight[docid]) self.assertEqual(len(self.index._docweight), 2) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 8) self.assertEqual(len(self.index._docwords), 2) self.assertEqual(len(self.index.get_words(docid)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) wids = self.lexicon.termToWordIds('document') self.assertEqual(len(wids), 1) document_wid = wids[0] for wid, map in self.index._wordinfo.items(): if wid == document_wid: self.assertEqual(len(map), 2) self.assertIn(1, map) self.assertIn(docid, map) else: self.assertEqual(len(map), 1) def test_index_two_unindex_one(self): # index two documents, unindex one, and test the results self.test_index_two_documents() self.index.unindex_doc(1) docid = 2 self.assertEqual(len(self.index._docweight), 1) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertTrue(self.index._docweight[docid]) self.assertEqual(len(self.index._wordinfo), 4) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(docid)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assertIn(docid, map) def test_index_duplicated_words(self, docid=1): doc = 'very simple repeat repeat repeat document test' self.index.index_doc(docid, doc) self.assertTrue(self.index._docweight[docid]) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(docid)), 7) self.assertEqual(len(self.index._wordinfo), self.index.length()) self.assertEqual( len(self.index._docweight), self.index.document_count()) wids = self.lexicon.termToWordIds('repeat') self.assertEqual(len(wids), 1) for wid, map in self.index._wordinfo.items(): self.assertEqual(len(map), 1) self.assertIn(docid, map) def test_simple_query_oneresult(self): self.index.index_doc(1, 'not the same document') results = self.index.search('document') self.assertEqual(list(results.keys()), [1]) def test_simple_query_noresults(self): self.index.index_doc(1, 'not the same document') results = self.index.search('frobnicate') self.assertEqual(list(results.keys()), []) def test_query_oneresult(self): self.index.index_doc(1, 'not the same document') self.index.index_doc(2, 'something about something else') results = self.index.search('document') self.assertEqual(list(results.keys()), [1]) def test_search_phrase(self): self.index.index_doc(1, 'the quick brown fox jumps over the lazy dog') self.index.index_doc(2, 'the quick fox jumps lazy over the brown dog') results = self.index.search_phrase('quick brown fox') self.assertEqual(list(results.keys()), [1]) def test_search_glob(self): self.index.index_doc(1, 'how now brown cow') self.index.index_doc(2, 'hough nough browne cough') self.index.index_doc(3, 'bar brawl') results = self.index.search_glob('bro*') self.assertEqual(list(results.keys()), [1, 2]) results = self.index.search_glob('b*') self.assertEqual(list(results.keys()), [1, 2, 3])
class IndexTest(TestCase): def setUp(self): self.lexicon = Lexicon(Splitter()) self.index = self.IndexFactory(self.lexicon) def test_index_document(self, DOCID=1): doc = "simple document contains five words" self.assert_(not self.index.has_doc(DOCID)) self.index.index_doc(DOCID, doc) self.assert_(self.index.has_doc(DOCID)) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._docweight), 1) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 5) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_unindex_document(self): DOCID = 1 self.test_index_document(DOCID) self.index.unindex_doc(DOCID) self.assertEqual(len(self.index._docweight), 0) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 0) self.assertEqual(len(self.index._docwords), 0) self.assertEqual(len(self.index._wordinfo), self.index.length()) def test_index_two_documents(self): self.test_index_document() doc = "another document just four" DOCID = 2 self.index.index_doc(DOCID, doc) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._docweight), 2) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assertEqual(len(self.index._wordinfo), 8) self.assertEqual(len(self.index._docwords), 2) self.assertEqual(len(self.index.get_words(DOCID)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) wids = self.lexicon.termToWordIds("document") self.assertEqual(len(wids), 1) document_wid = wids[0] for wid, map in self.index._wordinfo.items(): if wid == document_wid: self.assertEqual(len(map), 2) self.assert_(map.has_key(1)) self.assert_(map.has_key(DOCID)) else: self.assertEqual(len(map), 1) def test_index_two_unindex_one(self): # index two documents, unindex one, and test the results self.test_index_two_documents() self.index.unindex_doc(1) DOCID = 2 self.assertEqual(len(self.index._docweight), 1) self.assertEqual( len(self.index._docweight), self.index.document_count()) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._wordinfo), 4) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_index_duplicated_words(self, DOCID=1): doc = "very simple repeat repeat repeat document test" self.index.index_doc(DOCID, doc) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 7) self.assertEqual(len(self.index._wordinfo), self.index.length()) self.assertEqual( len(self.index._docweight), self.index.document_count()) wids = self.lexicon.termToWordIds("repeat") self.assertEqual(len(wids), 1) repititive_wid = wids[0] for wid, map in self.index._wordinfo.items(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_simple_query_oneresult(self): self.index.index_doc(1, 'not the same document') results = self.index.search("document") self.assertEqual(list(results.keys()), [1]) def test_simple_query_noresults(self): self.index.index_doc(1, 'not the same document') results = self.index.search("frobnicate") self.assertEqual(list(results.keys()), []) def test_query_oneresult(self): self.index.index_doc(1, 'not the same document') self.index.index_doc(2, 'something about something else') results = self.index.search("document") self.assertEqual(list(results.keys()), [1]) def test_search_phrase(self): self.index.index_doc(1, "the quick brown fox jumps over the lazy dog") self.index.index_doc(2, "the quick fox jumps lazy over the brown dog") results = self.index.search_phrase("quick brown fox") self.assertEqual(list(results.keys()), [1]) def test_search_glob(self): self.index.index_doc(1, "how now brown cow") self.index.index_doc(2, "hough nough browne cough") self.index.index_doc(3, "bar brawl") results = self.index.search_glob("bro*") self.assertEqual(list(results.keys()), [1, 2]) results = self.index.search_glob("b*") self.assertEqual(list(results.keys()), [1, 2, 3])
def testTermToWordIds(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('dogs') self.assertEqual(wids, [3])
def setUp(self): # Only 'stop' is a stopword (but 'and' is still an operator) self.lexicon = Lexicon(Splitter(), FakeStopWordRemover()) self.parser = QueryParser(self.lexicon)
class IndexTest(TestCase): def setUp(self): self.lexicon = Lexicon(Splitter()) self.index = self.IndexFactory(self.lexicon) def test_index_document(self, DOCID=1): doc = "simple document contains five words" self.assert_(not self.index.has_doc(DOCID)) self.index.index_doc(DOCID, doc) self.assert_(self.index.has_doc(DOCID)) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._docweight), 1) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 5) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_unindex_document(self): DOCID = 1 self.test_index_document(DOCID) self.index.unindex_doc(DOCID) self.assertEqual(len(self.index._docweight), 0) self.assertEqual(len(self.index._wordinfo), 0) self.assertEqual(len(self.index._docwords), 0) self.assertEqual(len(self.index._wordinfo), self.index.length()) def test_index_two_documents(self): self.test_index_document() doc = "another document just four" DOCID = 2 self.index.index_doc(DOCID, doc) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._docweight), 2) self.assertEqual(len(self.index._wordinfo), 8) self.assertEqual(len(self.index._docwords), 2) self.assertEqual(len(self.index.get_words(DOCID)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) wids = self.lexicon.termToWordIds("document") self.assertEqual(len(wids), 1) document_wid = wids[0] for wid, map in self.index._wordinfo.items(): if wid == document_wid: self.assertEqual(len(map), 2) self.assert_(map.has_key(1)) self.assert_(map.has_key(DOCID)) else: self.assertEqual(len(map), 1) def test_index_two_unindex_one(self): # index two documents, unindex one, and test the results self.test_index_two_documents() self.index.unindex_doc(1) DOCID = 2 self.assertEqual(len(self.index._docweight), 1) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._wordinfo), 4) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 4) self.assertEqual(len(self.index._wordinfo), self.index.length()) for map in self.index._wordinfo.values(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_index_duplicated_words(self, DOCID=1): doc = "very simple repeat repeat repeat document test" self.index.index_doc(DOCID, doc) self.assert_(self.index._docweight[DOCID]) self.assertEqual(len(self.index._wordinfo), 5) self.assertEqual(len(self.index._docwords), 1) self.assertEqual(len(self.index.get_words(DOCID)), 7) self.assertEqual(len(self.index._wordinfo), self.index.length()) wids = self.lexicon.termToWordIds("repeat") self.assertEqual(len(wids), 1) repititive_wid = wids[0] for wid, map in self.index._wordinfo.items(): self.assertEqual(len(map), 1) self.assert_(map.has_key(DOCID)) def test_simple_query_oneresult(self): self.index.index_doc(1, 'not the same document') results = self.index.search("document") self.assertEqual(list(results.keys()), [1]) def test_simple_query_noresults(self): self.index.index_doc(1, 'not the same document') results = self.index.search("frobnicate") self.assertEqual(list(results.keys()), []) def test_query_oneresult(self): self.index.index_doc(1, 'not the same document') self.index.index_doc(2, 'something about something else') results = self.index.search("document") self.assertEqual(list(results.keys()), [1]) def test_search_phrase(self): self.index.index_doc(1, "the quick brown fox jumps over the lazy dog") self.index.index_doc(2, "the quick fox jumps lazy over the brown dog") results = self.index.search_phrase("quick brown fox") self.assertEqual(list(results.keys()), [1]) def test_search_glob(self): self.index.index_doc(1, "how now brown cow") self.index.index_doc(2, "hough nough browne cough") self.index.index_doc(3, "bar brawl") results = self.index.search_glob("bro*") self.assertEqual(list(results.keys()), [1, 2]) results = self.index.search_glob("b*") self.assertEqual(list(results.keys()), [1, 2, 3])
def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon)
def testMissingTermToWordIds(self): lexicon = Lexicon(Splitter()) wids = lexicon.sourceToWordIds('cats and dogs') wids = lexicon.termToWordIds('boxes') self.assertEqual(wids, [0])
def setUp(self): from Products.ZCTextIndex.QueryParser import QueryParser from Products.ZCTextIndex.Lexicon import Lexicon from Products.ZCTextIndex.Lexicon import Splitter self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon)
def __init__(self): self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) self.index = OkapiIndex(self.lexicon)
def test_query_before_document_count_upgrade(self): self.index1 = OkapiIndex(Lexicon(Splitter())) self.index1.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance del self.index1.document_count self.assertEqual(len(self.index1.search('night')), 1)
def setUp(self): self.lexicon = Lexicon(Splitter()) self.parser = QueryParser(self.lexicon) self.index = FauxIndex()