class TextIndex(Persistent): def __init__(self): self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) self.index = OkapiIndex(self.lexicon) def index_text(self, docid, text): self.index.index_doc(docid, text) self._p_changed = 1 # XXX def unindex(self, docid): self.index.unindex_doc(docid) self._p_changed = 1 # XXX def query(self, query, nbest=10): # returns a total hit count and a mapping from docids to scores parser = QueryParser(self.lexicon) tree = parser.parseQuery(query) results = tree.executeQuery(self.index) if results is None: return [], 0 chooser = NBest(nbest) chooser.addmany(results.items()) return chooser.getbest(), len(results) def query_weight(self, query): parser = QueryParser(self.lexicon) tree = parser.parseQuery(query) terms = tree.terms() return self.index.query_weight(terms)
def test_reindex_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.index.index_doc(0, 'Sometimes change is good') self.index.index_doc(1, 'Then again, who asked') self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'Sometimes change isn\'t bad') transaction.commit() copy.index_doc(1, 'Then again, who asked you?') transaction.commit()
def test_upgrade_totaldoclen(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen()) self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen())
def test_upgrade_document_count(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to simulate an older index instance del self.index1.document_count self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count()) del self.index1.document_count self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count())
def test_index_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'The time has come') transaction.commit() copy.index_doc(1, 'That time has gone') transaction.commit()
def __init__(self): self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) self.index = OkapiIndex(self.lexicon)
def test_query_before_document_count_upgrade(self): self.index1 = OkapiIndex(Lexicon(Splitter())) self.index1.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance del self.index1.document_count self.assertEqual(len(self.index1.search('night')), 1)
class TestUpgrade(TestCase): def test_query_before_totaldoclen_upgrade(self): self.index1 = OkapiIndex(Lexicon(Splitter())) self.index1.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance self.index1._totaldoclen = int(self.index1._totaldoclen()) self.assertEqual(len(self.index1.search('night')), 1) def test_upgrade_totaldoclen(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen()) self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen()) def test_query_before_document_count_upgrade(self): self.index1 = OkapiIndex(Lexicon(Splitter())) self.index1.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance del self.index1.document_count self.assertEqual(len(self.index1.search('night')), 1) def test_upgrade_document_count(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to simulate an older index instance del self.index1.document_count self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count()) del self.index1.document_count self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count())
class TestIndexConflict(TestCase): db = None def tearDown(self): if self.db is not None: self.db.close() self.storage.cleanup() def openDB(self): n = 'fs_tmp__{0}'.format(os.getpid()) self.storage = FileStorage(n) self.db = DB(self.storage) def test_index_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'The time has come') transaction.commit() copy.index_doc(1, 'That time has gone') transaction.commit() def test_reindex_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.index.index_doc(0, 'Sometimes change is good') self.index.index_doc(1, 'Then again, who asked') self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'Sometimes change isn\'t bad') transaction.commit() copy.index_doc(1, 'Then again, who asked you?') transaction.commit()