def test_upgrade_totaldoclen(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen()) self.index1._totaldoclen = int(self.index1._totaldoclen()) self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertEqual( self.index1._totaldoclen(), self.index2._totaldoclen())
def test_reindex_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.index.index_doc(0, 'Sometimes change is good') self.index.index_doc(1, 'Then again, who asked') self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'Sometimes change isn\'t bad') transaction.commit() copy.index_doc(1, 'Then again, who asked you?') transaction.commit()
def test_upgrade_document_count(self): self.index1 = OkapiIndex(Lexicon()) self.index2 = OkapiIndex(Lexicon()) self.index1.index_doc(0, 'The quiet of night') self.index2.index_doc(0, 'The quiet of night') # Revert index1 back to simulate an older index instance del self.index1.document_count self.index1.index_doc(1, 'gazes upon my shadow') self.index2.index_doc(1, 'gazes upon my shadow') self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count()) del self.index1.document_count self.index1.unindex_doc(0) self.index2.unindex_doc(0) self.assertIs(self.index1.document_count.__class__, Length) self.assertEqual( self.index1.document_count(), self.index2.document_count())
def test_index_doc_conflict(self): self.index = OkapiIndex(Lexicon()) self.openDB() r1 = self.db.open().root() r1['i'] = self.index transaction.commit() r2 = self.db.open().root() copy = r2['i'] # Make sure the data is loaded list(copy._docweight.items()) list(copy._docwords.items()) list(copy._wordinfo.items()) list(copy._lexicon._wids.items()) list(copy._lexicon._words.items()) self.assertEqual(self.index._p_serial, copy._p_serial) self.index.index_doc(0, 'The time has come') transaction.commit() copy.index_doc(1, 'That time has gone') transaction.commit()
def test_query_before_document_count_upgrade(self): self.index1 = OkapiIndex(Lexicon(Splitter())) self.index1.index_doc(0, 'The quiet of night') # Revert index1 back to a long to simulate an older index instance del self.index1.document_count self.assertEqual(len(self.index1.search('night')), 1)
def __init__(self): self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover()) self.index = OkapiIndex(self.lexicon)