class TextIndex(Persistent):

    def __init__(self):
        self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
        self.index = OkapiIndex(self.lexicon)

    def index_text(self, docid, text):
        self.index.index_doc(docid, text)
        self._p_changed = 1 # XXX

    def unindex(self, docid):
        self.index.unindex_doc(docid)
        self._p_changed = 1 # XXX

    def query(self, query, nbest=10):
        # returns a total hit count and a mapping from docids to scores
        parser = QueryParser(self.lexicon)
        tree = parser.parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)

    def query_weight(self, query):
        parser = QueryParser(self.lexicon)
        tree = parser.parseQuery(query)
        terms = tree.terms()
        return self.index.query_weight(terms)
Exemple #2
0
    def test_reindex_doc_conflict(self):
        self.index = OkapiIndex(Lexicon())
        self.index.index_doc(0, 'Sometimes change is good')
        self.index.index_doc(1, 'Then again, who asked')
        self.openDB()
        r1 = self.db.open().root()
        r1['i'] = self.index
        transaction.commit()

        r2 = self.db.open().root()
        copy = r2['i']
        # Make sure the data is loaded
        list(copy._docweight.items())
        list(copy._docwords.items())
        list(copy._wordinfo.items())
        list(copy._lexicon._wids.items())
        list(copy._lexicon._words.items())

        self.assertEqual(self.index._p_serial, copy._p_serial)

        self.index.index_doc(0, 'Sometimes change isn\'t bad')
        transaction.commit()

        copy.index_doc(1, 'Then again, who asked you?')
        transaction.commit()
class TextIndex(Persistent):
    def __init__(self):
        self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
        self.index = OkapiIndex(self.lexicon)

    def index_text(self, docid, text):
        self.index.index_doc(docid, text)
        self._p_changed = 1  # XXX

    def unindex(self, docid):
        self.index.unindex_doc(docid)
        self._p_changed = 1  # XXX

    def query(self, query, nbest=10):
        # returns a total hit count and a mapping from docids to scores
        parser = QueryParser(self.lexicon)
        tree = parser.parseQuery(query)
        results = tree.executeQuery(self.index)
        if results is None:
            return [], 0
        chooser = NBest(nbest)
        chooser.addmany(results.items())
        return chooser.getbest(), len(results)

    def query_weight(self, query):
        parser = QueryParser(self.lexicon)
        tree = parser.parseQuery(query)
        terms = tree.terms()
        return self.index.query_weight(terms)
Exemple #4
0
 def test_upgrade_totaldoclen(self):
     self.index1 = OkapiIndex(Lexicon())
     self.index2 = OkapiIndex(Lexicon())
     self.index1.index_doc(0, 'The quiet of night')
     self.index2.index_doc(0, 'The quiet of night')
     # Revert index1 back to a long to simulate an older index instance
     self.index1._totaldoclen = int(self.index1._totaldoclen())
     self.index1.index_doc(1, 'gazes upon my shadow')
     self.index2.index_doc(1, 'gazes upon my shadow')
     self.assertEqual(
         self.index1._totaldoclen(), self.index2._totaldoclen())
     self.index1._totaldoclen = int(self.index1._totaldoclen())
     self.index1.unindex_doc(0)
     self.index2.unindex_doc(0)
     self.assertEqual(
         self.index1._totaldoclen(), self.index2._totaldoclen())
Exemple #5
0
 def test_upgrade_document_count(self):
     self.index1 = OkapiIndex(Lexicon())
     self.index2 = OkapiIndex(Lexicon())
     self.index1.index_doc(0, 'The quiet of night')
     self.index2.index_doc(0, 'The quiet of night')
     # Revert index1 back to simulate an older index instance
     del self.index1.document_count
     self.index1.index_doc(1, 'gazes upon my shadow')
     self.index2.index_doc(1, 'gazes upon my shadow')
     self.assertIs(self.index1.document_count.__class__, Length)
     self.assertEqual(
         self.index1.document_count(), self.index2.document_count())
     del self.index1.document_count
     self.index1.unindex_doc(0)
     self.index2.unindex_doc(0)
     self.assertIs(self.index1.document_count.__class__, Length)
     self.assertEqual(
         self.index1.document_count(), self.index2.document_count())
Exemple #6
0
    def test_index_doc_conflict(self):
        self.index = OkapiIndex(Lexicon())
        self.openDB()
        r1 = self.db.open().root()
        r1['i'] = self.index
        transaction.commit()

        r2 = self.db.open().root()
        copy = r2['i']
        # Make sure the data is loaded
        list(copy._docweight.items())
        list(copy._docwords.items())
        list(copy._wordinfo.items())
        list(copy._lexicon._wids.items())
        list(copy._lexicon._words.items())

        self.assertEqual(self.index._p_serial, copy._p_serial)

        self.index.index_doc(0, 'The time has come')
        transaction.commit()

        copy.index_doc(1, 'That time has gone')
        transaction.commit()
 def __init__(self):
     self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
     self.index = OkapiIndex(self.lexicon)
Exemple #8
0
 def test_query_before_document_count_upgrade(self):
     self.index1 = OkapiIndex(Lexicon(Splitter()))
     self.index1.index_doc(0, 'The quiet of night')
     # Revert index1 back to a long to simulate an older index instance
     del self.index1.document_count
     self.assertEqual(len(self.index1.search('night')), 1)
Exemple #9
0
class TestUpgrade(TestCase):

    def test_query_before_totaldoclen_upgrade(self):
        self.index1 = OkapiIndex(Lexicon(Splitter()))
        self.index1.index_doc(0, 'The quiet of night')
        # Revert index1 back to a long to simulate an older index instance
        self.index1._totaldoclen = int(self.index1._totaldoclen())
        self.assertEqual(len(self.index1.search('night')), 1)

    def test_upgrade_totaldoclen(self):
        self.index1 = OkapiIndex(Lexicon())
        self.index2 = OkapiIndex(Lexicon())
        self.index1.index_doc(0, 'The quiet of night')
        self.index2.index_doc(0, 'The quiet of night')
        # Revert index1 back to a long to simulate an older index instance
        self.index1._totaldoclen = int(self.index1._totaldoclen())
        self.index1.index_doc(1, 'gazes upon my shadow')
        self.index2.index_doc(1, 'gazes upon my shadow')
        self.assertEqual(
            self.index1._totaldoclen(), self.index2._totaldoclen())
        self.index1._totaldoclen = int(self.index1._totaldoclen())
        self.index1.unindex_doc(0)
        self.index2.unindex_doc(0)
        self.assertEqual(
            self.index1._totaldoclen(), self.index2._totaldoclen())

    def test_query_before_document_count_upgrade(self):
        self.index1 = OkapiIndex(Lexicon(Splitter()))
        self.index1.index_doc(0, 'The quiet of night')
        # Revert index1 back to a long to simulate an older index instance
        del self.index1.document_count
        self.assertEqual(len(self.index1.search('night')), 1)

    def test_upgrade_document_count(self):
        self.index1 = OkapiIndex(Lexicon())
        self.index2 = OkapiIndex(Lexicon())
        self.index1.index_doc(0, 'The quiet of night')
        self.index2.index_doc(0, 'The quiet of night')
        # Revert index1 back to simulate an older index instance
        del self.index1.document_count
        self.index1.index_doc(1, 'gazes upon my shadow')
        self.index2.index_doc(1, 'gazes upon my shadow')
        self.assertIs(self.index1.document_count.__class__, Length)
        self.assertEqual(
            self.index1.document_count(), self.index2.document_count())
        del self.index1.document_count
        self.index1.unindex_doc(0)
        self.index2.unindex_doc(0)
        self.assertIs(self.index1.document_count.__class__, Length)
        self.assertEqual(
            self.index1.document_count(), self.index2.document_count())
Exemple #10
0
class TestIndexConflict(TestCase):

    db = None

    def tearDown(self):
        if self.db is not None:
            self.db.close()
            self.storage.cleanup()

    def openDB(self):
        n = 'fs_tmp__{0}'.format(os.getpid())
        self.storage = FileStorage(n)
        self.db = DB(self.storage)

    def test_index_doc_conflict(self):
        self.index = OkapiIndex(Lexicon())
        self.openDB()
        r1 = self.db.open().root()
        r1['i'] = self.index
        transaction.commit()

        r2 = self.db.open().root()
        copy = r2['i']
        # Make sure the data is loaded
        list(copy._docweight.items())
        list(copy._docwords.items())
        list(copy._wordinfo.items())
        list(copy._lexicon._wids.items())
        list(copy._lexicon._words.items())

        self.assertEqual(self.index._p_serial, copy._p_serial)

        self.index.index_doc(0, 'The time has come')
        transaction.commit()

        copy.index_doc(1, 'That time has gone')
        transaction.commit()

    def test_reindex_doc_conflict(self):
        self.index = OkapiIndex(Lexicon())
        self.index.index_doc(0, 'Sometimes change is good')
        self.index.index_doc(1, 'Then again, who asked')
        self.openDB()
        r1 = self.db.open().root()
        r1['i'] = self.index
        transaction.commit()

        r2 = self.db.open().root()
        copy = r2['i']
        # Make sure the data is loaded
        list(copy._docweight.items())
        list(copy._docwords.items())
        list(copy._wordinfo.items())
        list(copy._lexicon._wids.items())
        list(copy._lexicon._words.items())

        self.assertEqual(self.index._p_serial, copy._p_serial)

        self.index.index_doc(0, 'Sometimes change isn\'t bad')
        transaction.commit()

        copy.index_doc(1, 'Then again, who asked you?')
        transaction.commit()
 def __init__(self):
     self.lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
     self.index = OkapiIndex(self.lexicon)