Beispiel #1
0
    def setUp(self):
        self._catalog = self._makeOne()
        self._catalog.lexicon = PLexicon('lexicon')
        col1 = FieldIndex('col1')
        col2 = ZCTextIndex('col2', caller=self._catalog,
                          index_factory=OkapiIndex, lexicon_id='lexicon')
        col3 = KeywordIndex('col3')

        self._catalog.addIndex('col1', col1)
        self._catalog.addIndex('col2', col2)
        self._catalog.addIndex('col3', col3)
        self._catalog.addColumn('col1')
        self._catalog.addColumn('col2')
        self._catalog.addColumn('col3')

        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2', caller=self._catalog,
                          index_factory=OkapiIndex, lexicon_id='lexicon')
        att3 = KeywordIndex('att3')
        num = FieldIndex('num')

        self._catalog.addIndex('att1', att1)
        self._catalog.addIndex('att2', att2)
        self._catalog.addIndex('att3', att3)
        self._catalog.addIndex('num', num)
        self._catalog.addColumn('att1')
        self._catalog.addColumn('att2')
        self._catalog.addColumn('att3')
        self._catalog.addColumn('num')

        for x in range(0, self.upper):
            self._catalog.catalogObject(dummy(self.nums[x]), repr(x))
        self._catalog = self._catalog.__of__(dummy('foo'))
Beispiel #2
0
 def testAddTextIndex(self):
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('id', idx)
     i = self._catalog.indexes['id']
     self.assert_(isinstance(i, ZCTextIndex), 'add text index failed')
Beispiel #3
0
 def setUp(self):
     self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                             StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                                 'text', 'lexicon')
     self.index = self.zc_index.index
Beispiel #4
0
    def test_ZCTextIndex(self):
        from xml.dom.minidom import parseString
        from Products.ZCTextIndex.ZCTextIndex import PLexicon
        from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
        from Products.GenericSetup.testing import DummySetupEnviron
        from Products.GenericSetup.ZCTextIndex.exportimport \
                import ZCTextIndexNodeAdapter
        _XML = """\
        <index name="foo_zctext" meta_type="ZCTextIndex">
        <indexed_attr value="bar"/>
        <extra name="index_type" value="Okapi BM25 Rank"/>
        <extra name="lexicon_id" value="foo_plexicon"/>
        </index>
        """
        environ = DummySetupEnviron()

        def _no_clear(*a):
            raise AssertionError("Don't clear me!")

        catalog = DummyCatalog()
        catalog.foo_plexicon = PLexicon('foo_plexicon')
        extra = _extra()
        extra.lexicon_id = 'foo_plexicon'
        extra.index_type = 'Okapi BM25 Rank'
        index = ZCTextIndex('foo_field',
                            extra=extra,
                            field_name='bar',
                            caller=catalog).__of__(catalog)
        index.clear = _no_clear
        adapted = ZCTextIndexNodeAdapter(index, environ)
        adapted.node = parseString(_XML).documentElement  # no raise
    def _make_one(self, extra=None):
        from Products.ZCatalog.Catalog import Catalog
        catalog = Catalog()
        catalog.lexicon = PLexicon('lexicon')
        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2',
                           caller=catalog,
                           index_factory=OkapiIndex,
                           lexicon_id='lexicon')
        catalog.addIndex('att2', att2)
        num = FieldIndex('num')

        catalog.addIndex('att1', att1)
        catalog.addIndex('num', num)
        catalog.addColumn('num')

        foo = MultiFieldIndex('foo')
        catalog.addIndex('foo', foo)

        if extra is not None:
            extra(catalog)

        for x in range(0, self.upper):
            catalog.catalogObject(Dummy(self.nums[x]), repr(x))
        return catalog.__of__(Dummy('foo'))
 def testReindex(self):
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name',
                            None,
                            caller,
                            self.IndexFactory,
                            'text',
                            'lexicon')
     doc = Indexable('Hello Tim')
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('glorious')
     self.assertEqual(len(nbest), 0)
     nbest, total = zc_index.query('Tim')
     self.assertEqual(len(nbest), 1)
     # reindex with another value
     doc.text = 'Goodbye George'
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('Tim')
     self.assertEqual(len(nbest), 0)
     nbest, total = zc_index.query('Goodbye')
     self.assertEqual(len(nbest), 1)
     # reindex with an empty value
     doc.text = ''
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('George')
     self.assertEqual(len(nbest), 0)
 def testLexiconIsNotFoundRaisesLookupError(self):
     caller = LexiconHolder(self.lexicon)
     with self.assertRaises(LookupError):
         ZCTextIndex(
             'name',
             extra=None,
             caller=caller,
         )
 def test_add_text_index(self):
     catalog = self._make_one()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('id', idx)
     i = catalog.indexes['id']
     self.assertIsInstance(i, ZCTextIndex)
Beispiel #9
0
 def testDelTextIndex(self):
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('id', idx)
     self._catalog.delIndex('id')
     self.assert_('id' not in self._catalog.indexes,
                  'del index failed')
 def test_del_text_index(self):
     catalog = self._make_one()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('id', idx)
     catalog.delIndex('id')
     self.assertNotIn('id', catalog.indexes)
Beispiel #11
0
def index(rt, mboxfile, db, profiler):
    global NUM
    idx_time = 0
    pack_time = 0
    start_time = time.time()

    lexicon = Lexicon(Splitter(), CaseNormalizer(), StopWordRemover())
    extra = Extra()
    extra.lexicon_id = 'lexicon'
    extra.doc_attr = 'text'
    extra.index_type = 'Okapi BM25 Rank'
    caller = Extra()
    caller.lexicon = lexicon
    rt["index"] = idx = ZCTextIndex("index", extra, caller)
    if not EXCLUDE_TEXT:
        rt["documents"] = docs = IOBTree()
    else:
        docs = None
    transaction.commit()

    mbox = mailbox.UnixMailbox(open(mboxfile, 'rb'))
    if VERBOSE:
        print "opened", mboxfile
    if not NUM:
        NUM = sys.maxint

    if profiler:
        itime, ptime, i = profiler.runcall(indexmbox, mbox, idx, docs, db)
    else:
        itime, ptime, i = indexmbox(mbox, idx, docs, db)
    idx_time += itime
    pack_time += ptime

    transaction.commit()

    if PACK_INTERVAL and i % PACK_INTERVAL != 0:
        if VERBOSE >= 2:
            print "packing one last time..."
        p0 = time.clock()
        db.pack(time.time())
        p1 = time.clock()
        if VERBOSE:
            print "pack took %s sec" % (p1 - p0)
        pack_time += p1 - p0

    if VERBOSE:
        finish_time = time.time()
        print
        print "Index time", round(idx_time / 60, 3), "minutes"
        print "Pack time", round(pack_time / 60, 3), "minutes"
        print "Index bytes", Message.total_bytes
        rate = (Message.total_bytes / idx_time) / 1024
        print "Index rate %.2f KB/sec" % rate
        print "Indexing began", time.ctime(start_time)
        print "Indexing ended", time.ctime(finish_time)
        print "Wall clock minutes", round((finish_time - start_time) / 60, 3)
Beispiel #12
0
def make_zc_index():
    # there's an elaborate dance necessary to construct an index
    class Struct:
        pass
    extra = Struct()
    extra.doc_attr = "read"
    extra.lexicon_id = "lexicon"
    caller = Struct()
    caller.lexicon = Lexicon(HTMLWordSplitter(), StopWordRemover())
    return ZCTextIndex("read", extra, caller)
Beispiel #13
0
 def testMultipleAttributes(self):
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                            'text1,text2', 'lexicon')
     doc = Indexable2('foo bar', 'alpha omega')
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('foo')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('foo alpha')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('foo alpha gamma')
     self.assertEqual(len(nbest), 0)
Beispiel #14
0
    def setUp(self):
        from Products.ZCTextIndex.ZCTextIndex import PLexicon
        from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex

        catalog = DummyCatalog()
        catalog.foo_plexicon = PLexicon('foo_plexicon')
        extra = _extra()
        extra.lexicon_id = 'foo_plexicon'
        extra.index_type = 'Okapi BM25 Rank'
        self._obj = ZCTextIndex('foo_zctext', extra=extra,
                                caller=catalog).__of__(catalog)
        self._XML = _ZCTEXT_XML
    def testInvalidIndexTypeRaisesValueError(self):
        caller = LexiconHolder(self.lexicon)

        class Extra(object):
            index_type = 'Some invalid index type'
        with self.assertRaises(ValueError):
            ZCTextIndex(
                'name',
                extra=Extra,
                caller=caller,
                index_factory=None,
                lexicon_id='lexicon'
            )
Beispiel #16
0
    def test_fixOkapiIndexes(self):
        catalog = ZCatalog('catalog')
        catalog.lexicon = PLexicon('lexicon')
        catalog.addIndex(
            'test',
            ZCTextIndex('test',
                        index_factory=OkapiIndex,
                        caller=catalog,
                        lexicon_id='lexicon'))
        catalog.Indexes['test'].index._totaldoclen = -1000

        from plone.app.upgrade.v41.final import fixOkapiIndexes
        fixOkapiIndexes(catalog)
        self.assertEqual(0, catalog.Indexes['test'].index._totaldoclen())
Beispiel #17
0
 def setUp(self):
     self._catalog = self._makeOne()
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('title', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('title', idx)
     self._catalog.addIndex('true', FieldIndex('true'))
     self._catalog.addColumn('title')
     cat = self._get_catalog()
     for i in (1, 2, 3, 10, 11, 110, 111):
         obj = zdummy(i)
         obj.true = True
         if i == 110:
             obj.true = False
         cat.catalogObject(obj, str(i))
Beispiel #18
0
 def testListAttributes(self):
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                            'text1,text2', 'lexicon')
     doc = Indexable2('Hello Tim', [
         'Now is the winter of our discontent',
         'Made glorious summer by this sun of York',
     ])
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('glorious')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('York Tim')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('Tuesday Tim York')
     self.assertEqual(len(nbest), 0)
    def _make_one(self):
        from Products.ZCatalog.Catalog import Catalog
        catalog = Catalog()
        catalog.lexicon = PLexicon('lexicon')
        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2', caller=catalog,
                           index_factory=OkapiIndex, lexicon_id='lexicon')
        att3 = KeywordIndex('att3')
        catalog.addIndex('att1', att1)
        catalog.addIndex('att2', att2)
        catalog.addIndex('att3', att3)

        for x in range(0, self.upper):
            catalog.catalogObject(Dummy(x), repr(x))
        return catalog.__of__(Dummy('foo'))
 def _make_one(self):
     from Products.ZCatalog.Catalog import Catalog
     catalog = Catalog()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('title', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('title', idx)
     catalog.addIndex('true', FieldIndex('true'))
     catalog.addColumn('title')
     for i in (1, 2, 3, 10, 11, 110, 111):
         obj = ZDummy(i)
         obj.true = True
         if i == 110:
             obj.true = False
         catalog.catalogObject(obj, str(i))
     return catalog.__of__(ZDummy(1))
Beispiel #21
0
 def setUp(self):
     self.catalogs = []
     for i in range(3):
         cat = self._makeOne()
         cat.lexicon = PLexicon('lexicon')
         cat.addIndex('num', FieldIndex('num'))
         cat.addIndex('big', FieldIndex('big'))
         cat.addIndex('number', FieldIndex('number'))
         i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex,
                         lexicon_id='lexicon')
         cat.addIndex('title', i)
         cat = cat.__of__(zdummy(16336))
         for i in range(10):
             obj = zdummy(i)
             obj.big = i > 5
             obj.number = True
             cat.catalogObject(obj, str(i))
         self.catalogs.append(cat)
 def _make_many(self):
     from Products.ZCatalog.Catalog import mergeResults
     catalogs = []
     for i in range(3):
         cat = self._make_one()
         cat.lexicon = PLexicon('lexicon')
         cat.addIndex('num', FieldIndex('num'))
         cat.addIndex('big', FieldIndex('big'))
         cat.addIndex('number', FieldIndex('number'))
         i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex,
                         lexicon_id='lexicon')
         cat.addIndex('title', i)
         cat = cat.__of__(ZDummy(16336))
         for i in range(10):
             obj = ZDummy(i)
             obj.big = i > 5
             obj.number = True
             cat.catalogObject(obj, str(i))
         catalogs.append(cat)
     return catalogs, mergeResults
Beispiel #23
0
    def __init__(self, id='Help', title=''):
        self.id = id
        self.title = title
        c = self.catalog = ZCatalog('catalog')

        l = PLexicon('lexicon', '', HTMLWordSplitter(), CaseNormalizer(),
                     StopWordRemover())
        c._setObject('lexicon', l)
        i = ZCTextIndex('SearchableText',
                        caller=c,
                        index_factory=OkapiIndex,
                        lexicon_id=l.id)
        # not using c.addIndex because it depends on Product initialization
        c._catalog.addIndex('SearchableText', i)
        c._catalog.addIndex('categories', KeywordIndex('categories'))
        c._catalog.addIndex('permissions', KeywordIndex('permissions'))
        c.addColumn('categories')
        c.addColumn('permissions')
        c.addColumn('title_or_id')
        c.addColumn('url')
        c.addColumn('id')