Python PLexiconの例、Products.ZCTextIndex.ZCTextIndex.PLexicon Pythonの例

コード例 #1

0

ファイルを表示

 def setUp(self):
     self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                             StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                                 'text', 'lexicon')
     self.index = self.zc_index.index

コード例 #2

0

ファイルを表示

ファイル: catalog.py プロジェクト: seantis/seantis.people

    def setup_lexicons(self):
        # setup the lexicons as some Plone-Catalog internal code depends on it
        # we wouldn't relly have to be so thorough as we don't actually want
        # to support full text searches on the people catalog, but setting
        # up the people catalog as close as possible to the portal catalog
        # ensures that there are no surprises
        lexicons = {
            'plone_lexicon': [
                ('Unicode Whitespace splitter', 'Word Splitter'),
                ('Unicode Ignoring Accents Case Normalizer',
                    'Case Normalizer'),
            ],
            'plaintext_lexicon': [
                ('HTML aware splitter', 'Word Splitter'),
                ('Case Normalizer', 'Case Normalizer'),
                ('Remove listed stop words only', 'Stop Words')
            ],
            'htmltext_lexicon': [
                ('HTML aware splitter', 'Word Splitter'),
                ('Case Normalizer', 'Case Normalizer'),
                ('Remove listed stop words only', 'Stop Words')
            ]
        }

        for lexicon, elements in lexicons.items():
            pipeline = []

            for element in elements:
                element = element_factory.instantiate(element[1], element[0])
                pipeline.append(element)

            plexicon = PLexicon(lexicon)
            plexicon._pipeline = pipeline
            self._setObject(lexicon, plexicon)

コード例 #3

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: gupineee/Products.ZCatalog

    def _make_one(self, extra=None):
        from Products.ZCatalog.Catalog import Catalog
        catalog = Catalog()
        catalog.lexicon = PLexicon('lexicon')
        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2',
                           caller=catalog,
                           index_factory=OkapiIndex,
                           lexicon_id='lexicon')
        catalog.addIndex('att2', att2)
        num = FieldIndex('num')

        catalog.addIndex('att1', att1)
        catalog.addIndex('num', num)
        catalog.addColumn('num')

        foo = MultiFieldIndex('foo')
        catalog.addIndex('foo', foo)

        if extra is not None:
            extra(catalog)

        for x in range(0, self.upper):
            catalog.catalogObject(Dummy(self.nums[x]), repr(x))
        return catalog.__of__(Dummy('foo'))

コード例 #4

0

ファイルを表示

ファイル: sitesetup.py プロジェクト: mohalfaki/bungeni-portal

def setup_catalog(context):
    portal = context.getSite()

    catalog_name = 'marginalia_catalog'
    try:
        catalog = cmfutils.getToolByName(portal, catalog_name)
    except AttributeError:
        # register catalog
        catalog = ZCatalog(catalog_name, u'Marginalia catalog', None, portal)
        portal._setObject(catalog_name, catalog)

    # add indexes and columns
    plaintext_extra = SimpleRecord(lexicon_id='plaintext_lexicon',
                                   index_type='Okapi BM25 Rank')

    indexes = catalog.indexes()
    columns = catalog.schema()

    # install lexicon
    _id = 'plaintext_lexicon'
    if not hasattr(catalog, _id):
        lexicon = PLexicon(_id, '', Splitter(), CaseNormalizer(),
                           StopWordRemover())
        catalog._setObject(_id, lexicon)

        for indexName, indexType, extra in (('edit_type', 'FieldIndex',
                                             None), ('note', 'ZCTextIndex',
                                                     plaintext_extra),
                                            ('link_title', 'FieldIndex',
                                             None)):

            if indexName not in indexes:
                catalog.addIndex(indexName, indexType, extra=extra)

コード例 #5

0

ファイルを表示

    def test_ZCTextIndex(self):
        from xml.dom.minidom import parseString
        from Products.ZCTextIndex.ZCTextIndex import PLexicon
        from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex
        from Products.GenericSetup.testing import DummySetupEnviron
        from Products.GenericSetup.ZCTextIndex.exportimport \
                import ZCTextIndexNodeAdapter
        _XML = """\
        <index name="foo_zctext" meta_type="ZCTextIndex">
        <indexed_attr value="bar"/>
        <extra name="index_type" value="Okapi BM25 Rank"/>
        <extra name="lexicon_id" value="foo_plexicon"/>
        </index>
        """
        environ = DummySetupEnviron()

        def _no_clear(*a):
            raise AssertionError("Don't clear me!")

        catalog = DummyCatalog()
        catalog.foo_plexicon = PLexicon('foo_plexicon')
        extra = _extra()
        extra.lexicon_id = 'foo_plexicon'
        extra.index_type = 'Okapi BM25 Rank'
        index = ZCTextIndex('foo_field',
                            extra=extra,
                            field_name='bar',
                            caller=catalog).__of__(catalog)
        index.clear = _no_clear
        adapted = ZCTextIndexNodeAdapter(index, environ)
        adapted.node = parseString(_XML).documentElement  # no raise

コード例 #6

0

ファイルを表示

    def _initSite(self, foo=2):
        site = Folder(id='site').__of__(self.app)
        ctool = CatalogTool()
        getSiteManager().registerUtility(ctool, ICatalogTool)

        for obj_id in ctool.objectIds():
            ctool._delObject(obj_id)
        for idx_id in ctool.indexes():
            ctool.delIndex(idx_id)
        for col in list(ctool.schema()):
            ctool.delColumn(col)

        if foo > 0:
            ctool._setObject('foo_plexicon', PLexicon('foo_plexicon'))
            lex = ctool.foo_plexicon
            lex._pipeline = (Splitter(), CaseNormalizer(), StopWordRemover())

            extra = _extra()
            extra.lexicon_id = 'foo_plexicon'
            extra.index_type = 'Okapi BM25 Rank'
            ctool.addIndex('foo_zctext', 'ZCTextIndex', extra)

            ctool.addColumn('foo_zctext')

        return site, ctool

コード例 #7

0

ファイルを表示

ファイル: testZCTextIndex.py プロジェクト: Andyvs/TrackMonthlyExpenses

 def testReindex(self):
     lexicon = PLexicon('lexicon', '',
                         Splitter(),
                         CaseNormalizer(),
                         StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name',
                             None,
                             caller,
                             self.IndexFactory,
                            'text',
                            'lexicon')
     doc = Indexable('Hello Tim')
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('glorious')
     self.assertEqual(len(nbest), 0)
     nbest, total = zc_index.query('Tim')
     self.assertEqual(len(nbest), 1)
     # reindex with another value
     doc.text = 'Goodbye George'
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('Tim')
     self.assertEqual(len(nbest), 0)
     nbest, total = zc_index.query('Goodbye')
     self.assertEqual(len(nbest), 1)
     # reindex with an empty value
     doc.text = ''
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('George')
     self.assertEqual(len(nbest), 0)

コード例 #8

0

ファイルを表示

def index():
    os.environ['STUPID_LOG_FILE'] = ''
    os.environ['STUPID_LOG_SEVERITY'] = '-111'
    import Zope2, Products.ZCatalog.ZCatalog
    import AccessControl.SecurityManagement, AccessControl.SpecialUsers
    app = Zope2.app()
    Products.ZCatalog.ZCatalog.manage_addZCatalog(app, 'cat', '')
    try:
        app.cat.threshold = atoi(sys.argv[2])
    except IndexError:
        app.cat.threashold = 1000

    from Products.ZCTextIndex.ZCTextIndex \
         import PLexicon
    from Products.ZCTextIndex.Lexicon \
         import Splitter, CaseNormalizer

    app.cat._setObject('lex', PLexicon('lex', '', Splitter(),
                                       CaseNormalizer()))

    class extra:
        doc_attr = 'PrincipiaSearchSource'
        lexicon_id = 'lex'
        index_type = 'Okapi BM25 Rank'

    app.cat.addIndex('PrincipiaSearchSource', 'ZCTextIndex', extra)

    transaction.commit()
    system = AccessControl.SpecialUsers.system
    AccessControl.SecurityManagement.newSecurityManager(None, system)
    r = RE()
    r.PARENTS = [app.cat, app]
    print do(Zope2.DB, indexf, (app, ))
    #hist(sys.argv[2])
    Zope2.DB.close()

コード例 #9

0

ファイルを表示

 def updateIndexes(self):
     if not getattr(self, 'audit_lexicon', None):
         # installing, add lexicon, indexes and metadata
         self.addIndex('last_audited_date', 'DateIndex')
         self.addIndex('audited_action', 'KeywordIndex')
         self.addColumn('Title')
         self.addColumn('id')
         self.addColumn('UID')
         self.addColumn('last_audited_date')
         self.addColumn('audited_action')
         l = PLexicon('audit_lexicon', '', HTMLWordSplitter(),
                      CaseNormalizer(), StopWordRemover())
         self._setObject('audit_lexicon', l)
     catalog = portal_api.get_tool('portal_catalog')
     indexes = catalog._catalog.indexes
     for name, index in indexes.items():
         if name in self._catalog.indexes.keys():
             continue
         if index.meta_type == 'DateRecurringIndex':
             continue
         elif index.meta_type == 'ZCTextIndex':
             extras = Empty()
             extras.doc_attr = name
             extras.index_type = 'Okapi BM25 Rank'
             extras.lexicon_id = 'audit_lexicon'
             self.addIndex(name, index.meta_type, extras)
         else:
             self.addIndex(name, index.meta_type)

コード例 #10

0

ファイルを表示

    def setUp(self):
        self._catalog = self._makeOne()
        self._catalog.lexicon = PLexicon('lexicon')
        col1 = FieldIndex('col1')
        col2 = ZCTextIndex('col2', caller=self._catalog,
                          index_factory=OkapiIndex, lexicon_id='lexicon')
        col3 = KeywordIndex('col3')

        self._catalog.addIndex('col1', col1)
        self._catalog.addIndex('col2', col2)
        self._catalog.addIndex('col3', col3)
        self._catalog.addColumn('col1')
        self._catalog.addColumn('col2')
        self._catalog.addColumn('col3')

        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2', caller=self._catalog,
                          index_factory=OkapiIndex, lexicon_id='lexicon')
        att3 = KeywordIndex('att3')
        num = FieldIndex('num')

        self._catalog.addIndex('att1', att1)
        self._catalog.addIndex('att2', att2)
        self._catalog.addIndex('att3', att3)
        self._catalog.addIndex('num', num)
        self._catalog.addColumn('att1')
        self._catalog.addColumn('att2')
        self._catalog.addColumn('att3')
        self._catalog.addColumn('num')

        for x in range(0, self.upper):
            self._catalog.catalogObject(dummy(self.nums[x]), repr(x))
        self._catalog = self._catalog.__of__(dummy('foo'))

コード例 #11

0

ファイルを表示

ファイル: zodbload.py プロジェクト: bennihepp/sandbox

def setup(lib_python):
    try:
        os.remove(os.path.join(lib_python, '..', '..', 'var', 'Data.fs'))
    except:
        pass
    import Zope2
    import Products
    import AccessControl.SecurityManagement
    app=Zope2.app()

    Products.ZCatalog.ZCatalog.manage_addZCatalog(app, 'cat', '')

    from Products.ZCTextIndex.ZCTextIndex import PLexicon
    from Products.ZCTextIndex.Lexicon import Splitter, CaseNormalizer

    app.cat._setObject('lex',
                       PLexicon('lex', '', Splitter(), CaseNormalizer())
                       )

    class extra:
        doc_attr = 'PrincipiaSearchSource'
        lexicon_id = 'lex'
        index_type = 'Okapi BM25 Rank'

    app.cat.addIndex('PrincipiaSearchSource', 'ZCTextIndex', extra)

    transaction.commit()

    system = AccessControl.SpecialUsers.system
    AccessControl.SecurityManagement.newSecurityManager(None, system)

    app._p_jar.close()

コード例 #12

0

ファイルを表示

 def testAddTextIndex(self):
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('id', idx)
     i = self._catalog.indexes['id']
     self.assert_(isinstance(i, ZCTextIndex), 'add text index failed')

コード例 #13

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: icemac/Products.ZCatalog

 def test_add_text_index(self):
     catalog = self._make_one()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('id', idx)
     i = catalog.indexes['id']
     self.assertIsInstance(i, ZCTextIndex)

コード例 #14

0

ファイルを表示

 def testDelTextIndex(self):
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('id', idx)
     self._catalog.delIndex('id')
     self.assert_('id' not in self._catalog.indexes,
                  'del index failed')

コード例 #15

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: icemac/Products.ZCatalog

 def test_del_text_index(self):
     catalog = self._make_one()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('id', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('id', idx)
     catalog.delIndex('id')
     self.assertNotIn('id', catalog.indexes)

コード例 #16

0

ファイルを表示

ファイル: test_exportimport.py プロジェクト: bendavis78/zope

    def setUp(self):
        from Products.ZCTextIndex.ZCTextIndex import PLexicon

        PlacelessSetup.setUp(self)
        zcml.load_config('meta.zcml', Products.Five)
        zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex)

        self._obj = PLexicon('foo_plexicon')
        self._XML = _PLEXICON_XML

コード例 #17

0

ファイルを表示

ファイル: test_exportimport.py プロジェクト: bendavis78/zope

    def setUp(self):
        import Products.GenericSetup.ZCTextIndex
        from Products.ZCTextIndex.ZCTextIndex import PLexicon

        NodeAdapterTestCase.setUp(self)
        zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex)

        self._obj = PLexicon('foo_plexicon')
        self._XML = _PLEXICON_XML

コード例 #18

0

ファイルを表示

    def setUp(self):
        from Products.ZCTextIndex.ZCTextIndex import PLexicon
        from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex

        catalog = DummyCatalog()
        catalog.foo_plexicon = PLexicon('foo_plexicon')
        extra = _extra()
        extra.lexicon_id = 'foo_plexicon'
        extra.index_type = 'Okapi BM25 Rank'
        self._obj = ZCTextIndex('foo_zctext', extra=extra,
                                caller=catalog).__of__(catalog)
        self._XML = _ZCTEXT_XML

コード例 #19

0

ファイルを表示

    def _populate_special(self, obj):
        from Products.ZCTextIndex.ZCTextIndex import PLexicon

        self._populate(self._obj)
        obj._setObject('old_plexicon', PLexicon('old_plexicon'))

        extra = _extra()
        extra.lexicon_id = 'old_plexicon'
        extra.index_type = 'Cosine Measure'
        obj.addIndex('foo_text', 'ZCTextIndex', extra)

        obj.addColumn('bacon')

コード例 #20

0

ファイルを表示

ファイル: testZCTextIndex.py プロジェクト: zopefoundation/Products.ZCatalog

 def setUp(self):
     self.lexicon = PLexicon('lexicon', '',
                             Splitter(),
                             CaseNormalizer(),
                             StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     self.zc_index = ZCTextIndex('name',
                                 None,
                                 caller,
                                 self.IndexFactory,
                                 'text',
                                 'lexicon')
     self.index = self.zc_index.index

コード例 #21

0

ファイルを表示

 def testMultipleAttributes(self):
     lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                        StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                            'text1,text2', 'lexicon')
     doc = Indexable2('foo bar', 'alpha omega')
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('foo')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('foo alpha')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('foo alpha gamma')
     self.assertEqual(len(nbest), 0)

コード例 #22

0

ファイルを表示

    def test_fixOkapiIndexes(self):
        catalog = ZCatalog('catalog')
        catalog.lexicon = PLexicon('lexicon')
        catalog.addIndex(
            'test',
            ZCTextIndex('test',
                        index_factory=OkapiIndex,
                        caller=catalog,
                        lexicon_id='lexicon'))
        catalog.Indexes['test'].index._totaldoclen = -1000

        from plone.app.upgrade.v41.final import fixOkapiIndexes
        fixOkapiIndexes(catalog)
        self.assertEqual(0, catalog.Indexes['test'].index._totaldoclen())

コード例 #23

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: icemac/Products.ZCatalog

    def _make_one(self):
        from Products.ZCatalog.Catalog import Catalog
        catalog = Catalog()
        catalog.lexicon = PLexicon('lexicon')
        att1 = FieldIndex('att1')
        att2 = ZCTextIndex('att2', caller=catalog,
                           index_factory=OkapiIndex, lexicon_id='lexicon')
        att3 = KeywordIndex('att3')
        catalog.addIndex('att1', att1)
        catalog.addIndex('att2', att2)
        catalog.addIndex('att3', att3)

        for x in range(0, self.upper):
            catalog.catalogObject(Dummy(x), repr(x))
        return catalog.__of__(Dummy('foo'))

コード例 #24

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: c0ns0le/zenoss-4

 def setUp(self):
     self._catalog = self._makeOne()
     self._catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('title', caller=self._catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     self._catalog.addIndex('title', idx)
     self._catalog.addIndex('true', FieldIndex('true'))
     self._catalog.addColumn('title')
     cat = self._get_catalog()
     for i in (1, 2, 3, 10, 11, 110, 111):
         obj = zdummy(i)
         obj.true = True
         if i == 110:
             obj.true = False
         cat.catalogObject(obj, str(i))

コード例 #25

0

ファイルを表示

 def testListAttributes(self):
     lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(),
                        StopWordRemover())
     caller = LexiconHolder(self.lexicon)
     zc_index = ZCTextIndex('name', None, caller, self.IndexFactory,
                            'text1,text2', 'lexicon')
     doc = Indexable2('Hello Tim', \
                      ['Now is the winter of our discontent',
                       'Made glorious summer by this sun of York', ])
     zc_index.index_object(1, doc)
     nbest, total = zc_index.query('glorious')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('York Tim')
     self.assertEqual(len(nbest), 1)
     nbest, total = zc_index.query('Tuesday Tim York')
     self.assertEqual(len(nbest), 0)

コード例 #26

0

ファイルを表示

ファイル: test_exportimport.py プロジェクト: bendavis78/zope

    def setUp(self):
        from Products.ZCTextIndex.ZCTextIndex import PLexicon
        from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex

        PlacelessSetup.setUp(self)
        zcml.load_config('meta.zcml', Products.Five)
        zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex)

        catalog = DummyCatalog()
        catalog.foo_plexicon = PLexicon('foo_plexicon')
        extra = _extra()
        extra.lexicon_id = 'foo_plexicon'
        extra.index_type = 'Okapi BM25 Rank'
        self._obj = ZCTextIndex('foo_zctext', extra=extra,
                                caller=catalog).__of__(catalog)
        self._XML = _ZCTEXT_XML

コード例 #27

0

ファイルを表示

ファイル: CatalogTool.py プロジェクト: bendavis78/zope

    def _initIndexes(self):
        # ZCTextIndex lexicons
        for id, splitter, normalizer, sw_remover in self.enumerateLexicons():
            lexicon = PLexicon(id, '', splitter, normalizer, sw_remover)
            self._setObject(id, lexicon)

        # Content indexes
        self._catalog.indexes.clear()
        for index_name, index_type, extra in self.enumerateIndexes():
            self.addIndex(index_name, index_type, extra=extra)

        # Cached metadata
        self._catalog.names = ()
        self._catalog.schema.clear()
        for column_name in self.enumerateColumns():
            self.addColumn(column_name)

コード例 #28

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: icemac/Products.ZCatalog

 def _make_one(self):
     from Products.ZCatalog.Catalog import Catalog
     catalog = Catalog()
     catalog.lexicon = PLexicon('lexicon')
     idx = ZCTextIndex('title', caller=catalog,
                       index_factory=OkapiIndex, lexicon_id='lexicon')
     catalog.addIndex('title', idx)
     catalog.addIndex('true', FieldIndex('true'))
     catalog.addColumn('title')
     for i in (1, 2, 3, 10, 11, 110, 111):
         obj = ZDummy(i)
         obj.true = True
         if i == 110:
             obj.true = False
         catalog.catalogObject(obj, str(i))
     return catalog.__of__(ZDummy(1))

コード例 #29

0

ファイルを表示

    def __init__(self, FULLTEXT=False):
        """
        """
        self.no_refresh = True
        CatalogTool.__init__(self)
        self._catalog = PlominoCatalog()
        lexicon = PLexicon('plaintext_lexicon', '', Splitter(),
                           CaseNormalizer())
        self._setObject('plaintext_lexicon', lexicon)
        self.addIndex('Form', "FieldIndex")
        self.addIndex('id', "FieldIndex")
        self.addColumn('id')
        self.addIndex('getPlominoReaders', "KeywordIndex")
        self.addIndex('path', "ExtendedPathIndex")

        if FULLTEXT:
            self.createFieldIndex('SearchableText', 'RICHTEXT')
        self.no_refresh = False

コード例 #30

0

ファイルを表示

 def setUp(self):
     self.catalogs = []
     for i in range(3):
         cat = self._makeOne()
         cat.lexicon = PLexicon('lexicon')
         cat.addIndex('num', FieldIndex('num'))
         cat.addIndex('big', FieldIndex('big'))
         cat.addIndex('number', FieldIndex('number'))
         i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex,
                         lexicon_id='lexicon')
         cat.addIndex('title', i)
         cat = cat.__of__(zdummy(16336))
         for i in range(10):
             obj = zdummy(i)
             obj.big = i > 5
             obj.number = True
             cat.catalogObject(obj, str(i))
         self.catalogs.append(cat)

コード例 #31

0

ファイルを表示

ファイル: test_catalog.py プロジェクト: icemac/Products.ZCatalog

 def _make_many(self):
     from Products.ZCatalog.Catalog import mergeResults
     catalogs = []
     for i in range(3):
         cat = self._make_one()
         cat.lexicon = PLexicon('lexicon')
         cat.addIndex('num', FieldIndex('num'))
         cat.addIndex('big', FieldIndex('big'))
         cat.addIndex('number', FieldIndex('number'))
         i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex,
                         lexicon_id='lexicon')
         cat.addIndex('title', i)
         cat = cat.__of__(ZDummy(16336))
         for i in range(10):
             obj = ZDummy(i)
             obj.big = i > 5
             obj.number = True
             cat.catalogObject(obj, str(i))
         catalogs.append(cat)
     return catalogs, mergeResults

コード例 #32

0

ファイルを表示

ファイル: PlominoIndex.py プロジェクト: gaudenz/Plomino

    def __init__(self, FULLTEXT=False):
        """
        """
        self.no_refresh = True
        ZCatalog.__init__(self, self.getId())
        self._catalog = PlominoCatalog()
        # TODO: use TextindexNG3
        #lexicon = PLexicon('plaintext_lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover())
        lexicon = PLexicon('plaintext_lexicon', '', Splitter(),
                           CaseNormalizer())
        self._setObject('plaintext_lexicon', lexicon)
        #self.createFieldIndex('Form', 'SELECTION')
        #self.createFieldIndex('getPlominoReaders', 'SELECTION')
        self.addIndex('Form', "FieldIndex")
        self.addIndex('id', "FieldIndex")
        self.addColumn('id')
        self.addIndex('getPlominoReaders', "KeywordIndex")

        if FULLTEXT:
            self.createFieldIndex('SearchableText', 'RICHTEXT')
        self.no_refresh = False

コード例 #33

0

ファイルを表示

ファイル: testZCTextIndex.py プロジェクト: nacho22martin/tesis

class ZCIndexTestsBase:

    def setUp(self):
        self.lexicon = PLexicon('lexicon', '',
                                Splitter(),
                                CaseNormalizer(),
                                StopWordRemover())
        caller = LexiconHolder(self.lexicon)
        self.zc_index = ZCTextIndex('name',
                                    None,
                                    caller,
                                    self.IndexFactory,
                                    'text',
                                    'lexicon')
        self.index = self.zc_index.index


    def parserFailure(self, query):
        self.assertRaises(ParseError, self.zc_index.query, query)

    def parserSuccess(self, query, n):
        r, num = self.zc_index.query(query)
        self.assertEqual(num, n)
        if n:
            self.assertEqual(r[0][0], 1)

    def testMultipleAttributes(self):
        lexicon = PLexicon('lexicon', '',
                            Splitter(),
                            CaseNormalizer(),
                            StopWordRemover())
        caller = LexiconHolder(self.lexicon)
        zc_index = ZCTextIndex('name',
                                None,
                                caller,
                                self.IndexFactory,
                               'text1,text2',
                               'lexicon')
        doc = Indexable2('foo bar', 'alpha omega')
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('foo')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('foo alpha')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('foo alpha gamma')
        self.assertEqual(len(nbest), 0)

    def testListAttributes(self):
        lexicon = PLexicon('lexicon', '',
                            Splitter(),
                            CaseNormalizer(),
                            StopWordRemover())
        caller = LexiconHolder(self.lexicon)
        zc_index = ZCTextIndex('name',
                                None,
                                caller,
                                self.IndexFactory,
                               'text1,text2',
                               'lexicon')
        doc = Indexable2('Hello Tim', \
                         ['Now is the winter of our discontent',
                          'Made glorious summer by this sun of York', ])
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('glorious')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('York Tim')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('Tuesday Tim York')
        self.assertEqual(len(nbest), 0)

    def testStopWords(self):
        # the only non-stopword is question
        text = ("to be or not to be "
                "that is the question")
        doc = Indexable(text)
        self.zc_index.index_object(1, doc)
        for word in text.split():
            if word != "question":
                wids = self.lexicon.termToWordIds(word)
                self.assertEqual(wids, [])
        self.assertEqual(len(self.index.get_words(1)), 1)

        self.parserSuccess('question', 1)
        self.parserSuccess('question AND to AND be', 1)
        self.parserSuccess('to AND question AND be', 1)
        self.parserSuccess('question AND NOT gardenia', 1)
        self.parserSuccess('question AND gardenia', 0)
        self.parserSuccess('gardenia', 0)
        self.parserSuccess('question OR gardenia', 1)
        self.parserSuccess('question AND NOT to AND NOT be', 1)
        self.parserSuccess('question OR to OR be', 1)
        self.parserSuccess('question to be', 1)

        self.parserFailure('to be')
        self.parserFailure('to AND be')
        self.parserFailure('to OR be')
        self.parserFailure('to AND NOT be')
        self.parserFailure('to AND NOT question')
        self.parserFailure('to AND NOT gardenia')

    def testDocUpdate(self):
        docid = 1   # doesn't change -- we index the same doc repeatedly
        N = len(text)
        stop = get_stopdict()

        d = {} # word -> list of version numbers containing that word
        for version, i in zip(text, range(N)):
            # use a simple splitter rather than an official one
            words = [w for w in re.split("\W+", version.lower())
                     if len(w) > 1 and not stop.has_key(w)]
            word_seen = {}
            for w in words:
                if not word_seen.has_key(w):
                    d.setdefault(w, []).append(i)
                    word_seen[w] = 1

        unique = {} # version number -> list of words unique to that version
        common = [] # list of words common to all versions
        for w, versionlist in d.items():
            if len(versionlist) == 1:
                unique.setdefault(versionlist[0], []).append(w)
            elif len(versionlist) == N:
                common.append(w)
        self.assert_(len(common) > 0)
        self.assert_(len(unique) > 0)

        for version, i in zip(text, range(N)):
            doc = Indexable(version)
            self.zc_index.index_object(docid, doc)
            for w in common:
                nbest, total = self.zc_index.query(w)
                self.assertEqual(total, 1, "did not find %s" % w)
            for k, v in unique.items():
                if k == i:
                    continue
                for w in v:
                    nbest, total = self.zc_index.query(w)
                    self.assertEqual(total, 0, "did not expect to find %s" % w)

コード例 #34

0

ファイルを表示

ファイル: testZCTextIndex.py プロジェクト: zopefoundation/Products.ZCatalog

class ZCIndexTestsBase(object):

    def setUp(self):
        self.lexicon = PLexicon('lexicon', '',
                                Splitter(),
                                CaseNormalizer(),
                                StopWordRemover())
        caller = LexiconHolder(self.lexicon)
        self.zc_index = ZCTextIndex('name',
                                    None,
                                    caller,
                                    self.IndexFactory,
                                    'text',
                                    'lexicon')
        self.index = self.zc_index.index

    def parserFailure(self, query):
        self.assertRaises(ParseError, self.zc_index.query, query)

    def parserSuccess(self, query, n):
        r, num = self.zc_index.query(query)
        self.assertEqual(num, n)
        if n:
            self.assertEqual(r[0][0], 1)

    def testMultipleAttributes(self):
        caller = LexiconHolder(self.lexicon)
        zc_index = ZCTextIndex('name',
                               None,
                               caller,
                               self.IndexFactory,
                               'text1,text2',
                               'lexicon')
        doc = Indexable2('foo bar', 'alpha omega')
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('foo')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('foo alpha')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('foo alpha gamma')
        self.assertEqual(len(nbest), 0)

    def testListAttributes(self):
        caller = LexiconHolder(self.lexicon)
        zc_index = ZCTextIndex('name',
                               None,
                               caller,
                               self.IndexFactory,
                               'text1,text2',
                               'lexicon')
        doc = Indexable2('Hello Tim',
                         ['Now is the winter of our discontent',
                          'Made glorious summer by this sun of York', ])
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('glorious')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('York Tim')
        self.assertEqual(len(nbest), 1)
        nbest, total = zc_index.query('Tuesday Tim York')
        self.assertEqual(len(nbest), 0)

    def testReindex(self):
        caller = LexiconHolder(self.lexicon)
        zc_index = ZCTextIndex('name',
                               None,
                               caller,
                               self.IndexFactory,
                               'text',
                               'lexicon')
        doc = Indexable('Hello Tim')
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('glorious')
        self.assertEqual(len(nbest), 0)
        nbest, total = zc_index.query('Tim')
        self.assertEqual(len(nbest), 1)
        # reindex with another value
        doc.text = 'Goodbye George'
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('Tim')
        self.assertEqual(len(nbest), 0)
        nbest, total = zc_index.query('Goodbye')
        self.assertEqual(len(nbest), 1)
        # reindex with an empty value
        doc.text = ''
        zc_index.index_object(1, doc)
        nbest, total = zc_index.query('George')
        self.assertEqual(len(nbest), 0)

    def testStopWords(self):
        # the only non-stopword is question
        text = ('to be or not to be '
                'that is the question')
        doc = Indexable(text)
        self.zc_index.index_object(1, doc)
        for word in text.split():
            if word != 'question':
                wids = self.lexicon.termToWordIds(word)
                self.assertEqual(wids, [])
        self.assertEqual(len(self.index.get_words(1)), 1)

        self.parserSuccess('question', 1)
        self.parserSuccess('question AND to AND be', 1)
        self.parserSuccess('to AND question AND be', 1)
        self.parserSuccess('question AND NOT gardenia', 1)
        self.parserSuccess('question AND gardenia', 0)
        self.parserSuccess('gardenia', 0)
        self.parserSuccess('question OR gardenia', 1)
        self.parserSuccess('question AND NOT to AND NOT be', 1)
        self.parserSuccess('question OR to OR be', 1)
        self.parserSuccess('question to be', 1)

        self.parserFailure('to be')
        self.parserFailure('to AND be')
        self.parserFailure('to OR be')
        self.parserFailure('to AND NOT be')
        self.parserFailure('to AND NOT question')
        self.parserFailure('to AND NOT gardenia')

    def testDocUpdate(self):
        docid = 1   # doesn't change -- we index the same doc repeatedly
        N = len(text)
        stop = get_stopdict()

        d = {}  # word -> list of version numbers containing that word
        for version, i in zip(text, range(N)):
            # use a simple splitter rather than an official one
            words = [w for w in re.split(r'\W+', version.lower())
                     if len(w) > 1 and w not in stop]
            word_seen = {}
            for w in words:
                if w not in word_seen:
                    d.setdefault(w, []).append(i)
                    word_seen[w] = 1

        unique = {}  # version number -> list of words unique to that version
        common = []  # list of words common to all versions
        for w, versionlist in d.items():
            if len(versionlist) == 1:
                unique.setdefault(versionlist[0], []).append(w)
            elif len(versionlist) == N:
                common.append(w)
        self.assertGreater(len(common), 0)
        self.assertGreater(len(unique), 0)

        for version, i in zip(text, range(N)):
            doc = Indexable(version)
            self.zc_index.index_object(docid, doc)
            for w in common:
                nbest, total = self.zc_index.query(w)
                self.assertEqual(total, 1, 'did not find {0}'.format(w))
            for k, v in unique.items():
                if k == i:
                    continue
                for w in v:
                    nbest, total = self.zc_index.query(w)
                    self.assertEqual(
                        total, 0,
                        'did not expect to find {0}'.format(w)
                    )

    def testLexiconIsNotFoundRaisesLookupError(self):
        caller = LexiconHolder(self.lexicon)
        with self.assertRaises(LookupError):
            ZCTextIndex(
                'name',
                extra=None,
                caller=caller,
            )

    def testInvalidIndexTypeRaisesValueError(self):
        caller = LexiconHolder(self.lexicon)

        class Extra(object):
            index_type = 'Some invalid index type'
        with self.assertRaises(ValueError):
            ZCTextIndex(
                'name',
                extra=Extra,
                caller=caller,
                index_factory=None,
                lexicon_id='lexicon'
            )