def setUp(self): from Products.ZCTextIndex.ZCTextIndex import PLexicon PlacelessSetup.setUp(self) zcml.load_config('meta.zcml', Products.Five) zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex) self._obj = PLexicon('foo_plexicon') self._XML = _PLEXICON_XML
def testAddTextIndex(self): self._catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('id', caller=self._catalog, index_factory=OkapiIndex, lexicon_id='lexicon') self._catalog.addIndex('id', idx) i = self._catalog.indexes['id'] self.assert_(isinstance(i, ZCTextIndex), 'add text index failed')
def testDelTextIndex(self): self._catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('id', caller=self._catalog, index_factory=OkapiIndex, lexicon_id='lexicon') self._catalog.addIndex('id', idx) self._catalog.delIndex('id') self.assert_('id' not in self._catalog.indexes, 'del index failed')
def setUp(self): import Products.GenericSetup.ZCTextIndex from Products.ZCTextIndex.ZCTextIndex import PLexicon NodeAdapterTestCase.setUp(self) zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex) self._obj = PLexicon('foo_plexicon') self._XML = _PLEXICON_XML
def test_add_text_index(self): catalog = self._make_one() catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('id', caller=catalog, index_factory=OkapiIndex, lexicon_id='lexicon') catalog.addIndex('id', idx) i = catalog.indexes['id'] self.assert_(isinstance(i, ZCTextIndex))
def test_del_text_index(self): catalog = self._make_one() catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('id', caller=catalog, index_factory=OkapiIndex, lexicon_id='lexicon') catalog.addIndex('id', idx) catalog.delIndex('id') self.assert_('id' not in catalog.indexes)
def setUp(self): self.lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) caller = LexiconHolder(self.lexicon) self.zc_index = ZCTextIndex('name', None, caller, self.IndexFactory, 'text', 'lexicon') self.parser = QueryParser(self.lexicon) self.index = self.zc_index.index self.add_docs()
def setUp(self): from Products.ZCTextIndex.ZCTextIndex import PLexicon from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex catalog = DummyCatalog() catalog.foo_plexicon = PLexicon('foo_plexicon') extra = _extra() extra.lexicon_id = 'foo_plexicon' extra.index_type = 'Okapi BM25 Rank' self._obj = ZCTextIndex('foo_zctext', extra=extra, caller=catalog).__of__(catalog) self._XML = _ZCTEXT_XML
def _populate_special(self, obj): from Products.ZCTextIndex.ZCTextIndex import PLexicon self._populate(self._obj) obj._setObject('old_plexicon', PLexicon('old_plexicon')) extra = _extra() extra.lexicon_id = 'old_plexicon' extra.index_type = 'Cosine Measure' obj.addIndex('foo_text', 'ZCTextIndex', extra) obj.addColumn('bacon')
def testMultipleAttributes(self): lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) caller = LexiconHolder(self.lexicon) zc_index = ZCTextIndex('name', None, caller, self.IndexFactory, 'text1,text2', 'lexicon') doc = Indexable2('foo bar', 'alpha omega') zc_index.index_object(1, doc) nbest, total = zc_index.query('foo') self.assertEqual(len(nbest), 1) nbest, total = zc_index.query('foo alpha') self.assertEqual(len(nbest), 1) nbest, total = zc_index.query('foo alpha gamma') self.assertEqual(len(nbest), 0)
def test_fixOkapiIndexes(self): catalog = ZCatalog('catalog') catalog.lexicon = PLexicon('lexicon') catalog.addIndex( 'test', ZCTextIndex('test', index_factory=OkapiIndex, caller=catalog, lexicon_id='lexicon')) catalog.Indexes['test'].index._totaldoclen = -1000 from plone.app.upgrade.v41.final import fixOkapiIndexes fixOkapiIndexes(catalog) self.assertEqual(0, catalog.Indexes['test'].index._totaldoclen())
def _make_one(self): from Products.ZCatalog.Catalog import Catalog catalog = Catalog() catalog.lexicon = PLexicon('lexicon') att1 = FieldIndex('att1') att2 = ZCTextIndex('att2', caller=catalog, index_factory=OkapiIndex, lexicon_id='lexicon') att3 = KeywordIndex('att3') catalog.addIndex('att1', att1) catalog.addIndex('att2', att2) catalog.addIndex('att3', att3) for x in range(0, self.upper): catalog.catalogObject(Dummy(x), repr(x)) return catalog.__of__(Dummy('foo'))
def setUp(self): self._catalog = self._makeOne() self._catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('title', caller=self._catalog, index_factory=OkapiIndex, lexicon_id='lexicon') self._catalog.addIndex('title', idx) self._catalog.addIndex('true', FieldIndex('true')) self._catalog.addColumn('title') cat = self._get_catalog() for i in (1, 2, 3, 10, 11, 110, 111): obj = zdummy(i) obj.true = True if i == 110: obj.true = False cat.catalogObject(obj, str(i))
def _initIndexes(self): # ZCTextIndex lexicons for id, splitter, normalizer, sw_remover in self.enumerateLexicons(): lexicon = PLexicon(id, '', splitter, normalizer, sw_remover) self._setObject(id, lexicon) # Content indexes self._catalog.indexes.clear() for index_name, index_type, extra in self.enumerateIndexes(): self.addIndex(index_name, index_type, extra=extra) # Cached metadata self._catalog.names = () self._catalog.schema.clear() for column_name in self.enumerateColumns(): self.addColumn(column_name)
def setUp(self): from Products.ZCTextIndex.ZCTextIndex import PLexicon from Products.ZCTextIndex.ZCTextIndex import ZCTextIndex PlacelessSetup.setUp(self) zcml.load_config('meta.zcml', Products.Five) zcml.load_config('configure.zcml', Products.GenericSetup.ZCTextIndex) catalog = DummyCatalog() catalog.foo_plexicon = PLexicon('foo_plexicon') extra = _extra() extra.lexicon_id = 'foo_plexicon' extra.index_type = 'Okapi BM25 Rank' self._obj = ZCTextIndex('foo_zctext', extra=extra, caller=catalog).__of__(catalog) self._XML = _ZCTEXT_XML
def testListAttributes(self): lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) caller = LexiconHolder(self.lexicon) zc_index = ZCTextIndex('name', None, caller, self.IndexFactory, 'text1,text2', 'lexicon') doc = Indexable2('Hello Tim', \ ['Now is the winter of our discontent', 'Made glorious summer by this sun of York', ]) zc_index.index_object(1, doc) nbest, total = zc_index.query('glorious') self.assertEqual(len(nbest), 1) nbest, total = zc_index.query('York Tim') self.assertEqual(len(nbest), 1) nbest, total = zc_index.query('Tuesday Tim York') self.assertEqual(len(nbest), 0)
def _make_one(self): from Products.ZCatalog.Catalog import Catalog catalog = Catalog() catalog.lexicon = PLexicon('lexicon') idx = ZCTextIndex('title', caller=catalog, index_factory=OkapiIndex, lexicon_id='lexicon') catalog.addIndex('title', idx) catalog.addIndex('true', FieldIndex('true')) catalog.addColumn('title') for i in (1, 2, 3, 10, 11, 110, 111): obj = ZDummy(i) obj.true = True if i == 110: obj.true = False catalog.catalogObject(obj, str(i)) return catalog.__of__(ZDummy(1))
def __init__(self, FULLTEXT=False): """ """ self.no_refresh = True CatalogTool.__init__(self) self._catalog = PlominoCatalog() lexicon = PLexicon('plaintext_lexicon', '', Splitter(), CaseNormalizer()) self._setObject('plaintext_lexicon', lexicon) self.addIndex('Form', "FieldIndex") self.addIndex('id', "FieldIndex") self.addColumn('id') self.addIndex('getPlominoReaders', "KeywordIndex") self.addIndex('path', "ExtendedPathIndex") if FULLTEXT: self.createFieldIndex('SearchableText', 'RICHTEXT') self.no_refresh = False
def setUp(self): self.catalogs = [] for i in range(3): cat = self._makeOne() cat.lexicon = PLexicon('lexicon') cat.addIndex('num', FieldIndex('num')) cat.addIndex('big', FieldIndex('big')) cat.addIndex('number', FieldIndex('number')) i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex, lexicon_id='lexicon') cat.addIndex('title', i) cat = cat.__of__(zdummy(16336)) for i in range(10): obj = zdummy(i) obj.big = i > 5 obj.number = True cat.catalogObject(obj, str(i)) self.catalogs.append(cat)
def _make_many(self): from Products.ZCatalog.Catalog import mergeResults catalogs = [] for i in range(3): cat = self._make_one() cat.lexicon = PLexicon('lexicon') cat.addIndex('num', FieldIndex('num')) cat.addIndex('big', FieldIndex('big')) cat.addIndex('number', FieldIndex('number')) i = ZCTextIndex('title', caller=cat, index_factory=OkapiIndex, lexicon_id='lexicon') cat.addIndex('title', i) cat = cat.__of__(ZDummy(16336)) for i in range(10): obj = ZDummy(i) obj.big = i > 5 obj.number = True cat.catalogObject(obj, str(i)) catalogs.append(cat) return catalogs, mergeResults
def _initIndexes(self, internal_cmf_16=False): if not internal_cmf_16: warn( 'CatalogTool._initIndexes is deprecated and will be ' 'removed in CMF 2.0.', DeprecationWarning) # ZCTextIndex lexicons for id, splitter, normalizer, sw_remover in self.enumerateLexicons(): lexicon = PLexicon(id, '', splitter, normalizer, sw_remover) self._setObject(id, lexicon) # Content indexes self._catalog.indexes.clear() for index_name, index_type, extra in self.enumerateIndexes(): self.addIndex(index_name, index_type, extra=extra) # Cached metadata self._catalog.names = () self._catalog.schema.clear() for column_name in self.enumerateColumns(): self.addColumn(column_name)
def __init__(self, FULLTEXT=False): """ """ self.no_refresh = True ZCatalog.__init__(self, self.getId()) self._catalog = PlominoCatalog() # TODO: use TextindexNG3 #lexicon = PLexicon('plaintext_lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) lexicon = PLexicon('plaintext_lexicon', '', Splitter(), CaseNormalizer()) self._setObject('plaintext_lexicon', lexicon) #self.createFieldIndex('Form', 'SELECTION') #self.createFieldIndex('getPlominoReaders', 'SELECTION') self.addIndex('Form', "FieldIndex") self.addIndex('id', "FieldIndex") self.addColumn('id') self.addIndex('getPlominoReaders', "KeywordIndex") if FULLTEXT: self.createFieldIndex('SearchableText', 'RICHTEXT') self.no_refresh = False
def __init__(self, id='Help', title=''): self.id = id self.title = title c = self.catalog = ZCatalog('catalog') l = PLexicon('lexicon', '', HTMLWordSplitter(), CaseNormalizer(), StopWordRemover()) c._setObject('lexicon', l) i = ZCTextIndex('SearchableText', caller=c, index_factory=OkapiIndex, lexicon_id=l.id) # not using c.addIndex because it depends on Product initialization c._catalog.addIndex('SearchableText', i) c._catalog.addIndex('categories', KeywordIndex('categories')) c._catalog.addIndex('permissions', KeywordIndex('permissions')) c.addColumn('categories') c.addColumn('permissions') c.addColumn('title_or_id') c.addColumn('url') c.addColumn('id')
def _make_one(self, extra=None): from Products.ZCatalog.Catalog import Catalog catalog = Catalog() catalog.lexicon = PLexicon('lexicon') att1 = FieldIndex('att1') att2 = ZCTextIndex('att2', caller=catalog, index_factory=OkapiIndex, lexicon_id='lexicon') catalog.addIndex('att2', att2) num = FieldIndex('num') catalog.addIndex('att1', att1) catalog.addIndex('num', num) catalog.addColumn('num') if extra is not None: extra(catalog) for x in range(0, self.upper): catalog.catalogObject(dummy(self.nums[x]), repr(x)) return catalog.__of__(dummy('foo'))
def setUp(self): self._catalog = self._makeOne() self._catalog.lexicon = PLexicon('lexicon') col1 = FieldIndex('col1') col2 = ZCTextIndex('col2', caller=self._catalog, index_factory=OkapiIndex, lexicon_id='lexicon') col3 = KeywordIndex('col3') self._catalog.addIndex('col1', col1) self._catalog.addIndex('col2', col2) self._catalog.addIndex('col3', col3) self._catalog.addColumn('col1') self._catalog.addColumn('col2') self._catalog.addColumn('col3') att1 = FieldIndex('att1') att2 = ZCTextIndex('att2', caller=self._catalog, index_factory=OkapiIndex, lexicon_id='lexicon') att3 = KeywordIndex('att3') num = FieldIndex('num') foo = MultiFieldIndex('foo') self._catalog.addIndex('att1', att1) self._catalog.addIndex('att2', att2) self._catalog.addIndex('att3', att3) self._catalog.addIndex('num', num) self._catalog.addIndex('foo', foo) self._catalog.addColumn('att1') self._catalog.addColumn('att2') self._catalog.addColumn('att3') self._catalog.addColumn('num') for x in range(0, self.upper): self._catalog.catalogObject(dummy(self.nums[x]), repr(x)) self._catalog = self._catalog.__of__(dummy('foo'))
def _populate(self, obj): from Products.ZCTextIndex.Lexicon import CaseNormalizer from Products.ZCTextIndex.Lexicon import Splitter from Products.ZCTextIndex.Lexicon import StopWordRemover from Products.ZCTextIndex.ZCTextIndex import PLexicon obj._setObject('foo_plexicon', PLexicon('foo_plexicon')) lex = obj.foo_plexicon lex._pipeline = (Splitter(), CaseNormalizer(), StopWordRemover()) obj.addIndex('foo_date', 'DateIndex') obj.addIndex('foo_daterange', 'DateRangeIndex') idx = obj._catalog.getIndex('foo_daterange') idx._edit('bar', 'baz') obj.addIndex('foo_field', 'FieldIndex') idx = obj._catalog.getIndex('foo_field') idx.indexed_attrs = ('bar', ) obj.addIndex('foo_keyword', 'KeywordIndex') idx = obj._catalog.getIndex('foo_keyword') idx.indexed_attrs = ('bar', ) obj.addIndex('foo_path', 'PathIndex') obj.addIndex('foo_topic', 'TopicIndex') idx = obj._catalog.getIndex('foo_topic') idx.addFilteredSet('bar', 'PythonFilteredSet', 'True') idx.addFilteredSet('baz', 'PythonFilteredSet', 'False') extra = _extra() extra.lexicon_id = 'foo_plexicon' extra.index_type = 'Okapi BM25 Rank' obj.addIndex('foo_zctext', 'ZCTextIndex', extra) obj.addColumn('spam') obj.addColumn('eggs')
def testReindex(self): lexicon = PLexicon('lexicon', '', Splitter(), CaseNormalizer(), StopWordRemover()) caller = LexiconHolder(self.lexicon) zc_index = ZCTextIndex('name', None, caller, self.IndexFactory, 'text', 'lexicon') doc = Indexable('Hello Tim') zc_index.index_object(1, doc) nbest, total = zc_index.query('glorious') self.assertEqual(len(nbest), 0) nbest, total = zc_index.query('Tim') self.assertEqual(len(nbest), 1) # reindex with another value doc.text = 'Goodbye George' zc_index.index_object(1, doc) nbest, total = zc_index.query('Tim') self.assertEqual(len(nbest), 0) nbest, total = zc_index.query('Goodbye') self.assertEqual(len(nbest), 1) # reindex with an empty value doc.text = '' zc_index.index_object(1, doc) nbest, total = zc_index.query('George') self.assertEqual(len(nbest), 0)
def _initSite(self, foo=2): site = self.root.site = Folder(id='site') ctool = site.portal_catalog = CatalogTool() for obj_id in ctool.objectIds(): ctool._delObject(obj_id) for idx_id in ctool.indexes(): ctool.delIndex(idx_id) for col in ctool.schema()[:]: ctool.delColumn(col) if foo > 0: ctool._setObject('foo_plexicon', PLexicon('foo_plexicon')) lex = ctool.foo_plexicon lex._pipeline = (Splitter(), CaseNormalizer(), StopWordRemover()) extra = _extra() extra.lexicon_id = 'foo_plexicon' extra.index_type = 'Okapi BM25 Rank' ctool.addIndex('foo_zctext', 'ZCTextIndex', extra) ctool.addColumn('foo_zctext') return site
def setUp(self): from Products.ZCTextIndex.ZCTextIndex import PLexicon self._obj = PLexicon('foo_plexicon') self._XML = _PLEXICON_XML
def setUp(self): from Products.ZCTextIndex.ZCTextIndex import PLexicon NodeAdapterTestCase.setUp(self) self._obj = PLexicon('foo_plexicon') self._XML = _PLEXICON_XML