def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] searchFields = [ f for f in document.getFields() if not FacetField.instance_(f) ] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f.name() for f in searchFields]) facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f.dim, list(f.path)) for f in facetsFields]) # Note: a FacetField doesn't have a name
def testIsUntokenized(self): registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')]) self.assertTrue(registry.isUntokenized('aDrilldownField')) self.assertTrue(registry.isUntokenized('untokenized.some.field')) self.assertFalse(registry.isUntokenized('other.field')) registry.register('fieldname', STRINGFIELD) self.assertTrue(registry.isUntokenized('fieldname')) registry.register('fieldname', TEXTFIELD) self.assertFalse(registry.isUntokenized('fieldname'))
def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] searchFields = [f for f in fields if not "path" in f] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f['name'] for f in searchFields]) facetsFields = [f for f in fields if "path" in f] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f['name'], f['path']) for f in facetsFields])
def testDrilldownFields(self): drilldownFields = [DrilldownField(name='aap'), DrilldownField(name='noot', hierarchical=True)] registry = FieldRegistry(drilldownFields=drilldownFields) registry.registerDrilldownField(fieldname='mies', multiValued=False) self.assertTrue(registry.isDrilldownField('aap')) self.assertTrue(registry.isDrilldownField('noot')) self.assertTrue(registry.isDrilldownField('mies')) self.assertFalse(registry.isDrilldownField('vuur')) self.assertFalse(registry.isHierarchicalDrilldown('aap')) self.assertTrue(registry.isHierarchicalDrilldown('noot')) self.assertTrue(registry.isMultivaluedDrilldown('aap')) self.assertTrue(registry.isMultivaluedDrilldown('noot')) self.assertFalse(registry.isMultivaluedDrilldown('mies')) self.assertTrue(registry.isUntokenized('mies')) field = registry.createFacetField("name", ["value"]) self.assertEqual({ "type": "FacetField", "name": "name", "path": ["value"] }, field)
def testDrilldownFields(self): drilldownFields = [ DrilldownField(name='aap'), DrilldownField(name='noot', hierarchical=True) ] registry = FieldRegistry(drilldownFields=drilldownFields) registry.registerDrilldownField(fieldname='mies', multiValued=False) self.assertTrue(registry.isDrilldownField('aap')) self.assertTrue(registry.isDrilldownField('noot')) self.assertTrue(registry.isDrilldownField('mies')) self.assertFalse(registry.isDrilldownField('vuur')) self.assertFalse(registry.isHierarchicalDrilldown('aap')) self.assertTrue(registry.isHierarchicalDrilldown('noot')) facetsConfig = registry.facetsConfig dimConfigs = facetsConfig.getDimConfigs() self.assertEquals(set(['aap', 'noot', 'mies']), set(dimConfigs.keySet())) self.assertFalse(dimConfigs.get('aap').hierarchical) self.assertTrue(dimConfigs.get('noot').hierarchical) self.assertTrue(dimConfigs.get('noot').multiValued) self.assertFalse(dimConfigs.get('mies').multiValued)
def testDrilldownFieldQuery(self): self.fieldRegistry = FieldRegistry( [DrilldownField('field', hierarchical=True)]) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), self._convert("field = value")) self.assertEquals( dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), self._convert("field = \"value>value1\""))
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] facetsFields = [f for f in fields if "path" in f] self.assertEquals(1, len(facetsFields))
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(1, len(facetsFields))
def testAdd(self): class Factory(): def __init__(self, observable, untokenizedFieldnames): self.observable = observable self.untokenizedFieldnames = untokenizedFieldnames def fieldsFor(self, fieldname, value): raise StopIteration([(fieldname, value)]) yield fieldFactory = Factory fieldRegistry = FieldRegistry(drilldownFields=[DrilldownField('drilldown.field')]) index = FieldsListToLuceneDocument(fieldRegistry, untokenizedFieldnames=[], indexFieldFactory=fieldFactory) observer = CallTrace(emptyGeneratorMethods=['addDocument']) index.addObserver(observer) longSpecialCharacterValue = u'\u041c\u0438\u043d\u0438\u0441\u0442\u0435\u0440\u0441\u0442\u0432\u043e \u0420\u044b\u0431\u043d\u043e\u0439 \u041f\u0440\u043e\u043c\u044b\u0448\u043b\u0435\u043d\u043d\u043e\u0441\u0438 \u0421\u043e\u044e\u0437\u0430 \u0421\u0421\u0420, \u0422\u0438\u0445\u043e\u043e\u043a\u0435\u0430\u043d\u0438\u0441\u043a\u0438\u0439 \u041d\u0430\u0443\u0447\u043d\u043e-\u0418\u0441\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u0441\u043a\u0438\u0439 \u0418\u043d\u0441\u0442\u0438\u0442\u0443\u0442 \u0420\u044b\u0431\u043d\u043e\u0433\u043e \u0425\u043e\u0437\u044f\u0439\u0441\u0442\u0432\u0430 \u0438 \u041e\u043a\u0435\u0430\u043d\u043e\u0433\u0440\u0430\u0444\u0438\u0438, \u0412\u043b\u0430\u0434\u0438\u0432\u043e\u0441\u0442\u043e\u043a' fields = [ ("field1", "value1"), ("field2", "value2"), ("drilldown.field", "a drilldown value"), ("drilldown.field", longSpecialCharacterValue), ("drilldown.field", ['a', 'b']), ("drilldown.field", []), ("__key__.field", "a key value"), ("__key__.field1", 2), ] consume(index.add(identifier="", fieldslist=fields)) self.assertEquals(['addDocument'], observer.calledMethodNames()) fields = observer.calledMethods[0].kwargs['fields'] self.assertEqual([ {'name': 'field1', 'type': 'TextField', 'value': 'value1'}, {'name': 'field2', 'type': 'TextField', 'value': 'value2'}, {'name': 'drilldown.field', 'type': 'FacetField', 'path': ['a drilldown value']}, {'name': 'drilldown.field', 'type': 'FacetField', 'path': [longSpecialCharacterValue]}, {'name': 'drilldown.field', 'type': 'FacetField', 'path': ['a', 'b']}, {'name': '__key__.field', 'type': 'KeyField', 'value': 'a key value'}, {'name': '__key__.field1', 'type': 'KeyField', 'value': 2}, ], fields)
def testIsIndexField(self): registry = FieldRegistry(drilldownFields=[DrilldownField(f) for f in ['field2', 'field3']], termVectorFields=['field1', 'field2']) self.assertTrue(registry.isIndexField('field1')) self.assertTrue(registry.isIndexField('field2')) self.assertFalse(registry.isIndexField('field3')) self.assertTrue(registry.isIndexField('field4'))
def testDrilldownFieldQuery(self): fieldRegistry = FieldRegistry([DrilldownField('field')]) self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry)) self.assertConversion(TermQuery(DrillDownQuery.term("$facets", "field", "value")), "field = value")
'long' : 'http://www.knaw.nl/narcis/1.0/long/', 'short' : 'http://www.knaw.nl/narcis/1.0/short/', 'mods' : 'http://www.loc.gov/mods/v3', 'didl' : 'urn:mpeg:mpeg21:2002:02-DIDL-NS', 'norm' : 'http://dans.knaw.nl/narcis/normalized', }) def untokenizedFieldname(fieldname): return UNTOKENIZED_PREFIX + fieldname UNQUALIFIED_TERM_FIELDS = [('__all__', 1.0)] drilldownFields = [ # def __init__(self, name, hierarchical=False, multiValued=True, indexFieldName=None): DrilldownField(untokenizedFieldname('meta_repositorygroupid')), DrilldownField(untokenizedFieldname('meta_repositoryid')), DrilldownField(untokenizedFieldname('meta_collection')), DrilldownField(untokenizedFieldname('genre')), DrilldownField(untokenizedFieldname('access')), DrilldownField(untokenizedFieldname('dd_year')), DrilldownField(untokenizedFieldname('status')), DrilldownField(untokenizedFieldname('dd_prices')), DrilldownField(untokenizedFieldname('dd_werkzaamheid')), DrilldownField(untokenizedFieldname('dd_position')), DrilldownField(untokenizedFieldname('dd_institute')), DrilldownField(untokenizedFieldname('dd_cat')), DrilldownField(untokenizedFieldname('dd_thesis')), DrilldownField(untokenizedFieldname('dd_penv')), DrilldownField(untokenizedFieldname('dd_os')), DrilldownField(untokenizedFieldname('dd_cre')),
def main(reactor, port, databasePath): drilldownFields = [ DrilldownField('untokenized.field2'), DrilldownField('untokenized.fieldHier', hierarchical=True) ] fieldRegistry = FieldRegistry(drilldownFields) luceneSettings = LuceneSettings(fieldRegistry=fieldRegistry, commitCount=30, commitTimeout=1, analyzer=MerescoDutchStemmingAnalyzer()) lucene = Lucene(path=join(databasePath, 'lucene'), reactor=reactor, name='main', settings=luceneSettings) lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry, commitTimeout=0.1) lucene2 = Lucene(path=join(databasePath, 'lucene2'), reactor=reactor, name='main2', settings=lucene2Settings) termNumerator = TermNumerator(path=join(databasePath, 'termNumerator')) emptyLuceneSettings = LuceneSettings(commitTimeout=1) multiLuceneHelix = ( MultiLucene(defaultCore='main'), (Lucene(path=join(databasePath, 'lucene-empty'), reactor=reactor, name='empty-core', settings=emptyLuceneSettings), ), (lucene, ), (lucene2, ), ) storageComponent = StorageComponent( directory=join(databasePath, 'storage')) return \ (Observable(), (ObservableHttpServer(reactor=reactor, port=port), (BasicHttpHandler(), (ApacheLogger(outputStream=stdout), (PathFilter("/info", excluding=[ '/info/version', '/info/name', '/update', '/sru', '/remote', '/via-remote-sru', ]), (DynamicHtml( [dynamicPath], reactor=reactor, indexPage='/info', additionalGlobals={ 'VERSION': version, } ), ) ), (PathFilter("/info/version"), (StringServer(version, ContentTypePlainText), ) ), (PathFilter("/info/name"), (StringServer('Meresco Lucene', ContentTypePlainText),) ), (PathFilter("/static"), (PathRename(lambda path: path[len('/static'):]), (FileServer(staticPath),) ) ), (PathFilter("/update_main", excluding=['/update_main2']), uploadHelix(lucene, termNumerator, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry), ), (PathFilter("/update_main2"), uploadHelix(lucene2, termNumerator, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry), ), (PathFilter('/sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (MultiCqlToLuceneQuery( defaultCore='main', coreToCqlLuceneQueries={ "main": CqlToLuceneQuery([], luceneSettings=luceneSettings), "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings), "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/via-remote-sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (LuceneRemote(host='localhost', port=port, path='/remote'),), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/remote'), (LuceneRemoteService(reactor=reactor), (MultiCqlToLuceneQuery( defaultCore='main', coreToCqlLuceneQueries={ "main": CqlToLuceneQuery([], luceneSettings=luceneSettings), "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings), "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ) ) ), (PathFilter('/autocomplete'), (Autocomplete('localhost', port, '/autocomplete', '__all__', '?', 5, '?', '?'), (lucene,), ) ) ) ) ) )