def testAddDocument(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') consume(fields2LuceneDoc.commit('unused')) self.assertEquals(['addDocument'], observer.calledMethodNames()) self.assertEquals('identifier', observer.calledMethods[0].kwargs['identifier'])
def testCreateDocument(self): fields = { 'field1': ['value1'], 'field2': ['value2', 'value2.1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], '__key__.field5': ["12345"], '__numeric__.field6': ["12345"], } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) observer = CallTrace(returnValues={'numerateTerm': 1}) fields2LuceneDoc.addObserver(observer) document = fields2LuceneDoc._createDocument(fields) self.assertEquals( set([ 'field1', 'field2', 'sorted.field3', 'untokenized.field4', '__key__.field5', '__numeric__.field6' ]), set([f.name() for f in document.getFields()])) field1 = document.getField("field1") self.assertEquals('value1', field1.stringValue()) self.assertTrue(field1.fieldType().indexed()) self.assertFalse(field1.fieldType().stored()) self.assertTrue(field1.fieldType().tokenized()) self.assertEquals(['value2', 'value2.1'], document.getValues('field2')) field3 = document.getField("sorted.field3") self.assertEquals('value3', field3.stringValue()) self.assertTrue(field3.fieldType().indexed()) self.assertFalse(field3.fieldType().stored()) self.assertFalse(field3.fieldType().tokenized()) field4 = document.getField("untokenized.field4") self.assertEquals('value4', field4.stringValue()) self.assertTrue(field4.fieldType().indexed()) self.assertFalse(field4.fieldType().stored()) self.assertFalse(field4.fieldType().tokenized()) field5 = document.getField("__key__.field5") self.assertEquals(1, field5.numericValue().longValue()) self.assertFalse(field5.fieldType().indexed()) self.assertFalse(field5.fieldType().stored()) self.assertTrue(field5.fieldType().tokenized()) field6 = document.getField("__numeric__.field6") self.assertEquals(12345, field6.numericValue().longValue()) self.assertFalse(field6.fieldType().indexed()) self.assertFalse(field6.fieldType().stored()) self.assertTrue(field6.fieldType().tokenized())
def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] searchFields = [ f for f in document.getFields() if not FacetField.instance_(f) ] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f.name() for f in searchFields]) facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f.dim, list(f.path)) for f in facetsFields]) # Note: a FacetField doesn't have a name
def testCreateDocument(self): fields = { 'field1': ['value1'], 'field2': ['value2', 'value2.1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], '__key__.field5': [12345], '__numeric__.field6': [12345], } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry()) fields = fields2LuceneDoc._createFields(fields) self.assertEqual([ { "name": "field2", "type": "TextField", "value": "value2" }, { "name": "field2", "type": "TextField", "value": "value2.1" }, { "name": "__key__.field5", "type": "KeyField", "value": 12345 }, { "name": "field1", "type": "TextField", "value": "value1" }, { "name": "sorted.field3", "type": "StringField", "value": "value3", "sort": True, }, { "name": "__numeric__.field6", "type": "NumericField", "value": 12345 }, { "name": "untokenized.field4", "type": "StringField", "value": "value4" } ], fields)
def testOnlyOneSortValueAllowed(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry() ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('sorted.field', 'value1') fields2LuceneDoc.addField('sorted.field', 'value2') consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] self.assertEquals(1, len(fields)) self.assertEqual({'sort': True, 'type': 'StringField', 'name': 'sorted.field', 'value': 'value1'}, fields[0])
def testRewriteFields(self): def rewriteFields(fields): fields['keys'] = list(sorted(fields.keys())) return fields fields2LuceneDoc = Fields2LuceneDoc('tsname', rewriteFields=rewriteFields, fieldRegistry=FieldRegistry()) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field1', 'value1') fields2LuceneDoc.addField('field2', 'value2') consume(fields2LuceneDoc.commit('unused')) self.assertEquals(['addDocument'], observer.calledMethodNames()) fields = observer.calledMethods[0].kwargs['fields'] self.assertEquals(set(['field1', 'field2', 'keys']), set([f['name'] for f in fields])) self.assertEquals(['field1', 'field2'], [f['value'] for f in fields if f['name'] == 'keys'])
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] facetsFields = [f for f in fields if "path" in f] self.assertEquals(1, len(facetsFields))
def testCreateFacet(self): fields = { 'field1': ['value1'], 'sorted.field3': ['value3'], 'untokenized.field4': ['value4'], 'untokenized.field5': ['value5', 'value6'], 'untokenized.field6': ['value5/value6'], 'untokenized.field7': ['valuex'], 'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']] } fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field4'), DrilldownField('untokenized.field5'), DrilldownField('untokenized.field6'), DrilldownField('untokenized.field8', hierarchical=True), ]) ) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' for field, values in fields.items(): for value in values: fields2LuceneDoc.addField(field, value) consume(fields2LuceneDoc.commit('unused')) fields = observer.calledMethods[0].kwargs['fields'] searchFields = [f for f in fields if not "path" in f] self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f['name'] for f in searchFields]) facetsFields = [f for f in fields if "path" in f] self.assertEquals(6, len(facetsFields)) self.assertEquals([ ('untokenized.field8', ['grandparent', 'parent', 'child']), ('untokenized.field8', ['parent2', 'child']), ('untokenized.field6', ['value5/value6']), ('untokenized.field4', ['value4']), ('untokenized.field5', ['value5']), ('untokenized.field5', ['value6']), ], [(f['name'], f['path']) for f in facetsFields])
def testAddFacetField(self): fields2LuceneDoc = Fields2LuceneDoc( 'tsname', fieldRegistry=FieldRegistry(drilldownFields=[ DrilldownField('untokenized.field'), ])) observer = CallTrace() fields2LuceneDoc.addObserver(observer) fields2LuceneDoc.ctx.tx = Transaction('tsname') fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier' fields2LuceneDoc.addField('field', 'value') fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value') consume(fields2LuceneDoc.commit('unused')) document = observer.calledMethods[0].kwargs['document'] facetsFields = [ FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f) ] self.assertEquals(1, len(facetsFields))
def uploadHelix(lucene, termNumerator, storageComponent, drilldownFields, fieldRegistry): indexHelix = (Fields2LuceneDoc('record', fieldRegistry=fieldRegistry), (termNumerator, ), (lucene, )) return \ (SruRecordUpdate(), (TransactionScope('record'), (Venturi(should=[{'partname': 'record', 'xpath': '.'}], namespaces={'doc': 'http://meresco.org/namespace/example'}), (FilterMessages(allowed=['delete']), (lucene,), (storageComponent,) ), (FilterMessages(allowed=['add']), (Xml2Fields(), (RenameField(lambda name: name.split('.', 1)[-1]), (FilterField(lambda name: 'fieldHier' not in name), indexHelix, ), (FilterField(lambda name: name == 'intfield1'), (RenameField(lambda name: SORTED_PREFIX + name), indexHelix, ) ), (FilterField(lambda name: name in ['field2', 'field3']), (RenameField(lambda name: UNTOKENIZED_PREFIX + name), indexHelix, ) ), ) ), (FieldHier(), indexHelix, ) ), (XmlPrintLxml(fromKwarg='lxmlNode', toKwarg='data'), (storageComponent,) ) ) ) )