Example #1
0
 def testGenericDrilldownFields(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         registry = FieldRegistry(isDrilldownFieldFunction=lambda name: name.startswith('drilldown'))
         self.assertTrue(registry.isDrilldownField('drilldown.aap'))
         self.assertTrue(registry.isDrilldownField('drilldown.noot'))
         self.assertFalse(registry.isDrilldownField('noot'))
Example #2
0
 def testAddWithoutIdentifier(self):
     registry = FieldRegistry()
     fields = [registry.createField("id", "id1")]
     consume(self._lucene.addDocument(fields=fields))
     self.assertEqual(1, len(self.post))
     self.assertEqual('/lucene/update/?', self.post[0]['path'])
     self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])
Example #3
0
 def testDefault(self):
     registry = FieldRegistry()
     field = registry.createField('__id__', 'id:1')
     self.assertFalse(field.fieldType().tokenized())
     self.assertTrue(field.fieldType().stored())
     self.assertTrue(field.fieldType().indexed())
     self.assertTrue(registry.isUntokenized('__id__'))
Example #4
0
 def testGenericDrilldownFields(self):
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         registry = FieldRegistry(isDrilldownFieldFunction=lambda name: name.startswith('drilldown'))
         self.assertTrue(registry.isDrilldownField('drilldown.aap'))
         self.assertTrue(registry.isDrilldownField('drilldown.noot'))
         self.assertFalse(registry.isDrilldownField('noot'))
Example #5
0
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     expected = PhraseQuery()
     expected.add(Term("unqualified", "phrase query"))
     self.assertConversion(expected, '"phrase query"')
Example #6
0
 def testAddWithoutIdentifier(self):
     registry = FieldRegistry()
     fields = [registry.createField("id", "id1")]
     consume(self._lucene.addDocument(fields=fields))
     self.assertEqual(1, len(self.post))
     self.assertEqual('/lucene/update/?', self.post[0]['path'])
     self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])
Example #7
0
 def testDefault(self):
     registry = FieldRegistry()
     field = registry.createField('__id__', 'id:1')
     self.assertEquals({
             "type": "StringField",
             "name": "__id__",
             "value": "id:1",
             "stored": True
         }, field)
Example #8
0
 def testNumericField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NUMERICFIELD)
     field = registry.createField('fieldname', 2010)
     self.assertEquals({
             "type": "NumericField",
             "name": "fieldname",
             "value": 2010,
         }, field)
Example #9
0
 def testIsUntokenized(self):
     registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')])
     self.assertTrue(registry.isUntokenized('aDrilldownField'))
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     self.assertFalse(registry.isUntokenized('other.field'))
     registry.register('fieldname', STRINGFIELD)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TEXTFIELD)
     self.assertFalse(registry.isUntokenized('fieldname'))
Example #10
0
 def testIsNumeric(self):
     registry = FieldRegistry()
     registry.register("longfield", fieldDefinition=LONGFIELD)
     registry.register("intfield", fieldDefinition=INTFIELD)
     self.assertFalse(registry.isNumeric('field1'))
     self.assertTrue(registry.isNumeric('longfield'))
     self.assertTrue(registry.isNumeric('intfield'))
     self.assertTrue(registry.isNumeric('range.double.afield'))
     self.assertTrue(registry.isNumeric('__key__.field1'))
Example #11
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "NoTermsFrequencyField",
             "name": "fieldname",
             "value": "value",
         }, field)
Example #12
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "NoTermsFrequencyField",
             "name": "fieldname",
             "value": "value",
         }, field)
Example #13
0
 def testDefault(self):
     registry = FieldRegistry()
     field = registry.createField('__id__', 'id:1')
     self.assertEquals({
             "type": "StringField",
             "name": "__id__",
             "value": "id:1",
             "stored": True
         }, field)
Example #14
0
 def testNumericField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NUMERICFIELD)
     field = registry.createField('fieldname', 2010)
     self.assertEquals({
             "type": "NumericField",
             "name": "fieldname",
             "value": 2010,
         }, field)
 def testPostDictWithDrilldownFields(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.registerDrilldownField("field0", hierarchical=True, multiValued=False)
     fieldRegistry.registerDrilldownField("field1", hierarchical=True, multiValued=True, indexFieldName="$facets_2")
     settings = LuceneSettings(fieldRegistry=fieldRegistry)
     soll = copy(DEFAULTS)
     soll['drilldownFields'] = [
         {'dim': 'field0', 'hierarchical': True, 'fieldname': None, 'multiValued': False},
         {'dim': 'field1', 'hierarchical': True, 'fieldname': '$facets_2', 'multiValued': True}]
     self.assertEquals(soll, settings.asPostDict())
Example #16
0
 def testIsUntokenized(self):
     registry = FieldRegistry()
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     registry.register('fieldname', StringField.TYPE_NOT_STORED)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TextField.TYPE_NOT_STORED)
     self.assertFalse(registry.isUntokenized('fieldname'))
Example #17
0
 def testReuseCreatedField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals("value", field.stringValue())
     newField = registry.createField('fieldname', 'newvalue', mayReUse=True)
     self.assertEquals("newvalue", newField.stringValue())
     self.assertEquals(field, newField)
     newField2 = registry.createField('fieldname',
                                      'newvalue',
                                      mayReUse=False)
     self.assertEquals("newvalue", newField2.stringValue())
     self.assertNotEqual(newField, newField2)
 def testDrilldownFieldQuery(self):
     self.fieldRegistry = FieldRegistry(
         [DrilldownField('field', hierarchical=True)])
     self.assertEquals(
         dict(type="TermQuery",
              term=dict(field="field", path=["value"], type="DrillDown")),
         self._convert("field = value"))
     self.assertEquals(
         dict(type="TermQuery",
              term=dict(field="field",
                        path=["value", "value1"],
                        type="DrillDown")),
         self._convert("field = \"value>value1\""))
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(
         self):
     self.fieldRegistry = FieldRegistry()
     self.fieldRegistry.register('noTermFreqField',
                                 NO_TERMS_FREQUENCY_FIELD)
     self.unqualifiedFields = [("unqualified", 1.0),
                               ('noTermFreqField', 2.0)]
     expected = dict(type="PhraseQuery",
                     terms=[
                         dict(field="unqualified", value="phrase"),
                         dict(field="unqualified", value="query")
                     ],
                     boost=1.0)
     self.assertEquals(expected, self._convert('"phrase query"'))
Example #20
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "TextField",
             "name": "fieldname",
             "value": "value",
         }, field)
     registry.register('fieldname', STRINGFIELD_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "StringField",
             "name": "fieldname",
             "value": "value",
             "stored": True
         }, field)
Example #21
0
 def __init__(
     self,
     commitTimeout=10,
     commitCount=100000,
     multithreaded=True,
     readonly=False,
     lruTaxonomyWriterCacheSize=4000,
     analyzer=MerescoStandardAnalyzer(),
     similarity=BM25Similarity(),
     fieldRegistry=FieldRegistry(),
     maxMergeAtOnce=2,
     segmentsPerTier=8.0,
     numberOfConcurrentTasks=6,
     verbose=True,
 ):
     self.commitTimeout = commitTimeout
     self.commitCount = commitCount
     self.multithreaded = multithreaded
     self.readonly = readonly
     self.lruTaxonomyWriterCacheSize = lruTaxonomyWriterCacheSize
     self.analyzer = analyzer
     self.similarity = similarity
     self.fieldRegistry = fieldRegistry
     self.maxMergeAtOnce = maxMergeAtOnce
     self.segmentsPerTier = segmentsPerTier
     self.numberOfConcurrentTasks = numberOfConcurrentTasks
     self.verbose = verbose
Example #22
0
    def testRangeQueryAndType(self):
        registry = FieldRegistry()
        registry.register("longfield", fieldDefinition=LONGFIELD)
        registry.register("intfield", fieldDefinition=INTFIELD)
        q, t = registry.rangeQueryAndType('longfield')
        self.assertEqual("Long", q)
        self.assertEqual(long, t)
        q, t = registry.rangeQueryAndType('intfield')
        self.assertEqual("Int", q)
        self.assertEqual(int, t)
        q, t = registry.rangeQueryAndType('range.double.field')
        self.assertEqual("Double", q)
        self.assertEqual(float, t)

        q, t = registry.rangeQueryAndType('anyfield')
        self.assertEqual("String", q)
        self.assertEqual(str, t)
 def testDrilldownFieldQuery(self):
     self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)])
     self.assertEquals(
         dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")),
         self._convert("field = value"))
     self.assertEquals(
         dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")),
         self._convert("field = \"value>value1\""))
Example #24
0
 def testTermVectorsForField(self):
     registry = FieldRegistry(termVectorFields=['field1', 'field2'])
     self.assertTrue(registry.isTermVectorField('field1'))
     self.assertTrue(registry.isTermVectorField('field2'))
     self.assertFalse(registry.isTermVectorField('field3'))
     field = registry.createField('field1', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field1",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field2', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field2",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field3', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field3",
             "value": "id:1",
         }, field)
 def testPostDictWithDrilldownFields(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.registerDrilldownField("field0", hierarchical=True, multiValued=False)
     fieldRegistry.registerDrilldownField("field1", hierarchical=True, multiValued=True, indexFieldName="$facets_2")
     settings = LuceneSettings(fieldRegistry=fieldRegistry)
     self.assertEqual({
             'lruTaxonomyWriterCacheSize': 4000,
             'maxMergeAtOnce': 2,
             'similarity': {'type': 'BM25Similarity'},
             'numberOfConcurrentTasks': 6,
             'segmentsPerTier': 8.0,
             'analyzer': {'type': 'MerescoStandardAnalyzer'},
             'drilldownFields': [
                 {'dim': 'field0', 'hierarchical': True, 'fieldname': None, 'multiValued': False},
                 {'dim': 'field1', 'hierarchical': True, 'fieldname': '$facets_2', 'multiValued': True}],
             'commitCount': 100000,
             'commitTimeout': 10
         }, settings.asPostDict())
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
     self.fieldRegistry = FieldRegistry()
     self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD)
     self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)]
     expected = dict(type="PhraseQuery", terms=[
         dict(field="unqualified", value="phrase"),
         dict(field="unqualified", value="query")
     ], boost=1.0)
     self.assertEquals(expected, self._convert('"phrase query"'))
Example #27
0
def luceneAndReaderConfig(defaultLuceneSettings, httpRequestAdapter,
                          lucenePort):

    fieldRegistry = FieldRegistry(drilldownFields=drilldownFields)
    luceneIndex = be((Lucene(
        host='127.0.0.1',
        port=lucenePort,
        name=DEFAULT_CORE,
        settings=defaultLuceneSettings.clone(fieldRegistry=fieldRegistry)),
                      (httpRequestAdapter, )))
    return luceneIndex
    def testAddDocument(self):
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        fields2LuceneDoc.addField('field', 'value')
        consume(fields2LuceneDoc.commit('unused'))

        self.assertEquals(['addDocument'], observer.calledMethodNames())
        self.assertEquals('identifier', observer.calledMethods[0].kwargs['identifier'])
Example #29
0
    def testRangeQueryAndType(self):
        registry = FieldRegistry()
        registry.register("longfield", fieldDefinition=LONGFIELD)
        registry.register("intfield", fieldDefinition=INTFIELD)
        q, t = registry.rangeQueryAndType('longfield')
        self.assertEqual("Long", q)
        self.assertEqual(long, t)
        q, t = registry.rangeQueryAndType('intfield')
        self.assertEqual("Int", q)
        self.assertEqual(int, t)
        q, t = registry.rangeQueryAndType('range.double.field')
        self.assertEqual("Double", q)
        self.assertEqual(float, t)

        q, t = registry.rangeQueryAndType('anyfield')
        self.assertEqual("String", q)
        self.assertEqual(str, t)
Example #30
0
 def setUp(self, fieldRegistry=FieldRegistry()):
     super(LuceneTestCase, self).setUp()
     self._javaObjects = self._getJavaObjects()
     self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
     self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
     self.lucene = Lucene(
         join(self.tempdir, 'lucene'),
         reactor=self._reactor,
         settings=self._defaultSettings,
     )
     self.observer = CallTrace()
     self.lucene.addObserver(self.observer)
Example #31
0
 def testTermVectorsForField(self):
     registry = FieldRegistry(termVectorFields=['field1', 'field2'])
     self.assertTrue(registry.isTermVectorField('field1'))
     self.assertTrue(registry.isTermVectorField('field2'))
     self.assertFalse(registry.isTermVectorField('field3'))
     field = registry.createField('field1', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field1",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field2', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field2",
             "value": "id:1",
             "termVectors": True,
         }, field)
     field = registry.createField('field3', 'id:1')
     self.assertEquals({
             "type": "TextField",
             "name": "field3",
             "value": "id:1",
         }, field)
Example #32
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertFalse(field.fieldType().stored())
     registry.register('fieldname', StringField.TYPE_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertTrue(field.fieldType().stored())
 def _prepareLuceneSettings(self):
     settings = LuceneSettings()
     if hasattr(self, '_analyzer'):
         settings.analyzer = self._analyzer
     if hasattr(self, 'fieldRegistry'):
         settings.fieldRegistry = self.fieldRegistry
     else:
         settings.fieldRegistry = FieldRegistry()
         settings.fieldRegistry.register("intField",
                                         fieldDefinition=INTFIELD)
         settings.fieldRegistry.register("longField",
                                         fieldDefinition=LONGFIELD)
     return settings
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': ["12345"],
            '__numeric__.field6': ["12345"],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
                                            fieldRegistry=FieldRegistry())
        observer = CallTrace(returnValues={'numerateTerm': 1})
        fields2LuceneDoc.addObserver(observer)
        document = fields2LuceneDoc._createDocument(fields)
        self.assertEquals(
            set([
                'field1', 'field2', 'sorted.field3', 'untokenized.field4',
                '__key__.field5', '__numeric__.field6'
            ]), set([f.name() for f in document.getFields()]))

        field1 = document.getField("field1")
        self.assertEquals('value1', field1.stringValue())
        self.assertTrue(field1.fieldType().indexed())
        self.assertFalse(field1.fieldType().stored())
        self.assertTrue(field1.fieldType().tokenized())

        self.assertEquals(['value2', 'value2.1'], document.getValues('field2'))

        field3 = document.getField("sorted.field3")
        self.assertEquals('value3', field3.stringValue())
        self.assertTrue(field3.fieldType().indexed())
        self.assertFalse(field3.fieldType().stored())
        self.assertFalse(field3.fieldType().tokenized())

        field4 = document.getField("untokenized.field4")
        self.assertEquals('value4', field4.stringValue())
        self.assertTrue(field4.fieldType().indexed())
        self.assertFalse(field4.fieldType().stored())
        self.assertFalse(field4.fieldType().tokenized())

        field5 = document.getField("__key__.field5")
        self.assertEquals(1, field5.numericValue().longValue())
        self.assertFalse(field5.fieldType().indexed())
        self.assertFalse(field5.fieldType().stored())
        self.assertTrue(field5.fieldType().tokenized())

        field6 = document.getField("__numeric__.field6")
        self.assertEquals(12345, field6.numericValue().longValue())
        self.assertFalse(field6.fieldType().indexed())
        self.assertFalse(field6.fieldType().stored())
        self.assertTrue(field6.fieldType().tokenized())
    def testCreateDocument(self):
        fields = {
            'field1': ['value1'],
            'field2': ['value2', 'value2.1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            '__key__.field5': [12345],
            '__numeric__.field6': [12345],
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname', fieldRegistry=FieldRegistry())
        fields = fields2LuceneDoc._createFields(fields)

        self.assertEqual([
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2"
                },
                {
                    "name": "field2",
                    "type": "TextField",
                    "value": "value2.1"
                },
                {
                    "name": "__key__.field5",
                    "type": "KeyField",
                    "value": 12345
                },
                {
                    "name": "field1",
                    "type": "TextField",
                    "value": "value1"
                },
                {
                    "name": "sorted.field3",
                    "type": "StringField",
                    "value": "value3",
                    "sort": True,
                },
                {
                    "name": "__numeric__.field6",
                    "type": "NumericField",
                    "value": 12345
                },
                {
                    "name": "untokenized.field4",
                    "type": "StringField",
                    "value": "value4"
                }
            ], fields)
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'],
                                   ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc(
            'tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ]))
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        document = observer.calledMethods[0].kwargs['document']
        searchFields = [
            f for f in document.getFields() if not FacetField.instance_(f)
        ]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'],
                          [f.name() for f in searchFields])

        facetsFields = [
            FacetField.cast_(f) for f in document.getFields()
            if FacetField.instance_(f)
        ]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
            ('untokenized.field8', ['grandparent', 'parent', 'child']),
            ('untokenized.field8', ['parent2', 'child']),
            ('untokenized.field6', ['value5/value6']),
            ('untokenized.field4', ['value4']),
            ('untokenized.field5', ['value5']),
            ('untokenized.field5', ['value6']),
        ], [(f.dim, list(f.path))
            for f in facetsFields])  # Note: a FacetField doesn't have a name
 def testOnlyOneSortValueAllowed(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry()
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('sorted.field', 'value1')
     fields2LuceneDoc.addField('sorted.field', 'value2')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     self.assertEquals(1, len(fields))
     self.assertEqual({'sort': True, 'type': 'StringField', 'name': 'sorted.field', 'value': 'value1'}, fields[0])
Example #38
0
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.registry = FieldRegistry()
     self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345)
     self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA')
     self._multiLucene.addObserver(self._lucene)
     self.post = []
     self.response = ""
     def mockPost(data, path, **kwargs):
         self.post.append(dict(data=data, path=path))
         raise StopIteration(self.response)
         yield
     connect = self._multiLucene._connect()
     connect._post = mockPost
     self._multiLucene._connect = lambda: connect
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ])
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     fields = observer.calledMethods[0].kwargs['fields']
     facetsFields = [f for f in fields if "path" in f]
     self.assertEquals(1, len(facetsFields))
Example #40
0
    def testDefaultDefinition(self):
        registry = FieldRegistry()
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "TextField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertFalse(registry.isUntokenized('aField'))

        registry = FieldRegistry(defaultDefinition=STRINGFIELD)
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "StringField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertTrue(registry.isUntokenized('aField'))
Example #41
0
    def testDrilldownFields(self):
        drilldownFields = [
            DrilldownField(name='aap'),
            DrilldownField(name='noot', hierarchical=True)
        ]
        registry = FieldRegistry(drilldownFields=drilldownFields)
        registry.registerDrilldownField(fieldname='mies', multiValued=False)
        self.assertTrue(registry.isDrilldownField('aap'))
        self.assertTrue(registry.isDrilldownField('noot'))
        self.assertTrue(registry.isDrilldownField('mies'))
        self.assertFalse(registry.isDrilldownField('vuur'))
        self.assertFalse(registry.isHierarchicalDrilldown('aap'))
        self.assertTrue(registry.isHierarchicalDrilldown('noot'))

        facetsConfig = registry.facetsConfig
        dimConfigs = facetsConfig.getDimConfigs()
        self.assertEquals(set(['aap', 'noot', 'mies']),
                          set(dimConfigs.keySet()))
        self.assertFalse(dimConfigs.get('aap').hierarchical)
        self.assertTrue(dimConfigs.get('noot').hierarchical)
        self.assertTrue(dimConfigs.get('noot').multiValued)
        self.assertFalse(dimConfigs.get('mies').multiValued)
 def __init__(
     self,
     commitTimeout=10,
     commitCount=100000,
     lruTaxonomyWriterCacheSize=4000,
     analyzer=dict(type="MerescoStandardAnalyzer"),
     similarity=dict(type="BM25Similarity"),
     mergePolicy=dict(type="TieredMergePolicy",
                      maxMergeAtOnce=2,
                      segmentsPerTier=8.0),
     fieldRegistry=FieldRegistry(),
     numberOfConcurrentTasks=6,
     cacheFacetOrdinals=True,
     verbose=True,
 ):
     local = locals()
     for name in SETTING_NAMES:
         self.__dict__['_' + name] = local[name]
     self.fieldRegistry = fieldRegistry
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ])
        )
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        fields = observer.calledMethods[0].kwargs['fields']

        searchFields = [f for f in fields if not "path" in f]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f['name'] for f in searchFields])

        facetsFields = [f for f in fields if "path" in f]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
                ('untokenized.field8', ['grandparent', 'parent', 'child']),
                ('untokenized.field8', ['parent2', 'child']),
                ('untokenized.field6', ['value5/value6']),
                ('untokenized.field4', ['value4']),
                ('untokenized.field5', ['value5']),
                ('untokenized.field5', ['value6']),
            ], [(f['name'], f['path']) for f in facetsFields])
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc(
         'tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ]))
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field',
                                    'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     document = observer.calledMethods[0].kwargs['document']
     facetsFields = [
         FacetField.cast_(f) for f in document.getFields()
         if FacetField.instance_(f)
     ]
     self.assertEquals(1, len(facetsFields))
Example #45
0
    def testDefaultDefinition(self):
        registry = FieldRegistry()
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "TextField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertFalse(registry.isUntokenized('aField'))

        registry = FieldRegistry(defaultDefinition=STRINGFIELD)
        field = registry.createField('aField', 'id:1')
        self.assertEquals({
                "type": "StringField",
                "name": "aField",
                "value": "id:1",
            }, field)
        self.assertTrue(registry.isUntokenized('aField'))
Example #46
0
    def testDrilldownFields(self):
        drilldownFields = [DrilldownField(name='aap'), DrilldownField(name='noot', hierarchical=True)]
        registry = FieldRegistry(drilldownFields=drilldownFields)
        registry.registerDrilldownField(fieldname='mies', multiValued=False)
        self.assertTrue(registry.isDrilldownField('aap'))
        self.assertTrue(registry.isDrilldownField('noot'))
        self.assertTrue(registry.isDrilldownField('mies'))
        self.assertFalse(registry.isDrilldownField('vuur'))
        self.assertFalse(registry.isHierarchicalDrilldown('aap'))
        self.assertTrue(registry.isHierarchicalDrilldown('noot'))
        self.assertTrue(registry.isMultivaluedDrilldown('aap'))
        self.assertTrue(registry.isMultivaluedDrilldown('noot'))
        self.assertFalse(registry.isMultivaluedDrilldown('mies'))
        self.assertTrue(registry.isUntokenized('mies'))

        field = registry.createFacetField("name", ["value"])
        self.assertEqual({
                "type": "FacetField",
                "name": "name",
                "path": ["value"]
            }, field)
 def testWildcardQuery(self):
     self.fieldRegistry = FieldRegistry()
     expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
     self.assertConversion(expected, cql='field=???*')
Example #48
0
def main(reactor, port, serverPort, autocompletePort, databasePath, **kwargs):
    drilldownFields = [
        DrilldownField('untokenized.field2'),
        DrilldownField('untokenized.field2.copy', indexFieldName='copy'),
        DrilldownField('untokenized.fieldHier', hierarchical=True)
    ]

    fieldRegistry = FieldRegistry(drilldownFields)
    fieldRegistry.register('intfield1', INTFIELD)
    fieldRegistry.register('intfield2', INTFIELD)
    fieldRegistry.register('intfield3', INTFIELD)
    fieldRegistry.register('intfield_missing', INTFIELD)
    fieldRegistry.register('sorted.intfield_missing', INTFIELD)
    luceneSettings = LuceneSettings(
                fieldRegistry=fieldRegistry,
                commitCount=30,
                commitTimeout=0.3,
                #analyzer=MerescoDutchStemmingAnalyzer(["field4", "field5"]),
                analyzer=dict(type="MerescoDutchStemmingAnalyzer", stemmingFields=['field4', 'field5'])
            )

    http11_request = be((HttpRequest1_1(),
        (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),)
    ))
    lucene = be((Lucene(host="localhost", port=serverPort, name='main', settings=luceneSettings),
            (http11_request,)
        ))

    lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry, commitTimeout=0.1)
    lucene2 = be((Lucene(host="localhost", port=serverPort, name='main2', settings=lucene2Settings),
            (http11_request,)
        ))

    emptyLuceneSettings = LuceneSettings(commitTimeout=1)
    multiLuceneHelix = (MultiLucene(host='localhost', port=serverPort, defaultCore='main'),
            (Lucene(host='localhost', port=serverPort, name='empty-core', settings=emptyLuceneSettings),
                (http11_request,)
            ),
            (lucene,),
            (lucene2,),
            (http11_request,)
        )
    storageComponent = be(
        (RetrieveDataToGetData(),
            (StorageComponentAdapter(),
                (MultiSequentialStorage(directory=join(databasePath, 'storage')),)
            )
        )
    )

    return \
    (Observable(),
        (ObservableHttpServer(reactor=reactor, port=port),
            (BasicHttpHandler(),
                (ApacheLogger(outputStream=stdout),
                    (PathFilter("/info", excluding=[
                            '/info/version',
                            '/info/name',
                            '/update',
                            '/sru',
                            '/remote',
                            '/via-remote-sru',
                        ]),
                        (DynamicHtml(
                                [dynamicPath],
                                reactor=reactor,
                                indexPage='/info',
                                additionalGlobals={
                                    'VERSION': version,
                                }
                            ),
                        )
                    ),
                    (PathFilter("/info/version"),
                        (StringServer(version, ContentTypePlainText), )
                    ),
                    (PathFilter("/info/name"),
                        (StringServer('Meresco Lucene', ContentTypePlainText),)
                    ),
                    (PathFilter("/static"),
                        (PathRename(lambda path: path[len('/static'):]),
                            (FileServer(staticPath),)
                        )
                    ),
                    (PathFilter("/update_main", excluding=['/update_main2']),
                        uploadHelix(lucene, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry),
                    ),
                    (PathFilter("/update_main2"),
                        uploadHelix(lucene2, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry),
                    ),
                    (PathFilter('/sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (AdapterToLuceneQuery(
                                    defaultCore='main',
                                    coreConverters={
                                        "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings),
                                        "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings),
                                        "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings),
                                    }),
                                    multiLuceneHelix,
                                ),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/via-remote-sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (LuceneRemote(host='localhost', port=port, path='/remote'),),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/remote'),
                        (LuceneRemoteService(reactor=reactor),
                            (AdapterToLuceneQuery(
                                    defaultCore='main',
                                    coreConverters={
                                        "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings),
                                        "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings),
                                        "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings),
                                    }),
                                multiLuceneHelix,
                            )
                        )
                    ),
                    (PathFilter('/autocomplete'),
                        (Autocomplete(host='localhost', port=port, path='/autocomplete', defaultField='__all__', templateQuery='?', defaultLimit=5, shortname='?', description='?'),
                            (lucene,),
                        )
                    ),
                    (PathFilter('/suggestion'),
                        (SuggestionIndexComponent(host='localhost', port=autocompletePort),
                            (http11_request,),
                        )
                    )
                )
            )
        )
    )
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase):
    def testTermQuery(self):
        self.assertConversion({
            "type": "TermQuery",
            "term": {
                "field":"field",
                "value": "value",
            }
        }, QueryExpression.searchterm("field", "=", "value"))
        self.assertConversion({"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, QueryExpression.searchterm("field", "=", "value"))


    def testRightHandSideIsLowercase(self):
        self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="CaT"))

    def testOneTermOutputWithANumber(self):
        self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="2005"))

    def testMatchAllQuery(self):
        self.assertConversion({"type": "MatchAllDocsQuery"}, QueryExpression.searchterm(term="*"))

    def testUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        self.assertConversion({"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0}, QueryExpression.searchterm(term="value"))

    def testMultipleUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "aField", "value": "value"},
                        "boost": 1.0,
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "oField", "value": "value"},
                        "boost": 2.0,
                        "occur": "SHOULD"
                    }
                ]
            }, QueryExpression.searchterm(term="value"))

    def testBooleanAndQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST"
                    }
                ]
            }, expr)

    def testBooleanOrQuery(self):
        expr = QueryExpression.nested(operator='OR')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "SHOULD"
                    }
                ]
            }, expr)

    def testBooleanNotQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        expr.operands[1].must_not = True
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, expr)

    def testBooleanNotQueryNested(self):
        expr = QueryExpression.nested(operator='AND')
        nestedNotExpr = QueryExpression.nested(operator='AND')
        nestedNotExpr.must_not = True
        nestedNotExpr.operands = [
            QueryExpression.searchterm("field2", "=", "value2"),
            QueryExpression.searchterm("field3", "=", "value3")
        ]
        expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "BooleanQuery",
                        "occur": "MUST_NOT",
                        "clauses": [
                            {
                                "type": "TermQuery",
                                "term": {"field": "field2", "value": "value2"},
                                "occur": "MUST"
                            },
                            {
                                "type": "TermQuery",
                                "term": {"field": "field3", "value": "value3"},
                                "occur": "MUST"
                            }
                        ]
                    }
                ]
            }, expr)

    def testNotExpression(self):
        expr = QueryExpression.searchterm("field", "=", "value")
        expr.must_not = True
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "MatchAllDocsQuery",
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field", "value": "value"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, expr)
    def testPhraseOutput(self):
        self.assertConversion({
                "type": "PhraseQuery",
                "boost": 1.0,
                "terms": [
                    {"field": "unqualified", "value": "cats"},
                    {"field": "unqualified", "value": "dogs"}
                ]
            }, QueryExpression.searchterm(term='"cats dogs"'))

    # def testWhitespaceAnalyzer(self):
    #     self._analyzer = WhitespaceAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "kat"))
    #     query.add(Term("unqualified", "hond"))
    #     self.assertConversion(query, cql='"kat hond"')

    # def testPhraseOutputDoesNoDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "katten"))
    #     query.add(Term("unqualified", "honden"))
    #     self.assertConversion(query, cql='"katten honden"')

    # def testDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='honden')

    # def testDutchStemmingOnlyForGivenFields(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified'])
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='honden')

    #     query = TermQuery(Term("field", "honden"))
    #     self.assertConversion(query, cql='field=honden')

    # def testIgnoreStemming(self):
    #     self._ignoredStemmingForWords = ['kate', 'wageningen']
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = TermQuery(Term("unqualified", "kate"))
    #     self.assertConversion(query, cql='kate')
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='katten')

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        input = '"vol.118 (2008) nr.3 (March) p.435-444"'
        self.assertConversion(expected, cql=input)

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        expected["terms"].append(dict(field="unqualified", value='aap'))
        expected["terms"].append(dict(field="unqualified", value='noot'))
        self.assertConversion(expected, cql='aap:noot')

    def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        self.assertConversion(expected, cql=':')

    def testStandardAnalyserWithoutStopWords(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["no", "is", "the", "only", "option"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertConversion(expected, cql='"no is the only option"')

    def testDiacritics(self):
        expected = termQuery('title', 'moree')
        self.assertConversion(expected, cql='title=Moree')
        self.assertConversion(expected, cql='title=Morée')
        self.assertConversion(expected, cql='title=Morèe')

        # self._analyzer = MerescoDutchStemmingAnalyzer()
        # query = PhraseQuery()
        # query.add(Term("title", "waar"))
        # query.add(Term("title", "is"))
        # query.add(Term("title", "moree"))
        # query.add(Term("title", "vandaag"))
        # self.assertConversion(query, cql='title="Waar is Morée vandaag"')

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = dict(type="PhraseQuery", terms=[])
        pq["terms"].append(dict(field="title", value="more"))
        pq["terms"].append(dict(field="title", value="e"))
        self.assertConversion(pq, cql='title=More\xcc\x81e') # Combined
        from unicodedata import normalize
        self.assertConversion(termQuery('title', 'moree'), cql=normalize('NFC', unicode('title=More\xcc\x81e')))

    def testIndexRelationTermOutput(self):
        self.assertConversion(termQuery('animal', 'cats'), cql='animal=cats')
        query = dict(type="PhraseQuery", terms=[])
        query["terms"].append(dict(field="animal", value="cats"))
        query["terms"].append(dict(field="animal", value="dogs"))
        self.assertConversion(query, cql='animal="cats dogs"')
        self.assertConversion(query, cql='animal="catS Dogs"')

    def testIndexRelationExactTermOutput(self):
        self.assertConversion(termQuery("animal", "hairy cats"), cql='animal exact "hairy cats"')
        self.assertConversion(termQuery("animal", "Capital Cats"), cql='animal exact "Capital Cats"')

    def testBoost(self):
        query = termQuery("title", "cats", boost=2.0)
        self.assertConversion(query, cql="title =/boost=2.0 cats")

    def testWildcards(self):
        query = prefixQuery('unqualified', 'prefix', 1.0)
        self.assertConversion(query, cql='prefix*')
        self.assertConversion(query, cql='PREfix*')
        query = prefixQuery('field', 'prefix')
        self.assertConversion(query, cql='field="PREfix*"')
        self.assertConversion(query, cql='field=prefix*')
        query = prefixQuery('field', 'oc-0123')
        self.assertConversion(query, cql='field="oc-0123*"')
        query = termQuery('field', 'p')
        self.assertConversion(query, cql='field="P*"')
        #only prefix queries for now
        query = termQuery('field', 'post')
        self.assertConversion(query, cql='field="*post"')

        query = termQuery('field', 'prefix')
        self.assertConversion(query, cql='field=prefix**')

        self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)]

        query = dict(type="BooleanQuery", clauses=[])
        query["clauses"].append(prefixQuery("field0", "prefix", 0.2))
        query["clauses"][0]["occur"] = "SHOULD"

        query["clauses"].append(prefixQuery("field1", "prefix", 2.0))
        query["clauses"][1]["occur"] = "SHOULD"
        self.assertConversion(query, cql="prefix*")

    def testMagicExact(self):
        exactResult = self.convert(cql='animal exact "cats dogs"')
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('animal', STRINGFIELD)
        self.assertConversion(exactResult, cql='animal = "cats dogs"')

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='field > value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='field >= value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='field < value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='field <= value')

    def testIntRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='intField > 1')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='intField >= 1')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='intField < 3')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='intField <= 3')

    def testLongRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='longField > 1')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='longField >= 1')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='longField < 3')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='longField <= 3')

    def testDrilldownFieldQuery(self):
        self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)])
        self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), cql="field = value")
        self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), cql="field = \"value>value1\"")

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD)
        self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)]
        expected = dict(type="PhraseQuery", terms=[
                dict(field="unqualified", value="phrase"),
                dict(field="unqualified", value="query")
            ], boost=1.0)
        self.assertConversion(expected, cql='"phrase query"')

    def testQueryForIntField(self):
        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="intField=5")

        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="intField exact 5")

    def testQueryForLongField(self):
        expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="longField=5")

    def testQueryForDoubleField(self):
        expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="range.double.field=5")

    def testWildcardQuery(self):
        self.fieldRegistry = FieldRegistry()
        expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
        self.assertConversion(expected, cql='field=???*')

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                self.convert(cql='index %(relation)s term' % locals())
                self.fail()
            except UnsupportedCQL:
                pass

    def convert(self, expression=None, cql=None):
        if expression is None:
            expression = cqlToExpression(parseCql(cql))
        unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)])
        settings = LuceneSettings()
        if hasattr(self, '_analyzer'):
            settings.analyzer = self._analyzer
        if hasattr(self, 'fieldRegistry'):
            settings.fieldRegistry = self.fieldRegistry
        else:
            settings.fieldRegistry = FieldRegistry()
            settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD)
            settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
        converter = QueryExpressionToLuceneQueryDict(
            unqualifiedTermFields=unqualifiedFields,
            luceneSettings=settings,
            ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None)
        )
        return converter.convert(expression)

    def assertConversion(self, expected, expression=None, cql=None):
        result = self.convert(expression=expression, cql=cql)
        self.assertEquals(expected, result)
 def testMagicExact(self):
     exactResult = self.composer.compose(parseCql('animal exact "cats dogs"'))
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('animal', StringField.TYPE_NOT_STORED)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     self.assertConversion(exactResult, 'animal = "cats dogs"')
 def setUp(self):
     super(LuceneQueryComposerTest, self).setUp()
     fieldRegistry = FieldRegistry()
     fieldRegistry.register("intField", fieldDefinition=INTFIELD)
     fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
 def testMagicExact(self):
     exactResult = self.convert(cql='animal exact "cats dogs"')
     self.fieldRegistry = FieldRegistry()
     self.fieldRegistry.register('animal', STRINGFIELD)
     self.assertConversion(exactResult, cql='animal = "cats dogs"')
Example #53
0
    def testSortField(self):
        registry = FieldRegistry()
        registry.register("sorted.longfield", fieldDefinition=LONGFIELD)
        registry.register("sorted.intfield", fieldDefinition=INTFIELD)
        registry.register("sorted.stringfield", fieldDefinition=STRINGFIELD)

        self.assertEqual("Long", registry.sortFieldType("sorted.longfield"))
        self.assertEqual(None, registry.defaultMissingValueForSort("sorted.longfield", True))

        self.assertEqual("Int", registry.sortFieldType("sorted.intfield"))
        self.assertEqual(None, registry.defaultMissingValueForSort("sorted.intfield", True))

        self.assertEqual("String", registry.sortFieldType("sorted.stringfield"))
        self.assertEqual("STRING_FIRST", registry.defaultMissingValueForSort("sorted.stringfield", True))
        self.assertEqual("STRING_LAST", registry.defaultMissingValueForSort("sorted.stringfield", False))
        self.assertEqual(None, registry.defaultMissingValueForSort("score", False))

        field = registry.createField('sorted.longfield', 'id:1')
        self.assertEqual({'name': 'sorted.longfield', 'type': 'LongField', 'value': 'id:1', 'sort': True}, field)
 def testMagicExact(self):
     exactResult = self._convert('animal exact "cats dogs"')
     self.fieldRegistry = FieldRegistry()
     self.fieldRegistry.register('animal', STRINGFIELD)
     self.assertEquals(exactResult, self._convert('animal = "cats dogs"'))
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase):
    def testTermQuery(self):
        self.assertEquals(
            {
                "type": "TermQuery",
                "term": {
                    "field":"field",
                    "value": "value",
                }
            }, self._convert(QueryExpression.searchterm("field", "=", "value")))
        self.assertEquals(
            {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, self._convert(QueryExpression.searchterm("field", "=", "value")))

    def testRightHandSideIsLowercase(self):
        self.assertEquals(
            {'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'},
            self._convert(QueryExpression.searchterm(term="CaT")))

    def testOneTermOutputWithANumber(self):
        self.assertEquals(
            {'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'},
            self._convert(QueryExpression.searchterm(term="2005")))

    def testMatchAllQuery(self):
        self.assertEquals(
            {"type": "MatchAllDocsQuery"}, self._convert(QueryExpression.searchterm(term="*")))

    def testUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        self.assertEquals(
            {"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0},
            self._convert(QueryExpression.searchterm(term="value")))

    def testUnqualifiedTermFieldsWithNestedExpression(self):
        self.unqualifiedFields = [('aField', 1.0)]
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm(term="value1"),
            QueryExpression.searchterm(term="value2")
        ]
        self.assertEquals({
                'type': 'BooleanQuery',
                'clauses': [
                    {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value1'}, 'boost': 1.0},
                    {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value2'}, 'boost': 1.0}
                ],
            },
            self._convert(expr))

    def testMultipleUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "aField", "value": "value"},
                        "boost": 1.0,
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "oField", "value": "value"},
                        "boost": 2.0,
                        "occur": "SHOULD"
                    }
                ]
            }, self._convert(QueryExpression.searchterm(term="value")))

    def testBooleanAndQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST"
                    }
                ]
            }, self._convert(expr))

    def testBooleanOrQuery(self):
        expr = QueryExpression.nested(operator='OR')
        expr.operands=[
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "SHOULD"
                    }
                ]
            }, self._convert(expr))

    def testBooleanNotQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        expr.operands[1].must_not = True
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, self._convert(expr))

    def testBooleanNotQueryNested(self):
        expr = QueryExpression.nested(operator='AND')
        nestedNotExpr = QueryExpression.nested(operator='AND')
        nestedNotExpr.must_not = True
        nestedNotExpr.operands = [
            QueryExpression.searchterm("field2", "=", "value2"),
            QueryExpression.searchterm("field3", "=", "value3")
        ]
        expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "BooleanQuery",
                        "occur": "MUST_NOT",
                        "clauses": [
                            {
                                "type": "TermQuery",
                                "term": {"field": "field2", "value": "value2"},
                                "occur": "MUST"
                            },
                            {
                                "type": "TermQuery",
                                "term": {"field": "field3", "value": "value3"},
                                "occur": "MUST"
                            }
                        ]
                    }
                ]
            }, self._convert(expr))

    def testNotExpression(self):
        expr = QueryExpression.searchterm("field", "=", "value")
        expr.must_not = True
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "MatchAllDocsQuery",
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field", "value": "value"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, self._convert(expr))

    def testPhraseOutput(self):
        self.assertEquals(
            {
                "type": "PhraseQuery",
                "boost": 1.0,
                "terms": [
                    {"field": "unqualified", "value": "cats"},
                    {"field": "unqualified", "value": "dogs"}
                ]
            }, self._convert(QueryExpression.searchterm(term='"cats dogs"')))

    # def testWhitespaceAnalyzer(self):
    #     self._analyzer = WhitespaceAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "kat"))
    #     query.add(Term("unqualified", "hond"))
    #     self.assertEquals(query, self._convert('"kat hond"'))

    # def testPhraseOutputDoesNoDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "katten"))
    #     query.add(Term("unqualified", "honden"))
    #     self.assertEquals(query, self._convert('"katten honden"'))

    # def testDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    # def testDutchStemmingOnlyForGivenFields(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified'])
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    #     query = TermQuery(Term("field", "honden"))
    #     self.assertEquals(query, self._convert('field=honden'))

    # def testIgnoreStemming(self):
    #     self._ignoredStemmingForWords = ['kate', 'wageningen']
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = TermQuery(Term("unqualified", "kate"))
    #     self.assertEquals(query, 'kate')
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('katten'))

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(expected, self._convert('"vol.118 (2008) nr.3 (March) p.435-444"'))

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        expected["terms"].append(dict(field="unqualified", value='aap'))
        expected["terms"].append(dict(field="unqualified", value='noot'))
        self.assertEquals(expected, self._convert('aap:noot'))

    def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        self.assertEquals(expected, self._convert(':'))

    def testStandardAnalyserWithoutStopWords(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["no", "is", "the", "only", "option"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(expected, self._convert('"no is the only option"'))

    def testDiacritics(self):
        expected = termQuery('title', 'moree')
        self.assertEquals(expected, self._convert('title=Moree'))
        self.assertEquals(expected, self._convert('title=Morée'))
        self.assertEquals(expected, self._convert('title=Morèe'))

        # self._analyzer = MerescoDutchStemmingAnalyzer()
        # query = PhraseQuery()
        # query.add(Term("title", "waar"))
        # query.add(Term("title", "is"))
        # query.add(Term("title", "moree"))
        # query.add(Term("title", "vandaag"))
        # self.assertEquals(query, self._convert('title="Waar is Morée vandaag"'))

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = dict(type="PhraseQuery", terms=[])
        pq["terms"].append(dict(field="title", value="more"))
        pq["terms"].append(dict(field="title", value="e"))
        self.assertEquals(pq, self._convert('title=More\xcc\x81e')) # Combined
        from unicodedata import normalize
        self.assertEquals(
            termQuery('title', 'moree'),
            self._convert(normalize('NFC', unicode('title=More\xcc\x81e'))))

    def testIndexRelationTermOutput(self):
        self.assertEquals(
            termQuery('animal', 'cats'),
            self._convert('animal=cats'))
        query = dict(type="PhraseQuery", terms=[])
        query["terms"].append(dict(field="animal", value="cats"))
        query["terms"].append(dict(field="animal", value="dogs"))
        self.assertEquals(query, self._convert('animal="cats dogs"'))
        self.assertEquals(query, self._convert('animal="catS Dogs"'))

    def testIndexRelationExactTermOutput(self):
        self.assertEquals(
            termQuery("animal", "hairy cats"),
            self._convert('animal exact "hairy cats"'))
        self.assertEquals(
            termQuery("animal", "Capital Cats"),
            self._convert('animal exact "Capital Cats"'))

    def testBoost(self):
        query = termQuery("title", "cats", boost=2.0)
        self.assertEquals(query, self._convert("title =/boost=2.0 cats"))

    def testWildcards(self):
        query = prefixQuery('unqualified', 'prefix', 1.0)
        self.assertEquals(query, self._convert('prefix*'))
        self.assertEquals(query, self._convert('PREfix*'))
        query = prefixQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field="PREfix*"'))
        self.assertEquals(query, self._convert('field=prefix*'))
        query = prefixQuery('field', 'oc-0123')
        self.assertEquals(query, self._convert('field="oc-0123*"'))
        query = termQuery('field', 'p')
        self.assertEquals(query, self._convert('field="P*"'))
        #only prefix queries for now
        query = termQuery('field', 'post')
        self.assertEquals(query, self._convert('field="*post"'))

        query = termQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field=prefix**'))

        self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)]

        query = dict(type="BooleanQuery", clauses=[])
        query["clauses"].append(prefixQuery("field0", "prefix", 0.2))
        query["clauses"][0]["occur"] = "SHOULD"

        query["clauses"].append(prefixQuery("field1", "prefix", 2.0))
        query["clauses"][1]["occur"] = "SHOULD"
        self.assertEquals(query, self._convert("prefix*"))

    def testMagicExact(self):
        exactResult = self._convert('animal exact "cats dogs"')
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('animal', STRINGFIELD)
        self.assertEquals(exactResult, self._convert('animal = "cats dogs"'))

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('field > value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('field >= value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('field < value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('field <= value'))

    def testIntRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('intField > 1'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField >= 1'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('intField < 3'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField <= 3'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=3, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField = 3'))
        self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='exact', term=3)))
        self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='=', term=3)))

    def testLongRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('longField > 1'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('longField >= 1'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('longField < 3'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('longField <= 3'))

    def testDrilldownFieldQuery(self):
        self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)])
        self.assertEquals(
            dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")),
            self._convert("field = value"))
        self.assertEquals(
            dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")),
            self._convert("field = \"value>value1\""))

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD)
        self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)]
        expected = dict(type="PhraseQuery", terms=[
            dict(field="unqualified", value="phrase"),
            dict(field="unqualified", value="query")
        ], boost=1.0)
        self.assertEquals(expected, self._convert('"phrase query"'))

    def testQueryForIntField(self):
        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("intField=5"))

        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("intField exact 5"))

    def testQueryForLongField(self):
        expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("longField=5"))

    def testQueryForDoubleField(self):
        expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("range.double.field=5"))

    def testWildcardQuery(self):
        self.fieldRegistry = FieldRegistry()
        expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
        self.assertEquals(expected, self._convert('field=???*'))

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                self._convert('index %(relation)s term' % locals())
                self.fail()
            except UnsupportedCQL:
                pass

    def testPerQueryUnqualifiedFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        converter = self._prepareConverter()
        self.assertEquals({
            "type": "BooleanQuery",
            "clauses": [{
                    "type": "TermQuery",
                    "term": {"field": "aField", "value": "value"},
                    'boost': 2.0,
                    'occur': 'SHOULD'
                }, {
                    "type": "TermQuery",
                    "term": {"field": "anotherField", "value": "value"},
                    'boost': 3.0,
                    'occur': 'SHOULD'
            }]},
            converter.convert(
                QueryExpression.searchterm(term="value"),
                unqualifiedTermFields=[('aField', 2.0), ('anotherField', 3.0)]))

    def testReallyIgnoreAnalyzedAwayTerms(self):
        self.assertEquals({'boost': 1.0, 'terms': [], 'type': 'PhraseQuery'}, self._convert('.'))  # will not yield any results, but that's what's desired
        self.assertDictEquals({'terms': [], 'type': 'PhraseQuery'}, self._convert("abc=:;+"))

        self.assertDictEquals({'type': 'BooleanQuery', 'clauses': [{'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'abc'}, 'type': 'TermQuery', 'occur': 'MUST'}, {'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'def'}, 'type': 'TermQuery', 'occur': 'MUST'}]}, self._convert("abc AND :;+ AND def"))

        self.unqualifiedFields = [("unqualified", 1.0), ("moreUnqualified", 1.0)]
        self.assertDictEquals({
            'clauses': [{
                'clauses': [{
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'unqualified', 'value': u'abc'},
                    'type': 'TermQuery'
                }, {
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'moreUnqualified', 'value': u'abc'},
                    'type': 'TermQuery'
                }],
                'occur': 'MUST',
                'type': 'BooleanQuery'
            }, {
                'clauses': [{
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'unqualified', 'value': u'def'},
                    'type': 'TermQuery'
                 }, {
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'moreUnqualified', 'value': u'def'},
                    'type': 'TermQuery'
                }],
                'occur': 'MUST',
                'type': 'BooleanQuery'
            }],
            'type': 'BooleanQuery'}, self._convert("abc AND :;+ AND def"))

    def testOtherCoreTermQuery(self):
        query = ComposedQuery('thisCore')
        query.cores.add('otherCore')
        query.addMatch(
            dict(core='thisCore', uniqueKey='A'),
            dict(core='otherCore', uniqueKey='B')
        )
        self.assertEquals({
            "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow? (with MatchAllDocs)
            "core": "otherCore",
            "collectKeyName": "B",
            "filterKeyName": "B",
            "query": {
                "type": "TermQuery",
                "term": {
                    "field": "field",
                    "value": "value",
                }
            }}, self._convert(QueryExpression.searchterm("otherCore.field", "=", "value"), composedQuery=query))

    @skip('not yet implemented')
    def testOtherCoreAndQuery(self):
        self.assertEquals({
            'type': 'JoinAndQuery',
            'first': {
                "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                "core": "thisCore",
                "collectKeyName": "A",  # where does this keyName come from?
                "filterKeyName": "A",
                "query": {
                    "type": "TermQuery",
                    "term": {
                        "field":"field0",
                        "value": "value",
                    }
                }
            },
            'second': {
                "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                "core": "otherCore",
                "collectKeyName": "A",  # where does this keyName come from?
                "filterKeyName": "A",
                "query": {
                    "type": "TermQuery",
                    "term": {
                        "field":"field",
                        "value": "value",
                    }
                }
            }
        }, self._convert(
                QueryExpression(operator='AND', operands=[
                    QueryExpression.searchterm('field0', '=', 'value'),
                    QueryExpression.searchterm("otherCore.field", "=", "value")
                ])
            )
        )


    def _convert(self, input, **kwargs):
        return self._prepareConverter().convert(self._makeExpression(input), **kwargs)

    def _prepareConverter(self):
        unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)])
        return QueryExpressionToLuceneQueryDict(
            unqualifiedTermFields=unqualifiedFields,
            luceneSettings=self._prepareLuceneSettings(),
            ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None)
        )

    def _prepareLuceneSettings(self):
        settings = LuceneSettings()
        if hasattr(self, '_analyzer'):
            settings.analyzer = self._analyzer
        if hasattr(self, 'fieldRegistry'):
            settings.fieldRegistry = self.fieldRegistry
        else:
            settings.fieldRegistry = FieldRegistry()
            settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD)
            settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
        return settings

    def _makeExpression(self, input):
        return cqlToExpression(parseCql(input)) if isinstance(input, basestring) else input
Example #56
0
 def testPhraseQueryPossible(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     self.assertFalse(registry.phraseQueryPossible('fieldname'))
     self.assertTrue(registry.phraseQueryPossible('other.fieldname'))
Example #57
0
 def testIsIndexField(self):
     registry = FieldRegistry(drilldownFields=[DrilldownField(f) for f in ['field2', 'field3']], termVectorFields=['field1', 'field2'])
     self.assertTrue(registry.isIndexField('field1'))
     self.assertTrue(registry.isIndexField('field2'))
     self.assertFalse(registry.isIndexField('field3'))
     self.assertTrue(registry.isIndexField('field4'))
Example #58
0
from digitalecollectie.erfgeo import VERSION_STRING
from digitalecollectie.erfgeo.namespaces import namespaces
from digitalecollectie.erfgeo.maybecombinewithsummary import COMBINED_METADATA_PREFIX

from digitalecollectie.erfgeo.index.constants import ALL_FIELD
from digitalecollectie.erfgeo.index.lxmltofieldslist import LxmlToFieldsList
from digitalecollectie.erfgeo.index.fieldslisttolucenedocument import FieldsListToLuceneDocument
from digitalecollectie.erfgeo.index.indexfields import IndexFields


workingPath = dirname(abspath(__file__))

unqualifiedTermFields = [(ALL_FIELD, 1.0)]

fieldRegistry = FieldRegistry(drilldownFields=IndexFields.drilldownFields)
fieldRegistry.register('dcterms:spatial.geo:long', fieldDefinition=DOUBLEFIELD)
fieldRegistry.register('dcterms:spatial.geo:lat', fieldDefinition=DOUBLEFIELD)

parseHugeOptions = dict(huge_tree=True, remove_blank_text=True)


def createErfGeoEnrichmentPeriodicDownloadHelix(reactor, lucene, config, statePath):
    erfgeoEnrichPortNumber = int(config['erfgeoEnrich.portNumber'])
    downloadName = 'erfgeoEnrich-%s' % COMBINED_METADATA_PREFIX
    erfGeoEnrichPeriodicDownload = PeriodicDownload(
        reactor,
        host='127.0.0.1',
        port=erfgeoEnrichPortNumber,
        name=downloadName,
        autoStart=True)
 def testWildcardQuery(self):
     self.fieldRegistry = FieldRegistry()
     expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
     self.assertEquals(expected, self._convert('field=???*'))