Example #1
0
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     expected = PhraseQuery()
     expected.add(Term("unqualified", "phrase query"))
     self.assertConversion(expected, '"phrase query"')
Example #2
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertFalse(field.fieldType().stored())
     registry.register('fieldname', StringField.TYPE_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertTrue(field.fieldType().stored())
Example #3
0
 def testIsUntokenized(self):
     registry = FieldRegistry()
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     registry.register('fieldname', StringField.TYPE_NOT_STORED)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TextField.TYPE_NOT_STORED)
     self.assertFalse(registry.isUntokenized('fieldname'))
Example #4
0
 def testIsUntokenized(self):
     registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')])
     self.assertTrue(registry.isUntokenized('aDrilldownField'))
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     self.assertFalse(registry.isUntokenized('other.field'))
     registry.register('fieldname', STRINGFIELD)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TEXTFIELD)
     self.assertFalse(registry.isUntokenized('fieldname'))
Example #5
0
 def testIsUntokenized(self):
     registry = FieldRegistry(drilldownFields=[DrilldownField('aDrilldownField')])
     self.assertTrue(registry.isUntokenized('aDrilldownField'))
     self.assertTrue(registry.isUntokenized('untokenized.some.field'))
     self.assertFalse(registry.isUntokenized('other.field'))
     registry.register('fieldname', STRINGFIELD)
     self.assertTrue(registry.isUntokenized('fieldname'))
     registry.register('fieldname', TEXTFIELD)
     self.assertFalse(registry.isUntokenized('fieldname'))
Example #6
0
 def testIsNumeric(self):
     registry = FieldRegistry()
     registry.register("longfield", fieldDefinition=LONGFIELD)
     registry.register("intfield", fieldDefinition=INTFIELD)
     self.assertFalse(registry.isNumeric('field1'))
     self.assertTrue(registry.isNumeric('longfield'))
     self.assertTrue(registry.isNumeric('intfield'))
     self.assertTrue(registry.isNumeric('range.double.afield'))
     self.assertTrue(registry.isNumeric('__key__.field1'))
Example #7
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "NoTermsFrequencyField",
             "name": "fieldname",
             "value": "value",
         }, field)
Example #8
0
 def testNumericField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NUMERICFIELD)
     field = registry.createField('fieldname', 2010)
     self.assertEquals({
             "type": "NumericField",
             "name": "fieldname",
             "value": 2010,
         }, field)
Example #9
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "NoTermsFrequencyField",
             "name": "fieldname",
             "value": "value",
         }, field)
Example #10
0
 def testIsNumeric(self):
     registry = FieldRegistry()
     registry.register("longfield", fieldDefinition=LONGFIELD)
     registry.register("intfield", fieldDefinition=INTFIELD)
     self.assertFalse(registry.isNumeric('field1'))
     self.assertTrue(registry.isNumeric('longfield'))
     self.assertTrue(registry.isNumeric('intfield'))
     self.assertTrue(registry.isNumeric('range.double.afield'))
     self.assertFalse(registry.isNumeric('__key__.field1'))
Example #11
0
 def testNumericField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NUMERICFIELD)
     field = registry.createField('fieldname', 2010)
     self.assertEquals({
             "type": "NumericField",
             "name": "fieldname",
             "value": 2010,
         }, field)
Example #12
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "TextField",
             "name": "fieldname",
             "value": "value",
         }, field)
     registry.register('fieldname', STRINGFIELD_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "StringField",
             "name": "fieldname",
             "value": "value",
             "stored": True
         }, field)
Example #13
0
 def testSpecificField(self):
     registry = FieldRegistry()
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "TextField",
             "name": "fieldname",
             "value": "value",
         }, field)
     registry.register('fieldname', STRINGFIELD_STORED)
     field = registry.createField('fieldname', 'value')
     self.assertEquals({
             "type": "StringField",
             "name": "fieldname",
             "value": "value",
             "stored": True
         }, field)
Example #14
0
    def testRangeQueryAndType(self):
        registry = FieldRegistry()
        registry.register("longfield", fieldDefinition=LONGFIELD)
        registry.register("intfield", fieldDefinition=INTFIELD)
        q, t = registry.rangeQueryAndType('longfield')
        self.assertEqual("Long", q)
        self.assertEqual(long, t)
        q, t = registry.rangeQueryAndType('intfield')
        self.assertEqual("Int", q)
        self.assertEqual(int, t)
        q, t = registry.rangeQueryAndType('range.double.field')
        self.assertEqual("Double", q)
        self.assertEqual(float, t)

        q, t = registry.rangeQueryAndType('anyfield')
        self.assertEqual("String", q)
        self.assertEqual(str, t)
Example #15
0
    def testRangeQueryAndType(self):
        registry = FieldRegistry()
        registry.register("longfield", fieldDefinition=LONGFIELD)
        registry.register("intfield", fieldDefinition=INTFIELD)
        q, t = registry.rangeQueryAndType('longfield')
        self.assertEqual("Long", q)
        self.assertEqual(long, t)
        q, t = registry.rangeQueryAndType('intfield')
        self.assertEqual("Int", q)
        self.assertEqual(int, t)
        q, t = registry.rangeQueryAndType('range.double.field')
        self.assertEqual("Double", q)
        self.assertEqual(float, t)

        q, t = registry.rangeQueryAndType('anyfield')
        self.assertEqual("String", q)
        self.assertEqual(str, t)
Example #16
0
    def testSortField(self):
        registry = FieldRegistry()
        registry.register("sorted.longfield", fieldDefinition=LONGFIELD)
        registry.register("sorted.intfield", fieldDefinition=INTFIELD)
        registry.register("sorted.stringfield", fieldDefinition=STRINGFIELD)

        self.assertEqual("Long", registry.sortFieldType("sorted.longfield"))
        self.assertEqual(None, registry.defaultMissingValueForSort("sorted.longfield", True))

        self.assertEqual("Int", registry.sortFieldType("sorted.intfield"))
        self.assertEqual(None, registry.defaultMissingValueForSort("sorted.intfield", True))

        self.assertEqual("String", registry.sortFieldType("sorted.stringfield"))
        self.assertEqual("STRING_FIRST", registry.defaultMissingValueForSort("sorted.stringfield", True))
        self.assertEqual("STRING_LAST", registry.defaultMissingValueForSort("sorted.stringfield", False))
        self.assertEqual(None, registry.defaultMissingValueForSort("score", False))

        field = registry.createField('sorted.longfield', 'id:1')
        self.assertEqual({'name': 'sorted.longfield', 'type': 'LongField', 'value': 'id:1', 'sort': True}, field)
Example #17
0
    def testSortField(self):
        registry = FieldRegistry()
        registry.register("sorted.longfield", fieldDefinition=LONGFIELD)
        registry.register("sorted.intfield", fieldDefinition=INTFIELD)
        registry.register("sorted.stringfield", fieldDefinition=STRINGFIELD)

        self.assertEqual("Long", registry.sortFieldType("sorted.longfield"))
        self.assertEqual(JAVA_MIN_LONG, registry.defaultMissingValueForSort("sorted.longfield", True))
        self.assertEqual(JAVA_MAX_LONG, registry.defaultMissingValueForSort("sorted.longfield", False))

        self.assertEqual("Int", registry.sortFieldType("sorted.intfield"))
        self.assertEqual(JAVA_MIN_INT, registry.defaultMissingValueForSort("sorted.intfield", True))
        self.assertEqual(JAVA_MAX_INT, registry.defaultMissingValueForSort("sorted.intfield", False))

        self.assertEqual("String", registry.sortFieldType("sorted.stringfield"))
        self.assertEqual("STRING_FIRST", registry.defaultMissingValueForSort("sorted.stringfield", True))
        self.assertEqual("STRING_LAST", registry.defaultMissingValueForSort("sorted.stringfield", False))

        self.assertEqual(None, registry.defaultMissingValueForSort("score", False))

        field = registry.createField('sorted.longfield', '1')
        self.assertEqual({'name': 'sorted.longfield', 'type': 'LongField', 'value': 1, 'sort': True}, field)
Example #18
0
 def testMagicExact(self):
     exactResult = self.composer.compose(parseCql('animal exact "cats dogs"'))
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('animal', StringField.TYPE_NOT_STORED)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     self.assertConversion(exactResult, 'animal = "cats dogs"')
 def setUp(self):
     super(LuceneQueryComposerTest, self).setUp()
     fieldRegistry = FieldRegistry()
     fieldRegistry.register("intField", fieldDefinition=INTFIELD)
     fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
Example #20
0
 def testPhraseQueryPossible(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELD)
     self.assertFalse(registry.phraseQueryPossible('fieldname'))
     self.assertTrue(registry.phraseQueryPossible('other.fieldname'))
Example #21
0
 def testPhraseQueryPossible(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELDTYPE)
     self.assertFalse(registry.phraseQueryPossible('fieldname'))
     self.assertTrue(registry.phraseQueryPossible('other.fieldname'))
Example #22
0
from digitalecollectie.erfgeo import VERSION_STRING
from digitalecollectie.erfgeo.namespaces import namespaces
from digitalecollectie.erfgeo.maybecombinewithsummary import COMBINED_METADATA_PREFIX

from digitalecollectie.erfgeo.index.constants import ALL_FIELD
from digitalecollectie.erfgeo.index.lxmltofieldslist import LxmlToFieldsList
from digitalecollectie.erfgeo.index.fieldslisttolucenedocument import FieldsListToLuceneDocument
from digitalecollectie.erfgeo.index.indexfields import IndexFields


workingPath = dirname(abspath(__file__))

unqualifiedTermFields = [(ALL_FIELD, 1.0)]

fieldRegistry = FieldRegistry(drilldownFields=IndexFields.drilldownFields)
fieldRegistry.register('dcterms:spatial.geo:long', fieldDefinition=DOUBLEFIELD)
fieldRegistry.register('dcterms:spatial.geo:lat', fieldDefinition=DOUBLEFIELD)

parseHugeOptions = dict(huge_tree=True, remove_blank_text=True)


def createErfGeoEnrichmentPeriodicDownloadHelix(reactor, lucene, config, statePath):
    erfgeoEnrichPortNumber = int(config['erfgeoEnrich.portNumber'])
    downloadName = 'erfgeoEnrich-%s' % COMBINED_METADATA_PREFIX
    erfGeoEnrichPeriodicDownload = PeriodicDownload(
        reactor,
        host='127.0.0.1',
        port=erfgeoEnrichPortNumber,
        name=downloadName,
        autoStart=True)
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase):
    def testTermQuery(self):
        self.assertConversion({
            "type": "TermQuery",
            "term": {
                "field":"field",
                "value": "value",
            }
        }, QueryExpression.searchterm("field", "=", "value"))
        self.assertConversion({"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, QueryExpression.searchterm("field", "=", "value"))


    def testRightHandSideIsLowercase(self):
        self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="CaT"))

    def testOneTermOutputWithANumber(self):
        self.assertConversion({'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'}, QueryExpression.searchterm(term="2005"))

    def testMatchAllQuery(self):
        self.assertConversion({"type": "MatchAllDocsQuery"}, QueryExpression.searchterm(term="*"))

    def testUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        self.assertConversion({"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0}, QueryExpression.searchterm(term="value"))

    def testMultipleUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "aField", "value": "value"},
                        "boost": 1.0,
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "oField", "value": "value"},
                        "boost": 2.0,
                        "occur": "SHOULD"
                    }
                ]
            }, QueryExpression.searchterm(term="value"))

    def testBooleanAndQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST"
                    }
                ]
            }, expr)

    def testBooleanOrQuery(self):
        expr = QueryExpression.nested(operator='OR')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "SHOULD"
                    }
                ]
            }, expr)

    def testBooleanNotQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
                QueryExpression.searchterm("field1", "=", "value1"),
                QueryExpression.searchterm("field2", "=", "value2")
            ]
        expr.operands[1].must_not = True
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, expr)

    def testBooleanNotQueryNested(self):
        expr = QueryExpression.nested(operator='AND')
        nestedNotExpr = QueryExpression.nested(operator='AND')
        nestedNotExpr.must_not = True
        nestedNotExpr.operands = [
            QueryExpression.searchterm("field2", "=", "value2"),
            QueryExpression.searchterm("field3", "=", "value3")
        ]
        expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr]
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "BooleanQuery",
                        "occur": "MUST_NOT",
                        "clauses": [
                            {
                                "type": "TermQuery",
                                "term": {"field": "field2", "value": "value2"},
                                "occur": "MUST"
                            },
                            {
                                "type": "TermQuery",
                                "term": {"field": "field3", "value": "value3"},
                                "occur": "MUST"
                            }
                        ]
                    }
                ]
            }, expr)

    def testNotExpression(self):
        expr = QueryExpression.searchterm("field", "=", "value")
        expr.must_not = True
        self.assertConversion({
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "MatchAllDocsQuery",
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field", "value": "value"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, expr)
    def testPhraseOutput(self):
        self.assertConversion({
                "type": "PhraseQuery",
                "boost": 1.0,
                "terms": [
                    {"field": "unqualified", "value": "cats"},
                    {"field": "unqualified", "value": "dogs"}
                ]
            }, QueryExpression.searchterm(term='"cats dogs"'))

    # def testWhitespaceAnalyzer(self):
    #     self._analyzer = WhitespaceAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "kat"))
    #     query.add(Term("unqualified", "hond"))
    #     self.assertConversion(query, cql='"kat hond"')

    # def testPhraseOutputDoesNoDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "katten"))
    #     query.add(Term("unqualified", "honden"))
    #     self.assertConversion(query, cql='"katten honden"')

    # def testDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='honden')

    # def testDutchStemmingOnlyForGivenFields(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified'])
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='honden')

    #     query = TermQuery(Term("field", "honden"))
    #     self.assertConversion(query, cql='field=honden')

    # def testIgnoreStemming(self):
    #     self._ignoredStemmingForWords = ['kate', 'wageningen']
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = TermQuery(Term("unqualified", "kate"))
    #     self.assertConversion(query, cql='kate')
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD)
    #     self.assertConversion(query, cql='katten')

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        input = '"vol.118 (2008) nr.3 (March) p.435-444"'
        self.assertConversion(expected, cql=input)

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        expected["terms"].append(dict(field="unqualified", value='aap'))
        expected["terms"].append(dict(field="unqualified", value='noot'))
        self.assertConversion(expected, cql='aap:noot')

    def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        self.assertConversion(expected, cql=':')

    def testStandardAnalyserWithoutStopWords(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["no", "is", "the", "only", "option"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertConversion(expected, cql='"no is the only option"')

    def testDiacritics(self):
        expected = termQuery('title', 'moree')
        self.assertConversion(expected, cql='title=Moree')
        self.assertConversion(expected, cql='title=Morée')
        self.assertConversion(expected, cql='title=Morèe')

        # self._analyzer = MerescoDutchStemmingAnalyzer()
        # query = PhraseQuery()
        # query.add(Term("title", "waar"))
        # query.add(Term("title", "is"))
        # query.add(Term("title", "moree"))
        # query.add(Term("title", "vandaag"))
        # self.assertConversion(query, cql='title="Waar is Morée vandaag"')

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = dict(type="PhraseQuery", terms=[])
        pq["terms"].append(dict(field="title", value="more"))
        pq["terms"].append(dict(field="title", value="e"))
        self.assertConversion(pq, cql='title=More\xcc\x81e') # Combined
        from unicodedata import normalize
        self.assertConversion(termQuery('title', 'moree'), cql=normalize('NFC', unicode('title=More\xcc\x81e')))

    def testIndexRelationTermOutput(self):
        self.assertConversion(termQuery('animal', 'cats'), cql='animal=cats')
        query = dict(type="PhraseQuery", terms=[])
        query["terms"].append(dict(field="animal", value="cats"))
        query["terms"].append(dict(field="animal", value="dogs"))
        self.assertConversion(query, cql='animal="cats dogs"')
        self.assertConversion(query, cql='animal="catS Dogs"')

    def testIndexRelationExactTermOutput(self):
        self.assertConversion(termQuery("animal", "hairy cats"), cql='animal exact "hairy cats"')
        self.assertConversion(termQuery("animal", "Capital Cats"), cql='animal exact "Capital Cats"')

    def testBoost(self):
        query = termQuery("title", "cats", boost=2.0)
        self.assertConversion(query, cql="title =/boost=2.0 cats")

    def testWildcards(self):
        query = prefixQuery('unqualified', 'prefix', 1.0)
        self.assertConversion(query, cql='prefix*')
        self.assertConversion(query, cql='PREfix*')
        query = prefixQuery('field', 'prefix')
        self.assertConversion(query, cql='field="PREfix*"')
        self.assertConversion(query, cql='field=prefix*')
        query = prefixQuery('field', 'oc-0123')
        self.assertConversion(query, cql='field="oc-0123*"')
        query = termQuery('field', 'p')
        self.assertConversion(query, cql='field="P*"')
        #only prefix queries for now
        query = termQuery('field', 'post')
        self.assertConversion(query, cql='field="*post"')

        query = termQuery('field', 'prefix')
        self.assertConversion(query, cql='field=prefix**')

        self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)]

        query = dict(type="BooleanQuery", clauses=[])
        query["clauses"].append(prefixQuery("field0", "prefix", 0.2))
        query["clauses"][0]["occur"] = "SHOULD"

        query["clauses"].append(prefixQuery("field1", "prefix", 2.0))
        query["clauses"][1]["occur"] = "SHOULD"
        self.assertConversion(query, cql="prefix*")

    def testMagicExact(self):
        exactResult = self.convert(cql='animal exact "cats dogs"')
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('animal', STRINGFIELD)
        self.assertConversion(exactResult, cql='animal = "cats dogs"')

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='field > value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='field >= value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='field < value')
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='field <= value')

    def testIntRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='intField > 1')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='intField >= 1')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='intField < 3')
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='intField <= 3')

    def testLongRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='longField > 1')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=False)
        self.assertConversion(q, cql='longField >= 1')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=False)
        self.assertConversion(q, cql='longField < 3')
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=False, includeUpper=True)
        self.assertConversion(q, cql='longField <= 3')

    def testDrilldownFieldQuery(self):
        self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)])
        self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")), cql="field = value")
        self.assertConversion(dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")), cql="field = \"value>value1\"")

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD)
        self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)]
        expected = dict(type="PhraseQuery", terms=[
                dict(field="unqualified", value="phrase"),
                dict(field="unqualified", value="query")
            ], boost=1.0)
        self.assertConversion(expected, cql='"phrase query"')

    def testQueryForIntField(self):
        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="intField=5")

        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="intField exact 5")

    def testQueryForLongField(self):
        expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="longField=5")

    def testQueryForDoubleField(self):
        expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True)
        self.assertConversion(expected, cql="range.double.field=5")

    def testWildcardQuery(self):
        self.fieldRegistry = FieldRegistry()
        expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
        self.assertConversion(expected, cql='field=???*')

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                self.convert(cql='index %(relation)s term' % locals())
                self.fail()
            except UnsupportedCQL:
                pass

    def convert(self, expression=None, cql=None):
        if expression is None:
            expression = cqlToExpression(parseCql(cql))
        unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)])
        settings = LuceneSettings()
        if hasattr(self, '_analyzer'):
            settings.analyzer = self._analyzer
        if hasattr(self, 'fieldRegistry'):
            settings.fieldRegistry = self.fieldRegistry
        else:
            settings.fieldRegistry = FieldRegistry()
            settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD)
            settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
        converter = QueryExpressionToLuceneQueryDict(
            unqualifiedTermFields=unqualifiedFields,
            luceneSettings=settings,
            ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None)
        )
        return converter.convert(expression)

    def assertConversion(self, expected, expression=None, cql=None):
        result = self.convert(expression=expression, cql=cql)
        self.assertEquals(expected, result)
Example #24
0
 def testNoTermsFreqField(self):
     registry = FieldRegistry()
     registry.register('fieldname', NO_TERMS_FREQUENCY_FIELDTYPE)
     field = registry.createField('fieldname', 'value')
     self.assertEquals(FieldInfo.IndexOptions.DOCS_ONLY,
                       field.fieldType().indexOptions())
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase):
    def testTermQuery(self):
        self.assertEquals(
            {
                "type": "TermQuery",
                "term": {
                    "field":"field",
                    "value": "value",
                }
            }, self._convert(QueryExpression.searchterm("field", "=", "value")))
        self.assertEquals(
            {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, self._convert(QueryExpression.searchterm("field", "=", "value")))

    def testRightHandSideIsLowercase(self):
        self.assertEquals(
            {'boost': 1.0, 'term': {'field': 'unqualified', 'value': 'cat'}, 'type': 'TermQuery'},
            self._convert(QueryExpression.searchterm(term="CaT")))

    def testOneTermOutputWithANumber(self):
        self.assertEquals(
            {'boost': 1.0, 'term': {'field': 'unqualified', 'value': '2005'}, 'type': 'TermQuery'},
            self._convert(QueryExpression.searchterm(term="2005")))

    def testMatchAllQuery(self):
        self.assertEquals(
            {"type": "MatchAllDocsQuery"}, self._convert(QueryExpression.searchterm(term="*")))

    def testUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        self.assertEquals(
            {"type": "TermQuery", "term": {"field": "aField", "value": "value"}, 'boost': 1.0},
            self._convert(QueryExpression.searchterm(term="value")))

    def testUnqualifiedTermFieldsWithNestedExpression(self):
        self.unqualifiedFields = [('aField', 1.0)]
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm(term="value1"),
            QueryExpression.searchterm(term="value2")
        ]
        self.assertEquals({
                'type': 'BooleanQuery',
                'clauses': [
                    {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value1'}, 'boost': 1.0},
                    {'type': 'TermQuery', 'occur': 'MUST', 'term': {'field': 'aField', 'value': u'value2'}, 'boost': 1.0}
                ],
            },
            self._convert(expr))

    def testMultipleUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "aField", "value": "value"},
                        "boost": 1.0,
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "oField", "value": "value"},
                        "boost": 2.0,
                        "occur": "SHOULD"
                    }
                ]
            }, self._convert(QueryExpression.searchterm(term="value")))

    def testBooleanAndQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST"
                    }
                ]
            }, self._convert(expr))

    def testBooleanOrQuery(self):
        expr = QueryExpression.nested(operator='OR')
        expr.operands=[
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "SHOULD"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "SHOULD"
                    }
                ]
            }, self._convert(expr))

    def testBooleanNotQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands=[
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        expr.operands[1].must_not = True
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field2", "value": "value2"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, self._convert(expr))

    def testBooleanNotQueryNested(self):
        expr = QueryExpression.nested(operator='AND')
        nestedNotExpr = QueryExpression.nested(operator='AND')
        nestedNotExpr.must_not = True
        nestedNotExpr.operands = [
            QueryExpression.searchterm("field2", "=", "value2"),
            QueryExpression.searchterm("field3", "=", "value3")
        ]
        expr.operands = [QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr]
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "TermQuery",
                        "term": {"field": "field1", "value": "value1"},
                        "occur": "MUST"
                    }, {
                        "type": "BooleanQuery",
                        "occur": "MUST_NOT",
                        "clauses": [
                            {
                                "type": "TermQuery",
                                "term": {"field": "field2", "value": "value2"},
                                "occur": "MUST"
                            },
                            {
                                "type": "TermQuery",
                                "term": {"field": "field3", "value": "value3"},
                                "occur": "MUST"
                            }
                        ]
                    }
                ]
            }, self._convert(expr))

    def testNotExpression(self):
        expr = QueryExpression.searchterm("field", "=", "value")
        expr.must_not = True
        self.assertEquals(
            {
                "type": "BooleanQuery",
                "clauses": [
                    {
                        "type": "MatchAllDocsQuery",
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {"field": "field", "value": "value"},
                        "occur": "MUST_NOT"
                    }
                ]
            }, self._convert(expr))

    def testPhraseOutput(self):
        self.assertEquals(
            {
                "type": "PhraseQuery",
                "boost": 1.0,
                "terms": [
                    {"field": "unqualified", "value": "cats"},
                    {"field": "unqualified", "value": "dogs"}
                ]
            }, self._convert(QueryExpression.searchterm(term='"cats dogs"')))

    # def testWhitespaceAnalyzer(self):
    #     self._analyzer = WhitespaceAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "kat"))
    #     query.add(Term("unqualified", "hond"))
    #     self.assertEquals(query, self._convert('"kat hond"'))

    # def testPhraseOutputDoesNoDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "katten"))
    #     query.add(Term("unqualified", "honden"))
    #     self.assertEquals(query, self._convert('"katten honden"'))

    # def testDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    # def testDutchStemmingOnlyForGivenFields(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified'])
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    #     query = TermQuery(Term("field", "honden"))
    #     self.assertEquals(query, self._convert('field=honden'))

    # def testIgnoreStemming(self):
    #     self._ignoredStemmingForWords = ['kate', 'wageningen']
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = TermQuery(Term("unqualified", "kate"))
    #     self.assertEquals(query, 'kate')
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('katten'))

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(expected, self._convert('"vol.118 (2008) nr.3 (March) p.435-444"'))

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        expected["terms"].append(dict(field="unqualified", value='aap'))
        expected["terms"].append(dict(field="unqualified", value='noot'))
        self.assertEquals(expected, self._convert('aap:noot'))

    def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        self.assertEquals(expected, self._convert(':'))

    def testStandardAnalyserWithoutStopWords(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["no", "is", "the", "only", "option"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(expected, self._convert('"no is the only option"'))

    def testDiacritics(self):
        expected = termQuery('title', 'moree')
        self.assertEquals(expected, self._convert('title=Moree'))
        self.assertEquals(expected, self._convert('title=Morée'))
        self.assertEquals(expected, self._convert('title=Morèe'))

        # self._analyzer = MerescoDutchStemmingAnalyzer()
        # query = PhraseQuery()
        # query.add(Term("title", "waar"))
        # query.add(Term("title", "is"))
        # query.add(Term("title", "moree"))
        # query.add(Term("title", "vandaag"))
        # self.assertEquals(query, self._convert('title="Waar is Morée vandaag"'))

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = dict(type="PhraseQuery", terms=[])
        pq["terms"].append(dict(field="title", value="more"))
        pq["terms"].append(dict(field="title", value="e"))
        self.assertEquals(pq, self._convert('title=More\xcc\x81e')) # Combined
        from unicodedata import normalize
        self.assertEquals(
            termQuery('title', 'moree'),
            self._convert(normalize('NFC', unicode('title=More\xcc\x81e'))))

    def testIndexRelationTermOutput(self):
        self.assertEquals(
            termQuery('animal', 'cats'),
            self._convert('animal=cats'))
        query = dict(type="PhraseQuery", terms=[])
        query["terms"].append(dict(field="animal", value="cats"))
        query["terms"].append(dict(field="animal", value="dogs"))
        self.assertEquals(query, self._convert('animal="cats dogs"'))
        self.assertEquals(query, self._convert('animal="catS Dogs"'))

    def testIndexRelationExactTermOutput(self):
        self.assertEquals(
            termQuery("animal", "hairy cats"),
            self._convert('animal exact "hairy cats"'))
        self.assertEquals(
            termQuery("animal", "Capital Cats"),
            self._convert('animal exact "Capital Cats"'))

    def testBoost(self):
        query = termQuery("title", "cats", boost=2.0)
        self.assertEquals(query, self._convert("title =/boost=2.0 cats"))

    def testWildcards(self):
        query = prefixQuery('unqualified', 'prefix', 1.0)
        self.assertEquals(query, self._convert('prefix*'))
        self.assertEquals(query, self._convert('PREfix*'))
        query = prefixQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field="PREfix*"'))
        self.assertEquals(query, self._convert('field=prefix*'))
        query = prefixQuery('field', 'oc-0123')
        self.assertEquals(query, self._convert('field="oc-0123*"'))
        query = termQuery('field', 'p')
        self.assertEquals(query, self._convert('field="P*"'))
        #only prefix queries for now
        query = termQuery('field', 'post')
        self.assertEquals(query, self._convert('field="*post"'))

        query = termQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field=prefix**'))

        self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)]

        query = dict(type="BooleanQuery", clauses=[])
        query["clauses"].append(prefixQuery("field0", "prefix", 0.2))
        query["clauses"][0]["occur"] = "SHOULD"

        query["clauses"].append(prefixQuery("field1", "prefix", 2.0))
        query["clauses"][1]["occur"] = "SHOULD"
        self.assertEquals(query, self._convert("prefix*"))

    def testMagicExact(self):
        exactResult = self._convert('animal exact "cats dogs"')
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('animal', STRINGFIELD)
        self.assertEquals(exactResult, self._convert('animal = "cats dogs"'))

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('field > value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm='value', upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('field >= value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('field < value'))
        q = dict(type="RangeQuery", rangeType="String", field='field', lowerTerm=None, upperTerm='value', includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('field <= value'))

    def testIntRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('intField > 1'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField >= 1'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('intField < 3'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField <= 3'))
        q = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=3, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('intField = 3'))
        self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='exact', term=3)))
        self.assertEquals(q, self._convert(QueryExpression.searchterm(index='intField', relation='=', term=3)))

    def testLongRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=False, includeUpper=True)
        self.assertEquals(q, self._convert('longField > 1'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=1, upperTerm=None, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('longField >= 1'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=False)
        self.assertEquals(q, self._convert('longField < 3'))
        q = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=None, upperTerm=3, includeLower=True, includeUpper=True)
        self.assertEquals(q, self._convert('longField <= 3'))

    def testDrilldownFieldQuery(self):
        self.fieldRegistry = FieldRegistry([DrilldownField('field', hierarchical=True)])
        self.assertEquals(
            dict(type="TermQuery", term=dict(field="field", path=["value"], type="DrillDown")),
            self._convert("field = value"))
        self.assertEquals(
            dict(type="TermQuery", term=dict(field="field", path=["value", "value1"], type="DrillDown")),
            self._convert("field = \"value>value1\""))

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELD)
        self.unqualifiedFields = [("unqualified", 1.0), ('noTermFreqField', 2.0)]
        expected = dict(type="PhraseQuery", terms=[
            dict(field="unqualified", value="phrase"),
            dict(field="unqualified", value="query")
        ], boost=1.0)
        self.assertEquals(expected, self._convert('"phrase query"'))

    def testQueryForIntField(self):
        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("intField=5"))

        expected = dict(type="RangeQuery", rangeType="Int", field='intField', lowerTerm=5, upperTerm=5, includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("intField exact 5"))

    def testQueryForLongField(self):
        expected = dict(type="RangeQuery", rangeType="Long", field='longField', lowerTerm=long(5), upperTerm=long(5), includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("longField=5"))

    def testQueryForDoubleField(self):
        expected = dict(type="RangeQuery", rangeType="Double", field='range.double.field', lowerTerm=float(5), upperTerm=float(5), includeLower=True, includeUpper=True)
        self.assertEquals(expected, self._convert("range.double.field=5"))

    def testWildcardQuery(self):
        self.fieldRegistry = FieldRegistry()
        expected = dict(type="WildcardQuery", term=dict(field="field", value="???*"))
        self.assertEquals(expected, self._convert('field=???*'))

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                self._convert('index %(relation)s term' % locals())
                self.fail()
            except UnsupportedCQL:
                pass

    def testPerQueryUnqualifiedFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        converter = self._prepareConverter()
        self.assertEquals({
            "type": "BooleanQuery",
            "clauses": [{
                    "type": "TermQuery",
                    "term": {"field": "aField", "value": "value"},
                    'boost': 2.0,
                    'occur': 'SHOULD'
                }, {
                    "type": "TermQuery",
                    "term": {"field": "anotherField", "value": "value"},
                    'boost': 3.0,
                    'occur': 'SHOULD'
            }]},
            converter.convert(
                QueryExpression.searchterm(term="value"),
                unqualifiedTermFields=[('aField', 2.0), ('anotherField', 3.0)]))

    def testReallyIgnoreAnalyzedAwayTerms(self):
        self.assertEquals({'boost': 1.0, 'terms': [], 'type': 'PhraseQuery'}, self._convert('.'))  # will not yield any results, but that's what's desired
        self.assertDictEquals({'terms': [], 'type': 'PhraseQuery'}, self._convert("abc=:;+"))

        self.assertDictEquals({'type': 'BooleanQuery', 'clauses': [{'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'abc'}, 'type': 'TermQuery', 'occur': 'MUST'}, {'boost': 1.0, 'term': {'field': 'unqualified', 'value': u'def'}, 'type': 'TermQuery', 'occur': 'MUST'}]}, self._convert("abc AND :;+ AND def"))

        self.unqualifiedFields = [("unqualified", 1.0), ("moreUnqualified", 1.0)]
        self.assertDictEquals({
            'clauses': [{
                'clauses': [{
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'unqualified', 'value': u'abc'},
                    'type': 'TermQuery'
                }, {
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'moreUnqualified', 'value': u'abc'},
                    'type': 'TermQuery'
                }],
                'occur': 'MUST',
                'type': 'BooleanQuery'
            }, {
                'clauses': [{
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'unqualified', 'value': u'def'},
                    'type': 'TermQuery'
                 }, {
                    'boost': 1.0,
                    'occur': 'SHOULD',
                    'term': {'field': 'moreUnqualified', 'value': u'def'},
                    'type': 'TermQuery'
                }],
                'occur': 'MUST',
                'type': 'BooleanQuery'
            }],
            'type': 'BooleanQuery'}, self._convert("abc AND :;+ AND def"))

    def testOtherCoreTermQuery(self):
        query = ComposedQuery('thisCore')
        query.cores.add('otherCore')
        query.addMatch(
            dict(core='thisCore', uniqueKey='A'),
            dict(core='otherCore', uniqueKey='B')
        )
        self.assertEquals({
            "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow? (with MatchAllDocs)
            "core": "otherCore",
            "collectKeyName": "B",
            "filterKeyName": "B",
            "query": {
                "type": "TermQuery",
                "term": {
                    "field": "field",
                    "value": "value",
                }
            }}, self._convert(QueryExpression.searchterm("otherCore.field", "=", "value"), composedQuery=query))

    @skip('not yet implemented')
    def testOtherCoreAndQuery(self):
        self.assertEquals({
            'type': 'JoinAndQuery',
            'first': {
                "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                "core": "thisCore",
                "collectKeyName": "A",  # where does this keyName come from?
                "filterKeyName": "A",
                "query": {
                    "type": "TermQuery",
                    "term": {
                        "field":"field0",
                        "value": "value",
                    }
                }
            },
            'second': {
                "type": "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                "core": "otherCore",
                "collectKeyName": "A",  # where does this keyName come from?
                "filterKeyName": "A",
                "query": {
                    "type": "TermQuery",
                    "term": {
                        "field":"field",
                        "value": "value",
                    }
                }
            }
        }, self._convert(
                QueryExpression(operator='AND', operands=[
                    QueryExpression.searchterm('field0', '=', 'value'),
                    QueryExpression.searchterm("otherCore.field", "=", "value")
                ])
            )
        )


    def _convert(self, input, **kwargs):
        return self._prepareConverter().convert(self._makeExpression(input), **kwargs)

    def _prepareConverter(self):
        unqualifiedFields = getattr(self, 'unqualifiedFields', [("unqualified", 1.0)])
        return QueryExpressionToLuceneQueryDict(
            unqualifiedTermFields=unqualifiedFields,
            luceneSettings=self._prepareLuceneSettings(),
            ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords', None)
        )

    def _prepareLuceneSettings(self):
        settings = LuceneSettings()
        if hasattr(self, '_analyzer'):
            settings.analyzer = self._analyzer
        if hasattr(self, 'fieldRegistry'):
            settings.fieldRegistry = self.fieldRegistry
        else:
            settings.fieldRegistry = FieldRegistry()
            settings.fieldRegistry.register("intField", fieldDefinition=INTFIELD)
            settings.fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
        return settings

    def _makeExpression(self, input):
        return cqlToExpression(parseCql(input)) if isinstance(input, basestring) else input
class QueryExpressionToLuceneQueryDictTest(SeecrTestCase):
    def testTermQuery(self):
        self.assertEquals(
            {
                "type": "TermQuery",
                "term": {
                    "field": "field",
                    "value": "value",
                }
            }, self._convert(QueryExpression.searchterm("field", "=",
                                                        "value")))
        self.assertEquals(
            {
                "term": {
                    "field": "field",
                    "value": "value"
                },
                "type": "TermQuery"
            }, self._convert(QueryExpression.searchterm("field", "=",
                                                        "value")))

    def testRightHandSideIsLowercase(self):
        self.assertEquals(
            {
                'boost': 1.0,
                'term': {
                    'field': 'unqualified',
                    'value': 'cat'
                },
                'type': 'TermQuery'
            }, self._convert(QueryExpression.searchterm(term="CaT")))

    def testOneTermOutputWithANumber(self):
        self.assertEquals(
            {
                'boost': 1.0,
                'term': {
                    'field': 'unqualified',
                    'value': '2005'
                },
                'type': 'TermQuery'
            }, self._convert(QueryExpression.searchterm(term="2005")))

    def testMatchAllQuery(self):
        self.assertEquals({"type": "MatchAllDocsQuery"},
                          self._convert(QueryExpression.searchterm(term="*")))

    def testUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        self.assertEquals(
            {
                "type": "TermQuery",
                "term": {
                    "field": "aField",
                    "value": "value"
                },
                'boost': 1.0
            }, self._convert(QueryExpression.searchterm(term="value")))

    def testUnqualifiedTermFieldsWithNestedExpression(self):
        self.unqualifiedFields = [('aField', 1.0)]
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm(term="value1"),
            QueryExpression.searchterm(term="value2")
        ]
        self.assertEquals(
            {
                'type':
                'BooleanQuery',
                'clauses': [{
                    'type': 'TermQuery',
                    'occur': 'MUST',
                    'term': {
                        'field': 'aField',
                        'value': u'value1'
                    },
                    'boost': 1.0
                }, {
                    'type': 'TermQuery',
                    'occur': 'MUST',
                    'term': {
                        'field': 'aField',
                        'value': u'value2'
                    },
                    'boost': 1.0
                }],
            }, self._convert(expr))

    def testMultipleUnqualifiedTermFields(self):
        self.unqualifiedFields = [('aField', 1.0), ('oField', 2.0)]
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "aField",
                        "value": "value"
                    },
                    "boost": 1.0,
                    "occur": "SHOULD"
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "oField",
                        "value": "value"
                    },
                    "boost": 2.0,
                    "occur": "SHOULD"
                }]
            }, self._convert(QueryExpression.searchterm(term="value")))

    def testBooleanAndQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "field1",
                        "value": "value1"
                    },
                    "occur": "MUST"
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "field2",
                        "value": "value2"
                    },
                    "occur": "MUST"
                }]
            }, self._convert(expr))

    def testBooleanOrQuery(self):
        expr = QueryExpression.nested(operator='OR')
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "field1",
                        "value": "value1"
                    },
                    "occur": "SHOULD"
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "field2",
                        "value": "value2"
                    },
                    "occur": "SHOULD"
                }]
            }, self._convert(expr))

    def testBooleanNotQuery(self):
        expr = QueryExpression.nested(operator='AND')
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"),
            QueryExpression.searchterm("field2", "=", "value2")
        ]
        expr.operands[1].must_not = True
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "field1",
                        "value": "value1"
                    },
                    "occur": "MUST"
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "field2",
                        "value": "value2"
                    },
                    "occur": "MUST_NOT"
                }]
            }, self._convert(expr))

    def testBooleanNotQueryNested(self):
        expr = QueryExpression.nested(operator='AND')
        nestedNotExpr = QueryExpression.nested(operator='AND')
        nestedNotExpr.must_not = True
        nestedNotExpr.operands = [
            QueryExpression.searchterm("field2", "=", "value2"),
            QueryExpression.searchterm("field3", "=", "value3")
        ]
        expr.operands = [
            QueryExpression.searchterm("field1", "=", "value1"), nestedNotExpr
        ]
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "field1",
                        "value": "value1"
                    },
                    "occur": "MUST"
                }, {
                    "type":
                    "BooleanQuery",
                    "occur":
                    "MUST_NOT",
                    "clauses": [{
                        "type": "TermQuery",
                        "term": {
                            "field": "field2",
                            "value": "value2"
                        },
                        "occur": "MUST"
                    }, {
                        "type": "TermQuery",
                        "term": {
                            "field": "field3",
                            "value": "value3"
                        },
                        "occur": "MUST"
                    }]
                }]
            }, self._convert(expr))

    def testNotExpression(self):
        expr = QueryExpression.searchterm("field", "=", "value")
        expr.must_not = True
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "MatchAllDocsQuery",
                    "occur": "MUST"
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "field",
                        "value": "value"
                    },
                    "occur": "MUST_NOT"
                }]
            }, self._convert(expr))

    def testPhraseOutput(self):
        self.assertEquals(
            {
                "type":
                "PhraseQuery",
                "boost":
                1.0,
                "terms": [{
                    "field": "unqualified",
                    "value": "cats"
                }, {
                    "field": "unqualified",
                    "value": "dogs"
                }]
            }, self._convert(QueryExpression.searchterm(term='"cats dogs"')))

    # def testWhitespaceAnalyzer(self):
    #     self._analyzer = WhitespaceAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "kat"))
    #     query.add(Term("unqualified", "hond"))
    #     self.assertEquals(query, self._convert('"kat hond"'))

    # def testPhraseOutputDoesNoDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = PhraseQuery()
    #     query.add(Term("unqualified", "katten"))
    #     query.add(Term("unqualified", "honden"))
    #     self.assertEquals(query, self._convert('"katten honden"'))

    # def testDutchStemming(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    # def testDutchStemmingOnlyForGivenFields(self):
    #     self._analyzer = MerescoDutchStemmingAnalyzer(['unqualified'])
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "honden")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "hond")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('honden'))

    #     query = TermQuery(Term("field", "honden"))
    #     self.assertEquals(query, self._convert('field=honden'))

    # def testIgnoreStemming(self):
    #     self._ignoredStemmingForWords = ['kate', 'wageningen']
    #     self._analyzer = MerescoDutchStemmingAnalyzer()
    #     query = TermQuery(Term("unqualified", "kate"))
    #     self.assertEquals(query, 'kate')
    #     query = BooleanQuery()
    #     query.add(TermQuery(Term("unqualified", "katten")), BooleanClause.Occur.SHOULD)
    #     query.add(TermQuery(Term("unqualified", "kat")), BooleanClause.Occur.SHOULD)
    #     self.assertEquals(query, self._convert('katten'))

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(
            expected, self._convert('"vol.118 (2008) nr.3 (March) p.435-444"'))

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        expected["terms"].append(dict(field="unqualified", value='aap'))
        expected["terms"].append(dict(field="unqualified", value='noot'))
        self.assertEquals(expected, self._convert('aap:noot'))

    def testCreatesEmptyPhraseQueryIfNoValidCharsFound(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        self.assertEquals(expected, self._convert(':'))

    def testStandardAnalyserWithoutStopWords(self):
        expected = dict(type="PhraseQuery", terms=[], boost=1.0)
        for term in ["no", "is", "the", "only", "option"]:
            expected["terms"].append(dict(field="unqualified", value=term))
        self.assertEquals(expected, self._convert('"no is the only option"'))

    def testDiacritics(self):
        expected = termQuery('title', 'moree')
        self.assertEquals(expected, self._convert('title=Moree'))
        self.assertEquals(expected, self._convert('title=Morée'))
        self.assertEquals(expected, self._convert('title=Morèe'))

        # self._analyzer = MerescoDutchStemmingAnalyzer()
        # query = PhraseQuery()
        # query.add(Term("title", "waar"))
        # query.add(Term("title", "is"))
        # query.add(Term("title", "moree"))
        # query.add(Term("title", "vandaag"))
        # self.assertEquals(query, self._convert('title="Waar is Morée vandaag"'))

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = dict(type="PhraseQuery", terms=[])
        pq["terms"].append(dict(field="title", value="more"))
        pq["terms"].append(dict(field="title", value="e"))
        self.assertEquals(pq, self._convert('title=More\xcc\x81e'))  # Combined
        from unicodedata import normalize
        self.assertEquals(
            termQuery('title', 'moree'),
            self._convert(normalize('NFC', unicode('title=More\xcc\x81e'))))

    def testIndexRelationTermOutput(self):
        self.assertEquals(termQuery('animal', 'cats'),
                          self._convert('animal=cats'))
        query = dict(type="PhraseQuery", terms=[])
        query["terms"].append(dict(field="animal", value="cats"))
        query["terms"].append(dict(field="animal", value="dogs"))
        self.assertEquals(query, self._convert('animal="cats dogs"'))
        self.assertEquals(query, self._convert('animal="catS Dogs"'))

    def testIndexRelationExactTermOutput(self):
        self.assertEquals(termQuery("animal", "hairy cats"),
                          self._convert('animal exact "hairy cats"'))
        self.assertEquals(termQuery("animal", "Capital Cats"),
                          self._convert('animal exact "Capital Cats"'))

    def testBoost(self):
        query = termQuery("title", "cats", boost=2.0)
        self.assertEquals(query, self._convert("title =/boost=2.0 cats"))

    def testWildcards(self):
        query = prefixQuery('unqualified', 'prefix', 1.0)
        self.assertEquals(query, self._convert('prefix*'))
        self.assertEquals(query, self._convert('PREfix*'))
        query = prefixQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field="PREfix*"'))
        self.assertEquals(query, self._convert('field=prefix*'))
        query = prefixQuery('field', 'oc-0123')
        self.assertEquals(query, self._convert('field="oc-0123*"'))
        query = termQuery('field', 'p')
        self.assertEquals(query, self._convert('field="P*"'))
        #only prefix queries for now
        query = termQuery('field', 'post')
        self.assertEquals(query, self._convert('field="*post"'))

        query = termQuery('field', 'prefix')
        self.assertEquals(query, self._convert('field=prefix**'))

        self.unqualifiedFields = [("field0", 0.2), ("field1", 2.0)]

        query = dict(type="BooleanQuery", clauses=[])
        query["clauses"].append(prefixQuery("field0", "prefix", 0.2))
        query["clauses"][0]["occur"] = "SHOULD"

        query["clauses"].append(prefixQuery("field1", "prefix", 2.0))
        query["clauses"][1]["occur"] = "SHOULD"
        self.assertEquals(query, self._convert("prefix*"))

    def testMagicExact(self):
        exactResult = self._convert('animal exact "cats dogs"')
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('animal', STRINGFIELD)
        self.assertEquals(exactResult, self._convert('animal = "cats dogs"'))

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery",
                 rangeType="String",
                 field='field',
                 lowerTerm='value',
                 upperTerm=None,
                 includeLower=False,
                 includeUpper=True)
        self.assertEquals(q, self._convert('field > value'))
        q = dict(type="RangeQuery",
                 rangeType="String",
                 field='field',
                 lowerTerm='value',
                 upperTerm=None,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('field >= value'))
        q = dict(type="RangeQuery",
                 rangeType="String",
                 field='field',
                 lowerTerm=None,
                 upperTerm='value',
                 includeLower=True,
                 includeUpper=False)
        self.assertEquals(q, self._convert('field < value'))
        q = dict(type="RangeQuery",
                 rangeType="String",
                 field='field',
                 lowerTerm=None,
                 upperTerm='value',
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('field <= value'))

    def testIntRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery",
                 rangeType="Int",
                 field='intField',
                 lowerTerm=1,
                 upperTerm=None,
                 includeLower=False,
                 includeUpper=True)
        self.assertEquals(q, self._convert('intField > 1'))
        q = dict(type="RangeQuery",
                 rangeType="Int",
                 field='intField',
                 lowerTerm=1,
                 upperTerm=None,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('intField >= 1'))
        q = dict(type="RangeQuery",
                 rangeType="Int",
                 field='intField',
                 lowerTerm=None,
                 upperTerm=3,
                 includeLower=True,
                 includeUpper=False)
        self.assertEquals(q, self._convert('intField < 3'))
        q = dict(type="RangeQuery",
                 rangeType="Int",
                 field='intField',
                 lowerTerm=None,
                 upperTerm=3,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('intField <= 3'))
        q = dict(type="RangeQuery",
                 rangeType="Int",
                 field='intField',
                 lowerTerm=3,
                 upperTerm=3,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('intField = 3'))
        self.assertEquals(
            q,
            self._convert(
                QueryExpression.searchterm(index='intField',
                                           relation='exact',
                                           term=3)))
        self.assertEquals(
            q,
            self._convert(
                QueryExpression.searchterm(index='intField',
                                           relation='=',
                                           term=3)))

    def testLongRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        q = dict(type="RangeQuery",
                 rangeType="Long",
                 field='longField',
                 lowerTerm=1,
                 upperTerm=None,
                 includeLower=False,
                 includeUpper=True)
        self.assertEquals(q, self._convert('longField > 1'))
        q = dict(type="RangeQuery",
                 rangeType="Long",
                 field='longField',
                 lowerTerm=1,
                 upperTerm=None,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('longField >= 1'))
        q = dict(type="RangeQuery",
                 rangeType="Long",
                 field='longField',
                 lowerTerm=None,
                 upperTerm=3,
                 includeLower=True,
                 includeUpper=False)
        self.assertEquals(q, self._convert('longField < 3'))
        q = dict(type="RangeQuery",
                 rangeType="Long",
                 field='longField',
                 lowerTerm=None,
                 upperTerm=3,
                 includeLower=True,
                 includeUpper=True)
        self.assertEquals(q, self._convert('longField <= 3'))

    def testDrilldownFieldQuery(self):
        self.fieldRegistry = FieldRegistry(
            [DrilldownField('field', hierarchical=True)])
        self.assertEquals(
            dict(type="TermQuery",
                 term=dict(field="field", path=["value"], type="DrillDown")),
            self._convert("field = value"))
        self.assertEquals(
            dict(type="TermQuery",
                 term=dict(field="field",
                           path=["value", "value1"],
                           type="DrillDown")),
            self._convert("field = \"value>value1\""))

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(
            self):
        self.fieldRegistry = FieldRegistry()
        self.fieldRegistry.register('noTermFreqField',
                                    NO_TERMS_FREQUENCY_FIELD)
        self.unqualifiedFields = [("unqualified", 1.0),
                                  ('noTermFreqField', 2.0)]
        expected = dict(type="PhraseQuery",
                        terms=[
                            dict(field="unqualified", value="phrase"),
                            dict(field="unqualified", value="query")
                        ],
                        boost=1.0)
        self.assertEquals(expected, self._convert('"phrase query"'))

    def testQueryForIntField(self):
        expected = dict(type="RangeQuery",
                        rangeType="Int",
                        field='intField',
                        lowerTerm=5,
                        upperTerm=5,
                        includeLower=True,
                        includeUpper=True)
        self.assertEquals(expected, self._convert("intField=5"))

        expected = dict(type="RangeQuery",
                        rangeType="Int",
                        field='intField',
                        lowerTerm=5,
                        upperTerm=5,
                        includeLower=True,
                        includeUpper=True)
        self.assertEquals(expected, self._convert("intField exact 5"))

    def testQueryForLongField(self):
        expected = dict(type="RangeQuery",
                        rangeType="Long",
                        field='longField',
                        lowerTerm=long(5),
                        upperTerm=long(5),
                        includeLower=True,
                        includeUpper=True)
        self.assertEquals(expected, self._convert("longField=5"))

    def testQueryForDoubleField(self):
        expected = dict(type="RangeQuery",
                        rangeType="Double",
                        field='range.double.field',
                        lowerTerm=float(5),
                        upperTerm=float(5),
                        includeLower=True,
                        includeUpper=True)
        self.assertEquals(expected, self._convert("range.double.field=5"))

    def testWildcardQuery(self):
        self.fieldRegistry = FieldRegistry()
        expected = dict(type="WildcardQuery",
                        term=dict(field="field", value="???*"))
        self.assertEquals(expected, self._convert('field=???*'))

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                self._convert('index %(relation)s term' % locals())
                self.fail()
            except UnsupportedCQL:
                pass

    def testPerQueryUnqualifiedFields(self):
        self.unqualifiedFields = [('aField', 1.0)]
        converter = self._prepareConverter()
        self.assertEquals(
            {
                "type":
                "BooleanQuery",
                "clauses": [{
                    "type": "TermQuery",
                    "term": {
                        "field": "aField",
                        "value": "value"
                    },
                    'boost': 2.0,
                    'occur': 'SHOULD'
                }, {
                    "type": "TermQuery",
                    "term": {
                        "field": "anotherField",
                        "value": "value"
                    },
                    'boost': 3.0,
                    'occur': 'SHOULD'
                }]
            },
            converter.convert(QueryExpression.searchterm(term="value"),
                              unqualifiedTermFields=[('aField', 2.0),
                                                     ('anotherField', 3.0)]))

    def testReallyIgnoreAnalyzedAwayTerms(self):
        self.assertEquals({
            'boost': 1.0,
            'terms': [],
            'type': 'PhraseQuery'
        }, self._convert(
            '.'))  # will not yield any results, but that's what's desired
        self.assertDictEquals({
            'terms': [],
            'type': 'PhraseQuery'
        }, self._convert("abc=:;+"))

        self.assertDictEquals(
            {
                'type':
                'BooleanQuery',
                'clauses': [{
                    'boost': 1.0,
                    'term': {
                        'field': 'unqualified',
                        'value': u'abc'
                    },
                    'type': 'TermQuery',
                    'occur': 'MUST'
                }, {
                    'boost': 1.0,
                    'term': {
                        'field': 'unqualified',
                        'value': u'def'
                    },
                    'type': 'TermQuery',
                    'occur': 'MUST'
                }]
            }, self._convert("abc AND :;+ AND def"))

        self.unqualifiedFields = [("unqualified", 1.0),
                                  ("moreUnqualified", 1.0)]
        self.assertDictEquals(
            {
                'clauses': [{
                    'clauses': [{
                        'boost': 1.0,
                        'occur': 'SHOULD',
                        'term': {
                            'field': 'unqualified',
                            'value': u'abc'
                        },
                        'type': 'TermQuery'
                    }, {
                        'boost': 1.0,
                        'occur': 'SHOULD',
                        'term': {
                            'field': 'moreUnqualified',
                            'value': u'abc'
                        },
                        'type': 'TermQuery'
                    }],
                    'occur':
                    'MUST',
                    'type':
                    'BooleanQuery'
                }, {
                    'clauses': [{
                        'boost': 1.0,
                        'occur': 'SHOULD',
                        'term': {
                            'field': 'unqualified',
                            'value': u'def'
                        },
                        'type': 'TermQuery'
                    }, {
                        'boost': 1.0,
                        'occur': 'SHOULD',
                        'term': {
                            'field': 'moreUnqualified',
                            'value': u'def'
                        },
                        'type': 'TermQuery'
                    }],
                    'occur':
                    'MUST',
                    'type':
                    'BooleanQuery'
                }],
                'type':
                'BooleanQuery'
            }, self._convert("abc AND :;+ AND def"))

    def testOtherCoreTermQuery(self):
        query = ComposedQuery('thisCore')
        query.cores.add('otherCore')
        query.addMatch(dict(core='thisCore', uniqueKey='A'),
                       dict(core='otherCore', uniqueKey='B'))
        self.assertEquals(
            {
                "type":
                "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow? (with MatchAllDocs)
                "core": "otherCore",
                "collectKeyName": "B",
                "filterKeyName": "B",
                "query": {
                    "type": "TermQuery",
                    "term": {
                        "field": "field",
                        "value": "value",
                    }
                }
            },
            self._convert(QueryExpression.searchterm("otherCore.field", "=",
                                                     "value"),
                          composedQuery=query))

    @skip('not yet implemented')
    def testOtherCoreAndQuery(self):
        self.assertEquals(
            {
                'type': 'JoinAndQuery',
                'first': {
                    "type":
                    "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                    "core": "thisCore",
                    "collectKeyName":
                    "A",  # where does this keyName come from?
                    "filterKeyName": "A",
                    "query": {
                        "type": "TermQuery",
                        "term": {
                            "field": "field0",
                            "value": "value",
                        }
                    }
                },
                'second': {
                    "type":
                    "RelationalLuceneQuery",  # should this not be 'joined' to own core somehow?
                    "core": "otherCore",
                    "collectKeyName":
                    "A",  # where does this keyName come from?
                    "filterKeyName": "A",
                    "query": {
                        "type": "TermQuery",
                        "term": {
                            "field": "field",
                            "value": "value",
                        }
                    }
                }
            },
            self._convert(
                QueryExpression(
                    operator='AND',
                    operands=[
                        QueryExpression.searchterm('field0', '=', 'value'),
                        QueryExpression.searchterm("otherCore.field", "=",
                                                   "value")
                    ])))

    def _convert(self, input, **kwargs):
        return self._prepareConverter().convert(self._makeExpression(input),
                                                **kwargs)

    def _prepareConverter(self):
        unqualifiedFields = getattr(self, 'unqualifiedFields',
                                    [("unqualified", 1.0)])
        return QueryExpressionToLuceneQueryDict(
            unqualifiedTermFields=unqualifiedFields,
            luceneSettings=self._prepareLuceneSettings(),
            ignoreStemmingForWords=getattr(self, '_ignoredStemmingForWords',
                                           None))

    def _prepareLuceneSettings(self):
        settings = LuceneSettings()
        if hasattr(self, '_analyzer'):
            settings.analyzer = self._analyzer
        if hasattr(self, 'fieldRegistry'):
            settings.fieldRegistry = self.fieldRegistry
        else:
            settings.fieldRegistry = FieldRegistry()
            settings.fieldRegistry.register("intField",
                                            fieldDefinition=INTFIELD)
            settings.fieldRegistry.register("longField",
                                            fieldDefinition=LONGFIELD)
        return settings

    def _makeExpression(self, input):
        return cqlToExpression(parseCql(input)) if isinstance(
            input, basestring) else input
Example #27
0
def main(reactor, port, serverPort, autocompletePort, databasePath, **kwargs):
    drilldownFields = [
        DrilldownField('untokenized.field2'),
        DrilldownField('untokenized.field2.copy', indexFieldName='copy'),
        DrilldownField('untokenized.fieldHier', hierarchical=True)
    ]

    fieldRegistry = FieldRegistry(drilldownFields)
    fieldRegistry.register('intfield1', INTFIELD)
    fieldRegistry.register('intfield2', INTFIELD)
    fieldRegistry.register('intfield3', INTFIELD)
    fieldRegistry.register('intfield_missing', INTFIELD)
    fieldRegistry.register('sorted.intfield_missing', INTFIELD)
    luceneSettings = LuceneSettings(
                fieldRegistry=fieldRegistry,
                commitCount=30,
                commitTimeout=0.3,
                #analyzer=MerescoDutchStemmingAnalyzer(["field4", "field5"]),
                analyzer=dict(type="MerescoDutchStemmingAnalyzer", stemmingFields=['field4', 'field5'])
            )

    http11_request = be((HttpRequest1_1(),
        (SocketPool(reactor=reactor, unusedTimeout=5, limits=dict(totalSize=100, destinationSize=10)),)
    ))
    lucene = be((Lucene(host="localhost", port=serverPort, name='main', settings=luceneSettings),
            (http11_request,)
        ))

    lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry, commitTimeout=0.1)
    lucene2 = be((Lucene(host="localhost", port=serverPort, name='main2', settings=lucene2Settings),
            (http11_request,)
        ))

    emptyLuceneSettings = LuceneSettings(commitTimeout=1)
    multiLuceneHelix = (MultiLucene(host='localhost', port=serverPort, defaultCore='main'),
            (Lucene(host='localhost', port=serverPort, name='empty-core', settings=emptyLuceneSettings),
                (http11_request,)
            ),
            (lucene,),
            (lucene2,),
            (http11_request,)
        )
    storageComponent = be(
        (RetrieveDataToGetData(),
            (StorageComponentAdapter(),
                (MultiSequentialStorage(directory=join(databasePath, 'storage')),)
            )
        )
    )

    return \
    (Observable(),
        (ObservableHttpServer(reactor=reactor, port=port),
            (BasicHttpHandler(),
                (ApacheLogger(outputStream=stdout),
                    (PathFilter("/info", excluding=[
                            '/info/version',
                            '/info/name',
                            '/update',
                            '/sru',
                            '/remote',
                            '/via-remote-sru',
                        ]),
                        (DynamicHtml(
                                [dynamicPath],
                                reactor=reactor,
                                indexPage='/info',
                                additionalGlobals={
                                    'VERSION': version,
                                }
                            ),
                        )
                    ),
                    (PathFilter("/info/version"),
                        (StringServer(version, ContentTypePlainText), )
                    ),
                    (PathFilter("/info/name"),
                        (StringServer('Meresco Lucene', ContentTypePlainText),)
                    ),
                    (PathFilter("/static"),
                        (PathRename(lambda path: path[len('/static'):]),
                            (FileServer(staticPath),)
                        )
                    ),
                    (PathFilter("/update_main", excluding=['/update_main2']),
                        uploadHelix(lucene, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry),
                    ),
                    (PathFilter("/update_main2"),
                        uploadHelix(lucene2, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry),
                    ),
                    (PathFilter('/sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (AdapterToLuceneQuery(
                                    defaultCore='main',
                                    coreConverters={
                                        "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings),
                                        "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings),
                                        "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings),
                                    }),
                                    multiLuceneHelix,
                                ),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/via-remote-sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (LuceneRemote(host='localhost', port=port, path='/remote'),),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/remote'),
                        (LuceneRemoteService(reactor=reactor),
                            (AdapterToLuceneQuery(
                                    defaultCore='main',
                                    coreConverters={
                                        "main": QueryExpressionToLuceneQueryDict([], luceneSettings=luceneSettings),
                                        "main2": QueryExpressionToLuceneQueryDict([], luceneSettings=lucene2Settings),
                                        "empty-core": QueryExpressionToLuceneQueryDict([], luceneSettings=emptyLuceneSettings),
                                    }),
                                multiLuceneHelix,
                            )
                        )
                    ),
                    (PathFilter('/autocomplete'),
                        (Autocomplete(host='localhost', port=port, path='/autocomplete', defaultField='__all__', templateQuery='?', defaultLimit=5, shortname='?', description='?'),
                            (lucene,),
                        )
                    ),
                    (PathFilter('/suggestion'),
                        (SuggestionIndexComponent(host='localhost', port=autocompletePort),
                            (http11_request,),
                        )
                    )
                )
            )
        )
    )