class LuceneQueryComposerTest(TestCase):
    def setUp(self):
        super(LuceneQueryComposerTest, self).setUp()
        fieldRegistry = FieldRegistry()
        fieldRegistry.register("intField", fieldDefinition=INTFIELD)
        fieldRegistry.register("longField", fieldDefinition=LONGFIELD)
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))

    def testOneTermOutput(self):
        self.assertConversion({"type": "TermQuery", "term": {"field": "unqualified", "value": "cat"}, "boost": 1.0}, "cat")

    def assertConversion(self, expected, input):
        result = self.composer.compose(parseCql(input))
        self.assertEquals(expected, result)
Exemple #2
0
    def testUnqualifiedTermFields(self):
        composer = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings())
        ast = parseCql("value")
        result = composer.compose(ast)
        query = BooleanQuery()
        left = TermQuery(Term("field0", "value"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = TermQuery(Term("field1", "value"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))
Exemple #3
0
class LuceneQueryComposerTest(TestCase):
    def setUp(self):
        super(LuceneQueryComposerTest, self).setUp()
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings())

    def testOneTermOutput(self):
        self.assertConversion(TermQuery(Term("unqualified", "cat")), "cat")

    def testRightHandSideIsLowercase(self):
        self.assertConversion(TermQuery(Term("unqualified", "cat")), "CaT")

    def testOneTermOutputWithANumber(self):
        self.assertConversion(TermQuery(Term("unqualified", "2005")), "2005")

    def testPhraseOutput(self):
        query = PhraseQuery()
        query.add(Term("unqualified", "cats"))
        query.add(Term("unqualified", "dogs"))
        self.assertConversion(query,'"cats dogs"')

    def testPhraseOutputDutchStemming(self):
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(analyzer=MerescoDutchStemmingAnalyzer()))
        query = PhraseQuery()
        query.add(Term("unqualified", "kat"))
        query.add(Term("unqualified", "hond"))
        self.assertConversion(query, '"katten honden"')

    def testPhraseQueryIsStandardAnalyzed(self):
        expected = PhraseQuery()
        for term in ["vol.118", "2008", "nr.3", "march", "p.435-444"]:
            expected.add(Term("unqualified", term))
        input = '"vol.118 (2008) nr.3 (March) p.435-444"'
        self.assertConversion(expected, input)

    def testOneTermPhraseQueryUsesStandardAnalyzed(self):
        expected = PhraseQuery()
        expected.add(Term('unqualified', 'aap'))
        expected.add(Term('unqualified', 'noot'))
        self.assertConversion(expected, 'aap:noot')

    def testStandardAnalyserWithoutStopWords(self):
        expected = PhraseQuery()
        for term in ["no", "is", "the", "only", "option"]:
            expected.add(Term("unqualified", term))
        self.assertConversion(expected, '"no is the only option"')

    def testDiacritics(self):
        self.assertConversion(TermQuery(Term('title', 'moree')), 'title=Moree')
        self.assertConversion(TermQuery(Term('title', 'moree')), 'title=Morée')
        self.assertConversion(TermQuery(Term('title', 'moree')), 'title=Morèe')

    def testDiacriticsShouldBeNormalizedNFC(self):
        pq = PhraseQuery()
        pq.add(Term("title", "more"))
        pq.add(Term("title", "e"))
        self.assertConversion(pq, 'title=More\xcc\x81e') # Combined `
        from unicodedata import normalize
        self.assertConversion(TermQuery(Term('title', 'moree')), normalize('NFC', unicode('title=More\xcc\x81e')))

    def testIndexRelationTermOutput(self):
        self.assertConversion(TermQuery(Term("animal", "cats")), 'animal=cats')
        query = PhraseQuery()
        query.add(Term("animal", "cats"))
        query.add(Term("animal", "dogs"))
        self.assertConversion(query, 'animal="cats dogs"')
        self.assertConversion(query, 'animal="catS Dogs"')

    def testIndexRelationExactTermOutput(self):
        self.assertConversion(TermQuery(Term("animal", "hairy cats")), 'animal exact "hairy cats"')
        self.assertConversion(TermQuery(Term("animal", "Capital Cats")), 'animal exact "Capital Cats"')

    def testBooleanAndTermOutput(self):
        query = BooleanQuery()
        query.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.MUST)
        query.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.MUST)
        self.assertConversion(query, 'cats AND dogs')

    def testBooleanOrTermOutput(self):
        query = BooleanQuery()
        query.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.SHOULD)
        query.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.SHOULD)
        self.assertConversion(query, 'cats OR dogs')

    def testBooleanNotTermOutput(self):
        query = BooleanQuery()
        query.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.MUST)
        query.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.MUST_NOT)
        self.assertConversion(query, 'cats NOT dogs')

    def testBraces(self):
        self.assertConversion(TermQuery(Term('unqualified', 'cats')), '(cats)')
        innerQuery = BooleanQuery()
        innerQuery.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.MUST)
        innerQuery.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.MUST)
        outerQuery = BooleanQuery()
        outerQuery.add(innerQuery, BooleanClause.Occur.SHOULD)
        outerQuery.add(TermQuery(Term('unqualified', 'mice')), BooleanClause.Occur.SHOULD)

        self.assertConversion(outerQuery, '(cats AND dogs) OR mice')

    def testBoost(self):
        query = TermQuery(Term("title", "cats"))
        query.setBoost(2.0)
        self.assertConversion(query, "title =/boost=2.0 cats")

    def testUnqualifiedTermFields(self):
        composer = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings())
        ast = parseCql("value")
        result = composer.compose(ast)
        query = BooleanQuery()
        left = TermQuery(Term("field0", "value"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = TermQuery(Term("field1", "value"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))

    def testWildcards(self):
        query = PrefixQuery(Term('unqualified', 'prefix'))
        self.assertConversion(query, 'prefix*')
        self.assertConversion(query, 'PREfix*')
        query = PrefixQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field="PREfix*"')
        self.assertConversion(query, 'field=prefix*')
        query = PrefixQuery(Term('field', 'oc-0123'))
        self.assertConversion(query, 'field="oc-0123*"')
        query = TermQuery(Term('field', 'p'))
        self.assertConversion(query, 'field="P*"')
        #only prefix queries for now
        query = TermQuery(Term('field', 'post'))
        self.assertConversion(query, 'field="*post"')

        query = TermQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field=prefix**')

        result = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings()).compose(parseCql("prefix*"))

        query = BooleanQuery()
        left = PrefixQuery(Term("field0", "prefix"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = PrefixQuery(Term("field1", "prefix"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))

    def testMagicExact(self):
        exactResult = self.composer.compose(parseCql('animal exact "cats dogs"'))
        fieldRegistry = FieldRegistry()
        fieldRegistry.register('animal', StringField.TYPE_NOT_STORED)
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
        self.assertConversion(exactResult, 'animal = "cats dogs"')

    def testMatchAllQuery(self):
        self.assertConversion(MatchAllDocsQuery(), '*')

    def testTextRangeQuery(self):
        # (field, lowerTerm, upperTerm, includeLower, includeUpper)
        self.assertConversion(TermRangeQuery.newStringRange('field', 'value', None, False, False), 'field > value')
        self.assertConversion(TermRangeQuery.newStringRange('field', 'value', None, True, False), 'field >= value')
        self.assertConversion(TermRangeQuery.newStringRange('field', None, 'value', False, False), 'field < value')
        self.assertConversion(TermRangeQuery.newStringRange('field', None, 'value', False, True), 'field <= value')

    def testDrilldownFieldQuery(self):
        fieldRegistry = FieldRegistry([DrilldownField('field')])
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
        self.assertConversion(TermQuery(DrillDownQuery.term("$facets", "field", "value")), "field = value")

    def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
        fieldRegistry = FieldRegistry()
        fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE)
        self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
        expected = PhraseQuery()
        expected.add(Term("unqualified", "phrase query"))
        self.assertConversion(expected, '"phrase query"')

    def assertConversion(self, expected, input):
        result = self.composer.compose(parseCql(input))
        self.assertEquals(type(expected), type(result), "expected %s, but got %s" % (repr(expected), repr(result)))
        self.assertEquals(repr(expected), repr(result))
        # self.assertEquals(expected, result, "expected %s['%s'], but got %s['%s']" % (repr(expected), str(expected), repr(result), str(result)))

    def testUnsupportedCQL(self):
        for relation in ['<>']:
            try:
                LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings()).compose(parseCql('index %(relation)s term' % locals()))
                self.fail()
            except UnsupportedCQL:
                pass