Ejemplo n.º 1
0
    def testFacetAndTopsMultiCollector(self):
        I = Index(path=self.tempdir, settings=LuceneSettings())
        for i in xrange(99):
            document1 = createDocument(fields=[("field1", str(i)),
                                               ("field2", str(i) * 1000)],
                                       facets=[("facet1", "value%s" % (i % 10))
                                               ])
            document1 = I._facetsConfig.build(I._taxoWriter, document1)
            I._indexWriter.addDocument(document1)
        I.commit()
        I.close()
        I = Index(path=self.tempdir, settings=LuceneSettings())

        f = FacetSuperCollector(I._indexAndTaxonomy.taxoReader,
                                I._facetsConfig, I._ordinalsReader)
        t = TopScoreDocSuperCollector(10, True)
        collectors = ArrayList().of_(SuperCollector)
        collectors.add(t)
        collectors.add(f)
        C = MultiSuperCollector(collectors)
        Q = MatchAllDocsQuery()
        I.search(Q, None, C)

        self.assertEquals(99, t.topDocs(0).totalHits)
        self.assertEquals(10, len(t.topDocs(0).scoreDocs))
        tc = f.getTopChildren(10, "facet1", [])

        self.assertEquals([('value0', 10), ('value1', 10), ('value2', 10),
                           ('value3', 10), ('value4', 10), ('value5', 10),
                           ('value6', 10), ('value7', 10), ('value8', 10),
                           ('value9', 9)], [(l.label, l.value.intValue())
                                            for l in tc.labelValues])
Ejemplo n.º 2
0
 def testSearch(self):
     C = TotalHitCountSuperCollector()
     I = Index(path=self.tempdir, settings=LuceneSettings())
     Q = MatchAllDocsQuery()
     I.search(Q, None, C)
     self.assertEquals(0, C.getTotalHits())
     I._indexWriter.addDocument(document(name="one", price="2"))
     I.close()
     I = Index(path=self.tempdir, settings=LuceneSettings())
     I.search(Q, None, C)
     self.assertEquals(1, C.getTotalHits())
Ejemplo n.º 3
0
 def setUp(self):
     coreAConverter = QueryExpressionToLuceneQueryDict(
         [('fieldA', 1.0)], luceneSettings=LuceneSettings())
     coreBConverter = QueryExpressionToLuceneQueryDict(
         [('fieldB', 1.0)], luceneSettings=LuceneSettings())
     self.converter = AdapterToLuceneQuery(defaultCore='A',
                                           coreConverters=dict(
                                               A=coreAConverter,
                                               B=coreBConverter))
     self.observer = CallTrace('Query responder',
                               methods={'executeQuery': executeQueryMock})
     self.dna = be((Observable(), (
         self.converter,
         (self.observer, ),
     )))
Ejemplo n.º 4
0
 def testSearchTopDocs(self):
     I = Index(path=self.tempdir, settings=LuceneSettings())
     I._indexWriter.addDocument(document(name="one", price="aap noot mies"))
     I._indexWriter.addDocument(document(name="two", price="aap vuur boom"))
     I._indexWriter.addDocument(
         document(name="three", price="noot boom mies"))
     I.close()
     I = Index(path=self.tempdir, settings=LuceneSettings())
     C = TopScoreDocSuperCollector(2, True)
     Q = MatchAllDocsQuery()
     I.search(Q, None, C)
     td = C.topDocs(0)
     self.assertEquals(3, C.getTotalHits())
     self.assertEquals(3, td.totalHits)
     self.assertEquals(2, len(td.scoreDocs))
Ejemplo n.º 5
0
 def testLuceneServerHostPortDynamic(self):
     lucene = Lucene(name='lucene', settings=LuceneSettings(), readonly=True)
     def httprequest1_1Mock(**kwargs):
         raise StopIteration(parseResponse(HTTP_RESPONSE))
         yield
     observer = CallTrace(
         'observer',
         returnValues=dict(luceneServer=('example.org', 1234)),
         methods=dict(httprequest1_1=httprequest1_1Mock))
     lucene.addObserver(observer)
     query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
     response = retval(lucene.executeQuery(
         luceneQuery=query, start=1, stop=5,
     ))
     self.assertEquals(887, response.total)
     self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
Ejemplo n.º 6
0
    def testAddTypeAndMissingValueToSortField(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "hits": [{"id": "record:1", "score": 0.1234}]
            }).dumps()

        cq = ComposedQuery('coreA')
        q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        cq.setCoreQuery('coreB', q)
        cq.sortKeys = [dict(sortBy='sortField', core='coreA', sortDescending=True)]
        cq.addMatch(dict(core='coreA', uniqueKey='A'), dict(core='coreB', key='B'))
        consume(self._multiLucene.executeComposedQuery(cq))
        self.assertEqual({
                "_sortKeys": [{'core': 'coreA', 'sortBy': 'sortField', 'sortDescending': True, 'type': 'String', 'missingValue': 'STRING_FIRST'}],
                "resultsFrom": "coreA",
                '_matches': {'coreA->coreB': [{'core': 'coreA', 'uniqueKey': 'A'}, {'core': 'coreB', 'key': 'B'}]},
                "_facets": {},
                "_otherCoreFacetFilters": {},
                "_rankQueries": {},
                "_drilldownQueries": {},
                "_unites": [],
                '_queries': {'coreB': {'term': {'field': 'field', 'value': 'value'}, 'type': 'TermQuery'}},
                "cores": ["coreB", "coreA"],
                "_filterQueries": {}
            }, loads(self.post[0]['data']))
Ejemplo n.º 7
0
    def testComposedQuery(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "hits": [{"id": "record:1", "score": 0.1234}]
            }).dumps()

        cq = ComposedQuery('coreA')
        q = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        cq.setCoreQuery("coreA", q)

        consume(self._multiLucene.executeComposedQuery(cq))
        self.assertEqual(1, len(self.post))
        self.assertEqual("/query/", self.post[0]['path'])
        self.assertEqual({
                "_sortKeys": [],
                "resultsFrom": "coreA",
                "_matches": {},
                "_facets": {},
                "_otherCoreFacetFilters": {},
                "_rankQueries": {},
                "_drilldownQueries": {},
                "_unites": [],
                "_queries": {"coreA": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}},
                "cores": ["coreA"],
                "_filterQueries": {}
            }, loads(self.post[0]['data']))
Ejemplo n.º 8
0
 def setUp(self):
     super(DeDupFilterCollectorTest, self).setUp()
     self._reactor = CallTrace('reactor')
     settings = LuceneSettings(commitCount=1, verbose=False)
     self.lucene = Lucene(self.tempdir,
                          reactor=self._reactor,
                          settings=settings)
Ejemplo n.º 9
0
    def testOne(self):
        settings = LuceneSettings()
        self.assertTrue(settings.verbose)

        newSettings = settings.clone(verbose=False)
        self.assertTrue(settings.verbose)
        self.assertFalse(newSettings.verbose)
Ejemplo n.º 10
0
 def testCreateNonDefaultAnalyzer(self):
     settings = LuceneSettings(
         analyzer=dict(type="MerescoDutchStemmingAnalyzer",
                       stemmingFields=["field_a", "field_b"]))
     analyzer = settings.createAnalyzer()
     self.assertEquals("MerescoDutchStemmingAnalyzer",
                       analyzer.class_.getSimpleName())
     self.assertEquals(["field_a", "field_b"], analyzer.getStemmingFields())
Ejemplo n.º 11
0
 def setUp(self):
     self.convertor = CqlToLuceneQuery([('field', 1.0)], luceneSettings=LuceneSettings())
     self.observer = CallTrace('Query responder', methods={'executeQuery': executeQueryMock})
     self.dna = be((Observable(),
         (self.convertor,
             (self.observer,),
         )
     ))
     self.loggedClauses = []
     def log(clause, **kwargs):
         self.loggedClauses.append(clause)
     self.convertor.log = log
Ejemplo n.º 12
0
 def testConfigureMergePolicy(self):
     settings = LuceneSettings(mergePolicy={
         'type': 'LogDocMergePolicy',
         'mergeFactor': 2,
         'maxMergeDocs': 100
     })
     soll = copy(DEFAULTS)
     soll['mergePolicy'] = dict(type='LogDocMergePolicy',
                                mergeFactor=2,
                                maxMergeDocs=100)
     ist = settings.asPostDict()
     self.assertEquals(soll, ist)
Ejemplo n.º 13
0
 def setUp(self, fieldRegistry=FieldRegistry()):
     super(LuceneTestCase, self).setUp()
     self._javaObjects = self._getJavaObjects()
     self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
     self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
     self.lucene = Lucene(
         join(self.tempdir, 'lucene'),
         reactor=self._reactor,
         settings=self._defaultSettings,
     )
     self.observer = CallTrace()
     self.lucene.addObserver(self.observer)
Ejemplo n.º 14
0
    def testFacetSuperCollector(self):
        I = Index(path=self.tempdir, settings=LuceneSettings())
        for i in xrange(1000):
            document1 = createDocument(fields=[("field1", str(i)),
                                               ("field2", str(i) * 1000)],
                                       facets=[("facet1",
                                                "value%s" % (i % 100))])
            document1 = I._facetsConfig.build(I._taxoWriter, document1)
            I._indexWriter.addDocument(document1)
        I.close()
        I = Index(path=self.tempdir, settings=LuceneSettings())

        C = FacetSuperCollector(I._indexAndTaxonomy.taxoReader,
                                I._facetsConfig, I._ordinalsReader)
        Q = MatchAllDocsQuery()
        I.search(Q, None, C)
        tc = C.getTopChildren(10, "facet1", [])
        self.assertEquals([('value90', 10), ('value91', 10), ('value92', 10),
                           ('value93', 10), ('value94', 10), ('value95', 10),
                           ('value96', 10), ('value97', 10), ('value98', 10),
                           ('value99', 10)], [(l.label, l.value.intValue())
                                              for l in tc.labelValues])
 def _prepareLuceneSettings(self):
     settings = LuceneSettings()
     if hasattr(self, '_analyzer'):
         settings.analyzer = self._analyzer
     if hasattr(self, 'fieldRegistry'):
         settings.fieldRegistry = self.fieldRegistry
     else:
         settings.fieldRegistry = FieldRegistry()
         settings.fieldRegistry.register("intField",
                                         fieldDefinition=INTFIELD)
         settings.fieldRegistry.register("longField",
                                         fieldDefinition=LONGFIELD)
     return settings
Ejemplo n.º 16
0
 def testExecuteQuery(self):
     self.response = JsonDict({
             "total": 887,
             "queryTime": 6,
             "times": {"searchTime": 3},
             "hits": [{
                     "id": "record:1", "score": 0.1234,
                     "duplicateCount": {"__key__": 2},
                     "duplicates": {"__grouping_key__": [{"id": 'record:1'}, {"id": 'record:2'}]}
                 }],
             "drilldownData": [
                 {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
             ],
             "suggestions": {
                 "valeu": ["value"]
             }
         }).dumps()
     query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
     response = retval(self._lucene.executeQuery(
                 luceneQuery=query, start=1, stop=5,
                 facets=[dict(maxTerms=10, fieldname='facet')],
                 sortKeys=[dict(sortBy='field', sortDescending=False)],
                 suggestionRequest=dict(suggests=['valeu'], count=2, field='field1'),
                 dedupField="__key__",
                 clustering=True,
                 storedFields=["field"]
             ))
     self.assertEqual(1, len(self.post))
     self.assertEqual('/lucene/query/', self.post[0]['path'])
     self.assertEqual({
                 "start": 1, "stop": 5,
                 "storedFields": ["field"],
                 "query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"},
                 "facets": [{"fieldname": "facet", "maxTerms": 10}],
                 "sortKeys": [{"sortBy": "field", "sortDescending": False, "type": "String", 'missingValue': 'STRING_LAST'}],
                 "suggestionRequest": dict(suggests=['valeu'], count=2, field='field1'),
                 "dedupField": "__key__",
                 "dedupSortField": None,
                 "clustering": True,
             }, loads(self.post[0]['data']))
     self.assertEqual(887, response.total)
     self.assertEqual(6, response.queryTime)
     self.assertEqual({'searchTime': 3}, response.times)
     self.assertEqual(1, len(response.hits))
     self.assertEqual("record:1", response.hits[0].id)
     self.assertEqual(0.1234, response.hits[0].score)
     self.assertEqual(dict(__key__=2), response.hits[0].duplicateCount)
     self.assertEqual([
             {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
         ], response.drilldownData)
     self.assertEqual({'valeu': ['value']}, response.suggestions)
Ejemplo n.º 17
0
 def testLuceneServerHostPortDynamic(self):
     multiLucene = MultiLucene(defaultCore='core1')
     def httprequest1_1Mock(**kwargs):
         raise StopIteration(parseResponse(HTTP_RESPONSE))
         yield
     observer = CallTrace(
         'observer',
         returnValues=dict(luceneServer=('example.org', 1234)),
         methods=dict(httprequest1_1=httprequest1_1Mock))
     multiLucene.addObserver(observer)
     query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
     response = retval(multiLucene.executeComposedQuery(ComposedQuery('core1', query)))
     self.assertEquals(887, response.total)
     self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
Ejemplo n.º 18
0
    def testScore(self):
        reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, similarity=TermFrequencySimilarity(), verbose=False)
        lucene = Lucene(join(self.tempdir, 'lucene'), reactor=reactor, settings=settings)
        document = Document()
        document.add(TextField('field', 'x '*100, Field.Store.NO))
        returnValueFromGenerator(lucene.addDocument(identifier="identifier", document=document))

        q = TermQuery(Term("field", 'x'))
        result = returnValueFromGenerator(lucene.executeQuery(q))
        self.assertAlmostEqual(0.1, result.hits[0].score)

        q.setBoost(10.0)
        result = returnValueFromGenerator(lucene.executeQuery(q))
        self.assertAlmostEqual(1, result.hits[0].score)
Ejemplo n.º 19
0
 def testSearchTopField(self):
     I = Index(path=self.tempdir, settings=LuceneSettings())
     I._indexWriter.addDocument(
         document(__id__='1', name="one", price="aap noot mies"))
     I.commit()
     I._indexWriter.addDocument(
         document(__id__='2', name="two", price="aap vuur boom"))
     I.commit()
     I._indexWriter.addDocument(
         document(__id__='3', name="three", price="noot boom mies"))
     I.commit()
     I.close()
     I = Index(path=self.tempdir, settings=LuceneSettings())
     sort = Sort(SortField("name", SortField.Type.STRING, True))
     C = TopFieldSuperCollector(sort, 2, True, False, True)
     Q = MatchAllDocsQuery()
     I.search(Q, None, C)
     td = C.topDocs(0)
     self.assertEquals(3, C.getTotalHits())
     self.assertEquals(3, td.totalHits)
     self.assertEquals(2, len(td.scoreDocs))
     self.assertEquals(
         ['2', '3'],
         [I.getDocument(s.doc).get("__id__") for s in td.scoreDocs])
Ejemplo n.º 20
0
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.registry = FieldRegistry()
     self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345)
     self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA')
     self._multiLucene.addObserver(self._lucene)
     self.post = []
     self.response = ""
     def mockPost(data, path, **kwargs):
         self.post.append(dict(data=data, path=path))
         raise StopIteration(self.response)
         yield
     connect = self._multiLucene._connect()
     connect._post = mockPost
     self._multiLucene._connect = lambda: connect
Ejemplo n.º 21
0
    def setUpLucene(self, **kwargs):
        self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings(), **kwargs)
        self.post = []
        self.response = ""
        connect = self._lucene._connect()
        def mockPost(data, path, **kwargs):
            self.post.append(dict(data=data, path=path))
            raise StopIteration(self.response)
            yield
        connect._post = mockPost

        self.read = []
        self.response = ""
        def mockRead(path, **kwargs):
            self.read.append(path)
            raise StopIteration(self.response)
            yield
        connect.read = mockRead
        self._lucene._connect = lambda: connect
Ejemplo n.º 22
0
 def testPostDictWithDrilldownFields(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.registerDrilldownField("field0",
                                          hierarchical=True,
                                          multiValued=False)
     fieldRegistry.registerDrilldownField("field1",
                                          hierarchical=True,
                                          multiValued=True,
                                          indexFieldName="$facets_2")
     settings = LuceneSettings(fieldRegistry=fieldRegistry)
     soll = copy(DEFAULTS)
     soll['drilldownFields'] = [{
         'dim': 'field0',
         'hierarchical': True,
         'fieldname': None,
         'multiValued': False
     }, {
         'dim': 'field1',
         'hierarchical': True,
         'fieldname': '$facets_2',
         'multiValued': True
     }]
     self.assertEquals(soll, settings.asPostDict())
Ejemplo n.º 23
0
    def testWildcards(self):
        query = PrefixQuery(Term('unqualified', 'prefix'))
        self.assertConversion(query, 'prefix*')
        self.assertConversion(query, 'PREfix*')
        query = PrefixQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field="PREfix*"')
        self.assertConversion(query, 'field=prefix*')
        query = PrefixQuery(Term('field', 'oc-0123'))
        self.assertConversion(query, 'field="oc-0123*"')
        query = TermQuery(Term('field', 'p'))
        self.assertConversion(query, 'field="P*"')
        #only prefix queries for now
        query = TermQuery(Term('field', 'post'))
        self.assertConversion(query, 'field="*post"')

        query = TermQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field=prefix**')

        result = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings()).compose(parseCql("prefix*"))

        query = BooleanQuery()
        left = PrefixQuery(Term("field0", "prefix"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = PrefixQuery(Term("field1", "prefix"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))
Ejemplo n.º 24
0
 def setUp(self):
     super(LuceneQueryComposerTest, self).setUp()
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings())
Ejemplo n.º 25
0
 def testPhraseOutputDutchStemming(self):
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(analyzer=MerescoDutchStemmingAnalyzer()))
     query = PhraseQuery()
     query.add(Term("unqualified", "kat"))
     query.add(Term("unqualified", "hond"))
     self.assertConversion(query, '"katten honden"')
Ejemplo n.º 26
0
 def testUnsupportedCQL(self):
     for relation in ['<>']:
         try:
             LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings()).compose(parseCql('index %(relation)s term' % locals()))
             self.fail()
         except UnsupportedCQL:
             pass
Ejemplo n.º 27
0
 def testExcludeUnqualifiedFieldForWhichNoPhraseQueryIsPossibleInCaseOfPhraseQuery(self):
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('noTermFreqField', NO_TERMS_FREQUENCY_FIELDTYPE)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0), ('noTermFreqField', 2.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     expected = PhraseQuery()
     expected.add(Term("unqualified", "phrase query"))
     self.assertConversion(expected, '"phrase query"')
Ejemplo n.º 28
0
 def testDrilldownFieldQuery(self):
     fieldRegistry = FieldRegistry([DrilldownField('field')])
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     self.assertConversion(TermQuery(DrillDownQuery.term("$facets", "field", "value")), "field = value")
Ejemplo n.º 29
0
 def testMagicExact(self):
     exactResult = self.composer.compose(parseCql('animal exact "cats dogs"'))
     fieldRegistry = FieldRegistry()
     fieldRegistry.register('animal', StringField.TYPE_NOT_STORED)
     self.composer = LuceneQueryComposer(unqualifiedTermFields=[("unqualified", 1.0)], luceneSettings=LuceneSettings(fieldRegistry=fieldRegistry))
     self.assertConversion(exactResult, 'animal = "cats dogs"')
Ejemplo n.º 30
0
    def testUnqualifiedTermFields(self):
        composer = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings())
        ast = parseCql("value")
        result = composer.compose(ast)
        query = BooleanQuery()
        left = TermQuery(Term("field0", "value"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = TermQuery(Term("field1", "value"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))