예제 #1
0
 def setUp(self):
     super(DeDupFilterCollectorTest, self).setUp()
     self._reactor = CallTrace('reactor')
     settings = LuceneSettings(commitCount=1, verbose=False)
     self.lucene = Lucene(self.tempdir,
                          reactor=self._reactor,
                          settings=settings)
예제 #2
0
 def setUp(self, fieldRegistry=FieldRegistry()):
     super(LuceneTestCase, self).setUp()
     self._javaObjects = self._getJavaObjects()
     self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
     self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
     self.lucene = Lucene(
         join(self.tempdir, 'lucene'),
         reactor=self._reactor,
         settings=self._defaultSettings,
     )
     self.observer = CallTrace()
     self.lucene.addObserver(self.observer)
예제 #3
0
    def testScore(self):
        reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, similarity=TermFrequencySimilarity(), verbose=False)
        lucene = Lucene(join(self.tempdir, 'lucene'), reactor=reactor, settings=settings)
        document = Document()
        document.add(TextField('field', 'x '*100, Field.Store.NO))
        returnValueFromGenerator(lucene.addDocument(identifier="identifier", document=document))

        q = TermQuery(Term("field", 'x'))
        result = returnValueFromGenerator(lucene.executeQuery(q))
        self.assertAlmostEqual(0.1, result.hits[0].score)

        q.setBoost(10.0)
        result = returnValueFromGenerator(lucene.executeQuery(q))
        self.assertAlmostEqual(1, result.hits[0].score)
예제 #4
0
 def testLuceneServerHostPortDynamic(self):
     lucene = Lucene(name='lucene', settings=LuceneSettings(), readonly=True)
     def httprequest1_1Mock(**kwargs):
         raise StopIteration(parseResponse(HTTP_RESPONSE))
         yield
     observer = CallTrace(
         'observer',
         returnValues=dict(luceneServer=('example.org', 1234)),
         methods=dict(httprequest1_1=httprequest1_1Mock))
     lucene.addObserver(observer)
     query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
     response = retval(lucene.executeQuery(
         luceneQuery=query, start=1, stop=5,
     ))
     self.assertEquals(887, response.total)
     self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
예제 #5
0
    def setUpLucene(self, **kwargs):
        self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings(), **kwargs)
        self.post = []
        self.response = ""
        connect = self._lucene._connect()
        def mockPost(data, path, **kwargs):
            self.post.append(dict(data=data, path=path))
            raise StopIteration(self.response)
            yield
        connect._post = mockPost

        self.read = []
        self.response = ""
        def mockRead(path, **kwargs):
            self.read.append(path)
            raise StopIteration(self.response)
            yield
        connect.read = mockRead
        self._lucene._connect = lambda: connect
예제 #6
0
def luceneAndReaderConfig(defaultLuceneSettings, httpRequestAdapter,
                          lucenePort):

    fieldRegistry = FieldRegistry(drilldownFields=drilldownFields)
    luceneIndex = be((Lucene(
        host='127.0.0.1',
        port=lucenePort,
        name=DEFAULT_CORE,
        settings=defaultLuceneSettings.clone(fieldRegistry=fieldRegistry)),
                      (httpRequestAdapter, )))
    return luceneIndex
예제 #7
0
class LuceneTestCase(SeecrTestCase):

    def setUp(self, fieldRegistry=FieldRegistry()):
        super(LuceneTestCase, self).setUp()
        self._javaObjects = self._getJavaObjects()
        self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
        self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
        self.lucene = Lucene(
            join(self.tempdir, 'lucene'),
            reactor=self._reactor,
            settings=self._defaultSettings,
        )
        self.observer = CallTrace()
        self.lucene.addObserver(self.observer)

    def tearDown(self):
        try:
            self._reactor.calledMethods.reset() # don't keep any references.
            self.lucene.close()
            self.lucene = None
            gc.collect()
            diff = self._getJavaObjects() - self._javaObjects
            self.assertEquals(0, len(diff), diff)
        finally:
            SeecrTestCase.tearDown(self)

    def _getJavaObjects(self):
        refs = VM._dumpRefs(classes=True)
        return set(
                [(c, refs[c])
                for c in refs.keys()
                if c != 'class java.lang.Class' and
                    c != 'class org.apache.lucene.document.Field' and # Fields are kept in FieldRegistry for reusing
                    c != 'class org.apache.lucene.document.NumericDocValuesField' and
                    c != 'class org.apache.lucene.facet.FacetsConfig'
            ])
예제 #8
0
 def setUp(self):
     SeecrTestCase.setUp(self)
     self.registry = FieldRegistry()
     self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345)
     self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA')
     self._multiLucene.addObserver(self._lucene)
     self.post = []
     self.response = ""
     def mockPost(data, path, **kwargs):
         self.post.append(dict(data=data, path=path))
         raise StopIteration(self.response)
         yield
     connect = self._multiLucene._connect()
     connect._post = mockPost
     self._multiLucene._connect = lambda: connect
예제 #9
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings())
        self.post = []
        self.response = ""
        def mockPost(data, path, **kwargs):
            self.post.append(dict(data=data, path=path))
            raise StopIteration(self.response)
            yield
        self._lucene._connect._post = mockPost

        self.read = []
        self.response = ""
        def mockRead(path, **kwargs):
            self.read.append(path)
            raise StopIteration(self.response)
            yield
        self._lucene._connect.read = mockRead
예제 #10
0
def main(reactor, port, databasePath):
    drilldownFields = [
        DrilldownField('untokenized.field2'),
        DrilldownField('untokenized.fieldHier', hierarchical=True)
    ]

    fieldRegistry = FieldRegistry(drilldownFields)
    luceneSettings = LuceneSettings(fieldRegistry=fieldRegistry,
                                    commitCount=30,
                                    commitTimeout=1,
                                    analyzer=MerescoDutchStemmingAnalyzer())
    lucene = Lucene(path=join(databasePath, 'lucene'),
                    reactor=reactor,
                    name='main',
                    settings=luceneSettings)

    lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry,
                                     commitTimeout=0.1)
    lucene2 = Lucene(path=join(databasePath, 'lucene2'),
                     reactor=reactor,
                     name='main2',
                     settings=lucene2Settings)

    termNumerator = TermNumerator(path=join(databasePath, 'termNumerator'))

    emptyLuceneSettings = LuceneSettings(commitTimeout=1)
    multiLuceneHelix = (
        MultiLucene(defaultCore='main'),
        (Lucene(path=join(databasePath, 'lucene-empty'),
                reactor=reactor,
                name='empty-core',
                settings=emptyLuceneSettings), ),
        (lucene, ),
        (lucene2, ),
    )
    storageComponent = StorageComponent(
        directory=join(databasePath, 'storage'))

    return \
    (Observable(),
        (ObservableHttpServer(reactor=reactor, port=port),
            (BasicHttpHandler(),
                (ApacheLogger(outputStream=stdout),
                    (PathFilter("/info", excluding=[
                            '/info/version',
                            '/info/name',
                            '/update',
                            '/sru',
                            '/remote',
                            '/via-remote-sru',
                        ]),
                        (DynamicHtml(
                                [dynamicPath],
                                reactor=reactor,
                                indexPage='/info',
                                additionalGlobals={
                                    'VERSION': version,
                                }
                            ),
                        )
                    ),
                    (PathFilter("/info/version"),
                        (StringServer(version, ContentTypePlainText), )
                    ),
                    (PathFilter("/info/name"),
                        (StringServer('Meresco Lucene', ContentTypePlainText),)
                    ),
                    (PathFilter("/static"),
                        (PathRename(lambda path: path[len('/static'):]),
                            (FileServer(staticPath),)
                        )
                    ),
                    (PathFilter("/update_main", excluding=['/update_main2']),
                        uploadHelix(lucene, termNumerator, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry),
                    ),
                    (PathFilter("/update_main2"),
                        uploadHelix(lucene2, termNumerator, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry),
                    ),
                    (PathFilter('/sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (MultiCqlToLuceneQuery(
                                    defaultCore='main',
                                    coreToCqlLuceneQueries={
                                        "main": CqlToLuceneQuery([], luceneSettings=luceneSettings),
                                        "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings),
                                        "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings),
                                    }),
                                    multiLuceneHelix,
                                ),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/via-remote-sru'),
                        (SruParser(defaultRecordSchema='record'),
                            (SruHandler(),
                                (LuceneRemote(host='localhost', port=port, path='/remote'),),
                                (SRUTermDrilldown(defaultFormat='xml'),),
                                (SruDuplicateCount(),),
                                (storageComponent,),
                            )
                        )
                    ),
                    (PathFilter('/remote'),
                        (LuceneRemoteService(reactor=reactor),
                            (MultiCqlToLuceneQuery(
                                    defaultCore='main',
                                    coreToCqlLuceneQueries={
                                        "main": CqlToLuceneQuery([], luceneSettings=luceneSettings),
                                        "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings),
                                        "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings),
                                    }),
                                multiLuceneHelix,
                            )
                        )
                    ),
                    (PathFilter('/autocomplete'),
                        (Autocomplete('localhost', port, '/autocomplete', '__all__', '?', 5, '?', '?'),
                            (lucene,),
                        )
                    )
                )
            )
        )
    )
 def setUp(self):
     super(DeDupFilterCollectorTest, self).setUp()
     self._reactor = CallTrace('reactor')
     settings = LuceneSettings(commitCount=1, verbose=False)
     self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings)
예제 #12
0
class MultiLuceneTest(SeecrTestCase):
    def __init__(self, *args, **kwargs):
        super(MultiLuceneTest, self).__init__(*args, **kwargs)
        self._multithreaded = True

    def setUp(self):
        SeecrTestCase.setUp(self)
        settings = LuceneSettings(multithreaded=self._multithreaded, verbose=False)
        settingsLuceneC = LuceneSettings(multithreaded=self._multithreaded, verbose=False, similarity=TermFrequencySimilarity())

        self.luceneA = Lucene(join(self.tempdir, 'a'), name='coreA', reactor=CallTrace(), settings=settings)
        self.luceneB = Lucene(join(self.tempdir, 'b'), name='coreB', reactor=CallTrace(), settings=settings)
        self.luceneC = Lucene(join(self.tempdir, 'c'), name='coreC', reactor=CallTrace(), settings=settingsLuceneC)
        self.dna = be((Observable(),
            (MultiLucene(defaultCore='coreA', multithreaded=self._multithreaded),
                (self.luceneA,),
                (self.luceneB,),
                (self.luceneC,),
            )
        ))

        # +---------------------------------+   +---------------------------------+  +----------------------+
        # |              ______             |   |                                 |  |                    C |
        # |         ____/      \____     A  |   |    __________                B  |  |      ____            |
        # |        /   /\   Q  /\   \       |   |   /    N     \                  |  |     /    \           |
        # |       /   /  \    /  \   \      |   |  /   ____     \                 |  |    |   R  |          |
        # |      /   |    \  /    |   \     |   | |   /    \     |                |  |     \ ___/           |
        # |     /     \    \/    /     \    |   | |  |  M __|____|_____           |  |                      |
        # |    /       \   /\   /       \   |   | |   \__/_/     |     \          |  |                      |
        # |   |         \_|__|_/         |  |   |  \    |       /      |          |  |                      |
        # |   |    U      |  |     M     |  |   |   \___|______/    ___|_______   |  |                      |
        # |   |           \  /           |  |   |       |          /   |       \  |  |                      |
        # |    \           \/           /   |   |       |   O     /   _|__      \ |  |                      |
        # |     \          /\          /    |   |        \_______|___/_/  \     | |  |                      |
        # |      \        /  \        /     |   |                |  |  M   | P  | |  |                      |
        # |       \______/    \______/      |   |                |   \____/     | |  |                      |
        # |                                 |   |                 \            /  |  |                      |
        # |                                 |   |                  \__________/   |  |                      |
        # +---------------------------------+   +---------------------------------+  +----------------------+

        k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 = range(1,12)
        self.addDocument(self.luceneA, identifier='A',      keys=[('A', k1 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'false'), ('S', '1')])
        self.addDocument(self.luceneA, identifier='A-U',    keys=[('A', k2 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'true' ), ('S', '2')])
        self.addDocument(self.luceneA, identifier='A-Q',    keys=[('A', k3 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'false'), ('S', '3')])
        self.addDocument(self.luceneA, identifier='A-QU',   keys=[('A', k4 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'true' ), ('S', '4')])
        self.addDocument(self.luceneA, identifier='A-M',    keys=[('A', k5 ), ('C', k5)], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'false'), ('S', '5')])
        self.addDocument(self.luceneA, identifier='A-MU',   keys=[('A', k6 )], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'true' ), ('S', '6')])
        self.addDocument(self.luceneA, identifier='A-MQ',   keys=[('A', k7 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'false'), ('S', '7')])
        self.addDocument(self.luceneA, identifier='A-MQU',  keys=[('A', k8 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'true' ), ('S', '8')])

        self.addDocument(self.luceneB, identifier='B-N>A-M',   keys=[('B', k5 ), ('D', k5)], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MU',  keys=[('B', k6 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQ',  keys=[('B', k7 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQU', keys=[('B', k8 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N',       keys=[('B', k9 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B',         keys=[('B', k10)], fields=[('N', 'false'), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-P>A-M',   keys=[('B', k5 )], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MU',  keys=[('B', k6 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQ',  keys=[('B', k7 )], fields=[('N', 'false'), ('O', 'false' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQU', keys=[('B', k8 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P',       keys=[('B', k11)], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])

        self.addDocument(self.luceneC, identifier='C-R', keys=[('C', k5)], fields=[('R', 'true')])
        self.addDocument(self.luceneC, identifier='C-S', keys=[('C', k8)], fields=[('S', 'true')])
        self.addDocument(self.luceneC, identifier='C-S2', keys=[('C', k7)], fields=[('S', 'false')])

        self.luceneA._realCommit()
        self.luceneB._realCommit()
        self.luceneC._realCommit()
        settings.commitCount = 1
        settingsLuceneC.commitCount = 1

    def tearDown(self):
        self.luceneA.close()
        self.luceneB.close()
        SeecrTestCase.tearDown(self)

    def hitIds(self, hits):
        return set([hit.id for hit in hits])

    def testQueryOneIndex(self):
        result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true')))
        self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))
        result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true AND M=true')))
        self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testQueryOneIndexWithComposedQuery(self):
        cq = ComposedQuery('coreA')
        cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq))
        self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))
        cq = ComposedQuery('coreA')
        cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'), filterQueries=[luceneQueryFromCql('M=true')])
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq))
        self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testB_N_is_true(self):
        result = returnValueFromGenerator(self.dna.any.executeQuery(core='coreB', luceneQuery=luceneQueryFromCql('N=true')))
        self.assertEquals(5, result.total)
        self.assertEquals(set(['B-N', 'B-N>A-M', 'B-N>A-MU', 'B-N>A-MQ', 'B-N>A-MQU']), self.hitIds(result.hits))

    def testJoinQuery(self):
        q = ComposedQuery('coreA', query=MatchAllDocsQuery())
        q.setCoreQuery(core='coreB', query=luceneQueryFromCql('N=true'))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))
        self.assertEquals(4, result.total)
        self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testMultipleJoinQueriesKeepsCachesWithinMaxSize(self):
        for i in xrange(25):
            self.addDocument(self.luceneB, identifier=str(i), keys=[('X', i)], fields=[('Y', str(i))])
        for i in xrange(25):
            q = ComposedQuery('coreA', query=MatchAllDocsQuery())
            q.setCoreQuery(core='coreB', query=luceneQueryFromCql('Y=%s' % i))
            q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'X'))
            ignoredResult = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))

    def testJoinQueryWithFilters(self):
        q = ComposedQuery('coreA')
        q.addFilterQuery('coreB', query=luceneQueryFromCql('N=true'))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))
        self.assertEquals(4, result.total)
        self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testJoinFacet(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('Q=true'))
        q.addFacet('coreB', dict(fieldname='cat_N', maxTerms=10))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(4, result.total)
        self.assertEquals([{
                'terms': [
                        {'count': 2, 'term': u'true'},
                        {'count': 2, 'term': u'false'},
                    ],
                'path': [],
                'fieldname': u'cat_N'
            }, {
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetWithDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreA', drilldownQuery=('cat_Q', ['true']))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetWithJoinDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true']))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true']))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)

    def testJoinFacetWithFilter(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addFilterQuery('coreA', query=luceneQueryFromCql('Q=true'))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetFromBPointOfView(self):
        q = ComposedQuery('coreB')
        q.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'))
        q.setCoreQuery(core='coreB', query=None, facets=[
                dict(fieldname='cat_N', maxTerms=10),
                dict(fieldname='cat_O', maxTerms=10),
            ])
        try:
            q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        except ValueError, e:
            self.assertEquals("Match for result core 'coreB' must have a uniqueKey specification.", str(e))
            return

        # for future reference
        self.assertEquals(4, result.total)
        self.assertEquals(set(['B-N>A-MQ', 'B-N>A-MQU', 'B-P>A-MQ', 'B-P>A-MQU']), self.hitIds(result.hits))
        self.assertEquals([{
                'terms': [
                        {'count': 2, 'term': u'false'},
                        {'count': 2, 'term': u'true'},
                    ],
                'fieldname': u'cat_N'
            }, {
                'terms': [
                    {'count': 2, 'term': u'false'},
                    {'count': 2, 'term': u'true'},
                ],
                'fieldname': u'cat_O'
             }], result.drilldownData)
예제 #13
0
class DeDupFilterCollectorTest(SeecrTestCase):
    def setUp(self):
        super(DeDupFilterCollectorTest, self).setUp()
        self._reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, verbose=False)
        self.lucene = Lucene(self.tempdir,
                             reactor=self._reactor,
                             settings=settings)

    def tearDown(self):
        self.lucene.close()
        super(DeDupFilterCollectorTest, self).tearDown()

    def testCollectorTransparentlyDelegatesToNextCollector(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def _addDocument(self, identifier, isformatof, sort=None):
        doc = Document()
        if isformatof:
            doc.add(NumericDocValuesField("__isformatof__", long(isformatof)))
        if sort:
            doc.add(NumericDocValuesField("__sort__", long(sort)))
        consume(self.lucene.addDocument(identifier, doc))
        self.lucene.commit()  # Explicitly, not required: since commitCount=1.

    def testCollectorFiltersTwoSimilar(self):
        self._addDocument("urn:1", 2, 1)
        self._addDocument("urn:2", 2, 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(1, topDocsResult.totalHits)
        self.assertEquals(1, len(topDocsResult.scoreDocs))

        docId = topDocsResult.scoreDocs[0].doc
        key = c.keyForDocId(docId)
        identifier = self.lucene._index.getDocument(
            key.getDocId()).get(IDFIELD)
        self.assertEquals('urn:2', identifier)
        self.assertEquals(2, key.count)

    def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
        self._addDocument("urn:1", 1, 2001)
        self._addDocument("urn:2", 3, 2009)  # result 2x
        self._addDocument("urn:3", 50, 2010)  # result 1x
        self._addDocument("urn:4", 3, 2001)
        self._addDocument("urn:5", 1, 2009)  # result 2x
        #expected: "urn:2', "urn:3" and "urn:5" in no particular order
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(3, topDocsResult.totalHits)
        self.assertEquals(3, len(topDocsResult.scoreDocs))
        rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
        netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
        identifiers = set(
            self.lucene._index.getDocument(doc).get(IDFIELD)
            for doc in netDocIds)
        self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
        self.assertEquals(
            [1, 2, 2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))

    def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def testShouldAddResultsWithoutIsFormatOf(self):
        self._addDocument("urn:1", 2)
        self._addDocument("urn:2", None)
        self._addDocument("urn:3", 2)
        self._addDocument("urn:4", None)
        self._addDocument("urn:5", None)
        self._addDocument("urn:6", None)
        self._addDocument("urn:7", None)
        self._addDocument("urn:8", None)
        self._addDocument("urn:9", None)
        self._addDocument("urn:A", None)
        self._addDocument("urn:B", None)  # trigger a merge
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(10, tc.topDocs(0).totalHits)
예제 #14
0
class LuceneTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings())
        self.post = []
        self.response = ""
        def mockPost(data, path, **kwargs):
            self.post.append(dict(data=data, path=path))
            raise StopIteration(self.response)
            yield
        self._lucene._connect._post = mockPost

        self.read = []
        self.response = ""
        def mockRead(path, **kwargs):
            self.read.append(path)
            raise StopIteration(self.response)
            yield
        self._lucene._connect.read = mockRead

    def testPostSettingsAddObserverInit(self):
        self.assertEqual([], self.post)
        self._lucene.observer_init()
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEquals(DEFAULTS, loads(self.post[0]['data']))

    def testInitialize(self):
        self.assertEqual([], self.post)
        consume(self._lucene.initialize())
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEquals(DEFAULTS, loads(self.post[0]['data']))

    def testAdd(self):
        registry = FieldRegistry()
        fields = [registry.createField("id", "id1")]
        consume(self._lucene.addDocument(identifier='id1', fields=fields))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/update/?identifier=id1', self.post[0]['path'])
        self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])

    def testAddWithoutIdentifier(self):
        registry = FieldRegistry()
        fields = [registry.createField("id", "id1")]
        consume(self._lucene.addDocument(fields=fields))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/update/?', self.post[0]['path'])
        self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])

    def testDelete(self):
        consume(self._lucene.delete(identifier='id1'))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/delete/?identifier=id1', self.post[0]['path'])
        self.assertEqual(None, self.post[0]['data'])

    def testExecuteQuery(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "times": {"searchTime": 3},
                "hits": [{
                        "id": "record:1", "score": 0.1234,
                        "duplicateCount": {"__key__": 2},
                        "duplicates": {"__grouping_key__": [{"id": 'record:1'}, {"id": 'record:2'}]}
                    }],
                "drilldownData": [
                    {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
                ],
                "suggestions": {
                    "valeu": ["value"]
                }
            }).dumps()
        query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        response = retval(self._lucene.executeQuery(
                    luceneQuery=query, start=1, stop=5,
                    facets=[dict(maxTerms=10, fieldname='facet')],
                    sortKeys=[dict(sortBy='field', sortDescending=False)],
                    suggestionRequest=dict(suggests=['valeu'], count=2, field='field1'),
                    dedupField="__key__",
                    clustering=True,
                    storedFields=["field"]
                ))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/query/', self.post[0]['path'])
        self.assertEqual({
                    "start": 1, "stop": 5,
                    "storedFields": ["field"],
                    "query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"},
                    "facets": [{"fieldname": "facet", "maxTerms": 10}],
                    "sortKeys": [{"sortBy": "field", "sortDescending": False, "type": "String", 'missingValue': 'STRING_LAST'}],
                    "suggestionRequest": dict(suggests=['valeu'], count=2, field='field1'),
                    "dedupField": "__key__",
                    "dedupSortField": None,
                    "clustering": True,
                }, loads(self.post[0]['data']))
        self.assertEqual(887, response.total)
        self.assertEqual(6, response.queryTime)
        self.assertEqual({'searchTime': 3}, response.times)
        self.assertEqual(1, len(response.hits))
        self.assertEqual("record:1", response.hits[0].id)
        self.assertEqual(0.1234, response.hits[0].score)
        self.assertEqual(dict(__key__=2), response.hits[0].duplicateCount)
        self.assertEqual([
                {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
            ], response.drilldownData)
        self.assertEqual({'valeu': ['value']}, response.suggestions)

    def testPrefixSearch(self):
        self.response = JsonList([["value0", 1], ["value1", 2]]).dumps()
        response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu'))
        self.assertEquals(['value1', 'value0'], response.hits)

        response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu', showCount=True))
        self.assertEquals([('value1', 2), ('value0', 1)], response.hits)

    def testNumDocs(self):
        self.response = "150"
        result = retval(self._lucene.numDocs())
        self.assertEqual(150, result)
        self.assertEqual([{'data': None, 'path': '/lucene/numDocs/'}], self.post)

    def testFieldnames(self):
        self.response = '["field1", "field2"]'
        result = retval(self._lucene.fieldnames())
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual([{"data": None, "path": "/lucene/fieldnames/"}], self.post)

    def testDrilldownFieldnames(self):
        self.response = '["field1", "field2"]'
        result = retval(self._lucene.drilldownFieldnames())
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual([{"data": None, "path": "/lucene/drilldownFieldnames/?limit=50"}], self.post)

        result = retval(self._lucene.drilldownFieldnames(limit=1, path=['field']))
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=field&limit=1"}, self.post[-1])

        result = retval(self._lucene.drilldownFieldnames(limit=1, path=['xyz', 'abc', 'field']))
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=xyz&limit=1&path=abc&path=field"}, self.post[-1])

    def testUpdateSettings(self):
        self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1))
        settings = retval(self._lucene.getSettings())
        self.assertEqual(['/settings/'], self.read)
        self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings)

        clusterFields = [
            {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
        ]
        self.response = ""
        consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields)))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 10,
                "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0),
                "clustering": {
                    "clusterMoreRecords": 200,
                    "clusteringEps": 1.0,
                    "clusteringMinPoints": 2,
                    "fields": [
                        {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
                    ]
                }
            }, loads(self.post[0]['data']))

        consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None))
        self.assertEqual(2, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[1]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 5,
            }, loads(self.post[1]['data']))

    def testSimilarDocs(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "times": {"searchTime": 3},
                "hits": [
                        {"id": "record:1", "score": 0.1234},
                        {"id": "record:2", "score": 0.1234},
                    ],
            }).dumps()
        response = retval(self._lucene.similarDocuments(identifier='record:3'))
        self.assertEqual(887, response.total)
        self.assertEqual(2, len(response.hits))
class DeDupFilterCollectorTest(SeecrTestCase):
    def setUp(self):
        super(DeDupFilterCollectorTest, self).setUp()
        self._reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, verbose=False)
        self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings)

    def tearDown(self):
        self.lucene.close()
        super(DeDupFilterCollectorTest, self).tearDown()

    def testCollectorTransparentlyDelegatesToNextCollector(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def _addDocument(self, identifier, isformatof, sort=None):
        doc = Document()
        if isformatof:
            doc.add(NumericDocValuesField("__isformatof__", long(isformatof)))
        if sort:
            doc.add(NumericDocValuesField("__sort__", long(sort)))
        consume(self.lucene.addDocument(identifier, doc))
        self.lucene.commit()  # Explicitly, not required: since commitCount=1.

    def testCollectorFiltersTwoSimilar(self):
        self._addDocument("urn:1", 2, 1)
        self._addDocument("urn:2", 2, 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(1, topDocsResult.totalHits)
        self.assertEquals(1, len(topDocsResult.scoreDocs))

        docId = topDocsResult.scoreDocs[0].doc
        key = c.keyForDocId(docId)
        identifier = self.lucene._index.getDocument(key.getDocId()).get(IDFIELD)
        self.assertEquals('urn:2', identifier)
        self.assertEquals(2, key.count)

    def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
        self._addDocument("urn:1",  1, 2001)
        self._addDocument("urn:2",  3, 2009) # result 2x
        self._addDocument("urn:3", 50, 2010) # result 1x
        self._addDocument("urn:4",  3, 2001)
        self._addDocument("urn:5",  1, 2009) # result 2x
        #expected: "urn:2', "urn:3" and "urn:5" in no particular order
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(3, topDocsResult.totalHits)
        self.assertEquals(3, len(topDocsResult.scoreDocs))
        rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
        netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
        identifiers = set(self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds)
        self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
        self.assertEquals([1,2,2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))

    def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def testShouldAddResultsWithoutIsFormatOf(self):
        self._addDocument("urn:1", 2)
        self._addDocument("urn:2", None)
        self._addDocument("urn:3", 2)
        self._addDocument("urn:4", None)
        self._addDocument("urn:5", None)
        self._addDocument("urn:6", None)
        self._addDocument("urn:7", None)
        self._addDocument("urn:8", None)
        self._addDocument("urn:9", None)
        self._addDocument("urn:A", None)
        self._addDocument("urn:B", None) # trigger a merge
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(10, tc.topDocs(0).totalHits)
예제 #16
0
class LuceneTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.setUpLucene()

    def setUpLucene(self, **kwargs):
        self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings(), **kwargs)
        self.post = []
        self.response = ""
        connect = self._lucene._connect()
        def mockPost(data, path, **kwargs):
            self.post.append(dict(data=data, path=path))
            raise StopIteration(self.response)
            yield
        connect._post = mockPost

        self.read = []
        self.response = ""
        def mockRead(path, **kwargs):
            self.read.append(path)
            raise StopIteration(self.response)
            yield
        connect.read = mockRead
        self._lucene._connect = lambda: connect

    def testPostSettingsAddObserverInit(self):
        self.assertEqual([], self.post)
        self._lucene.observer_init()
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEquals(DEFAULTS, loads(self.post[0]['data']))

    def testInitialize(self):
        self.assertEqual([], self.post)
        consume(self._lucene.initialize())
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEquals(DEFAULTS, loads(self.post[0]['data']))

    def testAdd(self):
        registry = FieldRegistry()
        fields = [registry.createField("id", "id1")]
        consume(self._lucene.addDocument(identifier='id1', fields=fields))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/update/?identifier=id1', self.post[0]['path'])
        self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])

    def testAddWithoutIdentifier(self):
        registry = FieldRegistry()
        fields = [registry.createField("id", "id1")]
        consume(self._lucene.addDocument(fields=fields))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/update/?', self.post[0]['path'])
        self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data'])

    def testDelete(self):
        consume(self._lucene.delete(identifier='id1'))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/delete/?identifier=id1', self.post[0]['path'])
        self.assertEqual(None, self.post[0]['data'])

    def testDeleteByQuery(self):
        query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        consume(self._lucene.delete(luceneQuery=query))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/delete/', self.post[0]['path'])
        self.assertEqual('{"query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}}', self.post[0]['data'])

    def testExecuteQuery(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "times": {"searchTime": 3},
                "hits": [{
                        "id": "record:1", "score": 0.1234,
                        "duplicateCount": {"__key__": 2},
                        "duplicates": {"__grouping_key__": [{"id": 'record:1'}, {"id": 'record:2'}]}
                    }],
                "drilldownData": [
                    {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
                ],
                "suggestions": {
                    "valeu": ["value"]
                }
            }).dumps()
        query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        response = retval(self._lucene.executeQuery(
                    luceneQuery=query, start=1, stop=5,
                    facets=[dict(maxTerms=10, fieldname='facet')],
                    sortKeys=[dict(sortBy='field', sortDescending=False)],
                    suggestionRequest=dict(suggests=['valeu'], count=2, field='field1'),
                    dedupField="__key__",
                    clustering=True,
                    storedFields=["field"]
                ))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/query/', self.post[0]['path'])
        self.assertEqual({
                    "start": 1, "stop": 5,
                    "storedFields": ["field"],
                    "query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"},
                    "facets": [{"fieldname": "facet", "maxTerms": 10}],
                    "sortKeys": [{"sortBy": "field", "sortDescending": False, "type": "String", 'missingValue': 'STRING_LAST'}],
                    "suggestionRequest": dict(suggests=['valeu'], count=2, field='field1'),
                    "dedupField": "__key__",
                    "dedupSortField": None,
                    "clustering": True,
                }, loads(self.post[0]['data']))
        self.assertEqual(887, response.total)
        self.assertEqual(6, response.queryTime)
        self.assertEqual({'searchTime': 3}, response.times)
        self.assertEqual(1, len(response.hits))
        self.assertEqual("record:1", response.hits[0].id)
        self.assertEqual(0.1234, response.hits[0].score)
        self.assertEqual(dict(__key__=2), response.hits[0].duplicateCount)
        self.assertEqual([
                {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]}
            ], response.drilldownData)
        self.assertEqual({'valeu': ['value']}, response.suggestions)

    def testPrefixSearch(self):
        self.response = JsonList([["value0", 1], ["value1", 2]]).dumps()
        response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu'))
        self.assertEquals(['value1', 'value0'], response.hits)

        response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu', showCount=True))
        self.assertEquals([('value1', 2), ('value0', 1)], response.hits)

    def testNumDocs(self):
        self.response = "150"
        result = retval(self._lucene.numDocs())
        self.assertEqual(150, result)
        self.assertEqual([{'data': None, 'path': '/lucene/numDocs/'}], self.post)

    def testFieldnames(self):
        self.response = '["field1", "field2"]'
        result = retval(self._lucene.fieldnames())
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual([{"data": None, "path": "/lucene/fieldnames/"}], self.post)

    def testDrilldownFieldnames(self):
        self.response = '["field1", "field2"]'
        result = retval(self._lucene.drilldownFieldnames())
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual([{"data": None, "path": "/lucene/drilldownFieldnames/?limit=50"}], self.post)

        result = retval(self._lucene.drilldownFieldnames(limit=1, path=['field']))
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=field&limit=1"}, self.post[-1])

        result = retval(self._lucene.drilldownFieldnames(limit=1, path=['xyz', 'abc', 'field']))
        self.assertEqual(["field1", "field2"], result.hits)
        self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=xyz&limit=1&path=abc&path=field"}, self.post[-1])

    def testUpdateSettings(self):
        self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1))
        settings = retval(self._lucene.getSettings())
        self.assertEqual(['/settings/'], self.read)
        self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings)

        clusterFields = [
            {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
        ]
        self.response = ""
        consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields)))
        self.assertEqual(1, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[0]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 10,
                "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0),
                "clustering": {
                    "clusterMoreRecords": 200,
                    "clusteringEps": 1.0,
                    "clusteringMinPoints": 2,
                    "fields": [
                        {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0}
                    ]
                }
            }, loads(self.post[0]['data']))

        consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None))
        self.assertEqual(2, len(self.post))
        self.assertEqual('/lucene/settings/', self.post[1]['path'])
        self.assertEqual({
                "numberOfConcurrentTasks": 5,
            }, loads(self.post[1]['data']))

    def testSimilarDocs(self):
        self.response = JsonDict({
                "total": 887,
                "queryTime": 6,
                "times": {"searchTime": 3},
                "hits": [
                        {"id": "record:1", "score": 0.1234},
                        {"id": "record:2", "score": 0.1234},
                    ],
            }).dumps()
        response = retval(self._lucene.similarDocuments(identifier='record:3'))
        self.assertEqual(887, response.total)
        self.assertEqual(2, len(response.hits))

    def testLuceneReadonly(self):
        self.setUpLucene(readonly=True)
        self._lucene.observer_init()
        self.assertEqual([], self.post)
        self.assertRaises(RuntimeError, lambda: consume(self._lucene.setSettings()))
        self.assertRaises(RuntimeError, lambda: consume(self._lucene.addDocument(fields=[])))
        self.assertRaises(RuntimeError, lambda: consume(self._lucene.delete('identifier')))

    def testLuceneServerHostPortDynamic(self):
        lucene = Lucene(name='lucene', settings=LuceneSettings(), readonly=True)
        def httprequest1_1Mock(**kwargs):
            raise StopIteration(parseResponse(HTTP_RESPONSE))
            yield
        observer = CallTrace(
            'observer',
            returnValues=dict(luceneServer=('example.org', 1234)),
            methods=dict(httprequest1_1=httprequest1_1Mock))
        lucene.addObserver(observer)
        query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value"))
        response = retval(lucene.executeQuery(
            luceneQuery=query, start=1, stop=5,
        ))
        self.assertEquals(887, response.total)
        self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
예제 #17
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        settings = LuceneSettings(multithreaded=self._multithreaded, verbose=False)
        settingsLuceneC = LuceneSettings(multithreaded=self._multithreaded, verbose=False, similarity=TermFrequencySimilarity())

        self.luceneA = Lucene(join(self.tempdir, 'a'), name='coreA', reactor=CallTrace(), settings=settings)
        self.luceneB = Lucene(join(self.tempdir, 'b'), name='coreB', reactor=CallTrace(), settings=settings)
        self.luceneC = Lucene(join(self.tempdir, 'c'), name='coreC', reactor=CallTrace(), settings=settingsLuceneC)
        self.dna = be((Observable(),
            (MultiLucene(defaultCore='coreA', multithreaded=self._multithreaded),
                (self.luceneA,),
                (self.luceneB,),
                (self.luceneC,),
            )
        ))

        # +---------------------------------+   +---------------------------------+  +----------------------+
        # |              ______             |   |                                 |  |                    C |
        # |         ____/      \____     A  |   |    __________                B  |  |      ____            |
        # |        /   /\   Q  /\   \       |   |   /    N     \                  |  |     /    \           |
        # |       /   /  \    /  \   \      |   |  /   ____     \                 |  |    |   R  |          |
        # |      /   |    \  /    |   \     |   | |   /    \     |                |  |     \ ___/           |
        # |     /     \    \/    /     \    |   | |  |  M __|____|_____           |  |                      |
        # |    /       \   /\   /       \   |   | |   \__/_/     |     \          |  |                      |
        # |   |         \_|__|_/         |  |   |  \    |       /      |          |  |                      |
        # |   |    U      |  |     M     |  |   |   \___|______/    ___|_______   |  |                      |
        # |   |           \  /           |  |   |       |          /   |       \  |  |                      |
        # |    \           \/           /   |   |       |   O     /   _|__      \ |  |                      |
        # |     \          /\          /    |   |        \_______|___/_/  \     | |  |                      |
        # |      \        /  \        /     |   |                |  |  M   | P  | |  |                      |
        # |       \______/    \______/      |   |                |   \____/     | |  |                      |
        # |                                 |   |                 \            /  |  |                      |
        # |                                 |   |                  \__________/   |  |                      |
        # +---------------------------------+   +---------------------------------+  +----------------------+

        k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 = range(1,12)
        self.addDocument(self.luceneA, identifier='A',      keys=[('A', k1 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'false'), ('S', '1')])
        self.addDocument(self.luceneA, identifier='A-U',    keys=[('A', k2 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'true' ), ('S', '2')])
        self.addDocument(self.luceneA, identifier='A-Q',    keys=[('A', k3 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'false'), ('S', '3')])
        self.addDocument(self.luceneA, identifier='A-QU',   keys=[('A', k4 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'true' ), ('S', '4')])
        self.addDocument(self.luceneA, identifier='A-M',    keys=[('A', k5 ), ('C', k5)], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'false'), ('S', '5')])
        self.addDocument(self.luceneA, identifier='A-MU',   keys=[('A', k6 )], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'true' ), ('S', '6')])
        self.addDocument(self.luceneA, identifier='A-MQ',   keys=[('A', k7 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'false'), ('S', '7')])
        self.addDocument(self.luceneA, identifier='A-MQU',  keys=[('A', k8 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'true' ), ('S', '8')])

        self.addDocument(self.luceneB, identifier='B-N>A-M',   keys=[('B', k5 ), ('D', k5)], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MU',  keys=[('B', k6 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQ',  keys=[('B', k7 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQU', keys=[('B', k8 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N',       keys=[('B', k9 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B',         keys=[('B', k10)], fields=[('N', 'false'), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-P>A-M',   keys=[('B', k5 )], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MU',  keys=[('B', k6 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQ',  keys=[('B', k7 )], fields=[('N', 'false'), ('O', 'false' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQU', keys=[('B', k8 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P',       keys=[('B', k11)], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])

        self.addDocument(self.luceneC, identifier='C-R', keys=[('C', k5)], fields=[('R', 'true')])
        self.addDocument(self.luceneC, identifier='C-S', keys=[('C', k8)], fields=[('S', 'true')])
        self.addDocument(self.luceneC, identifier='C-S2', keys=[('C', k7)], fields=[('S', 'false')])

        self.luceneA._realCommit()
        self.luceneB._realCommit()
        self.luceneC._realCommit()
        settings.commitCount = 1
        settingsLuceneC.commitCount = 1