def setUp(self): super(DeDupFilterCollectorTest, self).setUp() self._reactor = CallTrace('reactor') settings = LuceneSettings(commitCount=1, verbose=False) self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings)
def setUp(self, fieldRegistry=FieldRegistry()): super(LuceneTestCase, self).setUp() self._javaObjects = self._getJavaObjects() self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')}) self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry) self.lucene = Lucene( join(self.tempdir, 'lucene'), reactor=self._reactor, settings=self._defaultSettings, ) self.observer = CallTrace() self.lucene.addObserver(self.observer)
def testScore(self): reactor = CallTrace('reactor') settings = LuceneSettings(commitCount=1, similarity=TermFrequencySimilarity(), verbose=False) lucene = Lucene(join(self.tempdir, 'lucene'), reactor=reactor, settings=settings) document = Document() document.add(TextField('field', 'x '*100, Field.Store.NO)) returnValueFromGenerator(lucene.addDocument(identifier="identifier", document=document)) q = TermQuery(Term("field", 'x')) result = returnValueFromGenerator(lucene.executeQuery(q)) self.assertAlmostEqual(0.1, result.hits[0].score) q.setBoost(10.0) result = returnValueFromGenerator(lucene.executeQuery(q)) self.assertAlmostEqual(1, result.hits[0].score)
def testLuceneServerHostPortDynamic(self): lucene = Lucene(name='lucene', settings=LuceneSettings(), readonly=True) def httprequest1_1Mock(**kwargs): raise StopIteration(parseResponse(HTTP_RESPONSE)) yield observer = CallTrace( 'observer', returnValues=dict(luceneServer=('example.org', 1234)), methods=dict(httprequest1_1=httprequest1_1Mock)) lucene.addObserver(observer) query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) response = retval(lucene.executeQuery( luceneQuery=query, start=1, stop=5, )) self.assertEquals(887, response.total) self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
def setUpLucene(self, **kwargs): self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings(), **kwargs) self.post = [] self.response = "" connect = self._lucene._connect() def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield connect._post = mockPost self.read = [] self.response = "" def mockRead(path, **kwargs): self.read.append(path) raise StopIteration(self.response) yield connect.read = mockRead self._lucene._connect = lambda: connect
def luceneAndReaderConfig(defaultLuceneSettings, httpRequestAdapter, lucenePort): fieldRegistry = FieldRegistry(drilldownFields=drilldownFields) luceneIndex = be((Lucene( host='127.0.0.1', port=lucenePort, name=DEFAULT_CORE, settings=defaultLuceneSettings.clone(fieldRegistry=fieldRegistry)), (httpRequestAdapter, ))) return luceneIndex
class LuceneTestCase(SeecrTestCase): def setUp(self, fieldRegistry=FieldRegistry()): super(LuceneTestCase, self).setUp() self._javaObjects = self._getJavaObjects() self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')}) self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry) self.lucene = Lucene( join(self.tempdir, 'lucene'), reactor=self._reactor, settings=self._defaultSettings, ) self.observer = CallTrace() self.lucene.addObserver(self.observer) def tearDown(self): try: self._reactor.calledMethods.reset() # don't keep any references. self.lucene.close() self.lucene = None gc.collect() diff = self._getJavaObjects() - self._javaObjects self.assertEquals(0, len(diff), diff) finally: SeecrTestCase.tearDown(self) def _getJavaObjects(self): refs = VM._dumpRefs(classes=True) return set( [(c, refs[c]) for c in refs.keys() if c != 'class java.lang.Class' and c != 'class org.apache.lucene.document.Field' and # Fields are kept in FieldRegistry for reusing c != 'class org.apache.lucene.document.NumericDocValuesField' and c != 'class org.apache.lucene.facet.FacetsConfig' ])
def setUp(self): SeecrTestCase.setUp(self) self.registry = FieldRegistry() self._multiLucene = MultiLucene(defaultCore='coreA', host="localhost", port=12345) self._lucene = Lucene(host="localhost", port=12345, settings=LuceneSettings(), name='coreA') self._multiLucene.addObserver(self._lucene) self.post = [] self.response = "" def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield connect = self._multiLucene._connect() connect._post = mockPost self._multiLucene._connect = lambda: connect
def setUp(self): SeecrTestCase.setUp(self) self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings()) self.post = [] self.response = "" def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield self._lucene._connect._post = mockPost self.read = [] self.response = "" def mockRead(path, **kwargs): self.read.append(path) raise StopIteration(self.response) yield self._lucene._connect.read = mockRead
def main(reactor, port, databasePath): drilldownFields = [ DrilldownField('untokenized.field2'), DrilldownField('untokenized.fieldHier', hierarchical=True) ] fieldRegistry = FieldRegistry(drilldownFields) luceneSettings = LuceneSettings(fieldRegistry=fieldRegistry, commitCount=30, commitTimeout=1, analyzer=MerescoDutchStemmingAnalyzer()) lucene = Lucene(path=join(databasePath, 'lucene'), reactor=reactor, name='main', settings=luceneSettings) lucene2Settings = LuceneSettings(fieldRegistry=fieldRegistry, commitTimeout=0.1) lucene2 = Lucene(path=join(databasePath, 'lucene2'), reactor=reactor, name='main2', settings=lucene2Settings) termNumerator = TermNumerator(path=join(databasePath, 'termNumerator')) emptyLuceneSettings = LuceneSettings(commitTimeout=1) multiLuceneHelix = ( MultiLucene(defaultCore='main'), (Lucene(path=join(databasePath, 'lucene-empty'), reactor=reactor, name='empty-core', settings=emptyLuceneSettings), ), (lucene, ), (lucene2, ), ) storageComponent = StorageComponent( directory=join(databasePath, 'storage')) return \ (Observable(), (ObservableHttpServer(reactor=reactor, port=port), (BasicHttpHandler(), (ApacheLogger(outputStream=stdout), (PathFilter("/info", excluding=[ '/info/version', '/info/name', '/update', '/sru', '/remote', '/via-remote-sru', ]), (DynamicHtml( [dynamicPath], reactor=reactor, indexPage='/info', additionalGlobals={ 'VERSION': version, } ), ) ), (PathFilter("/info/version"), (StringServer(version, ContentTypePlainText), ) ), (PathFilter("/info/name"), (StringServer('Meresco Lucene', ContentTypePlainText),) ), (PathFilter("/static"), (PathRename(lambda path: path[len('/static'):]), (FileServer(staticPath),) ) ), (PathFilter("/update_main", excluding=['/update_main2']), uploadHelix(lucene, termNumerator, storageComponent, drilldownFields, fieldRegistry=luceneSettings.fieldRegistry), ), (PathFilter("/update_main2"), uploadHelix(lucene2, termNumerator, storageComponent, drilldownFields, fieldRegistry=lucene2Settings.fieldRegistry), ), (PathFilter('/sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (MultiCqlToLuceneQuery( defaultCore='main', coreToCqlLuceneQueries={ "main": CqlToLuceneQuery([], luceneSettings=luceneSettings), "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings), "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/via-remote-sru'), (SruParser(defaultRecordSchema='record'), (SruHandler(), (LuceneRemote(host='localhost', port=port, path='/remote'),), (SRUTermDrilldown(defaultFormat='xml'),), (SruDuplicateCount(),), (storageComponent,), ) ) ), (PathFilter('/remote'), (LuceneRemoteService(reactor=reactor), (MultiCqlToLuceneQuery( defaultCore='main', coreToCqlLuceneQueries={ "main": CqlToLuceneQuery([], luceneSettings=luceneSettings), "main2": CqlToLuceneQuery([], luceneSettings=lucene2Settings), "empty-core": CqlToLuceneQuery([], luceneSettings=emptyLuceneSettings), }), multiLuceneHelix, ) ) ), (PathFilter('/autocomplete'), (Autocomplete('localhost', port, '/autocomplete', '__all__', '?', 5, '?', '?'), (lucene,), ) ) ) ) ) )
class MultiLuceneTest(SeecrTestCase): def __init__(self, *args, **kwargs): super(MultiLuceneTest, self).__init__(*args, **kwargs) self._multithreaded = True def setUp(self): SeecrTestCase.setUp(self) settings = LuceneSettings(multithreaded=self._multithreaded, verbose=False) settingsLuceneC = LuceneSettings(multithreaded=self._multithreaded, verbose=False, similarity=TermFrequencySimilarity()) self.luceneA = Lucene(join(self.tempdir, 'a'), name='coreA', reactor=CallTrace(), settings=settings) self.luceneB = Lucene(join(self.tempdir, 'b'), name='coreB', reactor=CallTrace(), settings=settings) self.luceneC = Lucene(join(self.tempdir, 'c'), name='coreC', reactor=CallTrace(), settings=settingsLuceneC) self.dna = be((Observable(), (MultiLucene(defaultCore='coreA', multithreaded=self._multithreaded), (self.luceneA,), (self.luceneB,), (self.luceneC,), ) )) # +---------------------------------+ +---------------------------------+ +----------------------+ # | ______ | | | | C | # | ____/ \____ A | | __________ B | | ____ | # | / /\ Q /\ \ | | / N \ | | / \ | # | / / \ / \ \ | | / ____ \ | | | R | | # | / | \ / | \ | | | / \ | | | \ ___/ | # | / \ \/ / \ | | | | M __|____|_____ | | | # | / \ /\ / \ | | | \__/_/ | \ | | | # | | \_|__|_/ | | | \ | / | | | | # | | U | | M | | | \___|______/ ___|_______ | | | # | | \ / | | | | / | \ | | | # | \ \/ / | | | O / _|__ \ | | | # | \ /\ / | | \_______|___/_/ \ | | | | # | \ / \ / | | | | M | P | | | | # | \______/ \______/ | | | \____/ | | | | # | | | \ / | | | # | | | \__________/ | | | # +---------------------------------+ +---------------------------------+ +----------------------+ k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 = range(1,12) self.addDocument(self.luceneA, identifier='A', keys=[('A', k1 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'false'), ('S', '1')]) self.addDocument(self.luceneA, identifier='A-U', keys=[('A', k2 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'true' ), ('S', '2')]) self.addDocument(self.luceneA, identifier='A-Q', keys=[('A', k3 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'false'), ('S', '3')]) self.addDocument(self.luceneA, identifier='A-QU', keys=[('A', k4 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'true' ), ('S', '4')]) self.addDocument(self.luceneA, identifier='A-M', keys=[('A', k5 ), ('C', k5)], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'false'), ('S', '5')]) self.addDocument(self.luceneA, identifier='A-MU', keys=[('A', k6 )], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'true' ), ('S', '6')]) self.addDocument(self.luceneA, identifier='A-MQ', keys=[('A', k7 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'false'), ('S', '7')]) self.addDocument(self.luceneA, identifier='A-MQU', keys=[('A', k8 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'true' ), ('S', '8')]) self.addDocument(self.luceneB, identifier='B-N>A-M', keys=[('B', k5 ), ('D', k5)], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MU', keys=[('B', k6 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MQ', keys=[('B', k7 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MQU', keys=[('B', k8 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N', keys=[('B', k9 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B', keys=[('B', k10)], fields=[('N', 'false'), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-P>A-M', keys=[('B', k5 )], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MU', keys=[('B', k6 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MQ', keys=[('B', k7 )], fields=[('N', 'false'), ('O', 'false' ), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MQU', keys=[('B', k8 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P', keys=[('B', k11)], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )]) self.addDocument(self.luceneC, identifier='C-R', keys=[('C', k5)], fields=[('R', 'true')]) self.addDocument(self.luceneC, identifier='C-S', keys=[('C', k8)], fields=[('S', 'true')]) self.addDocument(self.luceneC, identifier='C-S2', keys=[('C', k7)], fields=[('S', 'false')]) self.luceneA._realCommit() self.luceneB._realCommit() self.luceneC._realCommit() settings.commitCount = 1 settingsLuceneC.commitCount = 1 def tearDown(self): self.luceneA.close() self.luceneB.close() SeecrTestCase.tearDown(self) def hitIds(self, hits): return set([hit.id for hit in hits]) def testQueryOneIndex(self): result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true'))) self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits)) result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true AND M=true'))) self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits)) def testQueryOneIndexWithComposedQuery(self): cq = ComposedQuery('coreA') cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq)) self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits)) cq = ComposedQuery('coreA') cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'), filterQueries=[luceneQueryFromCql('M=true')]) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq)) self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits)) def testB_N_is_true(self): result = returnValueFromGenerator(self.dna.any.executeQuery(core='coreB', luceneQuery=luceneQueryFromCql('N=true'))) self.assertEquals(5, result.total) self.assertEquals(set(['B-N', 'B-N>A-M', 'B-N>A-MU', 'B-N>A-MQ', 'B-N>A-MQU']), self.hitIds(result.hits)) def testJoinQuery(self): q = ComposedQuery('coreA', query=MatchAllDocsQuery()) q.setCoreQuery(core='coreB', query=luceneQueryFromCql('N=true')) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q)) self.assertEquals(4, result.total) self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits)) def testMultipleJoinQueriesKeepsCachesWithinMaxSize(self): for i in xrange(25): self.addDocument(self.luceneB, identifier=str(i), keys=[('X', i)], fields=[('Y', str(i))]) for i in xrange(25): q = ComposedQuery('coreA', query=MatchAllDocsQuery()) q.setCoreQuery(core='coreB', query=luceneQueryFromCql('Y=%s' % i)) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'X')) ignoredResult = returnValueFromGenerator(self.dna.any.executeComposedQuery(q)) def testJoinQueryWithFilters(self): q = ComposedQuery('coreA') q.addFilterQuery('coreB', query=luceneQueryFromCql('N=true')) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q)) self.assertEquals(4, result.total) self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits)) def testJoinFacet(self): q = ComposedQuery('coreA', query=luceneQueryFromCql('Q=true')) q.addFacet('coreB', dict(fieldname='cat_N', maxTerms=10)) q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10)) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(4, result.total) self.assertEquals([{ 'terms': [ {'count': 2, 'term': u'true'}, {'count': 2, 'term': u'false'}, ], 'path': [], 'fieldname': u'cat_N' }, { 'terms': [ {'count': 3, 'term': u'false'}, {'count': 1, 'term': u'true'}, ], 'path': [], 'fieldname': u'cat_O' }], result.drilldownData) def testJoinFacetWithDrilldownQueryFilters(self): q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true')) q.addDrilldownQuery('coreA', drilldownQuery=('cat_Q', ['true'])) q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10)) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(2, result.total) self.assertEquals([{ 'terms': [ {'count': 3, 'term': u'false'}, {'count': 1, 'term': u'true'}, ], 'path': [], 'fieldname': u'cat_O' }], result.drilldownData) def testJoinFacetWithJoinDrilldownQueryFilters(self): q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true')) q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true'])) q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10)) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(2, result.total) self.assertEquals([{ 'terms': [ {'count': 3, 'term': u'true'}, ], 'path': [], 'fieldname': u'cat_O' }], result.drilldownData) def testJoinDrilldownQueryFilters(self): q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true')) q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true'])) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(2, result.total) def testJoinFacetWithFilter(self): q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true')) q.addFilterQuery('coreA', query=luceneQueryFromCql('Q=true')) q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10)) q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(2, result.total) self.assertEquals([{ 'terms': [ {'count': 3, 'term': u'false'}, {'count': 1, 'term': u'true'}, ], 'path': [], 'fieldname': u'cat_O' }], result.drilldownData) def testJoinFacetFromBPointOfView(self): q = ComposedQuery('coreB') q.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true')) q.setCoreQuery(core='coreB', query=None, facets=[ dict(fieldname='cat_N', maxTerms=10), dict(fieldname='cat_O', maxTerms=10), ]) try: q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B')) except ValueError, e: self.assertEquals("Match for result core 'coreB' must have a uniqueKey specification.", str(e)) return # for future reference self.assertEquals(4, result.total) self.assertEquals(set(['B-N>A-MQ', 'B-N>A-MQU', 'B-P>A-MQ', 'B-P>A-MQU']), self.hitIds(result.hits)) self.assertEquals([{ 'terms': [ {'count': 2, 'term': u'false'}, {'count': 2, 'term': u'true'}, ], 'fieldname': u'cat_N' }, { 'terms': [ {'count': 2, 'term': u'false'}, {'count': 2, 'term': u'true'}, ], 'fieldname': u'cat_O' }], result.drilldownData)
class DeDupFilterCollectorTest(SeecrTestCase): def setUp(self): super(DeDupFilterCollectorTest, self).setUp() self._reactor = CallTrace('reactor') settings = LuceneSettings(commitCount=1, verbose=False) self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings) def tearDown(self): self.lucene.close() super(DeDupFilterCollectorTest, self).tearDown() def testCollectorTransparentlyDelegatesToNextCollector(self): self._addDocument("urn:1", 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(1, tc.topDocs(0).totalHits) def _addDocument(self, identifier, isformatof, sort=None): doc = Document() if isformatof: doc.add(NumericDocValuesField("__isformatof__", long(isformatof))) if sort: doc.add(NumericDocValuesField("__sort__", long(sort))) consume(self.lucene.addDocument(identifier, doc)) self.lucene.commit() # Explicitly, not required: since commitCount=1. def testCollectorFiltersTwoSimilar(self): self._addDocument("urn:1", 2, 1) self._addDocument("urn:2", 2, 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) topDocsResult = tc.topDocs(0) self.assertEquals(1, topDocsResult.totalHits) self.assertEquals(1, len(topDocsResult.scoreDocs)) docId = topDocsResult.scoreDocs[0].doc key = c.keyForDocId(docId) identifier = self.lucene._index.getDocument( key.getDocId()).get(IDFIELD) self.assertEquals('urn:2', identifier) self.assertEquals(2, key.count) def testCollectorFiltersTwoTimesTwoSimilarOneNot(self): self._addDocument("urn:1", 1, 2001) self._addDocument("urn:2", 3, 2009) # result 2x self._addDocument("urn:3", 50, 2010) # result 1x self._addDocument("urn:4", 3, 2001) self._addDocument("urn:5", 1, 2009) # result 2x #expected: "urn:2', "urn:3" and "urn:5" in no particular order tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) topDocsResult = tc.topDocs(0) self.assertEquals(3, topDocsResult.totalHits) self.assertEquals(3, len(topDocsResult.scoreDocs)) rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs] netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds] identifiers = set( self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds) self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers) self.assertEquals( [1, 2, 2], list(sorted(c.keyForDocId(d).count for d in netDocIds))) def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self): self._addDocument("urn:1", 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(1, tc.topDocs(0).totalHits) def testShouldAddResultsWithoutIsFormatOf(self): self._addDocument("urn:1", 2) self._addDocument("urn:2", None) self._addDocument("urn:3", 2) self._addDocument("urn:4", None) self._addDocument("urn:5", None) self._addDocument("urn:6", None) self._addDocument("urn:7", None) self._addDocument("urn:8", None) self._addDocument("urn:9", None) self._addDocument("urn:A", None) self._addDocument("urn:B", None) # trigger a merge tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(10, tc.topDocs(0).totalHits)
class LuceneTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings()) self.post = [] self.response = "" def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield self._lucene._connect._post = mockPost self.read = [] self.response = "" def mockRead(path, **kwargs): self.read.append(path) raise StopIteration(self.response) yield self._lucene._connect.read = mockRead def testPostSettingsAddObserverInit(self): self.assertEqual([], self.post) self._lucene.observer_init() self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEquals(DEFAULTS, loads(self.post[0]['data'])) def testInitialize(self): self.assertEqual([], self.post) consume(self._lucene.initialize()) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEquals(DEFAULTS, loads(self.post[0]['data'])) def testAdd(self): registry = FieldRegistry() fields = [registry.createField("id", "id1")] consume(self._lucene.addDocument(identifier='id1', fields=fields)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/update/?identifier=id1', self.post[0]['path']) self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data']) def testAddWithoutIdentifier(self): registry = FieldRegistry() fields = [registry.createField("id", "id1")] consume(self._lucene.addDocument(fields=fields)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/update/?', self.post[0]['path']) self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data']) def testDelete(self): consume(self._lucene.delete(identifier='id1')) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/delete/?identifier=id1', self.post[0]['path']) self.assertEqual(None, self.post[0]['data']) def testExecuteQuery(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "times": {"searchTime": 3}, "hits": [{ "id": "record:1", "score": 0.1234, "duplicateCount": {"__key__": 2}, "duplicates": {"__grouping_key__": [{"id": 'record:1'}, {"id": 'record:2'}]} }], "drilldownData": [ {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]} ], "suggestions": { "valeu": ["value"] } }).dumps() query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) response = retval(self._lucene.executeQuery( luceneQuery=query, start=1, stop=5, facets=[dict(maxTerms=10, fieldname='facet')], sortKeys=[dict(sortBy='field', sortDescending=False)], suggestionRequest=dict(suggests=['valeu'], count=2, field='field1'), dedupField="__key__", clustering=True, storedFields=["field"] )) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/query/', self.post[0]['path']) self.assertEqual({ "start": 1, "stop": 5, "storedFields": ["field"], "query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, "facets": [{"fieldname": "facet", "maxTerms": 10}], "sortKeys": [{"sortBy": "field", "sortDescending": False, "type": "String", 'missingValue': 'STRING_LAST'}], "suggestionRequest": dict(suggests=['valeu'], count=2, field='field1'), "dedupField": "__key__", "dedupSortField": None, "clustering": True, }, loads(self.post[0]['data'])) self.assertEqual(887, response.total) self.assertEqual(6, response.queryTime) self.assertEqual({'searchTime': 3}, response.times) self.assertEqual(1, len(response.hits)) self.assertEqual("record:1", response.hits[0].id) self.assertEqual(0.1234, response.hits[0].score) self.assertEqual(dict(__key__=2), response.hits[0].duplicateCount) self.assertEqual([ {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]} ], response.drilldownData) self.assertEqual({'valeu': ['value']}, response.suggestions) def testPrefixSearch(self): self.response = JsonList([["value0", 1], ["value1", 2]]).dumps() response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu')) self.assertEquals(['value1', 'value0'], response.hits) response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu', showCount=True)) self.assertEquals([('value1', 2), ('value0', 1)], response.hits) def testNumDocs(self): self.response = "150" result = retval(self._lucene.numDocs()) self.assertEqual(150, result) self.assertEqual([{'data': None, 'path': '/lucene/numDocs/'}], self.post) def testFieldnames(self): self.response = '["field1", "field2"]' result = retval(self._lucene.fieldnames()) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual([{"data": None, "path": "/lucene/fieldnames/"}], self.post) def testDrilldownFieldnames(self): self.response = '["field1", "field2"]' result = retval(self._lucene.drilldownFieldnames()) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual([{"data": None, "path": "/lucene/drilldownFieldnames/?limit=50"}], self.post) result = retval(self._lucene.drilldownFieldnames(limit=1, path=['field'])) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=field&limit=1"}, self.post[-1]) result = retval(self._lucene.drilldownFieldnames(limit=1, path=['xyz', 'abc', 'field'])) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=xyz&limit=1&path=abc&path=field"}, self.post[-1]) def testUpdateSettings(self): self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1)) settings = retval(self._lucene.getSettings()) self.assertEqual(['/settings/'], self.read) self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings) clusterFields = [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] self.response = "" consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields))) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEqual({ "numberOfConcurrentTasks": 10, "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0), "clustering": { "clusterMoreRecords": 200, "clusteringEps": 1.0, "clusteringMinPoints": 2, "fields": [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] } }, loads(self.post[0]['data'])) consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None)) self.assertEqual(2, len(self.post)) self.assertEqual('/lucene/settings/', self.post[1]['path']) self.assertEqual({ "numberOfConcurrentTasks": 5, }, loads(self.post[1]['data'])) def testSimilarDocs(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "times": {"searchTime": 3}, "hits": [ {"id": "record:1", "score": 0.1234}, {"id": "record:2", "score": 0.1234}, ], }).dumps() response = retval(self._lucene.similarDocuments(identifier='record:3')) self.assertEqual(887, response.total) self.assertEqual(2, len(response.hits))
class DeDupFilterCollectorTest(SeecrTestCase): def setUp(self): super(DeDupFilterCollectorTest, self).setUp() self._reactor = CallTrace('reactor') settings = LuceneSettings(commitCount=1, verbose=False) self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings) def tearDown(self): self.lucene.close() super(DeDupFilterCollectorTest, self).tearDown() def testCollectorTransparentlyDelegatesToNextCollector(self): self._addDocument("urn:1", 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(1, tc.topDocs(0).totalHits) def _addDocument(self, identifier, isformatof, sort=None): doc = Document() if isformatof: doc.add(NumericDocValuesField("__isformatof__", long(isformatof))) if sort: doc.add(NumericDocValuesField("__sort__", long(sort))) consume(self.lucene.addDocument(identifier, doc)) self.lucene.commit() # Explicitly, not required: since commitCount=1. def testCollectorFiltersTwoSimilar(self): self._addDocument("urn:1", 2, 1) self._addDocument("urn:2", 2, 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) topDocsResult = tc.topDocs(0) self.assertEquals(1, topDocsResult.totalHits) self.assertEquals(1, len(topDocsResult.scoreDocs)) docId = topDocsResult.scoreDocs[0].doc key = c.keyForDocId(docId) identifier = self.lucene._index.getDocument(key.getDocId()).get(IDFIELD) self.assertEquals('urn:2', identifier) self.assertEquals(2, key.count) def testCollectorFiltersTwoTimesTwoSimilarOneNot(self): self._addDocument("urn:1", 1, 2001) self._addDocument("urn:2", 3, 2009) # result 2x self._addDocument("urn:3", 50, 2010) # result 1x self._addDocument("urn:4", 3, 2001) self._addDocument("urn:5", 1, 2009) # result 2x #expected: "urn:2', "urn:3" and "urn:5" in no particular order tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) topDocsResult = tc.topDocs(0) self.assertEquals(3, topDocsResult.totalHits) self.assertEquals(3, len(topDocsResult.scoreDocs)) rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs] netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds] identifiers = set(self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds) self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers) self.assertEquals([1,2,2], list(sorted(c.keyForDocId(d).count for d in netDocIds))) def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self): self._addDocument("urn:1", 2) tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(1, tc.topDocs(0).totalHits) def testShouldAddResultsWithoutIsFormatOf(self): self._addDocument("urn:1", 2) self._addDocument("urn:2", None) self._addDocument("urn:3", 2) self._addDocument("urn:4", None) self._addDocument("urn:5", None) self._addDocument("urn:6", None) self._addDocument("urn:7", None) self._addDocument("urn:8", None) self._addDocument("urn:9", None) self._addDocument("urn:A", None) self._addDocument("urn:B", None) # trigger a merge tc = TopScoreDocSuperCollector(100, True) c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc) self.lucene.search(query=MatchAllDocsQuery(), collector=c) self.assertEquals(10, tc.topDocs(0).totalHits)
class LuceneTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.setUpLucene() def setUpLucene(self, **kwargs): self._lucene = Lucene(host="localhost", port=1234, name='lucene', settings=LuceneSettings(), **kwargs) self.post = [] self.response = "" connect = self._lucene._connect() def mockPost(data, path, **kwargs): self.post.append(dict(data=data, path=path)) raise StopIteration(self.response) yield connect._post = mockPost self.read = [] self.response = "" def mockRead(path, **kwargs): self.read.append(path) raise StopIteration(self.response) yield connect.read = mockRead self._lucene._connect = lambda: connect def testPostSettingsAddObserverInit(self): self.assertEqual([], self.post) self._lucene.observer_init() self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEquals(DEFAULTS, loads(self.post[0]['data'])) def testInitialize(self): self.assertEqual([], self.post) consume(self._lucene.initialize()) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEquals(DEFAULTS, loads(self.post[0]['data'])) def testAdd(self): registry = FieldRegistry() fields = [registry.createField("id", "id1")] consume(self._lucene.addDocument(identifier='id1', fields=fields)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/update/?identifier=id1', self.post[0]['path']) self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data']) def testAddWithoutIdentifier(self): registry = FieldRegistry() fields = [registry.createField("id", "id1")] consume(self._lucene.addDocument(fields=fields)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/update/?', self.post[0]['path']) self.assertEqual('[{"type": "TextField", "name": "id", "value": "id1"}]', self.post[0]['data']) def testDelete(self): consume(self._lucene.delete(identifier='id1')) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/delete/?identifier=id1', self.post[0]['path']) self.assertEqual(None, self.post[0]['data']) def testDeleteByQuery(self): query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) consume(self._lucene.delete(luceneQuery=query)) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/delete/', self.post[0]['path']) self.assertEqual('{"query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}}', self.post[0]['data']) def testExecuteQuery(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "times": {"searchTime": 3}, "hits": [{ "id": "record:1", "score": 0.1234, "duplicateCount": {"__key__": 2}, "duplicates": {"__grouping_key__": [{"id": 'record:1'}, {"id": 'record:2'}]} }], "drilldownData": [ {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]} ], "suggestions": { "valeu": ["value"] } }).dumps() query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) response = retval(self._lucene.executeQuery( luceneQuery=query, start=1, stop=5, facets=[dict(maxTerms=10, fieldname='facet')], sortKeys=[dict(sortBy='field', sortDescending=False)], suggestionRequest=dict(suggests=['valeu'], count=2, field='field1'), dedupField="__key__", clustering=True, storedFields=["field"] )) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/query/', self.post[0]['path']) self.assertEqual({ "start": 1, "stop": 5, "storedFields": ["field"], "query": {"term": {"field": "field", "value": "value"}, "type": "TermQuery"}, "facets": [{"fieldname": "facet", "maxTerms": 10}], "sortKeys": [{"sortBy": "field", "sortDescending": False, "type": "String", 'missingValue': 'STRING_LAST'}], "suggestionRequest": dict(suggests=['valeu'], count=2, field='field1'), "dedupField": "__key__", "dedupSortField": None, "clustering": True, }, loads(self.post[0]['data'])) self.assertEqual(887, response.total) self.assertEqual(6, response.queryTime) self.assertEqual({'searchTime': 3}, response.times) self.assertEqual(1, len(response.hits)) self.assertEqual("record:1", response.hits[0].id) self.assertEqual(0.1234, response.hits[0].score) self.assertEqual(dict(__key__=2), response.hits[0].duplicateCount) self.assertEqual([ {"fieldname": "facet", "path": [], "terms": [{"term": "term", "count": 1}]} ], response.drilldownData) self.assertEqual({'valeu': ['value']}, response.suggestions) def testPrefixSearch(self): self.response = JsonList([["value0", 1], ["value1", 2]]).dumps() response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu')) self.assertEquals(['value1', 'value0'], response.hits) response = retval(self._lucene.prefixSearch(fieldname='field1', prefix='valu', showCount=True)) self.assertEquals([('value1', 2), ('value0', 1)], response.hits) def testNumDocs(self): self.response = "150" result = retval(self._lucene.numDocs()) self.assertEqual(150, result) self.assertEqual([{'data': None, 'path': '/lucene/numDocs/'}], self.post) def testFieldnames(self): self.response = '["field1", "field2"]' result = retval(self._lucene.fieldnames()) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual([{"data": None, "path": "/lucene/fieldnames/"}], self.post) def testDrilldownFieldnames(self): self.response = '["field1", "field2"]' result = retval(self._lucene.drilldownFieldnames()) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual([{"data": None, "path": "/lucene/drilldownFieldnames/?limit=50"}], self.post) result = retval(self._lucene.drilldownFieldnames(limit=1, path=['field'])) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=field&limit=1"}, self.post[-1]) result = retval(self._lucene.drilldownFieldnames(limit=1, path=['xyz', 'abc', 'field'])) self.assertEqual(["field1", "field2"], result.hits) self.assertEqual({"data": None, "path": "/lucene/drilldownFieldnames/?dim=xyz&limit=1&path=abc&path=field"}, self.post[-1]) def testUpdateSettings(self): self.response = JsonDict(numberOfConcurrentTasks=6, similarity="BM25(k1=1.2,b=0.75)", clustering=JsonDict(clusterMoreRecords=100, clusteringEps=0.4, clusteringMinPoints=1)) settings = retval(self._lucene.getSettings()) self.assertEqual(['/settings/'], self.read) self.assertEquals({'numberOfConcurrentTasks': 6, 'similarity': u'BM25(k1=1.2,b=0.75)', 'clustering': {'clusterMoreRecords': 100, 'clusteringEps': 0.4, 'clusteringMinPoints': 1}}, settings) clusterFields = [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] self.response = "" consume(self._lucene.setSettings(similarity=dict(name="bm25", k1=1.0, b=2.0), numberOfConcurrentTasks=10, clustering=dict(clusterMoreRecords=200, clusteringEps=1.0, clusteringMinPoints=2, fields=clusterFields))) self.assertEqual(1, len(self.post)) self.assertEqual('/lucene/settings/', self.post[0]['path']) self.assertEqual({ "numberOfConcurrentTasks": 10, "similarity": dict(type="BM25Similarity", k1=1.0, b=2.0), "clustering": { "clusterMoreRecords": 200, "clusteringEps": 1.0, "clusteringMinPoints": 2, "fields": [ {"filterValue": None, "fieldname": "untokenized.dcterms:isFormatOf.uri", "weight": 0} ] } }, loads(self.post[0]['data'])) consume(self._lucene.setSettings(numberOfConcurrentTasks=5, similarity=None, clustering=None)) self.assertEqual(2, len(self.post)) self.assertEqual('/lucene/settings/', self.post[1]['path']) self.assertEqual({ "numberOfConcurrentTasks": 5, }, loads(self.post[1]['data'])) def testSimilarDocs(self): self.response = JsonDict({ "total": 887, "queryTime": 6, "times": {"searchTime": 3}, "hits": [ {"id": "record:1", "score": 0.1234}, {"id": "record:2", "score": 0.1234}, ], }).dumps() response = retval(self._lucene.similarDocuments(identifier='record:3')) self.assertEqual(887, response.total) self.assertEqual(2, len(response.hits)) def testLuceneReadonly(self): self.setUpLucene(readonly=True) self._lucene.observer_init() self.assertEqual([], self.post) self.assertRaises(RuntimeError, lambda: consume(self._lucene.setSettings())) self.assertRaises(RuntimeError, lambda: consume(self._lucene.addDocument(fields=[]))) self.assertRaises(RuntimeError, lambda: consume(self._lucene.delete('identifier'))) def testLuceneServerHostPortDynamic(self): lucene = Lucene(name='lucene', settings=LuceneSettings(), readonly=True) def httprequest1_1Mock(**kwargs): raise StopIteration(parseResponse(HTTP_RESPONSE)) yield observer = CallTrace( 'observer', returnValues=dict(luceneServer=('example.org', 1234)), methods=dict(httprequest1_1=httprequest1_1Mock)) lucene.addObserver(observer) query = QueryExpressionToLuceneQueryDict([], LuceneSettings()).convert(cqlToExpression("field=value")) response = retval(lucene.executeQuery( luceneQuery=query, start=1, stop=5, )) self.assertEquals(887, response.total) self.assertEquals(['luceneServer', 'httprequest1_1'], observer.calledMethodNames())
def setUp(self): SeecrTestCase.setUp(self) settings = LuceneSettings(multithreaded=self._multithreaded, verbose=False) settingsLuceneC = LuceneSettings(multithreaded=self._multithreaded, verbose=False, similarity=TermFrequencySimilarity()) self.luceneA = Lucene(join(self.tempdir, 'a'), name='coreA', reactor=CallTrace(), settings=settings) self.luceneB = Lucene(join(self.tempdir, 'b'), name='coreB', reactor=CallTrace(), settings=settings) self.luceneC = Lucene(join(self.tempdir, 'c'), name='coreC', reactor=CallTrace(), settings=settingsLuceneC) self.dna = be((Observable(), (MultiLucene(defaultCore='coreA', multithreaded=self._multithreaded), (self.luceneA,), (self.luceneB,), (self.luceneC,), ) )) # +---------------------------------+ +---------------------------------+ +----------------------+ # | ______ | | | | C | # | ____/ \____ A | | __________ B | | ____ | # | / /\ Q /\ \ | | / N \ | | / \ | # | / / \ / \ \ | | / ____ \ | | | R | | # | / | \ / | \ | | | / \ | | | \ ___/ | # | / \ \/ / \ | | | | M __|____|_____ | | | # | / \ /\ / \ | | | \__/_/ | \ | | | # | | \_|__|_/ | | | \ | / | | | | # | | U | | M | | | \___|______/ ___|_______ | | | # | | \ / | | | | / | \ | | | # | \ \/ / | | | O / _|__ \ | | | # | \ /\ / | | \_______|___/_/ \ | | | | # | \ / \ / | | | | M | P | | | | # | \______/ \______/ | | | \____/ | | | | # | | | \ / | | | # | | | \__________/ | | | # +---------------------------------+ +---------------------------------+ +----------------------+ k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 = range(1,12) self.addDocument(self.luceneA, identifier='A', keys=[('A', k1 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'false'), ('S', '1')]) self.addDocument(self.luceneA, identifier='A-U', keys=[('A', k2 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'true' ), ('S', '2')]) self.addDocument(self.luceneA, identifier='A-Q', keys=[('A', k3 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'false'), ('S', '3')]) self.addDocument(self.luceneA, identifier='A-QU', keys=[('A', k4 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'true' ), ('S', '4')]) self.addDocument(self.luceneA, identifier='A-M', keys=[('A', k5 ), ('C', k5)], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'false'), ('S', '5')]) self.addDocument(self.luceneA, identifier='A-MU', keys=[('A', k6 )], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'true' ), ('S', '6')]) self.addDocument(self.luceneA, identifier='A-MQ', keys=[('A', k7 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'false'), ('S', '7')]) self.addDocument(self.luceneA, identifier='A-MQU', keys=[('A', k8 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'true' ), ('S', '8')]) self.addDocument(self.luceneB, identifier='B-N>A-M', keys=[('B', k5 ), ('D', k5)], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MU', keys=[('B', k6 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MQ', keys=[('B', k7 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N>A-MQU', keys=[('B', k8 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-N', keys=[('B', k9 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B', keys=[('B', k10)], fields=[('N', 'false'), ('O', 'false'), ('P', 'false')]) self.addDocument(self.luceneB, identifier='B-P>A-M', keys=[('B', k5 )], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MU', keys=[('B', k6 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MQ', keys=[('B', k7 )], fields=[('N', 'false'), ('O', 'false' ), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P>A-MQU', keys=[('B', k8 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )]) self.addDocument(self.luceneB, identifier='B-P', keys=[('B', k11)], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )]) self.addDocument(self.luceneC, identifier='C-R', keys=[('C', k5)], fields=[('R', 'true')]) self.addDocument(self.luceneC, identifier='C-S', keys=[('C', k8)], fields=[('S', 'true')]) self.addDocument(self.luceneC, identifier='C-S2', keys=[('C', k7)], fields=[('S', 'false')]) self.luceneA._realCommit() self.luceneB._realCommit() self.luceneC._realCommit() settings.commitCount = 1 settingsLuceneC.commitCount = 1