def __init__(self, aString, antiUnaryClause=""): self.original = aString plusminus = _feelsLikePlusMinusQuery(aString) boolean = _feelsLikeBooleanQuery(aString) self._needsHelp = boolean and plusminus if plusminus and not boolean: self._kind = PLUSMINUS_KIND self.ast = parseString(_plusminus2Cql(aString, antiUnaryClause)) elif boolean and not plusminus: try: self._kind = BOOLEAN_KIND self.ast = parseString(_boolean2Cql(aString, antiUnaryClause)) except CQLParseException: self._needsHelp = True self._kind = DEFAULT_KIND self.ast = parseString(_default2CqlWithQuotes(aString, antiUnaryClause=antiUnaryClause)) else: self._kind = DEFAULT_KIND try: self.ast = parseString(_default2Cql(aString, antiUnaryClause=antiUnaryClause)) except CQLParseException: self._needsHelp = True self.ast = parseString(_default2CqlWithQuotes(aString, antiUnaryClause=antiUnaryClause)) self.originalAst = self.ast self._filters = []
def parseSruArgs(self, arguments): sruArgs = { 'version': arguments['version'][0], 'operation': arguments['operation'][0], 'recordSchema': arguments.get('recordSchema', [self._defaultRecordSchema])[0], 'recordPacking': arguments.get('recordPacking', [self._defaultRecordPacking])[0], } startRecord = arguments.get('startRecord', ['1'])[0] if not startRecord.isdigit() or int(startRecord) < 1: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'startRecord') sruArgs['startRecord'] = int(startRecord) maximumRecords = arguments.get('maximumRecords', [DEFAULT_MAXIMUMRECORDS])[0] if not maximumRecords.isdigit() or int(maximumRecords) < 0: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords') sruArgs['maximumRecords'] = int(maximumRecords) if self._maximumMaximumRecords and sruArgs[ 'maximumRecords'] > self._maximumMaximumRecords: raise SruException( UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords > %s' % self._maximumMaximumRecords) query = arguments.get('query', [''])[0] try: parseString(query) except CQLParseException, e: raise SruException(QUERY_FEATURE_UNSUPPORTED, str(e))
def testRemoteExecuteQuery(self): http = CallTrace('http') def httppost(*args, **kwargs): raise StopIteration('HTTP/1.0 200 Ok\r\n\r\n%s' % LuceneResponse(total=5, hits=[ Hit("1"), Hit("2"), Hit("3", duplicateCount=2), Hit("4"), Hit("5") ]).asJson()) yield http.methods['httppost'] = httppost remote = LuceneRemote(host='host', port=1234, path='/path') observable = Observable() observable.addObserver(remote) remote._httppost = http.httppost cq = ComposedQuery('coreA') cq.setCoreQuery( core='coreA', query=parseString('query AND field=value'), filterQueries=[parseString('query=fiets')], facets=[{ 'fieldname': 'field', 'maxTerms': 5 }], ) cq.setCoreQuery(core='coreB', query=parseString('query=test')) cq.addMatch(dict(core='coreA', uniqueKey='keyA'), dict(core='coreB', key='keyB')) result = returnValueFromGenerator( observable.any.executeComposedQuery(query=cq)) self.assertEquals(5, result.total) self.assertEquals([ Hit("1"), Hit("2"), Hit("3", duplicateCount=2), Hit("4"), Hit("5") ], result.hits) self.assertEquals(['httppost'], http.calledMethodNames()) m = http.calledMethods[0] self.assertEquals('host', m.kwargs['host']) self.assertEquals(1234, m.kwargs['port']) self.assertEquals('/path/__lucene_remote__', m.kwargs['request']) self.assertEquals('application/json', m.kwargs['headers']['Content-Type']) message, kwargs = jsonLoadMessage(m.kwargs['body']) query = kwargs['query'] self.assertEquals('executeComposedQuery', message) self.assertEquals('coreA', query.resultsFrom) self.assertEquals([{ 'fieldname': 'field', 'maxTerms': 5 }], query.facetsFor('coreA'))
def testServiceExecuteQuery(self): observer = CallTrace('lucene') def executeQuery(**kwargs): raise StopIteration(LuceneResponse(total=2, hits=['aap','noot'])) yield observer.methods['executeQuery'] = executeQuery service = LuceneRemoteService(CallTrace('reactor')) service.addObserver(observer) body = dumps({ 'message': 'executeQuery', 'kwargs':{ 'cqlAbstractSyntaxTree': {'__CQL_QUERY__': 'query AND field=value'}, 'start':0, 'stop': 10, 'facets': [{'fieldname': 'field', 'maxTerms':5}], 'filterQueries': [{'__CQL_QUERY__': 'query=fiets'}], 'joinQueries': {'core1': {'__CQL_QUERY__': 'query=test'}} } }) result = ''.join(compose(service.handleRequest(path='/__lucene_remote__', Method="POST", Body=body))) header, body = result.split('\r\n'*2) self.assertTrue('Content-Type: application/json' in header, header+body) response = LuceneResponse.fromJson(body) self.assertEquals(2, response.total) self.assertEquals(['aap', 'noot'], response.hits) self.assertEquals(['executeQuery'], observer.calledMethodNames()) m = observer.calledMethods[0] self.assertEquals(parseString('query AND field=value'), m.kwargs['cqlAbstractSyntaxTree']) self.assertEquals(0, m.kwargs['start']) self.assertEquals(10, m.kwargs['stop']) self.assertEquals([{'fieldname': 'field', 'maxTerms':5}], m.kwargs['facets']) self.assertEquals([parseString('query=fiets')], m.kwargs['filterQueries']) self.assertEquals({'core1': parseString('query=test')}, m.kwargs['joinQueries'])
def testRemoteExecuteQuery(self): http = CallTrace('http') def httppost(*args, **kwargs): raise StopIteration('HTTP/1.0 200 Ok\r\n\r\n%s' % LuceneResponse(total=5, hits=[Hit("1"), Hit("2"), Hit("3", duplicateCount=2), Hit("4"), Hit("5")]).asJson()) yield http.methods['httppost'] = httppost remote = LuceneRemote(host='host', port=1234, path='/path') observable = Observable() observable.addObserver(remote) remote._httppost = http.httppost cq = ComposedQuery('coreA') cq.setCoreQuery( core='coreA', query=parseString('query AND field=value'), filterQueries=[parseString('query=fiets')], facets=[{'fieldname': 'field', 'maxTerms':5}], ) cq.setCoreQuery(core='coreB', query=parseString('query=test')) cq.addMatch(dict(core='coreA', uniqueKey='keyA'), dict(core='coreB', key='keyB')) result = returnValueFromGenerator(observable.any.executeComposedQuery(query=cq)) self.assertEquals(5, result.total) self.assertEquals([Hit("1"), Hit("2"), Hit("3", duplicateCount=2), Hit("4"), Hit("5")], result.hits) self.assertEquals(['httppost'], http.calledMethodNames()) m = http.calledMethods[0] self.assertEquals('host', m.kwargs['host']) self.assertEquals(1234, m.kwargs['port']) self.assertEquals('/path/__lucene_remote__', m.kwargs['request']) self.assertEquals('application/json', m.kwargs['headers']['Content-Type']) message, kwargs = Conversion().jsonLoadMessage(m.kwargs['body']) query = kwargs['query'] self.assertEquals('executeComposedQuery', message) self.assertEquals('coreA', query.resultsFrom) self.assertEquals([{'fieldname': 'field', 'maxTerms':5}], query.facetsFor('coreA'))
def testConversion(self): kwargs = {'q': parseString('CQL'), 'attr': {'qs': [parseString('qs')]}} dump = Conversion().jsonDumpMessage(message='aMessage', **kwargs) self.assertEquals(str, type(dump)) message, kwargs = Conversion().jsonLoadMessage(dump) self.assertEquals('aMessage', message) self.assertEquals(parseString('CQL'), kwargs['q']) self.assertEquals([parseString('qs')], kwargs['attr']['qs'])
def testConversion(self): kwargs = {'q': parseString('CQL'), 'attr': {'qs':[parseString('qs')]}} dump = Conversion().jsonDumpMessage(message='aMessage', **kwargs) self.assertEquals(str, type(dump)) message, kwargs = Conversion().jsonLoadMessage(dump) self.assertEquals('aMessage', message) self.assertEquals(parseString('CQL'), kwargs['q']) self.assertEquals([parseString('qs')], kwargs['attr']['qs'])
def testServiceExecuteQuery(self): observer = CallTrace('lucene') def executeQuery(**kwargs): raise StopIteration(LuceneResponse(total=2, hits=['aap', 'noot'])) yield observer.methods['executeQuery'] = executeQuery service = LuceneRemoteService(CallTrace('reactor')) service.addObserver(observer) body = dumps({ 'message': 'executeQuery', 'kwargs': { 'cqlAbstractSyntaxTree': { '__CQL_QUERY__': 'query AND field=value' }, 'start': 0, 'stop': 10, 'facets': [{ 'fieldname': 'field', 'maxTerms': 5 }], 'filterQueries': [{ '__CQL_QUERY__': 'query=fiets' }], 'joinQueries': { 'core1': { '__CQL_QUERY__': 'query=test' } } } }) result = ''.join( compose( service.handleRequest(path='/__lucene_remote__', Method="POST", Body=body))) header, body = result.split('\r\n' * 2) self.assertTrue('Content-Type: application/json' in header, header + body) response = LuceneResponse.fromJson(body) self.assertEquals(2, response.total) self.assertEquals(['aap', 'noot'], response.hits) self.assertEquals(['executeQuery'], observer.calledMethodNames()) m = observer.calledMethods[0] self.assertEquals(parseString('query AND field=value'), m.kwargs['cqlAbstractSyntaxTree']) self.assertEquals(0, m.kwargs['start']) self.assertEquals(10, m.kwargs['stop']) self.assertEquals([{ 'fieldname': 'field', 'maxTerms': 5 }], m.kwargs['facets']) self.assertEquals([parseString('query=fiets')], m.kwargs['filterQueries']) self.assertEquals({'core1': parseString('query=test')}, m.kwargs['joinQueries'])
def testConvertComposedQuery(self): q = ComposedQuery('A') q.setCoreQuery(core='A', query=parseString('valueAQ')) q.setCoreQuery(core='B', query=parseString('valueBQ')) q.addMatch(dict(core='A', uniqueKey='keyA'), dict(core='B', key='keyB')) q.addUnite(dict(core='A', query=parseString('fieldUA exact valueUA')), dict(core='B', query=parseString('fieldUB exact valueUB'))) q.validate() consume(self.dna.any.executeComposedQuery(query=q)) self.assertEquals(['executeComposedQuery'], self.observer.calledMethodNames()) self.assertEquals(repr(TermQuery(Term('fieldA', 'valueaq'))), repr(q.queryFor('A'))) self.assertEquals(repr(TermQuery(Term('fieldB', 'valuebq'))), repr(q.queryFor('B')))
def testOneTerm(self): self.assertEqualsCQL( CQL_QUERY(SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term'))))), parseString('term')) self.assertEqualsCQL( CQL_QUERY( SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM( TERM('white space'))))), parseString('"white space"')) self.assertEqualsCQL( CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('string "quotes"'))))), parseString(r'"string \"quotes\""'))
def testParentheses(self): Q = CQL_QUERY SC = SCOPED_CLAUSE SE = SEARCH_CLAUSE ST = SEARCH_TERM T = TERM self.assertEqualsCQL(Q(SC(SE(Q(SC(SE(ST(T('term')))))))), parseString('(term)')) self.assertEqualsCQL(Q(SC(SE(Q(SC(SE(Q(SC(SE(ST(T('term'))))))))))), parseString('((term))')) self.assertEqualsCQL(Q(SC(SE(Q(SC(SC(SE(ST(T('term')))), BOOLEAN('and'), SE(ST(T('term2')))))))), parseString('(term and term2)')) self.assertException(CQLParseException, '(term') self.assertException(CQLParseException, '(term term2')
def testJoin(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') q = ComposedQuery('main', query=parseString('*')) q.addMatch(dict(core='main', uniqueKey=KEY_PREFIX + 'field'), dict(core='main2', key=KEY_PREFIX + 'field')) q.start = 0 q.stop = 100 q.addFilterQuery(core='main', query=parseString('field2=value0 OR field2=value1')) q.addFacet(core='main2', facet=dict(fieldname='untokenized.field2', maxTerms=5)) response = remote.executeComposedQuery(query=q) self.assertEquals(19, response.total) self.assertEquals([ 'record:10', 'record:11', 'record:20', 'record:21', 'record:30', 'record:31', 'record:40', 'record:41', 'record:50', 'record:51', 'record:60', 'record:61', 'record:70', 'record:71', 'record:80', 'record:81', 'record:90', 'record:91', 'record:100' ], [hit.id for hit in response.hits]) self.assertEquals([{ 'fieldname': 'untokenized.field2', 'path': [], 'terms': [ { 'count': 27, 'term': 'value3' }, { 'count': 22, 'term': 'value0' }, { 'count': 19, 'term': 'value5' }, { 'count': 19, 'term': 'value7' }, { 'count': 19, 'term': 'value9' }, ] }], response.drilldownData)
def _loads_object_hook(dct): if '__CQL_QUERY__' in dct: return parseString(dct['__CQL_QUERY__']) elif '__COMPOSED_QUERY__' in dct: return ComposedQuery.fromDict( loads(dct['__COMPOSED_QUERY__'], object_hook=_loads_object_hook)) return dct
def _loads_object_hook(self, dct): if '__CQL_QUERY__' in dct: return parseString(dct['__CQL_QUERY__']) for converter in self._converters: if converter['name'] in dct: return converter['type'].fromDict(loads(dct[converter['name']], object_hook=self._loads_object_hook)) return dct
def processLogfiles(self): for root,dirs,files in os.walk(self.logfilepath): for file in filter(files, "*.log"): print file f=open(os.path.join(root,file), 'r') for line in f: if not 'smdBroker' in line: query = self.getQuery(line) if query: cqlstring = '' try: cqlstring = parseString(query) except: print("Error in CQL parsing: %s" % query) else: termExtractor = TermExtractor() termExtractor.extractTerms(cqlstring) uniqueTerms = [] for term in termExtractor.returnTerms: term = term.replace('~', '').replace('*', '').replace('"','').replace('_','') if term not in uniqueTerms: uniqueTerms.append(term) uniqueDisciplines = [] for discipline in termExtractor.discipline: if discipline not in uniqueDisciplines: uniqueDisciplines.append(discipline) for uniqueTerm in uniqueTerms: for uniqueDiscipline in uniqueDisciplines: self.storeData(uniqueTerm, uniqueDiscipline) f.close()
def testTwoTerms(self): expected = CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term1'))), ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))))) r = parseString('term1 and term2') self.assertEqualsCQL(expected, r)
def testPrecedenceAndOr2(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term')))), BOOLEAN('and'), SEARCH_CLAUSE( parseString( 'term2 and term3 and term4 and term5'))))), BOOLEAN('or'), SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term6')))))) r = parseString( 'term and (term2 and term3 and term4 and term5) or term6') self.assertEqualsCQL(answer, r)
def testDedup(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.field", core="main", stop=3) self.assertEqual(100, response.total) self.assertEqual(100, response.totalWithDuplicates) self.assertEquals( [1, 1, 1], [hit.duplicateCount['__key__.field'] for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.groupfield", dedupSortField="__id__", core="main2", stop=3) self.assertEqual(10, response.total) self.assertEqual(1000, response.totalWithDuplicates) self.assertEquals( [100] * 3, [hit.duplicateCount['__key__.groupfield'] for hit in response.hits] )
def testDedup(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.field", core="main", stop=3, sortKeys=[{'sortBy': '__id__', 'sortDescending': False}]) self.assertEqual(100, response.total) self.assertEqual(100, response.totalWithDuplicates) self.assertEquals( [('record:1', 1), ('record:10', 1), ('record:100', 1)], [(hit.id, hit.duplicateCount['__key__.field']) for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.groupfield", dedupSortField="__id__", core="main2", stop=3, sortKeys=[{'sortBy': '__id__', 'sortDescending': False}]) self.assertEqual(10, response.total) self.assertEqual(1000, response.totalWithDuplicates) self.assertEquals( [100] * 3, [hit.duplicateCount['__key__.groupfield'] for hit in response.hits] )
def testGrouping(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), groupingField="__key__.groupfield", core="main2", stop=3, sortKeys=[{'sortBy': '__id__', 'sortDescending': False}]) self.assertEqual(3, len(response.hits)) self.assertEquals( [('record:1', 100), ('record:100', 100), ('record:200', 100)], [(hit.id, len(hit.duplicates['__key__.groupfield'])) for hit in response.hits] )
def testConversionOfComposedQuery(self): conversion = Conversion() cq = ComposedQuery('coreA') cq.setCoreQuery(core='coreA', query=parseString('Q0'), filterQueries=['Q1', 'Q2'], facets=['F0', 'F1']) cq.setCoreQuery(core='coreB', query=QueryExpression.searchterm(term='Q3'), filterQueries=['Q4']) cq.addMatch(dict(core='coreA', uniqueKey='keyA'), dict(core='coreB', key='keyB')) cq.addUnite(dict(core='coreA', query='AQuery'), dict(core='coreB', query='anotherQuery')) cq.start = 0 cq.sortKeys = [dict(sortBy='field', sortDescending=True)] kwargs = {'q': cq} dump = conversion.jsonDumpMessage(message='aMessage', **kwargs) self.assertEquals(str, type(dump)) message, kwargs = conversion.jsonLoadMessage(dump) self.assertEquals('aMessage', message) cq2 = kwargs['q'] self.assertEquals(parseString('Q0'), cq2.queryFor('coreA'))
def testBooleansAreCaseInsensitive(self): self.assertEqualsCQL( CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term')))), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))))), parseString('term AnD term2'))
def assertIdentity(self, query): input_query = parseString(query) result_query = CqlIdentityVisitor(input_query).visit() self.assertEquals(input_query, result_query) input_ids = set(id(n) for n in allnodes(input_query)) result_ids = set(id(n) for n in allnodes(result_query)) self.assertEquals(0, len(input_ids.intersection(result_ids)), 'Expected new ast to be a deepcopy.')
def testIndexRelationSearchTerm(self): Q = CQL_QUERY SC = SCOPED_CLAUSE SE = SEARCH_CLAUSE ST = SEARCH_TERM T = TERM R = RELATION self.assertEqualsCQL( Q(SC(SE(INDEX(T('field1')), R(COMPARITOR('=')), ST(T('200'))))), parseString('field1 = 200')) self.assertEqualsCQL( Q(SC(SE(INDEX(T('field1')), R(COMPARITOR('==')), ST(T('200'))))), parseString('field1 == 200')) for comparitor in ['>', '<', '>=', '<=', '<>', '==']: self.assertException(UnsupportedCQL, 'field1 %s 200' % comparitor, supportedComparitors=['='])
def testUnsupportedCQL(self): printer=SolrLuceneQueryComposer(unqualifiedTermFields=[("all", 1)]) ast = parseString("field any term") try: printer.compose(ast) self.fail("must raise UnsupportedCQL") except UnsupportedCQL, e: self.assertEquals("Only =, ==, exact, <, <=, > and >= are supported.", str(e))
def parseSruArgs(self, arguments): sruArgs = { 'version': arguments['version'][0], 'operation': arguments['operation'][0], 'recordSchema': arguments.get('recordSchema', [self._defaultRecordSchema])[0], 'recordPacking': arguments.get('recordPacking', [self._defaultRecordPacking])[0], } startRecord = arguments.get('startRecord', ['1'])[0] if not startRecord.isdigit() or int(startRecord) < 1: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'startRecord') sruArgs['startRecord'] = int(startRecord) maximumRecords = arguments.get('maximumRecords', [DEFAULT_MAXIMUMRECORDS])[0] if not maximumRecords.isdigit() or int(maximumRecords) < 0: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords') sruArgs['maximumRecords'] = int(maximumRecords) if self._maximumMaximumRecords and sruArgs['maximumRecords'] > self._maximumMaximumRecords: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords > %s' % self._maximumMaximumRecords) query = arguments.get('query', [''])[0] try: parseString(query) except CQLParseException as e: raise SruException(QUERY_FEATURE_UNSUPPORTED, str(e)) except CQLTokenizerException as e: raise SruException(QUERY_FEATURE_UNSUPPORTED, str(e)) sruArgs['query'] = query queryArgs = sruArgs.copy() if 'sortKeys' in arguments : try: sortBy, ignored, sortDirection = arguments.get('sortKeys')[0].split(',') sortDescending = not bool(int(sortDirection)) if self._oldAndWrongStyleSortKeys: sortDescending = not sortDescending queryArgs['sortKeys'] = [{'sortBy': sortBy.strip(), 'sortDescending': sortDescending}] sruArgs['sortKeys'] = arguments['sortKeys'] except ValueError: pass for key in arguments: if not key in sruArgs: sruArgs[key] = arguments[key] return sruArgs, queryArgs
def testBooleansAreCaseInsensitive(self): self.assertEqualsCQL( CQL_QUERY(SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term'))) ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))))), parseString('term AnD term2'))
def testPrecedenceOrAnd(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term1'))), BOOLEAN('or'), SCOPED_CLAUSE( SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term2')))), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term3')))))) self.assertEqualsCQL(answer, parseString('term1 or term2 and term3'))
def assertIdentity(self, query): input_query = parseString(query) result_query = CqlIdentityVisitor(input_query).visit() self.assertEqual(input_query, result_query) input_ids = set(id(n) for n in allnodes(input_query)) result_ids = set(id(n) for n in allnodes(result_query)) self.assertEqual(0, len(input_ids.intersection(result_ids)), 'Expected new ast to be a deepcopy.')
def testIdentityVisitor(self): p = parseString(open('ridiculouslongquery.txt').read().strip()) def doVisit(): for i in range(10): CqlIdentityVisitor(p).visit() t0 = time() doVisit() t1 = time() #profile(doVisit, runKCacheGrind = True) self.assertTiming(0.032, t1-t0, 0.041) # optimized identityvisitor
def testIndexRelationExactSearchTerm(self): Q = CQL_QUERY SC = SCOPED_CLAUSE SE = SEARCH_CLAUSE ST = SEARCH_TERM T = TERM R = RELATION self.assertEqualsCQL( Q(SC(SE(INDEX(T('field1')), R(COMPARITOR('exact')), ST(T('200'))))), parseString('field1 exact 200'))
def testRemoteExecuteQueryWithNoneValues(self): http = CallTrace('http') def httppost(*args, **kwargs): raise StopIteration('HTTP/1.0 200 Ok\r\n\r\n%s' % LuceneResponse( total=5, hits=[Hit("1"), Hit("2"), Hit("3"), Hit("4"), Hit("5")]).asJson()) yield http.methods['httppost'] = httppost remote = LuceneRemote(host='host', port=1234, path='/path') observable = Observable() observable.addObserver(remote) remote._httppost = http.httppost result = returnValueFromGenerator( observable.any.executeQuery( cqlAbstractSyntaxTree=parseString('query AND field=value'), start=0, stop=10, facets=None, filterQueries=None, joinQueries=None, )) self.assertEquals(5, result.total) self.assertEquals( [Hit("1"), Hit("2"), Hit("3"), Hit("4"), Hit("5")], result.hits) self.assertEquals(['httppost'], http.calledMethodNames()) m = http.calledMethods[0] self.assertEquals('host', m.kwargs['host']) self.assertEquals(1234, m.kwargs['port']) self.assertEquals('/path/__lucene_remote__', m.kwargs['request']) self.assertEquals('application/json', m.kwargs['headers']['Content-Type']) self.assertDictEquals( { 'message': 'executeQuery', 'kwargs': { 'cqlAbstractSyntaxTree': { '__CQL_QUERY__': 'query AND field=value' }, 'start': 0, 'stop': 10, 'facets': None, 'filterQueries': None, 'joinQueries': None, } }, loads(m.kwargs['body']))
def testPrettyPrintSimple(self): q = parseString('aap') self.assertEquals("""CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( SEARCH_TERM( TERM('aap') ) ) ) )""", q.prettyPrint())
def testPrecedenceAndAndAnd(self): expected = CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SCOPED_CLAUSE( SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('a'))), ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('b')))), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('c')))), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('d'))))) r = parseString("a and b and c and d") self.assertEqualsCQL(expected, r)
def testDedup(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.field", core="main", stop=3) self.assertEqual(100, response.total) self.assertEqual(100, response.totalWithDuplicates) self.assertEquals( [1, 1, 1], [hit.duplicateCount['__key__.field'] for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.groupfield", dedupSortField="__id__", core="main2", stop=3) self.assertEqual(3, len(response.hits)) self.assertEqual(10, response.total) self.assertEqual(1000, response.totalWithDuplicates) self.assertEquals( [100] * 3, [hit.duplicateCount['__key__.groupfield'] for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.groupfield", dedupSortField="__numeric__.sort1", core="main2", stop=100000) self.assertEqual(10, len(response.hits)) self.assertEqual(10, response.total) self.assertEqual(1000, response.totalWithDuplicates) self.assertEquals( [100] * 10, [hit.duplicateCount['__key__.groupfield'] for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('groupfield=1'), dedupField="__key__.groupfield", dedupSortField=["__numeric__.sort1","__numeric__.sort2"], core="main2", stop=10000) self.assertEqual(1, len(response.hits)) self.assertEqual(1, response.total) self.assertEqual(100, response.totalWithDuplicates) self.assertEquals(['main2:record:199'], [hit.id for hit in response.hits] ) response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('groupfield=1'), dedupField="__key__.groupfield", dedupSortField=["__numeric__.sort2","__numeric__.sort1"], core="main2", stop=10000) self.assertEqual(1, len(response.hits)) self.assertEqual(1, response.total) self.assertEqual(100, response.totalWithDuplicates) self.assertEquals(['main2:record:199'], [hit.id for hit in response.hits] )
def testTwoTerms(self): expected = CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term1'))), ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))) ) ) r = parseString('term1 and term2') self.assertEqualsCQL(expected, r)
def testIdentityVisitor(self): p = parseString(self.ridiculouslongquery()) def doVisit(): for i in range(10): CqlIdentityVisitor(p).visit() t0 = time() doVisit() t1 = time() #profile(doVisit, runKCacheGrind = True) self.assertTiming(0.0032, t1 - t0, 0.041) # optimized identityvisitor
def testPrettyPrintSimple(self): q = parseString('aap') self.assertEqual( """CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( SEARCH_TERM( TERM('aap') ) ) ) )""", q.prettyPrint())
def testConversionOfComposedQuery(self): cq = ComposedQuery('coreA') cq.setCoreQuery(core='coreA', query=parseString('Q0'), filterQueries=['Q1', 'Q2'], facets=['F0', 'F1']) cq.setCoreQuery(core='coreB', query='Q3', filterQueries=['Q4']) cq.addMatch(dict(core='coreA', uniqueKey='keyA'), dict(core='coreB', key='keyB')) cq.addUnite(dict(core='coreA', query='AQuery'), dict(core='coreB', query='anotherQuery')) cq.start = 0 cq.sortKeys = [dict(sortBy='field', sortDescending=True)] kwargs = {'q': cq} dump = jsonDumpMessage(message='aMessage', **kwargs) self.assertEquals(str, type(dump)) message, kwargs = jsonLoadMessage(dump) self.assertEquals('aMessage', message) cq2 = kwargs['q'] self.assertEquals(parseString('Q0'), cq2.queryFor('coreA'))
def __init__(self, aString, antiUnaryClause=""): self.original = aString try: plusminus = _feelsLikePlusMinusQuery(aString) boolean = _feelsLikeBooleanQuery(aString) self._needsHelp = boolean and plusminus if plusminus and not boolean: self._kind = PLUSMINUS_KIND self.ast = parseString(_plusminus2Cql(aString, antiUnaryClause)) elif boolean and not plusminus: try: self._kind = BOOLEAN_KIND self.ast = parseString(_boolean2Cql(aString, antiUnaryClause)) except CQLParseException: self._needsHelp = True self._kind = DEFAULT_KIND self.ast = parseString(_default2CqlWithQuotes(aString, antiUnaryClause=antiUnaryClause)) else: self._kind = DEFAULT_KIND try: self.ast = parseString(_default2Cql(aString, antiUnaryClause=antiUnaryClause)) except CQLParseException: self._needsHelp = True self.ast = parseString(_default2CqlWithQuotes(aString, antiUnaryClause=antiUnaryClause)) except (CQLParseException, CQLTokenizerException): self.ast = parseString(quotTerm(self.original)) self.originalAst = self.ast self._filters = []
def testPartialVisitor(self): class PartialVisitor(CqlVisitor): def visitINDEX(self, node): return node.visitChildren(self) p = parseString(open('ridiculouslongquery.txt').read().strip()) def doVisit(): for i in range(10): PartialVisitor(p).visit() t0 = time() doVisit() t1 = time() #profile(doVisit, runKCacheGrind = True) self.assertTiming(0.018, t1-t0, 0.024)
def testPrecedenceAndOr2(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term'))) ), BOOLEAN('and'), SEARCH_CLAUSE(parseString('term2 and term3 and term4 and term5')) ) ) ), BOOLEAN('or'), SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term6'))) ) ) ) r = parseString('term and (term2 and term3 and term4 and term5) or term6') self.assertEqualsCQL(answer, r)
def testParentheses(self): Q = CQL_QUERY SC = SCOPED_CLAUSE SE = SEARCH_CLAUSE ST = SEARCH_TERM T = TERM self.assertEqualsCQL(Q(SC(SE(Q(SC(SE(ST(T('term')))))))), parseString('(term)')) self.assertEqualsCQL(Q(SC(SE(Q(SC(SE(Q(SC(SE(ST(T('term'))))))))))), parseString('((term))')) self.assertEqualsCQL( Q( SC( SE( Q( SC(SC(SE(ST(T('term')))), BOOLEAN('and'), SE(ST(T('term2')))))))), parseString('(term and term2)')) self.assertException(CQLParseException, '(term') self.assertException(CQLParseException, '(term term2')
def testPrecedenceAndOr(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term')))), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2')))))), BOOLEAN('or'), SCOPED_CLAUSE(SEARCH_CLAUSE(SEARCH_TERM(TERM('term3')))))) result = parseString('term and term2 or term3') self.assertEqualsCQL(answer, result)
def testPrecedenceOrAnd(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term1'))), BOOLEAN('or'), SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))) ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term3'))) ) ) ) self.assertEqualsCQL(answer, parseString('term1 or term2 and term3'))
def parseSruArgs(self, arguments): sruArgs = { 'version': arguments['version'][0], 'operation': arguments['operation'][0], 'recordSchema': arguments.get('recordSchema', [self._defaultRecordSchema])[0], 'recordPacking': arguments.get('recordPacking', [self._defaultRecordPacking])[0], } startRecord = arguments.get('startRecord', ['1'])[0] if not startRecord.isdigit() or int(startRecord) < 1: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'startRecord') sruArgs['startRecord'] = int(startRecord) maximumRecords = arguments.get('maximumRecords', [DEFAULT_MAXIMUMRECORDS])[0] if not maximumRecords.isdigit() or int(maximumRecords) < 0: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords') sruArgs['maximumRecords'] = int(maximumRecords) if self._maximumMaximumRecords and sruArgs['maximumRecords'] > self._maximumMaximumRecords: raise SruException(UNSUPPORTED_PARAMETER_VALUE, 'maximumRecords > %s' % self._maximumMaximumRecords) query = arguments.get('query', [''])[0] try: parseString(query) except CQLParseException, e: raise SruException(QUERY_FEATURE_UNSUPPORTED, str(e))
def testPartialVisitor(self): class PartialVisitor(CqlVisitor): def visitINDEX(self, node): return node.visitChildren(self) p = parseString(self.ridiculouslongquery()) def doVisit(): for i in range(10): PartialVisitor(p).visit() t0 = time() doVisit() t1 = time() #profile(doVisit, runKCacheGrind = True) self.assertTiming(0.0018, t1 - t0, 0.024)
def testModifiers(self): Q = CQL_QUERY SC = SCOPED_CLAUSE SE = SEARCH_CLAUSE ST = SEARCH_TERM T = TERM self.assertEqualsCQL( Q( SC( SE( INDEX(T('field0')), RELATION( COMPARITOR('='), MODIFIERLIST( MODIFIER(T("boost"), COMPARITOR("="), T("1.5")))), ST(T('value'))))), parseString("field0 =/boost=1.5 value"))
def testDedup(self): remote = SynchronousRemote(host='localhost', port=self.httpPort, path='/remote') response = remote.executeQuery(cqlAbstractSyntaxTree=parseString('*'), dedupField="__key__.field", core="main", stop=3, sortKeys=[{ 'sortBy': '__id__', 'sortDescending': False }]) self.assertEquals([('record:1', 0), ('record:10', 1), ('record:100', 1)], [(hit.id, hit.duplicateCount['__key__.field']) for hit in response.hits])
def testPrecedenceAndAndAnd(self): expected = CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('a'))), ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('b'))) ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('c'))) ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('d'))) ) ) r = parseString("a and b and c and d") self.assertEqualsCQL(expected, r)
def testRemoteExecuteQueryWithNoneValues(self): http = CallTrace('http') def httppost(*args, **kwargs): raise StopIteration('HTTP/1.0 200 Ok\r\n\r\n%s' % LuceneResponse(total=5, hits=[Hit("1"), Hit("2"), Hit("3"), Hit("4"), Hit("5")]).asJson()) yield http.methods['httppost'] = httppost remote = LuceneRemote(host='host', port=1234, path='/path') observable = Observable() observable.addObserver(remote) remote._httppost = http.httppost result = returnValueFromGenerator(observable.any.executeQuery( cqlAbstractSyntaxTree=parseString('query AND field=value'), start=0, stop=10, facets=None, filterQueries=None, joinQueries=None, ) ) self.assertEquals(5, result.total) self.assertEquals([Hit("1"), Hit("2"), Hit("3"), Hit("4"), Hit("5")], result.hits) self.assertEquals(['httppost'], http.calledMethodNames()) m = http.calledMethods[0] self.assertEquals('host', m.kwargs['host']) self.assertEquals(1234, m.kwargs['port']) self.assertEquals('/path/__lucene_remote__', m.kwargs['request']) self.assertEquals('application/json', m.kwargs['headers']['Content-Type']) self.assertDictEquals({ 'message': 'executeQuery', 'kwargs':{ 'cqlAbstractSyntaxTree': {'__CQL_QUERY__': 'query AND field=value'}, 'start':0, 'stop': 10, 'facets': None, 'filterQueries': None, 'joinQueries': None, } }, loads(m.kwargs['body']))
def testPrettyPrintComplex(self): q = parseString('aap AND (noot = mies OR vuur)') self.assertEquals("""CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE( SEARCH_TERM( TERM('aap') ) ) ), BOOLEAN('and'), SEARCH_CLAUSE( CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( INDEX( TERM('noot') ), RELATION( COMPARITOR('=') ), SEARCH_TERM( TERM('mies') ) ), BOOLEAN('or'), SCOPED_CLAUSE( SEARCH_CLAUSE( SEARCH_TERM( TERM('vuur') ) ) ) ) ) ) ) )""", q.prettyPrint(), q.prettyPrint())
def testPrecedenceAndOr(self): answer = CQL_QUERY( SCOPED_CLAUSE( SEARCH_CLAUSE( CQL_QUERY( SCOPED_CLAUSE( SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term'))) ), BOOLEAN('and'), SEARCH_CLAUSE(SEARCH_TERM(TERM('term2'))) ) ) ), BOOLEAN('or'), SCOPED_CLAUSE( SEARCH_CLAUSE(SEARCH_TERM(TERM('term3'))) ) ) ) result = parseString('term and term2 or term3') self.assertEqualsCQL(answer, result)
def testFilterQueries(self): self.assertConversion(['term2', 'term1'], query='term1', filterQueries=[parseString('term2')])
def assertConversion(self, expectedClauses, query, **kwargs): self.loggedClauses = [] list(compose(self.dna.any.executeQuery(cqlAbstractSyntaxTree=parseString(query), **kwargs))) self.assertEquals(expectedClauses, self.loggedClauses)
def testTermWithOrWithoutQuotes(self): self.assertEqualsCQL(parseString('"cats"'), parseString('cats'))
def cql2lucene(self, cql): return self.printer.compose(parseString(cql))