class MatchUnit(object): """Implement visitor using ``match_unit`` API.""" visitor = make_visitor() def __init__(self, data, getitem=dottable_getitem): """Initialize matching unit with data and keyword value getter.""" self.data = data self.getitem = getitem # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return left & right @visitor(OrOp) def visit(self, node, left, right): return left | right @visitor(NotOp) def visit(self, node, op): return not op @visitor(KeywordOp) def visit(self, node, left, right): return match_unit(self.getitem(self.data, left), **right) @visitor(ValueQuery) def visit(self, node, op): return match_unit(self.data, **op) @visitor(Keyword) def visit(self, node): return node.value @visitor(Value) def visit(self, node): return dict(p=node.value) @visitor(SingleQuotedValue) def visit(self, node): return dict(p=node.value, m='p') @visitor(DoubleQuotedValue) def visit(self, node): return dict(p=node.value, m='e') @visitor(RegexValue) def visit(self, node): return dict(p=node.value, m='r') @visitor(RangeOp) def visit(self, node, left, right): return dict(p=(left['p'], right['p'])) @visitor(EmptyQuery) def visit(self, node): return True
class Terms(object): """Implement visitor to get all given terms.""" visitor = make_visitor() def __init__(self, keywords=None): """Initialize list of keywords operators.""" self.keywords = keywords # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return left + right @visitor(OrOp) def visit(self, node, left, right): return left + right @visitor(NotOp) def visit(self, node, op): return [] @visitor(KeywordOp) def visit(self, node, left, right): return right if left else [] @visitor(ValueQuery) def visit(self, node, op): return map(lambda p: p.strip('%'), op) @visitor(Keyword) def visit(self, node): return self.keywords is None or node.value in self.keywords @visitor(Value) def visit(self, node): return [node.value.strip('%')] @visitor(SingleQuotedValue) def visit(self, node): return [node.value] @visitor(DoubleQuotedValue) def visit(self, node): return [node.value] @visitor(RegexValue) def visit(self, node): return [node.value] @visitor(RangeOp) def visit(self, node, left, right): return ["%s->%s" % (left, right)] @visitor(EmptyQuery) def visit(self, node): return []
class TestVisitorInheritance(TestVisitor): visitor = make_visitor(TestVisitor.visitor) @visitor(B) def visit(self, el): return 'BB' def test_visit_a(self): assert self.visit(A()) == 'A' def test_visit_b(self): assert self.visit(B()) == 'BB'
class TestVisitor(object): visitor = make_visitor() @visitor(A) def visit(self, el): # pylint: disable=W0613 return 'A' @visitor(B) def visit(self, el): # pylint: disable=W0613 return 'B' def test_visit_a(self): assert self.visit(A()) == 'A' def test_visit_b(self): assert self.visit(B()) == 'B'
class ElasticSearchDSL(object): """Implement visitor to create Elastic Search DSL.""" visitor = make_visitor() # pylint: disable=W0613,E0102 def __init__(self): """Provide a dictinary mapping invenio keywords to elasticsearch fields as a list eg. {"author": ["author.last_name, author.first_name"]} """ self.keyword_dict = cfg['SEARCH_ELASTIC_KEYWORD_MAPPING'] def map_keyword_to_fields(self, keyword, mode='a'): """Convert keyword to keyword list for searches Map keyword to elasticsearch fields if needed """ if self.keyword_dict: res = self.keyword_dict.get(keyword) if isinstance(res, dict): if mode in res: res = res[mode] else: raise RuntimeError( 'Not defined mapping for keyword "{keyword}" and ' 'mode "{mode}"'.format(keyword=keyword, mode=mode) ) return res if res else [str(keyword)] return [str(keyword)] @visitor(AndOp) def visit(self, node, left, right): return {'bool': {'must': [left, right]}} @visitor(OrOp) def visit(self, node, left, right): return {'bool': {'should': [left, right]}} @visitor(NotOp) def visit(self, node, op): return {'bool': {'must_not': [op]}} @visitor(KeywordOp) def visit(self, node, left, right): if callable(right): keyword = self.map_keyword_to_fields( left, getattr(right, '__search_mode__', 'a') ) return right(keyword) raise RuntimeError("Not supported second level operation.") @visitor(ValueQuery) def visit(self, node, op): return op(['global_fulltext']) @visitor(Keyword) def visit(self, node): return node.value @visitor(Value) def visit(self, node): def _f(keyword): return { 'multi_match': { 'query': node.value, 'fields': keyword } } _f.__search_mode__ = 'a' return _f @visitor(SingleQuotedValue) def visit(self, node): def _f(keyword): return { 'multi_match': { 'query': node.value, 'type': 'phrase', 'fields': keyword } } _f.__search_mode__ = 'p' return _f @visitor(DoubleQuotedValue) def visit(self, node): def _f(keyword): if (len(keyword) > 1): return {"bool": {"should": [{"term": {k: str(node.value)}} for k in keyword]}} else: return {'term': {keyword[0]: node.value}} _f.__search_mode__ = 'e' return _f @visitor(RegexValue) def visit(self, node): def _f(keyword): if len(keyword) > 1: res = {"bool": {"should": []}} res["bool"]["should"] = [{'regexp': {k: node.value}} for k in keyword] elif keyword[0] != "_all": res = {'regexp': {keyword[0]: node.value}} else: raise RuntimeError("Not supported regex search for all fields") return res return _f @visitor(RangeOp) def visit(self, node, left, right): condition = {} if left: condition['gte'] = left(None)["multi_match"]["query"] if right: condition['lte'] = right(None)["multi_match"]["query"] def _f(keyword): if len(keyword) > 1: res = {"bool": {"should": []}} res["bool"]["should"] = [{'range': {k: condition}} for k in keyword] else: res = {'range': {keyword[0]: condition}} return res return _f @visitor(EmptyQuery) def visit(self, node): return { "match_all": {} } @staticmethod def _operators(node, condition): def _f(keyword): if len(keyword) > 1: res = {"bool": {"should": []}} res["bool"]["should"] = [{'range': {k: condition}} for k in keyword] else: res = {'range': {keyword[0]: condition}} return res return _f @visitor(GreaterOp) def visit(self, node, value_fn): condition = {"gt": value_fn(None)["multi_match"]["query"]} return self._operators(node, condition) @visitor(LowerOp) def visit(self, node, value_fn): condition = {"lt": value_fn(None)["multi_match"]["query"]} return self._operators(node, condition) @visitor(GreaterEqualOp) def visit(self, node, value_fn): condition = {"gte": value_fn(None)["multi_match"]["query"]} return self._operators(node, condition) @visitor(LowerEqualOp) def visit(self, node, value_fn): condition = {"lte": value_fn(None)["multi_match"]["query"]} return self._operators(node, condition)
class SpiresToInvenio(object): visitor = make_visitor() # pylint: disable=W0613,E0102 @visitor(ast.AndOp) def visit(self, node, left, right): return type(node)(left, right) @visitor(ast.OrOp) def visit(self, node, left, right): return type(node)(left, right) @visitor(ast.KeywordOp) def visit(self, node, left, right): return type(node)(left, right) @visitor(ast.RangeOp) def visit(self, node, left, right): return type(node)(left, right) @visitor(ast.NotOp) def visit(self, node, op): return type(node)(op) @visitor(ast.GreaterOp) def visit(self, node, op): return type(node)(op) @visitor(ast.LowerOp) def visit(self, node, op): return type(node)(op) @visitor(ast.GreaterEqualOp) def visit(self, node, op): return type(node)(op) @visitor(ast.LowerEqualOp) def visit(self, node, op): return type(node)(op) @visitor(ast.Keyword) def visit(self, node): return type(node)(node.value) @visitor(ast.Value) def visit(self, node): return type(node)(node.value) @visitor(ast.ValueQuery) def visit(self, node, op): return type(node)(op) @visitor(ast.SingleQuotedValue) def visit(self, node): return type(node)(node.value) @visitor(ast.DoubleQuotedValue) def visit(self, node): return type(node)(node.value) @visitor(ast.RegexValue) def visit(self, node): return type(node)(node.value) @visitor(ast.EmptyQuery) def visit(self, node): return type(node)(node.value) @visitor(SpiresOp) def visit(self, node, left, right): left.value = SPIRES_KEYWORDS[left.value] if left.value is 'author': return ast.KeywordOp(left, ast.DoubleQuotedValue(right.value)) return ast.KeywordOp(left, right)
class MatchUnit(object): """Implement visitor using ``match_unit`` API.""" visitor = make_visitor() def __init__(self, record): self.record = record # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return left & right @visitor(OrOp) def visit(self, node, left, right): return left | right @visitor(NotOp) def visit(self, node, op): return not op @visitor(KeywordOp) def visit(self, node, left, right): if isinstance(right, bool): # second level operator left.update(dict(p=right)) else: left.update(right) return match_unit(self.record, **left) @visitor(ValueQuery) def visit(self, node, op): return match_unit(self.record, **op) @visitor(Keyword) def visit(self, node): return dict(f=node.value) @visitor(Value) def visit(self, node): return dict(p=node.value) @visitor(SingleQuotedValue) def visit(self, node): return dict(p=node.value, m='p') @visitor(DoubleQuotedValue) def visit(self, node): return dict(p=node.value, m='e') @visitor(RegexValue) def visit(self, node): return dict(p=node.value, m='r') @visitor(RangeOp) def visit(self, node, left, right): return dict(p="%s->%s" % (left, right)) @visitor(EmptyQuery) def visit(self, node): return True
class ElasticSearchDSL(object): """Implement visitor to create Elastic Search DSL.""" visitor = make_visitor() def __init__(self, keyword_to_fields=None): """Provide a dictinary mapping from keywords to Elastic field(s).""" self.keyword_to_fields = keyword_to_fields or {None: ['_all']} def get_fields_for_keyword(self, keyword, mode='a'): """Convert keyword to fields.""" field = self.keyword_to_fields.get(keyword, keyword) if isinstance(field, dict): return field[mode] elif isinstance(field, (list, tuple)): return field return [field] # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return {'bool': {'must': [left, right]}} @visitor(OrOp) def visit(self, node, left, right): return {'bool': {'should': [left, right]}} @visitor(NotOp) def visit(self, node, op): return {'bool': {'must_not': [op]}} @visitor(KeywordOp) def visit(self, node, left, right): if callable(right): return right(left) raise RuntimeError("Not supported second level operation.") @visitor(ValueQuery) def visit(self, node, op): return op(None) @visitor(Keyword) def visit(self, node): return node.value @visitor(Value) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='a') return { 'multi_match': { 'query': node.value, 'fields': fields, } } return query @visitor(SingleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') return { 'multi_match': { 'query': node.value, 'type': 'phrase', 'fields': fields, } } return query @visitor(DoubleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='e') if len(fields) > 1: return { "bool": { "should": [{ "term": { field: str(node.value) } } for field in fields] } } return {'term': {field: node.value for field in fields}} return query @visitor(RegexValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if keyword is None or fields is None: raise RuntimeError("Not supported regex search for all fields") if len(fields) > 1: res = { "bool": { "should": [{ 'regexp': { k: node.value } } for k in fields] } } else: res = {'regexp': {fields[0]: node.value}} return res return query @visitor(EmptyQuery) def visit(self, node): return {"match_all": {}} def _range_operators(self, node, condition): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if len(fields) > 1: res = { "bool": { "should": [{ 'range': { k: condition } } for k in fields] } } else: res = {'range': {fields[0]: condition}} return res return query @visitor(RangeOp) def visit(self, node, left, right): condition = {} if left: condition['gte'] = left(None)["multi_match"]["query"] if right: condition['lte'] = right(None)["multi_match"]["query"] return self._range_operators(node, condition) @visitor(GreaterOp) def visit(self, node, value_fn): condition = {"gt": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(LowerOp) def visit(self, node, value_fn): condition = {"lt": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(GreaterEqualOp) def visit(self, node, value_fn): condition = {"gte": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(LowerEqualOp) def visit(self, node, value_fn): condition = {"lte": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition)
class FacetsVisitor(object): """Implement visitor to extract all facets filters.""" visitor = make_visitor() @staticmethod def jsonable(parsedFacets): """Convert a visited query result to a structure which can be jsonified. :param parsedFacets: a visited query result. """ result = {} # sets cannot be converted to json. We need to convert them to lists. for facet_name in parsedFacets: result[facet_name] = { 'inc': list(parsedFacets[facet_name]['inc']), 'exc': list(parsedFacets[facet_name]['exc']), } return result # pylint: disable=W0613,E0102,F999,D102 def _merge_facets(self, left, right): """merge faceting for an AND or OR operator. :param left: left child node faceting :param right: right child node faceting """ for k in right: if k in left: inc = left[k]['inc'].union(right[k]['inc']) exc = left[k]['exc'].union(right[k]['exc']) # Don't mark as included or excluded if only partially # included/excluded left[k] = { 'inc': inc.difference(exc), 'exc': exc.difference(inc), } else: left[k] = right[k] return left def _invert_facets(self, facets): """invert facet filters included <-> excluded. :param facets: facet filters """ for k in facets: facets[k] = { 'inc': facets[k]['exc'], 'exc': facets[k]['inc'], } return facets @visitor(AndOp) def visit(self, node, left, right): return self._merge_facets(left, right) @visitor(OrOp) def visit(self, node, left, right): return self._merge_facets(left, right) @visitor(NotOp) def visit(self, node, op): return self._invert_facets(op) @visitor(KeywordOp) def visit(self, node, left, right): return { node.left.value: { 'inc': set([node.right.value]), 'exc': set() } } @visitor(ValueQuery) def visit(self, node, op): return {} @visitor(Keyword) def visit(self, node): return {} @visitor(Value) def visit(self, node): return {} @visitor(SingleQuotedValue) def visit(self, node): return {} @visitor(DoubleQuotedValue) def visit(self, node): return {} @visitor(RegexValue) def visit(self, node): return {} @visitor(RangeOp) def visit(self, node, left, right): return {} @visitor(EmptyQuery) def visit(self, node): return {}
class ElasticSearchNoKeywordsDSL(object): """Implement visitor to create Elastic Search DSL for queries that don't include keywords.""" visitor = make_visitor() @visitor(KeywordOp) def visit(self, node, left, right): raise QueryHasKeywords() @visitor(MalformedQuery) def visit(self, op): # FIXME: Should send signal to display a message to the user. return @visitor(AndOp) def visit(self, node, left, right): return @visitor(OrOp) def visit(self, node, left, right): pass @visitor(NotOp) def visit(self, node, op): pass @visitor(ValueQuery) def visit(self, node, op): return @visitor(Keyword) def visit(self, node): pass @visitor(Value) def visit(self, node): pass @visitor(SingleQuotedValue) def visit(self, node): pass @visitor(DoubleQuotedValue) def visit(self, node): pass @visitor(RegexValue) def visit(self, node): pass @visitor(RangeOp) def visit(self, node, left, right): pass @visitor(EmptyQuery) def visit(self, node): return @visitor(GreaterOp) def visit(self, node, value_fn): pass @visitor(WildcardQuery) def visit(self, node): pass @visitor(LowerOp) def visit(self, node, value_fn): pass @visitor(GreaterEqualOp) def visit(self, node, value_fn): pass @visitor(LowerEqualOp) def visit(self, node, value_fn): pass
class ElasticSearchDSL(object): """Implement visitor to create Elastic Search DSL.""" visitor = make_visitor() def __init__(self, keyword_to_fields=None): """Provide a dictinary mapping from keywords to Elastic field(s).""" self.keyword_to_fields = keyword_to_fields or {None: ['_all']} def get_fields_for_keyword(self, keyword, mode='a'): """Convert keyword to fields.""" field = self.keyword_to_fields.get(keyword, keyword) if isinstance(field, dict): return field[mode] elif isinstance(field, (list, tuple)): return field return [field] # pylint: disable=W0613,E0102 @visitor(FilterOp) def visit(self, node, left, right): return Q({'filtered': {'query': [left], "filter": [right]}}) @visitor(AndOp) def visit(self, node, left, right): return left & right @visitor(OrOp) def visit(self, node, left, right): return left | right @visitor(NotOp) def visit(self, node, op): return ~op @visitor(KeywordOp) def visit(self, node, left, right): if callable(right): return right(left) raise RuntimeError('Not supported second level operation.') @visitor(ValueQuery) def visit(self, node, op): return op(None) @visitor(Keyword) def visit(self, node): return node.value @visitor(WildcardQuery) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') value = str(node.value).replace('#', '*') if len(fields) > 1: res = Q('bool', should=[ Q('query_string', query=value, default_field=k, analyze_wildcard=True) for k in fields ]) else: res = Q('query_string', query=value, default_field=fields[0], analyze_wildcard=True) return res return query @visitor(Value) def visit(self, node): def query(keyword): # FIXME: This is a temporary hack that should be removed when # nested keywords search is implemented. if str(node.value).startswith('recid:'): node.value = node.value[len('recid:'):] fields = self.get_fields_for_keyword(keyword, mode='a') if fields == current_app.config['SEARCH_ELASTIC_KEYWORD_MAPPING'][ 'author']: return Q( 'bool', must=Q('bool', should=[ Q("match", authors__name_variations=str(node.value)), Q("term", authors__ids__value=str(node.value)) ]), should=[Q("match", authors__full_name=str(node.value))]) return Q({'multi_match': {'query': node.value, 'fields': fields}}) return query @visitor(SingleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') return Q('multi_match', query=node.value, fields=fields, type='phrase') return query @visitor(DoubleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='e') if fields == current_app.config['SEARCH_ELASTIC_KEYWORD_MAPPING'][ 'author']: return Q( 'bool', must=Q('bool', should=[ Q("match", authors__name_variations=str(node.value)), Q("term", authors__ids__value=str(node.value)) ]), should=[Q("match", authors__full_name=str(node.value))]) if (len(fields) > 1): return Q({ "bool": { "should": [{ "term": { k: str(node.value) } } for k in fields] } }) else: return Q({'term': {fields[0]: node.value}}) return query @visitor(RegexValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if keyword is None or fields is None: raise RuntimeError('Not supported regex search for all fields') return reduce(or_, [Q('regexp', **{k: node.value}) for k in fields]) return query @visitor(EmptyQuery) def visit(self, node): return Q('match_all') def _range_operators(self, node, condition): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') return reduce(or_, [Q('range', **{k: condition}) for k in fields]) return query @visitor(RangeOp) def visit(self, node, left, right): condition = {} if left: condition['gte'] = left(None).to_dict()['multi_match']['query'] if right: condition['lte'] = right(None).to_dict()['multi_match']['query'] return self._range_operators(node, condition) @visitor(GreaterOp) def visit(self, node, value_fn): condition = {'gt': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(LowerOp) def visit(self, node, value_fn): condition = {'lt': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(GreaterEqualOp) def visit(self, node, value_fn): condition = {'gte': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(LowerEqualOp) def visit(self, node, value_fn): condition = {'lte': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition)
class ElasticSearchNoKeywordsDSL(object): """Implement visitor to create Elastic Search DSL for queries that don't include keywords.""" visitor = make_visitor() @visitor(KeywordOp) def visit(self, node, left, right): raise QueryHasKeywords() @visitor(AndOp) def visit(self, node, left, right): return @visitor(OrOp) def visit(self, node, left, right): pass @visitor(NotOp) def visit(self, node, op): pass @visitor(ValueQuery) def visit(self, node, op): return @visitor(Keyword) def visit(self, node): pass @visitor(Value) def visit(self, node): pass @visitor(SingleQuotedValue) def visit(self, node): pass @visitor(DoubleQuotedValue) def visit(self, node): pass @visitor(RegexValue) def visit(self, node): pass @visitor(RangeOp) def visit(self, node, left, right): pass @visitor(EmptyQuery) def visit(self, node): return @visitor(GreaterOp) def visit(self, node, value_fn): pass @visitor(LowerOp) def visit(self, node, value_fn): pass @visitor(GreaterEqualOp) def visit(self, node, value_fn): pass @visitor(LowerEqualOp) def visit(self, node, value_fn): pass
class ElasticSearchDSL(object): """Implement visitor to create Elastic Search DSL.""" visitor = make_visitor() def __init__(self, keyword_to_fields=None): """Provide a dictinary mapping from keywords to Elastic field(s).""" self.keyword_to_fields = keyword_to_fields or {None: ['_all']} def get_fields_for_keyword(self, keyword, mode='a'): """Convert keyword to fields.""" field = self.keyword_to_fields.get(keyword, keyword) if isinstance(field, dict): return field[mode] elif isinstance(field, (list, tuple)): return field return [field] # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return left & right @visitor(OrOp) def visit(self, node, left, right): return left | right @visitor(NotOp) def visit(self, node, op): return ~op @visitor(KeywordOp) def visit(self, node, left, right): if callable(right): return right(left) raise RuntimeError('Not supported second level operation.') @visitor(ValueQuery) def visit(self, node, op): return op(None) @visitor(Keyword) def visit(self, node): return node.value @visitor(Value) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='a') return Q('multi_match', query=node.value, fields=fields) return query @visitor(SingleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') return Q('multi_match', query=node.value, fields=fields, type='phrase') return query @visitor(DoubleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') return Q('multi_match', query=node.value, fields=fields, type='phrase') return query @visitor(RegexValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if keyword is None or fields is None: raise RuntimeError('Not supported regex search for all fields') return reduce(or_, [Q('regexp', **{k: node.value}) for k in fields]) return query @visitor(EmptyQuery) def visit(self, node): return Q('match_all') def _range_operators(self, node, condition): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') return reduce(or_, [Q('range', **{k: condition}) for k in fields]) return query @visitor(RangeOp) def visit(self, node, left, right): condition = {} if left: condition['gte'] = left(None).to_dict()['multi_match']['query'] if right: condition['lte'] = right(None).to_dict()['multi_match']['query'] return self._range_operators(node, condition) @visitor(GreaterOp) def visit(self, node, value_fn): condition = {'gt': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(LowerOp) def visit(self, node, value_fn): condition = {'lt': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(GreaterEqualOp) def visit(self, node, value_fn): condition = {'gte': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition) @visitor(LowerEqualOp) def visit(self, node, value_fn): condition = {'lte': value_fn(None).to_dict()['multi_match']['query']} return self._range_operators(node, condition)
class ElasticSearchDSL(object): """Implement visitor to create Elastic Search DSL.""" visitor = make_visitor() def __init__(self, keyword_to_fields=None): """Provide a dictinary mapping from keywords to Elastic field(s).""" self.keyword_to_fields = current_app.config.get( "SEARCH_ELASTIC_KEYWORD_MAPPING", {}) def get_fields_for_keyword(self, keyword, mode='a'): """Convert keyword to fields.""" field = self.keyword_to_fields.get(keyword, keyword) if isinstance(field, dict): return field[mode] elif isinstance(field, (list, tuple)): return field return [field] # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return {'bool': {'must': [left, right]}} @visitor(FilterOp) def visit(self, node, left, right): return {'filtered': {'query': [left], "filter": [right]}} @visitor(OrOp) def visit(self, node, left, right): return {'bool': {'should': [left, right]}} @visitor(NotOp) def visit(self, node, op): return {'bool': {'must_not': [op]}} @visitor(KeywordOp) def visit(self, node, left, right): if callable(right): return right(left) raise RuntimeError("Not supported second level operation.") @visitor(ValueQuery) def visit(self, node, op): return op(None) @visitor(Keyword) def visit(self, node): return node.value @visitor(WildcardQuery) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') value = str(node.value).replace('#', '*') if len(fields) > 1: res = { "bool": { "should": [{ "query_string": { "analyze_wildcard": "true", "default_field": k, "query": value } } for k in fields] } } else: res = { "query_string": { "analyze_wildcard": "true", "default_field": fields[0], "query": value } } return res return query @visitor(Value) def visit(self, node): def query(keyword): # FIXME: This is a temporary hack that should be removed when # nested keywords search is implemented. if str(node.value).startswith('recid:'): node.value = node.value[len('recid:'):] fields = self.get_fields_for_keyword(keyword, mode='a') if fields == ['authors.full_name', 'authors.alternative_name']: return { "bool": { "should": [{ "match": { "authors.name_variations": str(node.value) } }, { "match": { "authors.full_name": str(node.value) } }] } } return {'multi_match': {'query': node.value, 'fields': fields}} return query @visitor(SingleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='p') return { 'multi_match': { 'query': node.value, 'type': 'phrase', 'fields': fields, } } return query @visitor(DoubleQuotedValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='e') if fields == ['authors.full_name', 'authors.alternative_name']: return { "bool": { "must": [{ "match": { "authors.name_variations": str(node.value) } }], "should": [{ "match": { "authors.full_name": str(node.value) } }] } } if (len(fields) > 1): return { "bool": { "should": [{ "term": { k: str(node.value) } } for k in fields] } } else: return {'term': {fields[0]: node.value}} return query @visitor(RegexValue) def visit(self, node): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if keyword is None or fields is None: raise RuntimeError("Not supported regex search for all fields") if len(fields) > 1: res = { "bool": { "should": [{ 'regexp': { k: node.value } } for k in fields] } } else: res = {'regexp': {fields[0]: node.value}} return res return query @visitor(EmptyQuery) def visit(self, node): return {"match_all": {}} def _range_operators(self, node, condition): def query(keyword): fields = self.get_fields_for_keyword(keyword, mode='r') if len(fields) > 1: res = { "bool": { "should": [{ 'range': { k: condition } } for k in fields] } } else: res = {'range': {fields[0]: condition}} return res return query @visitor(RangeOp) def visit(self, node, left, right): condition = {} if left: condition['gte'] = left(None)["multi_match"]["query"] if right: condition['lte'] = right(None)["multi_match"]["query"] return self._range_operators(node, condition) @visitor(GreaterOp) def visit(self, node, value_fn): condition = {"gt": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(LowerOp) def visit(self, node, value_fn): condition = {"lt": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(GreaterEqualOp) def visit(self, node, value_fn): condition = {"gte": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition) @visitor(LowerEqualOp) def visit(self, node, value_fn): condition = {"lte": value_fn(None)["multi_match"]["query"]} return self._range_operators(node, condition)
class TreeRepr(repr_printer.TreeRepr): visitor = make_visitor(repr_printer.TreeRepr.visitor) @visitor(SpiresOp) def visit(self, node, left, right): return "find %s %s" % (left, right)
class PypegConverter(pypeg_to_ast.PypegConverter): visitor = make_visitor(pypeg_to_ast.PypegConverter.visitor) # pylint: disable=W0613,E0102 @visitor(parser.SpiresKeywordRule) def visit(self, node): return ast.Keyword(node.value) @visitor(parser.SpiresKeywordQuery) def visit(self, node, keyword, value): return SpiresOp(keyword, value) @visitor(parser.GreaterQuery) def visit(self, node, child): return ast.GreaterOp(child) @visitor(parser.GreaterEqualQuery) def visit(self, node, child): return ast.GreaterEqualOp(child) @visitor(parser.LowerQuery) def visit(self, node, child): return ast.LowerOp(child) @visitor(parser.LowerEqualQuery) def visit(self, node, child): return ast.LowerEqualOp(child) @visitor(parser.SpiresSimpleValue) def visit(self, node): return ast.Value(node.value) @visitor(parser.SpiresValue) def visit(self, node, children): return ast.Value("".join([c.value for c in children])) @visitor(parser.SpiresValueQuery) def visit(self, node, child): return ast.ValueQuery(child) @visitor(parser.SpiresSimpleQuery) def visit(self, node, child): return child @visitor(parser.SpiresParenthesizedQuery) def visit(self, node, child): return child @visitor(parser.SpiresNotQuery) def visit(self, node, child): return ast.AndOp(None, ast.NotOp(child)) @visitor(parser.SpiresAndQuery) def visit(self, node, child): return ast.AndOp(None, child) @visitor(parser.SpiresOrQuery) def visit(self, node, child): return ast.OrOp(None, child) @visitor(parser.SpiresQuery) def visit(self, node, children): # Assign implicit keyword # find author x and y --> find author x and author y def assign_implicit_keyword(implicit_keyword, node): """ Note: this function has side effects on node content """ if type(node) in [ast.AndOp, ast.OrOp] and \ type(node.right) == ast.ValueQuery: node.right = SpiresOp(implicit_keyword, node.right.op) if type(node) in [ast.AndOp, ast.OrOp] and \ type(node.right) == ast.NotOp: assign_implicit_keyword(implicit_keyword, node.right) if type(node) in [ast.NotOp] and \ type(node.op) == ast.ValueQuery: node.op = SpiresOp(implicit_keyword, node.op.op) implicit_keyword = None for child in children: new_keyword = getattr(child, 'keyword', None) if new_keyword is not None: implicit_keyword = new_keyword if implicit_keyword is not None: assign_implicit_keyword(implicit_keyword, child) # Build the boolean expression, left to right # x and y or z and ... --> ((x and y) or z) and ... tree = children[0] for booleanNode in children[1:]: booleanNode.left = tree tree = booleanNode return tree @visitor(parser.FindQuery) def visit(self, node, child): return child @visitor(parser.Main) def visit(self, node, child): return child
class SearchUnit(object): """Implement visitor using ``search_unit`` API.""" visitor = make_visitor() # pylint: disable=W0613,E0102 @visitor(AndOp) def visit(self, node, left, right): return left & right @visitor(OrOp) def visit(self, node, left, right): return left | right @visitor(NotOp) def visit(self, node, op): return intbitset(trailing_bits=1) - op @visitor(KeywordOp) def visit(self, node, left, right): if isinstance(right, intbitset): # second level operator left.update(dict(p=right)) else: left.update(right) return search_unit(**left) @visitor(ValueQuery) def visit(self, node, op): return search_unit(**op) @visitor(GreaterOp) def visit(self, node, op): op["p"] = "{0}->".format(op["p"]) return op @visitor(Keyword) def visit(self, node): return dict(f=node.value) @visitor(Value) def visit(self, node): return dict(p=node.value) @visitor(SingleQuotedValue) def visit(self, node): return dict(p=node.value, m='p') @visitor(DoubleQuotedValue) def visit(self, node): return dict(p=node.value, m='e') @visitor(RegexValue) def visit(self, node): return dict(p=node.value, m='r') @visitor(RangeOp) def visit(self, node, left, right): return dict(p="%s->%s" % (left, right)) @visitor(EmptyQuery) def visit(self, node): return intbitset(trailing_bits=1)