def test_match_phrase_class_via_connection(self): ESLowLevelClientByConnection.get_instance() # construct the query object using the class of the query type search = Search(index='cf_etf', using='high_level_client') search.query = MatchPhrase(fund_name='iShares MSCI ACWI ETF') response = search.execute() self.assertEqual(response['hits']['total']['value'], 1)
def search_close(self, origin_timestamp, channel, qterm, number_results): """ Find log entries close to origin timestamp, filter by channel, highlight qterm and return them sorted by date. :param origin_timestamp: origin timestamp to find logs around :param channel: Channel to be filtered :param qterm: Term to be highlighted :param number_results: how many results :return: List of sorted log entries (Elastic-search response) :rtype: ``list`` """ # Prepare query s = DslSearch(using=self._es, index=self._index_prefix.format('*')) # Function score main_query_boosting = 1e-15 # only used for highlighting, not for scoring -> give very low signifance pos = MatchPhrase(msg={'query': qterm, 'boost': main_query_boosting}) | \ Match(**{'username': {'query': qterm, 'boost': main_query_boosting}}) | \ Match(channel={'query': qterm, 'boost': main_query_boosting}) | \ Match(msg={'query': qterm, 'boost': main_query_boosting}) main_query = (pos | Q('match_all')) function_score_query = Q('function_score', query=main_query, functions=[ SF( 'exp', **{ '@timestamp': { "origin": origin_timestamp, "scale": "1m", "decay": 0.999 } }) ]) s = s.query(function_score_query) # filter channel s = s.filter('term', **{'channel.keyword': channel}) # Number of results s = s[0:number_results] # Highlight s = s.highlight_options(order='score') s = s.highlight('msg', number_of_fragments=0) s = s.highlight('username') s = s.highlight('channel') # Execute response = s.execute() # Sort results response_sorted = sorted(response, key=lambda hit: hit['@timestamp']) return response_sorted
def apply_filter_match_phrases(cls, queryset, options, value): __values = value if isinstance( value, (list, tuple)) else cls.split_lookup_complex_value(value) return cls.apply_filter(queryset=queryset, args=[ Bool(should=[ MatchPhrase(**{options['field']: i}) for i in __values ], minimum_should_match=1) ])
def get_query_condition(self, url_parameter, field, value): """ Return an ElasticSearch DSL query object with the appropriate settings for its kind and values from the given url_parameter. :param url_parameter: String of the URL parameter for this query. :param field: String of the ElasticSearch document field to search. :param value: Raw value to look for in this query. :returns: Some kind of Query child class. """ settings = self.query_parameters.get(url_parameter) if not settings: raise KeyError("Parameter %s has no query parameters defined." % url_parameter) # The resultant query_class_parameters dictionary looks something like: # { # 'query_key': 'value', # 'fields': ['foo', 'bar', 'baz'], # } # Attributes is the leftover **kwargs from the original function call. # query_class_parameters = {**{settings.get('query_key'): value}, **settings.get('attributes')} if settings.get('query_class') is Wildcard: search_value = "*%s*" % value return Wildcard(**{field: {'value': search_value}}) elif settings.get('query_class') is MatchPhrase: return MatchPhrase(**{field: value}) elif settings.get('query_class') is Prefix: # If you're having problems with Prefix queries, see if the # whitespace analyzer is set! See the RecipeIndex class for more. return Prefix(**{field: {'value': value}}) elif settings.get('query_class') is Match: return Match(**{field: {'query': value}}) elif settings.get('query_class') is Exists: return Exists(**{'field': field}) elif settings.get('query_class') is Range: # This is some hacks to simplify URL queries. This may be a bad idea. # Range() queries do not support 'eq' (use Match() for that). To cheat # this in my interface if something sets this a key of 'eq' then we # under the hood convert this to a Match query. if 'eq' in value.keys(): return Match(**{field: {'query': value.get('eq')}}) return Range(**{field: value}) else: raise KeyError("Unsupported query class")
def translate_clause(self, clause, field): if ("constraint" in clause): match_params = {} query_type = clause.get("query_type", "match") match_field_params = {} match_field_params["query"] = clause["constraint"] #match_field_params["type"] = query_type match_field_params["boost"] = field.get("weight", 1.0) match_field_params["_name"] = "{}:{}:{}".format( clause.get("_id"), field.get("name"), clause.get("constraint")) match_params[field["name"]] = match_field_params if query_type == "match_phrase": match_params_mp = {} match_field_params_mp = copy.copy(match_field_params) match_field_params_mp[ "boost"] = match_field_params_mp["boost"] * len( clause.get("constraint").split(" ")) match_field_params_mp[ "_name"] = match_field_params_mp["_name"] + ":match_phrase" match_params_mp[field["name"]] = match_field_params_mp match_field_params_mp["slop"] = 10 mp = MatchPhrase(**match_params_mp) m = Match(**match_params) return Bool(must=[m], should=[mp]) else: if "date" in clause.get("type", "owl:Thing").lower(): match_field_params.pop("query", None) if re.match("\d\d\d\d-\d\d-\d\d", clause["constraint"]): match_field_params["gte"] = clause["constraint"] else: match_field_params["gte"] = "{}||/d".format( clause["constraint"]) match_field_params["lt"] = "{}||+1d/d".format( clause["constraint"]) return Range(**match_params) elif clause.get("type", "owl:Thing") == "owl:Thing": match_field_params["boost"] = field.get("weight", 1.0) * 2 return Match(**match_params) else: return Exists(field=field["name"])
def _get_query(qterm): """ Return query for search-term (used in search and search_day) :param qterm: ``str`` string to build query for :return: ``Query`` Search-Query """ if helpers.is_simple_query_string_query(qterm): msg_query = SimpleQueryString( query=qterm, fields=['msg', 'username', 'channel'], default_operator='AND', boost=5) else: msg_query = DisMax(tie_breaker=0.7, boost=1, queries=[ SimpleQueryString( query=qterm, fields=['username', 'channel'], default_operator='AND', boost=1), MatchPhrase(msg={ 'query': qterm, 'boost': 1 }) ]) pos = DisMax(tie_breaker=0.7, boost=1, queries=[ msg_query, Common(msg={ 'query': qterm, 'cutoff_frequency': 0.001 }) ]) return pos
def add_query_fields(self, s, qterm, **kwargs): r"""Searches in the elasticsearch index for the mail :param s: DSL-Query to modify :type s: ``DslSearch`` Elasticsearch DSL query :param qterm: Query-string :type qterm: ``str`` :param \**kwargs: See below :Keyword Arguments: * *date_gte* (``datetime``) -- Filter, From: only emails greater than * *date_lte* (``datetime``) -- Filter, To: only emails less than * *date_sliding* (``str``) -- Filter sliding window, only emails of the past XX-hours/days/years... e.g. '-1d/d','-5y/y' -- See: https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math * *date_sliding_type* (``str``) -- Valid date-type: e.g. y M d * *use_sliding_value* (``bool``) -- True: Only respect date_sliding and date_sliding_type. False: only respect fix date: date_gte and date_lte * *include_spam* (``bool``) -- True: Include spam in search (Both) False: Spam will be filtered and not respected in search * *only_attachment* (``bool``) -- True: Only find emails with attachments False: emails with and without attachments (Both) * *number_results* (``int``) -- Number of total results to return * *sort_field* (``str``) -- By which field should results be sorted e.g. date, _score, fromEmail.keyword * *sort_dir* (``str``) -- In Which direction should results be sorted '+': ascending '-': descending) :return: ``DslSearch`` Elasticsearch DSL query """ # Query fields = [ 'body', 'fromEmail', 'toEmail', 'replyToEmail', 'fromName', 'toName', 'replyToName', 'subject', 'attachmentNames' ] if helpers.is_simple_query_string_query(qterm): body_query = SimpleQueryString(query=qterm, fields=fields, default_operator='AND', boost=5) else: body_query = DisMax(tie_breaker=0.7, boost=1, queries=[ SimpleQueryString(query=qterm, fields=fields, default_operator='AND', boost=1), MatchPhrase(body={ 'query': qterm, 'boost': 1 }), ]) pos = DisMax(tie_breaker=0.7, boost=1, queries=[ body_query, Common(body={ 'query': qterm, 'cutoff_frequency': 0.001 }), ]) # penalize if spam neg = Match(subject={'query': 'spam'}) boosting = Boosting(positive=pos, negative=neg, negative_boost=0.2) s = s.query(boosting) # Get specific query arguments include_spam = False only_attachment = False mailq = '' for key, value in kwargs.items(): if key == 'include_spam': include_spam = value if key == 'only_attachment': only_attachment = value if key == 'mailq': mailq = value # Filter mail if mailq != '': s = s.filter(Match(**{'fromEmail.keyword':mailq}) | \ Match(**{'toEmail.keyword': mailq}) | \ Match(**{'replyToEmail.keyword': mailq})) # Filter spam if not include_spam: s = s.filter(~Match(subject={'query': 'spam'})) s = s.filter( ~Term(spam=1) ) # TODO: Spam-flag currently not in use, but for use with different spam filter # Filter attachment if only_attachment: s = s.filter('term', hasAttachment=True) # Extra s = s.extra( indices_boost={ self._index_prefix.format('ja'): 1.5, self._index_prefix.format('en'): 1, self._index_prefix.format('un'): 0.5 }) # s = s.extra(_source={'excludes': ['body']}) # Body needed, no link available # Highlight s = s.highlight_options(order='score') s = s.highlight('body', fragment_size=50) # s = s.highlight('body', number_of_fragments=0) s = s.highlight('subject') s = s.highlight('fromEmail') s = s.highlight('toEmail') s = s.highlight('replyToEmail') s = s.highlight('fromEmail.keyword') s = s.highlight('toEmail.keyword') s = s.highlight('replyToEmail.keyword') s = s.highlight('fromName') s = s.highlight('toName') s = s.highlight('replyToName') s = s.highlight('attachmentNames') return s
def translate_filter(self, f, field): range_operators = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"} op = f["operator"] if isinstance(op, basestring) and op in range_operators: range_params = {} range_field_params = {} range_field_params[range_operators[op]] = f["constraint"] range_params[field["name"]] = range_field_params range_field_params["_name"] = "{}:{}:{}".format( f.get("_id"), field.get("name"), f.get("constraint")) return Range(**range_params) if not isinstance(op, basestring) and isinstance(op, list): range_params = {} range_field_params = {} for (o, c) in zip(op, f["constraint"]): range_field_params[range_operators[o]] = c range_params[field["name"]] = range_field_params _name = "" for (i, c) in zip(f.get("_id"), f.get("constraint")): _name = "{}:{}:{}:{}".format(_name, i, field.get("name"), c) _name = _name[1:] range_field_params["_name"] = _name return Range(**range_params) else: match_params = {} match_field_params = {} match_field_params["boost"] = field.get("weight", 1.0) * 5 match_field_params["query"] = f["constraint"] match_field_params["_name"] = "{}:{}:{}".format( f.get("_id"), field.get("name"), f.get("constraint")) match_params[field["name"]] = match_field_params query_type = f.get("query_type", "match") if query_type == "match_phrase": match_params_mp = {} match_field_params_mp = copy.copy(match_field_params) match_field_params_mp[ "boost"] = match_field_params_mp["boost"] * 10 match_field_params_mp[ "_name"] = match_field_params_mp["_name"] + ":match_phrase" match_params_mp[field["name"]] = match_field_params_mp match_field_params_mp["slop"] = 10 terms = len(f.get("constraint").split(" ")) if terms > 5: msm = terms / 2 + 1 else: msm = max(1, terms / 2) match_field_params["minimum_should_match"] = msm mp = MatchPhrase(**match_params_mp) if f.get("type", "owl:Thing") == "owl:Thing": match_field_params["boost"] = field.get("weight", 1.0) * 2 m = Match(**match_params) return Bool(must=[m], should=[mp]) else: terms = len(f.get("constraint").split(" ")) if terms > 5: msm = terms / 2 + 1 else: msm = max(1, terms / 2) match_field_params["minimum_should_match"] = msm if f.get("type", "owl:Thing") == "owl:Thing": match_field_params["boost"] = field.get("weight", 1.0) * 2 return Match(**match_params)
result = search.query(kwargs_list[i]).execute() print('\n' + keywords[i] + ':') for hit in result.hits.hits: print(hit) # result = search.query(Q('constant_score', filter=kwargs1)).execute() # kwargs_exclude = SimpleQueryString(query=regex_list[1], fields=['context'], default_operator='not') result3 = search.query().exclude(kwargs_list[1]).execute() result4 = search.query().filter('match_phrase_prefix', context=regex_list[0]).execute() result5 = search.query().filter('match_phrase', context=regex_list[0]).execute() result6 = search.query().filter('match_phrase', context=regex_list[2]).execute() # result7 = search.query().filter('match_phrase', context=regex_list[2]).filter('match_phrase', context=regex_list[3]).execute() #works # result7 = search.query().filter(Q("match_phrase", context=regex_list[2]) & Q("match_phrase", context=regex_list[3])).execute() result7 = search.query().filter(Q('bool', **{'must': [MatchPhrase(context=regex_list[2]), MatchPhrase(context=regex_list[3])]})).execute() print('\n exclude' + keywords[2] + ':') for hit in result3.hits.hits: print(hit) print('\n match phrase prefix' + keywords[0] + ':') for hit in result4.hits.hits: print(hit) print('\n match phrase' + keywords[0] + ':') for hit in result5.hits.hits: print(hit) print('\n match phrase' + keywords[2] + ':')
def name_unique(cls, txt): p = cls.search() p.query = MatchPhrase(name=txt) return p.execute()
def get_query_conditions(self, url_parameter, fields, value): """ Return a list of ElasticSearch DSL query objects with the appropriate settings for its kind and values from the given url_parameter. :param url_parameter: String of the URL parameter for this query. :param fields: List[String] of the ElasticSearch document fields to search. :param value: Raw value to look for in this query. :returns: Some kind of Query child class. """ settings = self.query_parameters.get(url_parameter) if not settings: raise KeyError("Parameter %s has no query parameters defined." % url_parameter) # The resultant query_class_parameters dictionary looks something like: # { # 'query_key': 'value', # 'fields': ['foo', 'bar', 'baz'], # } # Attributes is the leftover **kwargs from the original function call. # query_class_parameters = {**{settings.get('query_key'): value}, **settings.get('attributes')} conditions = [] # QueryClasses that take all fields at once get constructed here. # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html if settings.get('query_class') is MultiMatch: conditions.append(MultiMatch(**{'query': value, 'fields': fields})) return conditions # For the rest, cycle through each field and build an object for each of them. for field in fields: if settings.get('query_class') is Wildcard: search_value = "*%s*" % value conditions.append(Wildcard(**{field: {'value': search_value}})) elif settings.get('query_class') is MatchPhrase: conditions.append(MatchPhrase(**{field: value})) elif settings.get('query_class') is Prefix: # If you're having problems with Prefix queries, see if the # whitespace analyzer is set! See the RecipeIndex class for more. conditions.append(Prefix(**{field: {'value': value}})) elif settings.get('query_class') is Match: conditions.append(Match(**{field: {'query': value}})) elif settings.get('query_class') is Exists: conditions.append(Exists(**{'field': field})) elif settings.get('query_class') is Range: # This is some hacks to simplify URL queries. This may be a bad idea. # Range() queries do not support 'eq' (use Match() for that). To cheat # this in my interface if something sets this a key of 'eq' then we # under the hood convert this to a Match query. if 'eq' in value.keys(): conditions.append( Match(**{field: { 'query': value.get('eq') }})) else: conditions.append(Range(**{field: value})) # Since MultiMatch takes all fields at once, it is constructed outside of this area. else: raise KeyError("Unsupported query class") # Return the list return conditions
def aggregate(self, request, queryset, view): filter_query_params = self.get_filter_query_params(request, view).values() __facets = self.construct_facets(request, view) __nested_facets = self.construct_nested_facets(request, view) __facets.update(__nested_facets) for __field, __facet in __facets.items(): agg = __facet["facet"].get_aggregation() agg_filter = Q("match_all") global_facet = __facet.get("global", False) nested_facet = "path" in __facet for options in filter_query_params: if nested_facet: if __facet["filter_field"] == options["field"] or __facet[ "filter_field"] == options.get( "filter_field" ): # Don't filter nested aggregation on its own field continue else: if __field == options["field"] or __field == options.get( "filter_field" ): # Don't filter aggregation on its own field continue if (isinstance(options["values"], (list, tuple)) and options["lookup"] is None): if "path" in options: # Filter term is nested if options["path"] == "keywords": for val in options["values"]: agg_filter &= Nested( path=options["path"], query=MatchPhrase( **{options["field"]: val}), ) else: agg_filter &= Nested( path=options["path"], query=Terms( **{options["field"]: options["values"]}), ) else: agg_filter &= Q( "terms", **{options["field"]: options["values"]}) continue lookup_filter = Q("match_all") for value in options["values"]: if options["lookup"] == LOOKUP_FILTER_TERMS: lookup_filter &= Q( "terms", **{ options["field"]: self.split_lookup_complex_value(value) }, ) elif options["lookup"] == LOOKUP_FILTER_RANGE: lookup_filter &= Q( "range", **{options["field"]: self.get_range_params(value)}) elif options["lookup"] == LOOKUP_QUERY_GT: lookup_filter &= Q( "range", **{ options["field"]: self.get_gte_lte_params(value, "gt") }, ) elif options["lookup"] == LOOKUP_QUERY_GTE: lookup_filter &= Q( "range", **{ options["field"]: self.get_gte_lte_params(value, "gte") }, ) elif options["lookup"] == LOOKUP_QUERY_LT: lookup_filter &= Q( "range", **{ options["field"]: self.get_gte_lte_params(value, "lt") }, ) elif options["lookup"] == LOOKUP_QUERY_LTE: lookup_filter &= Q( "range", **{ options["field"]: self.get_gte_lte_params(value, "lte") }, ) elif options["lookup"] == "match_phrase": lookup_filter &= MatchPhrase( **{options["field"]: value}) if "path" in options: # Filter term is nested agg_filter &= Nested(path=options["path"], query=lookup_filter) else: agg_filter &= lookup_filter if nested_facet: if global_facet: queryset.aggs.bucket( "_filter_" + __field, "global" ).bucket( # Filter must appear BEFORE nested aggregation to have effect "_filter_" + __field, "filter", filter=agg_filter, ).bucket("_filter_" + __field, "nested", path=__facet["path"]).bucket(__field, agg) else: queryset.aggs.bucket("_filter_" + __field, "filter", filter=agg_filter).bucket( "_filter_" + __field, "nested", path=__facet["path"]).bucket( __field, agg) else: if global_facet: queryset.aggs.bucket("_filter_" + __field, "global").bucket( "_filter_" + __field, "filter", filter=agg_filter).bucket( __field, agg) else: queryset.aggs.bucket("_filter_" + __field, "filter", filter=agg_filter).bucket( __field, agg) return queryset