def test_match_phrase_class_via_connection(self):
     ESLowLevelClientByConnection.get_instance()
     # construct the query object using the class of the query type
     search = Search(index='cf_etf', using='high_level_client')
     search.query = MatchPhrase(fund_name='iShares MSCI ACWI ETF')
     response = search.execute()
     self.assertEqual(response['hits']['total']['value'], 1)
コード例 #2
0
    def search_close(self, origin_timestamp, channel, qterm, number_results):
        """
        Find log entries close to origin timestamp, filter by channel, highlight qterm and return them sorted by date.

        :param origin_timestamp: origin timestamp to find logs around
        :param channel: Channel to be filtered
        :param qterm: Term to be highlighted
        :param number_results: how many results
        :return: List of sorted log entries (Elastic-search response)
        :rtype: ``list``
        """
        # Prepare query
        s = DslSearch(using=self._es, index=self._index_prefix.format('*'))

        # Function score
        main_query_boosting = 1e-15  # only used for highlighting, not for scoring -> give very low signifance
        pos = MatchPhrase(msg={'query': qterm, 'boost': main_query_boosting}) | \
              Match(**{'username': {'query': qterm, 'boost': main_query_boosting}}) | \
              Match(channel={'query': qterm, 'boost': main_query_boosting}) | \
              Match(msg={'query': qterm, 'boost': main_query_boosting})
        main_query = (pos | Q('match_all'))

        function_score_query = Q('function_score',
                                 query=main_query,
                                 functions=[
                                     SF(
                                         'exp', **{
                                             '@timestamp': {
                                                 "origin": origin_timestamp,
                                                 "scale": "1m",
                                                 "decay": 0.999
                                             }
                                         })
                                 ])

        s = s.query(function_score_query)

        # filter channel
        s = s.filter('term', **{'channel.keyword': channel})

        # Number of results
        s = s[0:number_results]

        # Highlight
        s = s.highlight_options(order='score')
        s = s.highlight('msg', number_of_fragments=0)
        s = s.highlight('username')
        s = s.highlight('channel')

        # Execute
        response = s.execute()

        # Sort results
        response_sorted = sorted(response, key=lambda hit: hit['@timestamp'])

        return response_sorted
コード例 #3
0
 def apply_filter_match_phrases(cls, queryset, options, value):
     __values = value if isinstance(
         value, (list, tuple)) else cls.split_lookup_complex_value(value)
     return cls.apply_filter(queryset=queryset,
                             args=[
                                 Bool(should=[
                                     MatchPhrase(**{options['field']: i})
                                     for i in __values
                                 ],
                                      minimum_should_match=1)
                             ])
コード例 #4
0
    def get_query_condition(self, url_parameter, field, value):
        """
        Return an ElasticSearch DSL query object with the appropriate settings
        for its kind and values from the given url_parameter.
        :param url_parameter: String of the URL parameter for this query.
        :param field: String of the ElasticSearch document field to search.
        :param value: Raw value to look for in this query.
        :returns: Some kind of Query child class.
        """
        settings = self.query_parameters.get(url_parameter)
        if not settings:
            raise KeyError("Parameter %s has no query parameters defined." %
                           url_parameter)

        # The resultant query_class_parameters dictionary looks something like:
        # {
        #   'query_key': 'value',
        #   'fields': ['foo', 'bar', 'baz'],
        # }
        # Attributes is the leftover **kwargs from the original function call.
        # query_class_parameters = {**{settings.get('query_key'): value}, **settings.get('attributes')}

        if settings.get('query_class') is Wildcard:
            search_value = "*%s*" % value
            return Wildcard(**{field: {'value': search_value}})
        elif settings.get('query_class') is MatchPhrase:
            return MatchPhrase(**{field: value})
        elif settings.get('query_class') is Prefix:
            # If you're having problems with Prefix queries, see if the
            # whitespace analyzer is set! See the RecipeIndex class for more.
            return Prefix(**{field: {'value': value}})
        elif settings.get('query_class') is Match:
            return Match(**{field: {'query': value}})
        elif settings.get('query_class') is Exists:
            return Exists(**{'field': field})
        elif settings.get('query_class') is Range:
            # This is some hacks to simplify URL queries. This may be a bad idea.
            # Range() queries do not support 'eq' (use Match() for that). To cheat
            # this in my interface if something sets this a key of 'eq' then we
            # under the hood convert this to a Match query.
            if 'eq' in value.keys():
                return Match(**{field: {'query': value.get('eq')}})
            return Range(**{field: value})
        else:
            raise KeyError("Unsupported query class")
コード例 #5
0
    def translate_clause(self, clause, field):
        if ("constraint" in clause):
            match_params = {}
            query_type = clause.get("query_type", "match")
            match_field_params = {}
            match_field_params["query"] = clause["constraint"]
            #match_field_params["type"] = query_type
            match_field_params["boost"] = field.get("weight", 1.0)
            match_field_params["_name"] = "{}:{}:{}".format(
                clause.get("_id"), field.get("name"), clause.get("constraint"))
            match_params[field["name"]] = match_field_params
            if query_type == "match_phrase":
                match_params_mp = {}
                match_field_params_mp = copy.copy(match_field_params)
                match_field_params_mp[
                    "boost"] = match_field_params_mp["boost"] * len(
                        clause.get("constraint").split(" "))
                match_field_params_mp[
                    "_name"] = match_field_params_mp["_name"] + ":match_phrase"
                match_params_mp[field["name"]] = match_field_params_mp
                match_field_params_mp["slop"] = 10
                mp = MatchPhrase(**match_params_mp)
                m = Match(**match_params)
                return Bool(must=[m], should=[mp])
            else:
                if "date" in clause.get("type", "owl:Thing").lower():
                    match_field_params.pop("query", None)
                    if re.match("\d\d\d\d-\d\d-\d\d", clause["constraint"]):
                        match_field_params["gte"] = clause["constraint"]
                    else:
                        match_field_params["gte"] = "{}||/d".format(
                            clause["constraint"])
                    match_field_params["lt"] = "{}||+1d/d".format(
                        clause["constraint"])
                    return Range(**match_params)

                elif clause.get("type", "owl:Thing") == "owl:Thing":
                    match_field_params["boost"] = field.get("weight", 1.0) * 2
                return Match(**match_params)
        else:
            return Exists(field=field["name"])
コード例 #6
0
    def _get_query(qterm):
        """
        Return query for search-term (used in search and search_day)

        :param qterm: ``str`` string to build query for
        :return: ``Query`` Search-Query
        """
        if helpers.is_simple_query_string_query(qterm):
            msg_query = SimpleQueryString(
                query=qterm,
                fields=['msg', 'username', 'channel'],
                default_operator='AND',
                boost=5)
        else:
            msg_query = DisMax(tie_breaker=0.7,
                               boost=1,
                               queries=[
                                   SimpleQueryString(
                                       query=qterm,
                                       fields=['username', 'channel'],
                                       default_operator='AND',
                                       boost=1),
                                   MatchPhrase(msg={
                                       'query': qterm,
                                       'boost': 1
                                   })
                               ])
        pos = DisMax(tie_breaker=0.7,
                     boost=1,
                     queries=[
                         msg_query,
                         Common(msg={
                             'query': qterm,
                             'cutoff_frequency': 0.001
                         })
                     ])

        return pos
コード例 #7
0
    def add_query_fields(self, s, qterm, **kwargs):
        r"""Searches in the elasticsearch index for the mail

            :param s:
                DSL-Query to modify
            :type s: ``DslSearch`` Elasticsearch DSL query
            :param qterm:
                Query-string
            :type qterm: ``str``
            :param \**kwargs:
                See below

            :Keyword Arguments:
                * *date_gte* (``datetime``) --
                  Filter, From: only emails greater than
                * *date_lte* (``datetime``) --
                  Filter, To: only emails less than
                * *date_sliding* (``str``) --
                  Filter sliding window, only emails of the past XX-hours/days/years... e.g. '-1d/d','-5y/y' --
                  See: https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math
                * *date_sliding_type* (``str``) --
                  Valid date-type: e.g. y M d
                * *use_sliding_value* (``bool``) --
                  True: Only respect date_sliding and date_sliding_type.
                  False: only respect fix date: date_gte and date_lte
                * *include_spam* (``bool``) --
                  True: Include spam in search (Both)
                  False: Spam will be filtered and not respected in search
                * *only_attachment* (``bool``) --
                  True: Only find emails with attachments
                  False: emails with and without attachments (Both)
                * *number_results* (``int``) --
                  Number of total results to return
                * *sort_field* (``str``) --
                  By which field should results be sorted e.g. date, _score, fromEmail.keyword
                * *sort_dir* (``str``) --
                  In Which direction should results be sorted
                  '+': ascending
                  '-': descending)
            :return: ``DslSearch`` Elasticsearch DSL query

            """
        # Query
        fields = [
            'body', 'fromEmail', 'toEmail', 'replyToEmail', 'fromName',
            'toName', 'replyToName', 'subject', 'attachmentNames'
        ]
        if helpers.is_simple_query_string_query(qterm):
            body_query = SimpleQueryString(query=qterm,
                                           fields=fields,
                                           default_operator='AND',
                                           boost=5)
        else:
            body_query = DisMax(tie_breaker=0.7,
                                boost=1,
                                queries=[
                                    SimpleQueryString(query=qterm,
                                                      fields=fields,
                                                      default_operator='AND',
                                                      boost=1),
                                    MatchPhrase(body={
                                        'query': qterm,
                                        'boost': 1
                                    }),
                                ])
        pos = DisMax(tie_breaker=0.7,
                     boost=1,
                     queries=[
                         body_query,
                         Common(body={
                             'query': qterm,
                             'cutoff_frequency': 0.001
                         }),
                     ])

        # penalize if spam
        neg = Match(subject={'query': 'spam'})
        boosting = Boosting(positive=pos, negative=neg, negative_boost=0.2)
        s = s.query(boosting)

        # Get specific query arguments
        include_spam = False
        only_attachment = False
        mailq = ''
        for key, value in kwargs.items():
            if key == 'include_spam':
                include_spam = value
            if key == 'only_attachment':
                only_attachment = value
            if key == 'mailq':
                mailq = value

        # Filter mail
        if mailq != '':
            s = s.filter(Match(**{'fromEmail.keyword':mailq}) | \
                         Match(**{'toEmail.keyword': mailq}) | \
                         Match(**{'replyToEmail.keyword': mailq}))

        # Filter spam
        if not include_spam:
            s = s.filter(~Match(subject={'query': 'spam'}))
            s = s.filter(
                ~Term(spam=1)
            )  # TODO: Spam-flag currently not in use, but for use with different spam filter

        # Filter attachment
        if only_attachment:
            s = s.filter('term', hasAttachment=True)

        # Extra
        s = s.extra(
            indices_boost={
                self._index_prefix.format('ja'): 1.5,
                self._index_prefix.format('en'): 1,
                self._index_prefix.format('un'): 0.5
            })
        # s = s.extra(_source={'excludes': ['body']})  # Body needed, no link available

        # Highlight
        s = s.highlight_options(order='score')
        s = s.highlight('body', fragment_size=50)
        # s = s.highlight('body', number_of_fragments=0)
        s = s.highlight('subject')
        s = s.highlight('fromEmail')
        s = s.highlight('toEmail')
        s = s.highlight('replyToEmail')
        s = s.highlight('fromEmail.keyword')
        s = s.highlight('toEmail.keyword')
        s = s.highlight('replyToEmail.keyword')
        s = s.highlight('fromName')
        s = s.highlight('toName')
        s = s.highlight('replyToName')
        s = s.highlight('attachmentNames')

        return s
コード例 #8
0
 def translate_filter(self, f, field):
     range_operators = {"<": "lt", "<=": "lte", ">": "gt", ">=": "gte"}
     op = f["operator"]
     if isinstance(op, basestring) and op in range_operators:
         range_params = {}
         range_field_params = {}
         range_field_params[range_operators[op]] = f["constraint"]
         range_params[field["name"]] = range_field_params
         range_field_params["_name"] = "{}:{}:{}".format(
             f.get("_id"), field.get("name"), f.get("constraint"))
         return Range(**range_params)
     if not isinstance(op, basestring) and isinstance(op, list):
         range_params = {}
         range_field_params = {}
         for (o, c) in zip(op, f["constraint"]):
             range_field_params[range_operators[o]] = c
         range_params[field["name"]] = range_field_params
         _name = ""
         for (i, c) in zip(f.get("_id"), f.get("constraint")):
             _name = "{}:{}:{}:{}".format(_name, i, field.get("name"), c)
         _name = _name[1:]
         range_field_params["_name"] = _name
         return Range(**range_params)
     else:
         match_params = {}
         match_field_params = {}
         match_field_params["boost"] = field.get("weight", 1.0) * 5
         match_field_params["query"] = f["constraint"]
         match_field_params["_name"] = "{}:{}:{}".format(
             f.get("_id"), field.get("name"), f.get("constraint"))
         match_params[field["name"]] = match_field_params
         query_type = f.get("query_type", "match")
         if query_type == "match_phrase":
             match_params_mp = {}
             match_field_params_mp = copy.copy(match_field_params)
             match_field_params_mp[
                 "boost"] = match_field_params_mp["boost"] * 10
             match_field_params_mp[
                 "_name"] = match_field_params_mp["_name"] + ":match_phrase"
             match_params_mp[field["name"]] = match_field_params_mp
             match_field_params_mp["slop"] = 10
             terms = len(f.get("constraint").split(" "))
             if terms > 5:
                 msm = terms / 2 + 1
             else:
                 msm = max(1, terms / 2)
             match_field_params["minimum_should_match"] = msm
             mp = MatchPhrase(**match_params_mp)
             if f.get("type", "owl:Thing") == "owl:Thing":
                 match_field_params["boost"] = field.get("weight", 1.0) * 2
             m = Match(**match_params)
             return Bool(must=[m], should=[mp])
         else:
             terms = len(f.get("constraint").split(" "))
             if terms > 5:
                 msm = terms / 2 + 1
             else:
                 msm = max(1, terms / 2)
             match_field_params["minimum_should_match"] = msm
             if f.get("type", "owl:Thing") == "owl:Thing":
                 match_field_params["boost"] = field.get("weight", 1.0) * 2
             return Match(**match_params)
コード例 #9
0
    result = search.query(kwargs_list[i]).execute()
    print('\n' + keywords[i] + ':')
    for hit in result.hits.hits:
        print(hit)

# result = search.query(Q('constant_score', filter=kwargs1)).execute()

# kwargs_exclude = SimpleQueryString(query=regex_list[1], fields=['context'], default_operator='not')

result3 = search.query().exclude(kwargs_list[1]).execute()
result4 = search.query().filter('match_phrase_prefix', context=regex_list[0]).execute()
result5 = search.query().filter('match_phrase', context=regex_list[0]).execute()
result6 = search.query().filter('match_phrase', context=regex_list[2]).execute()
# result7 = search.query().filter('match_phrase', context=regex_list[2]).filter('match_phrase', context=regex_list[3]).execute()  #works
# result7 = search.query().filter(Q("match_phrase",  context=regex_list[2]) & Q("match_phrase", context=regex_list[3])).execute()
result7 = search.query().filter(Q('bool', **{'must': [MatchPhrase(context=regex_list[2]), MatchPhrase(context=regex_list[3])]})).execute()


print('\n exclude' + keywords[2] + ':')
for hit in result3.hits.hits:
    print(hit)

print('\n match phrase prefix' + keywords[0] + ':')
for hit in result4.hits.hits:
    print(hit)

print('\n match phrase' + keywords[0] + ':')
for hit in result5.hits.hits:
    print(hit)

print('\n match phrase' + keywords[2] + ':')
コード例 #10
0
 def name_unique(cls, txt):
     p = cls.search()
     p.query = MatchPhrase(name=txt)
     return p.execute()
コード例 #11
0
ファイル: __init__.py プロジェクト: cohoe/barbados
    def get_query_conditions(self, url_parameter, fields, value):
        """
        Return a list of ElasticSearch DSL query objects with the appropriate settings
        for its kind and values from the given url_parameter.
        :param url_parameter: String of the URL parameter for this query.
        :param fields: List[String] of the ElasticSearch document fields to search.
        :param value: Raw value to look for in this query.
        :returns: Some kind of Query child class.
        """
        settings = self.query_parameters.get(url_parameter)
        if not settings:
            raise KeyError("Parameter %s has no query parameters defined." %
                           url_parameter)

        # The resultant query_class_parameters dictionary looks something like:
        # {
        #   'query_key': 'value',
        #   'fields': ['foo', 'bar', 'baz'],
        # }
        # Attributes is the leftover **kwargs from the original function call.
        # query_class_parameters = {**{settings.get('query_key'): value}, **settings.get('attributes')}

        conditions = []

        # QueryClasses that take all fields at once get constructed here.
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html
        if settings.get('query_class') is MultiMatch:
            conditions.append(MultiMatch(**{'query': value, 'fields': fields}))
            return conditions

        # For the rest, cycle through each field and build an object for each of them.
        for field in fields:
            if settings.get('query_class') is Wildcard:
                search_value = "*%s*" % value
                conditions.append(Wildcard(**{field: {'value': search_value}}))
            elif settings.get('query_class') is MatchPhrase:
                conditions.append(MatchPhrase(**{field: value}))
            elif settings.get('query_class') is Prefix:
                # If you're having problems with Prefix queries, see if the
                # whitespace analyzer is set! See the RecipeIndex class for more.
                conditions.append(Prefix(**{field: {'value': value}}))
            elif settings.get('query_class') is Match:
                conditions.append(Match(**{field: {'query': value}}))
            elif settings.get('query_class') is Exists:
                conditions.append(Exists(**{'field': field}))
            elif settings.get('query_class') is Range:
                # This is some hacks to simplify URL queries. This may be a bad idea.
                # Range() queries do not support 'eq' (use Match() for that). To cheat
                # this in my interface if something sets this a key of 'eq' then we
                # under the hood convert this to a Match query.
                if 'eq' in value.keys():
                    conditions.append(
                        Match(**{field: {
                            'query': value.get('eq')
                        }}))
                else:
                    conditions.append(Range(**{field: value}))
            # Since MultiMatch takes all fields at once, it is constructed outside of this area.
            else:
                raise KeyError("Unsupported query class")

        # Return the list
        return conditions
コード例 #12
0
    def aggregate(self, request, queryset, view):
        filter_query_params = self.get_filter_query_params(request,
                                                           view).values()
        __facets = self.construct_facets(request, view)
        __nested_facets = self.construct_nested_facets(request, view)
        __facets.update(__nested_facets)
        for __field, __facet in __facets.items():
            agg = __facet["facet"].get_aggregation()
            agg_filter = Q("match_all")
            global_facet = __facet.get("global", False)
            nested_facet = "path" in __facet
            for options in filter_query_params:
                if nested_facet:
                    if __facet["filter_field"] == options["field"] or __facet[
                            "filter_field"] == options.get(
                                "filter_field"
                            ):  # Don't filter nested aggregation on its own field
                        continue
                else:
                    if __field == options["field"] or __field == options.get(
                            "filter_field"
                    ):  # Don't filter aggregation on its own field
                        continue

                if (isinstance(options["values"], (list, tuple))
                        and options["lookup"] is None):
                    if "path" in options:  # Filter term is nested
                        if options["path"] == "keywords":
                            for val in options["values"]:
                                agg_filter &= Nested(
                                    path=options["path"],
                                    query=MatchPhrase(
                                        **{options["field"]: val}),
                                )
                        else:
                            agg_filter &= Nested(
                                path=options["path"],
                                query=Terms(
                                    **{options["field"]: options["values"]}),
                            )
                    else:
                        agg_filter &= Q(
                            "terms", **{options["field"]: options["values"]})
                    continue

                lookup_filter = Q("match_all")
                for value in options["values"]:
                    if options["lookup"] == LOOKUP_FILTER_TERMS:
                        lookup_filter &= Q(
                            "terms",
                            **{
                                options["field"]:
                                self.split_lookup_complex_value(value)
                            },
                        )
                    elif options["lookup"] == LOOKUP_FILTER_RANGE:
                        lookup_filter &= Q(
                            "range",
                            **{options["field"]: self.get_range_params(value)})
                    elif options["lookup"] == LOOKUP_QUERY_GT:
                        lookup_filter &= Q(
                            "range",
                            **{
                                options["field"]:
                                self.get_gte_lte_params(value, "gt")
                            },
                        )
                    elif options["lookup"] == LOOKUP_QUERY_GTE:
                        lookup_filter &= Q(
                            "range",
                            **{
                                options["field"]:
                                self.get_gte_lte_params(value, "gte")
                            },
                        )
                    elif options["lookup"] == LOOKUP_QUERY_LT:
                        lookup_filter &= Q(
                            "range",
                            **{
                                options["field"]:
                                self.get_gte_lte_params(value, "lt")
                            },
                        )
                    elif options["lookup"] == LOOKUP_QUERY_LTE:
                        lookup_filter &= Q(
                            "range",
                            **{
                                options["field"]:
                                self.get_gte_lte_params(value, "lte")
                            },
                        )
                    elif options["lookup"] == "match_phrase":
                        lookup_filter &= MatchPhrase(
                            **{options["field"]: value})

                if "path" in options:  # Filter term is nested
                    agg_filter &= Nested(path=options["path"],
                                         query=lookup_filter)
                else:
                    agg_filter &= lookup_filter

            if nested_facet:
                if global_facet:
                    queryset.aggs.bucket(
                        "_filter_" + __field, "global"
                    ).bucket(
                        # Filter must appear BEFORE nested aggregation to have effect
                        "_filter_" + __field,
                        "filter",
                        filter=agg_filter,
                    ).bucket("_filter_" + __field,
                             "nested",
                             path=__facet["path"]).bucket(__field, agg)
                else:
                    queryset.aggs.bucket("_filter_" + __field,
                                         "filter",
                                         filter=agg_filter).bucket(
                                             "_filter_" + __field,
                                             "nested",
                                             path=__facet["path"]).bucket(
                                                 __field, agg)
            else:
                if global_facet:
                    queryset.aggs.bucket("_filter_" + __field,
                                         "global").bucket(
                                             "_filter_" + __field,
                                             "filter",
                                             filter=agg_filter).bucket(
                                                 __field, agg)
                else:
                    queryset.aggs.bucket("_filter_" + __field,
                                         "filter",
                                         filter=agg_filter).bucket(
                                             __field, agg)

        return queryset