Beispiel #1
0
 def get_permission(self, user_id, file_ids):
     query_conditions = query.Bool(must=[
         query.Terms(file_id=file_ids),
         query.Bool(should=[
             query.Term(owner={
                 'value': user_id,
                 'boost': 100
             }),
             query.Bool(must=[
                 query.Term(share_mode={
                     'value': 1,
                     'boost': 5
                 }),
                 query.Term(users_shared={
                     'value': user_id,
                     'boost': 5
                 })
             ]),
             query.Term(share_mode=2)
         ])
     ])
     file_es = Search() \
         .query(query_conditions) \
         .source(['owner', 'share_mode', 'editable'])
     file_es = file_es[0:1]
     print(json.dumps(file_es.to_dict()))
     responses = file_es.using(self.es).index(self._index).execute()
     return responses
Beispiel #2
0
 def query_to_check_duplicate_when_upload_or_create(self, folder_id, name):
     conditions = query.Bool(filter=[
         query.Term(parent_id={'value': folder_id}),
         query.Term(file_title__raw={'value': name})
     ])
     file_es = Search() \
         .query(conditions)
     file_es = file_es[0:1]
     responses = file_es.using(self.es).index(self._index).execute()
     return responses
Beispiel #3
0
    def primary_should_rules(self, search_query, analyzer):
        """Return "primary" should rules for the query.

        These are the ones using the strongest boosts, so they are only applied
        to a specific set of fields like the name, the slug and authors.

        Applied rules:

        * Prefer phrase matches that allows swapped terms (boost=4)
        * Then text matches, using the standard text analyzer (boost=3)
        * Then text matches, using a language specific analyzer (boost=2.5)
        * Then try fuzzy matches ("fire bug" => firebug) (boost=2)
        * Then look for the query as a prefix of a name (boost=1.5)
        """
        should = []
        rules = [
            (query.MatchPhrase, {
                'query': search_query, 'boost': 4, 'slop': 1}),
            (query.Match, {
                'query': search_query, 'boost': 3,
                'analyzer': 'standard'}),
            (query.Match, {
                'query': search_query, 'boost': 2,
                'prefix_length': 4, 'fuzziness': 'AUTO'}),
            (query.Prefix, {
                'value': search_query, 'boost': 1.5}),
        ]

        # Apply rules to search on few base fields. Some might not be present
        # in every document type / indexes.
        for query_cls, opts in rules:
            for field in ('name', 'slug', 'listed_authors.name'):
                should.append(query_cls(**{field: opts}))

        # Exact matches need to be queried against a non-analyzed field. Let's
        # do a term query on `name.raw` for an exact match against the add-on
        # name and boost it since this is likely what the user wants.
        # Use a super-high boost to avoid `description` or `summary`
        # getting in our way.
        should.append(query.Term(**{
            'name.raw': {
                'value': search_query, 'boost': 100
            }
        }))

        # For name, also search in translated field with the right language
        # and analyzer.
        if analyzer:
            should.append(
                query.Match(**{
                    'name_l10n_%s' % analyzer: {
                        'query': search_query,
                        'boost': 2.5,
                        'analyzer': analyzer
                    }
                })
            )

        return should
Beispiel #4
0
    def generate_exact_name_match_query(self, search_query, lang):
        """
        Return the query used for exact name matching.

        If the name of the add-on is an exact match for the search query, it's
        likely to be what the user wanted to find. To support that, we need to
        do a term query against a non-analyzed field and boost it super high.
        Since we need to support translations, this function has 2 modes:
        - In the first one, used when we are dealing with a language for which
          we know we didn't store a translation in ES (because we don't have an
          analyzer for it), it only executes a term query against `name.raw`.
        - In the second one, we did store a translation in that language...
          potentially. We don't know in advance if there is a translation for
          each add-on! We need to do a query against both `name.raw` and
          `name_l10n_<analyzer>.raw`, applying the boost only once if both
          match. This is where the DisMax comes in, it's what MultiMatch
          would do, except that it works with Term queries.
        """
        analyzer = self.get_locale_analyzer(lang)
        if analyzer is None:
            clause = query.Term(
                **{
                    'name.raw': {
                        '_name': 'Term(name.raw)',
                        'value': search_query,
                        'boost': 100.0,
                    }
                })
        else:
            queries = [
                {
                    'term': {
                        'name.raw': search_query
                    }
                },
            ]
            # Search in all languages supported by this analyzer. This allows
            # us to return results in a different language that is close to the
            # one requested ('en-ca' when searching in 'en-us' for instance).
            fields = [
                'name_l10n_%s.raw' % lang
                for lang in amo.SEARCH_ANALYZER_MAP[analyzer]
            ]
            queries.extend([{
                'term': {
                    field: search_query
                }
            } for field in fields])
            clause = query.DisMax(
                # Note: We only care if one of these matches, so we leave
                # tie_breaker to the default value of 0.0.
                _name='DisMax(Term(name.raw), %s)' %
                ', '.join(['Term(%s)' % field for field in fields]),
                boost=100.0,
                queries=queries,
            )
        return clause
Beispiel #5
0
def test_dismax_to_dict():
    assert {
        "dis_max": {
            "queries": [{
                "term": {
                    "_type": "article"
                }
            }]
        }
    } == query.DisMax(queries=[query.Term(_type='article')]).to_dict()
Beispiel #6
0
 def build_sku_match_conditions(text_source: str) -> List[query.Query]:
     return [
         query.Term(sku__raw={
             "value": text_source,
             "boost": pow(10, 7)
         }),
         query.Prefix(sku__raw={
             "value": text_source,
             "boost": pow(10, 6)
         })
     ]
Beispiel #7
0
    def generate_exact_name_match_query(self, search_query, analyzer):
        """
        Return the query used for exact name matching.

        If the name of the add-on is an exact match for the search query, it's
        likely to be what the user wanted to find. To support that, we need to
        do a term query against a non-analyzed field and boost it super high.
        Since we need to support translations, this function has 2 modes:
        - In the first one, used when we are dealing with a language for which
          we know we didn't store a translation in ES (because we don't have an
          analyzer for it), it only executes a term query against `name.raw`.
        - In the second one, we did store a translation in that language...
          potentially. We don't know in advance if there is a translation for
          each add-on! We need to do a query against both `name.raw` and
          `name_l10n_<analyzer>.raw`, applying the boost only once if both
          match. This is where the DisMax comes in, it's what MultiMatch
          would do, except that it works with Term queries.
        """
        if analyzer is None:
            clause = query.Term(
                **{
                    'name.raw': {
                        '_name': 'Term(name.raw)',
                        'value': search_query,
                        'boost': 100.0
                    }
                })
        else:
            query_name = 'DisMax(Term(name.raw), Term(name_l10n_%s.raw))' % (
                analyzer)
            clause = query.DisMax(
                # We only care if one of these matches, so we leave tie_breaker
                # to the default value of 0.0.
                _name=query_name,
                boost=100.0,
                queries=[
                    {
                        'term': {
                            'name.raw': search_query
                        }
                    },
                    {
                        'term': {
                            'name_l10n_%s.raw' % analyzer: search_query
                        }
                    },
                ])
        return clause
Beispiel #8
0
    def get_filter_conditions(self, args):
        conditions = []
        conditions.append(query.MatchAll())

        seller_id = args.get('seller_id')
        if seller_id:
            conditions.append(query.Term(seller__id=seller_id))

        category_codes = args.get('category_codes')
        if category_codes:
            conditions.append(query.Nested(
                path='categories',
                query=query.Terms(categories__code=category_codes)
            ))

        brand_codes = args.get('brand_codes')
        if brand_codes:
            conditions.append(query.Terms(brand__code=brand_codes))

        return conditions
Beispiel #9
0
 def build_filter_condions(self, args):
     must_conditions = []
     must_conditions.append(
         query.Bool(should=[
             query.Term(trashed=False),
             query.Bool(must_not=query.Exists(field="trashed"))
         ] if not args.get('trash') else [query.Term(trashed=True)]))
     if not args.get('is_folder_api'):
         if args.get('file_id'):
             should_conditions = []
             should_conditions.append(query.Term(share_mode={'value': 2}))
             if args.get('user_id'):
                 should_conditions.append(
                     query.Term(owner=args.get('user_id')))
                 should_conditions.append(
                     self.shared_by_email_permission_condition(args))
             must_conditions.append(
                 query.Bool(should=should_conditions,
                            minimum_should_match=1))
         elif not args.get('user_id'):
             raise PermissionException("You must login to use this api")
         elif args.get('share'):
             must_conditions.append(
                 self.shared_by_email_permission_condition(args))
         elif args.get('q'):
             must_conditions.append(
                 query.Bool(should=[
                     query.Term(owner=args.get('user_id')),
                     self.shared_by_email_permission_condition(args),
                 ],
                            minimum_should_match=1))
         else:
             must_conditions.append(query.Term(owner=args.get('user_id')))
     if args.get('star'):
         must_conditions.append(query.Term(star=True))
     if args.get('only_photo'):
         must_conditions.append(query.Prefix(file_type={'value': 'image'}))
     return query.Bool(must=must_conditions)
Beispiel #10
0
    def primary_should_rules(self, search_query, analyzer):
        """Return "primary" should rules for the query.

        These are the ones using the strongest boosts, so they are only applied
        to a specific set of fields like the name, the slug and authors.

        Applied rules:

        * Prefer phrase matches that allows swapped terms (boost=4)
        * Then text matches, using the standard text analyzer (boost=3)
        * Then text matches, using a language specific analyzer (boost=2.5)
        * Then look for the query as a prefix of a name (boost=1.5)
        """
        should = [
            # Exact matches need to be queried against a non-analyzed field.
            # Let's do a term query on `name.raw` for an exact match against
            # the add-on name and boost it since this is likely what the user
            # wants.
            # Use a super-high boost to avoid `description` or `summary`
            # getting in our way.
            # Put the raw query first to give it a higher priority during
            # Scoring, `boost` alone doesn't necessarily put it first.
            query.Term(**{'name.raw': {
                'value': search_query,
                'boost': 100
            }})
        ]

        rules = [
            (query.MatchPhrase, {
                'query': search_query,
                'boost': 4,
                'slop': 1
            }),
            (query.Match, {
                'query': search_query,
                'boost': 3,
                'analyzer': 'standard',
                'operator': 'and'
            }),
            (query.Prefix, {
                'value': search_query,
                'boost': 1.5
            }),
        ]

        # Add a rule for fuzzy matches ("fire bug" => firebug) (boost=2) for
        # short query strings only (long strings, depending on what characters
        # they contain and how many words are present, can be too costly).
        if len(search_query) < self.MAX_QUERY_LENGTH_FOR_FUZZY_SEARCH:
            rules.append((query.Match, {
                'query': search_query,
                'boost': 2,
                'prefix_length': 4,
                'fuzziness': 'AUTO'
            }))

        # Apply rules to search on few base fields. Some might not be present
        # in every document type / indexes.
        for query_cls, opts in rules:
            for field in ('name', 'listed_authors.name'):
                should.append(query_cls(**{field: opts}))

        # For name, also search in translated field with the right language
        # and analyzer.
        if analyzer:
            should.append(
                query.Match(
                    **{
                        'name_l10n_%s' % analyzer: {
                            'query': search_query,
                            'boost': 2.5,
                            'analyzer': analyzer,
                            'operator': 'and'
                        }
                    }))

        return should
Beispiel #11
0
 def get_must_conditions(self, args):
     conditions = []
     file_id = args.get('file_id')
     if file_id:
         if isinstance(file_id, list):
             conditions.append(query.Terms(file_id=file_id))
         else:
             conditions.append(query.Term(file_id=file_id))
     search_text = args.get('q')
     if file_id and search_text:
         raise BadRequestException("Not support both q and file_id param")
     if search_text:
         conditions.append(
             query.DisMax(queries=[
                 query.MatchPhrasePrefix(file_title={
                     'query': search_text,
                     'boost': 10
                 }),
                 query.MatchPhrasePrefix(file_title__no_tone={
                     'query': search_text,
                     'boost': 10
                 }),
                 query.Match(
                     file_title={
                         'query': search_text,
                         'boost': 4,
                         'operator': 'or',
                         'minimum_should_match': "1<75%"
                     }),
                 query.Match(
                     file_title__no_tone={
                         'query': search_text,
                         'boost': 4,
                         'operator': 'or',
                         'minimum_should_match': "1<75%"
                     }),
                 query.Match(
                     file_tag__text={
                         'query': search_text,
                         'boost': 2,
                         'operator': 'or',
                         'minimum_should_match': "1<75%"
                     }),
                 query.MatchPhrasePrefix(file_tag__text={
                     'query': search_text,
                     'boost': 2
                 }),
                 query.Match(
                     description={
                         'query': search_text,
                         'boost': 1,
                         'operator': 'or',
                         'minimum_should_match': "1<75%"
                     }),
                 query.Match(
                     description__no_tone={
                         'query': search_text,
                         'boost': 1,
                         'operator': 'or',
                         'minimum_should_match': "1<75%"
                     })
             ]))
     if not conditions:
         conditions.append(query.MatchAll())
     return conditions
Beispiel #12
0
def test_term_to_dict():
    assert {
        "term": {
            "_type": "article"
        }
    } == query.Term(_type='article').to_dict()
Beispiel #13
0
 def get_channel_filter_condition(self, channel):
     return query.Term(channel={
         "value": channel
     })
Beispiel #14
0
def test_term_to_dict():
    assert {"term": {"f": "value"}} == query.Term(f='value').to_dict()
Beispiel #15
0
def name_query(q):
    """
    Returns a boolean should query `elasticsearch_dsl.query.Bool` given a
    query string.
    """
    should = []

    rules = {
        query.Match: {
            'query': q,
            'boost': 3,
            'analyzer': 'standard'
        },
        query.Match: {
            'query': q,
            'boost': 4,
            'type': 'phrase',
            'slop': 1
        },
        query.Prefix: {
            'value': q,
            'boost': 1.5
        }
    }
    # Only add fuzzy queries if q is a single word. It doesn't make sense to do
    # a fuzzy query for multi-word queries.
    if ' ' not in q:
        rules[query.Fuzzy] = {'value': q, 'boost': 2, 'prefix_length': 1}

    for k, v in rules.iteritems():
        for field in ('name', 'app_slug', 'author'):
            should.append(k(**{field: v}))

    # Exact matches need to be queried against a non-analyzed field. Let's do a
    # term query on `name_sort` for an exact match against the app name and
    # give it a good boost since this is likely what the user wants.
    should.append(query.Term(name_sort={'value': q, 'boost': 10}))

    analyzer = _get_locale_analyzer()
    if analyzer:
        should.append(
            query.Match(**{'name_%s' % analyzer: {
                'query': q,
                'boost': 2.5
            }}))

    # Add searches on the description field.
    should.append(
        query.Match(description={
            'query': q,
            'boost': 0.8,
            'type': 'phrase'
        }))

    analyzer = _get_locale_analyzer()
    if analyzer:
        should.append(
            query.Match(
                **{
                    'description_%s' % analyzer: {
                        'query': q,
                        'boost': 0.6,
                        'type': 'phrase',
                        'analyzer': get_custom_analyzer(analyzer)
                    }
                }))

    # Add searches on tag field.
    should.append(query.Match(tags={'query': q}))
    if ' ' not in q:
        should.append(query.Fuzzy(tags={'value': q, 'prefix_length': 1}))

    return query.Bool(should=should)
Beispiel #16
0
 def build_filter_conditions(self):
     return query.Term(is_last_level=True)
Beispiel #17
0
 def shared_by_email_permission_condition(self, args):
     return query.Bool(must=[
         query.Term(share_mode={'value': 1}),
         query.Term(users_shared={'value': args.get('user_id')})
     ])
Beispiel #18
0
 def _query_term(cls, value: object, field: Sequence[str]) -> Query:
     q = query.Term(**{".".join(field): value})
     for i in range(len(field) - 2, -1, -1):
         q = query.Nested(path=".".join(field[:i + 1]), query=q)
     return q
Beispiel #19
0
    def filter_queryset(self, request, queryset, view):

        q = request.GET.get('q', '').lower()
        lang = translation.get_language()
        analyzer = self._get_locale_analyzer(lang)

        if not q:
            return queryset

        should = []
        rules = [
            (query.Match, {
                'query': q,
                'boost': 3,
                'analyzer': 'standard'
            }),
            (query.Match, {
                'query': q,
                'boost': 4,
                'type': 'phrase',
                'slop': 1
            }),
            (query.Prefix, {
                'value': q,
                'boost': 1.5
            }),
        ]

        # Only add fuzzy queries if q is a single word. It doesn't make sense
        # to do a fuzzy query for multi-word queries.
        if ' ' not in q:
            rules.append((query.Fuzzy, {
                'value': q,
                'boost': 2,
                'prefix_length': 1
            }))

        # Apply rules to search on few base fields. Some might not be present
        # in every document type / indexes.
        for k, v in rules:
            for field in ('app_slug', 'author', 'name', 'short_name', 'slug',
                          'title', 'url_tokenized'):
                should.append(k(**{field: v}))

        # Exact matches need to be queried against a non-analyzed field. Let's
        # do a term query on `name.raw` for an exact match against the item
        # name and give it a good boost since this is likely what the user
        # wants.
        # FIXME: we should also do that on translations and slug/app_slug, but
        # we don't store a raw version for them at the moment.
        should.append(query.Term(**{'name.raw': {'value': q, 'boost': 10}}))
        # Do the same for GUID searches.
        should.append(query.Term(**{'guid': {'value': q, 'boost': 10}}))
        # If query is numeric, check if it is an ID.
        if q.isnumeric():
            should.append(query.Term(**{'id': {'value': q, 'boost': 10}}))

        if analyzer:
            should.append(
                query.Match(
                    **{'name_l10n_%s' % analyzer: {
                        'query': q,
                        'boost': 2.5
                    }}))
            should.append(
                query.Match(**{
                    'short_name_l10n_%s' % analyzer: {
                        'query': q,
                        'boost': 2.5
                    }
                }))

        # Add searches on the description field.
        should.append(
            query.Match(description={
                'query': q,
                'boost': 0.8,
                'type': 'phrase'
            }))

        if analyzer:
            desc_field = 'description_l10n_%s' % analyzer
            desc_analyzer = ('%s_analyzer' % analyzer
                             if analyzer in mkt.STEMMER_MAP else analyzer)
            should.append(
                query.Match(
                    **{
                        desc_field: {
                            'query': q,
                            'boost': 0.6,
                            'type': 'phrase',
                            'analyzer': desc_analyzer
                        }
                    }))

        # Add searches on tag field.
        should.append(query.Term(tags={'value': q}))
        if ' ' not in q:
            should.append(query.Fuzzy(tags={'value': q, 'prefix_length': 1}))

        # The list of functions applied to our `function_score` query.
        functions = [
            query.SF('field_value_factor', field='boost'),
        ]

        # Add a boost for the preferred region, if it exists.
        region = get_region_from_request(request)
        if region:
            functions.append({
                'filter': {
                    'term': {
                        'preferred_regions': region.id
                    }
                },
                # TODO: When we upgrade to Elasticsearch 1.4, change this
                # to 'weight'.
                'boost_factor': 4,
            })

        return queryset.query('function_score',
                              query=query.Bool(should=should),
                              functions=functions)