Python Search.suggestの例、elasticsearch_dsl.Search.suggest Pythonの例

コード例 #1

0

ファイルを表示

def test_suggest_can_be_run_separately(data_client):
    s = Search()
    s = s.suggest('simple_suggestion', 'elasticserach', term={'field': 'organization'})
    response = s.execute_suggest()

    assert response.success()
    assert response.simple_suggestion[0].options[0].text == 'elasticsearch'

コード例 #2

0

ファイルを表示

    def get(self, request):
        key_words = request.GET.get('s','')
        key_type = request.GET.get('doc', DEFAULT_DOCUMENT)
        try:
            doc = Search(index=key_type)
        except KeyError:
            doc = Search(index=DEFAULT_DOCUMENT)
        re_datas = []
        try:
            if key_words:
                s = doc.suggest('my_suggest', key_words,
                               completion={
                                   "field":"suggest",
                                   "fuzzy":{
                                       "fuzziness":2
                                   },
                                   "size": 5})
                suggestions = s.execute()
                for match in suggestions.suggest.my_suggest[0].options:
                    source = match._source
                    re_datas.append(str(source['title']))
        except ConnectionTimeout as e:
            print(e)

        return HttpResponse(json.dumps(re_datas), content_type="application/json")

コード例 #3

0

ファイルを表示

ファイル: test_search.py プロジェクト: Aigeruth/elasticsearch-dsl-py

def test_suggest_can_be_run_separately(data_client):
    s = Search()
    s = s.suggest('simple_suggestion', 'elasticserach', term={'field': 'organization'})
    response = s.execute_suggest()

    assert response.success()
    assert response.simple_suggestion[0].options[0].text == 'elasticsearch'

コード例 #4

0

ファイルを表示

ファイル: movielib.py プロジェクト: phokon/movielib-incomplete

def get_suggest(input):
    if not input:
        return None
    s = Search(using=es)
    s = s.index('imdb')
    s = s.suggest('suggestion', input, completion={'field': 'suggest'})
    s = s.source(False)
    ret = s.execute()
    results = [x['text'] for x in ret.suggest.suggestion[0]['options']]
    return jsonify(result=results)

コード例 #5

0

ファイルを表示

ファイル: views.py プロジェクト: Shivamgera/OMDB_Search

 def get_queryset(self):
     s = Search(index=ELASTIC_INDEX)
     title_param = self.request.query_params.get('q', None)
     s = s.suggest('auto_complete',
                   title_param,
                   completion={'field': 'suggest'})
     response = s.execute()
     return [{
         'title': i._source.title,
         'score': i._score
     } for i in response.suggest.auto_complete[0].options]

コード例 #6

0

ファイルを表示

def suggest(q, field, size=10):
    s = Search(using=es.client, index=es.index_name)
    s = s.suggest('suggestions', q, completion={
        'field': field,
        'size': size,
    })
    result = s.execute_suggest().to_dict()
    try:
        suggestions = result.get('suggestions', [])[0]['options']
        return suggestions
    except (IndexError, AttributeError):
        return []

コード例 #7

0

ファイルを表示

ファイル: MyApp.py プロジェクト: kinandwair/sbotapi

def autocomplete():
    text = request.args.get('text')
    type = request.args.get('type')
    s = Search(using=client, index="autocomplete")
    s = s.suggest('autocomplete',
                  text,
                  completion={
                      'field': type,
                      'fuzzy': True,
                      "skip_duplicates": True
                  })
    s = s[0:0]
    response = s.execute()  #
    return response.to_dict()
    response = s.execute()  #

コード例 #8

0

ファイルを表示

ファイル: query.py プロジェクト: spoon-ss/IR-final-project

def autocomplete():
    # get search term entered by user
    text = request.args.getlist('search[term]')
    search = Search(index='covid_index')
    # do suggest on the query term
    s = search.suggest('autocomplete',
                       text=text,
                       completion={'field': 'suggestion'})
    response = s.execute()
    options = response.suggest.autocomplete[0].options
    results = list()
    for option in options:
        if option['_source']['title'] not in results:
            results.append(option['_source']['title'])
    return jsonify(results)

コード例 #9

0

ファイルを表示

    def get_queryset(self, queryset, data):
        phrase = data.get('q')

        if 'models' not in data:
            models = self._supported_models
        else:
            models = data['models'].split(',')

        advanced = data.get('advanced')
        op, suffix = get_advanced_options(advanced)
        lang = get_language()

        per_model = data.get('per_model', 1)
        ms = MultiSearch(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME)

        for model in models:
            if is_enabled('S39_filter_by_geodata.be'
                          ) and model in self._completion_models:
                sug_query = Search(index=f'{model}s')
                sug_query = sug_query.suggest('title',
                                              phrase,
                                              completion={
                                                  'field':
                                                  f'title.{lang}.suggest',
                                                  'size': per_model
                                              })
                res = sug_query.execute()
                suggestions = res.suggest['title'][0]
                ids = [sug['_id'] for sug in suggestions['options']]
                query = Search(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME)
                query = query.filter('term', model=model).query('ids',
                                                                values=ids)
            else:
                query = Search(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME)
                query = query.filter('term', model=model)
                query = query.query('bool',
                                    should=[
                                        nested_query_with_advanced_opts(
                                            phrase, field, lang, op, suffix)
                                        for field in ('title', 'notes')
                                    ])
                query = query.extra(size=per_model)
            ms = ms.add(query)

        return ms

コード例 #10

0

ファイルを表示

ファイル: query.py プロジェクト: orbit7ch/creativeday

    def suggestions(text):
        """
        concat a string for 'did you mean "XY"?'
        check if there is an option (if not take the original word)
        """
        s = Search(using=Elasticsearch(settings.ELASTIC_URL))
        res = s.suggest('suggestion',
                        text,
                        term={'field': 'all_tags_str'}).execute()

        suggested_words = []
        suggestions = res.suggest['suggestion']

        for ou in suggestions:
            options = ou['options']
            if options:
                suggested_words.append(options[0].text)
            else:
                suggested_words.append(ou['text'])

        suggested = ' '.join(suggested_words)
        if suggested.lower() != text.lower():
            return suggested

コード例 #11

0

ファイルを表示

def autocomplete_view(request: HttpRequest) -> HttpResponse:
    if not settings.ES_ENABLED or not es_client:
        return HttpResponse({})
    if not es_client.indices.exists(es_index_name):
        return HttpResponse({})

    query = request.GET.get('q', '')

    s = Search(using=es_client, index=es_index_name)

    response = s.suggest('title_complete',
                         query,
                         completion={
                             "field": 'title_complete',
                         }).execute()

    options = response['title_complete'][0]['options']
    data = json.dumps([{'id': i['_id'], 'title': i['text']} for i in options])
    mime_type = 'application/json; charset=utf-8'
    http_response = HttpResponse(data, mime_type)
    # http_response['Access-Control-Allow-Origin'] = 'http://localhost:3000'
    # http_response['Access-Control-Allow-Credentials'] = 'true'
    return http_response

コード例 #12

0

ファイルを表示

ファイル: search_controller.py プロジェクト: madewithkode/cccatalog-api

def search(search_params,
           index,
           page_size,
           ip,
           request,
           filter_dead,
           page=1) -> Tuple[List[Hit], int, int]:
    """
    Given a set of keywords and an optional set of filters, perform a ranked
    paginated search.

    :param search_params: Search parameters. See
     :class: `ImageSearchQueryStringSerializer`.
    :param index: The Elasticsearch index to search (e.g. 'image')
    :param page_size: The number of results to return per page.
    :param ip: The user's hashed IP. Hashed IPs are used to anonymously but
    uniquely identify users exclusively for ensuring query consistency across
    Elasticsearch shards.
    :param request: Django's request object.
    :param filter_dead: Whether dead links should be removed.
    :param page: The results page number.
    :return: Tuple with a List of Hits from elasticsearch, the total count of
    pages and results.
    """
    s = Search(index=index)
    # Apply term filters. Each tuple pairs a filter's parameter name in the API
    # with its corresponding field in Elasticsearch. "None" means that the
    # names are identical.
    filters = [('extension', None), ('categories', None),
               ('aspect_ratio', None), ('size', None), ('source', 'provider'),
               ('license', 'license__keyword'),
               ('license_type', 'license__keyword')]
    for tup in filters:
        api_field, elasticsearch_field = tup
        s = _apply_filter(s, search_params, api_field, elasticsearch_field)
    # Get suggestions for any route
    s = s.suggest('get_suggestion', '', term={'field': 'creator'})
    # Exclude mature content unless explicitly enabled by the requester
    if not search_params.data['mature']:
        s = s.exclude('term', mature=True)
    # Hide data sources from the catalog dynamically.
    filter_cache_key = 'filtered_providers'
    filtered_providers = cache.get(key=filter_cache_key)
    if not filtered_providers:
        filtered_providers = models.ContentProvider.objects\
            .filter(filter_content=True)\
            .values('provider_identifier')
        cache.set(key=filter_cache_key,
                  timeout=CACHE_TIMEOUT,
                  value=filtered_providers)
    to_exclude = [f['provider_identifier'] for f in filtered_providers]
    s = s.exclude('terms', provider=to_exclude)

    # Search either by generic multimatch or by "advanced search" with
    # individual field-level queries specified.
    search_fields = ['tags.name', 'title', 'description']
    if 'q' in search_params.data:
        query = _quote_escape(search_params.data['q'])
        s = s.query('simple_query_string', query=query, fields=search_fields)
        # Get suggestions for term query
        s = s.suggest('get_suggestion', query, term={'field': 'creator'})
    else:
        if 'creator' in search_params.data:
            creator = _quote_escape(search_params.data['creator'])
            s = s.query('simple_query_string',
                        query=creator,
                        fields=['creator'])
            # Get suggestions for creator
            s = s.suggest('get_suggestion', creator, term={'field': 'creator'})
        if 'title' in search_params.data:
            title = _quote_escape(search_params.data['title'])
            s = s.query('simple_query_string', query=title, fields=['title'])
            # Get suggestions for title
            s = s.suggest('get_suggestion', title, term={'field': 'title'})
        if 'tags' in search_params.data:
            tags = _quote_escape(search_params.data['tags'])
            s = s.query('simple_query_string',
                        fields=['tags.name'],
                        query=tags)
            # Get suggestions for tags
            s = s.suggest('get_suggestion', tags, term={'field': 'tags.name'})
    # Boost by popularity metrics
    if POPULARITY_BOOST:
        queries = []
        factors = ['comments', 'views', 'likes']
        boost_factor = 100 / len(factors)
        for factor in factors:
            rank_feature_query = Q('rank_feature',
                                   field=factor,
                                   boost=boost_factor)
            queries.append(rank_feature_query)
        s = Search().query(
            Q('bool', must=s.query, should=queries, minimum_should_match=1))

    # Use highlighting to determine which fields contribute to the selection of
    # top results.
    s = s.highlight(*search_fields)
    s = s.highlight_options(order='score')
    s.extra(track_scores=True)
    # Route users to the same Elasticsearch worker node to reduce
    # pagination inconsistencies and increase cache hits.
    s = s.params(preference=str(ip), request_timeout=7)
    # Paginate
    start, end = _get_query_slice(s, page_size, page, filter_dead)
    s = s[start:end]
    try:
        search_response = s.execute()
        log.info(f'query={s.to_dict()}, es_took_ms={search_response.took}')
    except RequestError as e:
        raise ValueError(e)
    results = _post_process_results(s, start, end, page_size, search_response,
                                    request, filter_dead)

    suggestion = _query_suggestions(search_response)

    result_count, page_count = _get_result_and_page_count(
        search_response, results, page_size)

    return results, page_count, result_count, suggestion

コード例 #13

0

ファイルを表示

    def search_keyword(self, keyword, doc_filter=None, size=10):
        '''
        Create the search object and get the number of hits.
        '''

        s = Search(index='lucid').using(self.client)
        print doc_filter
        if 'divtype' in doc_filter:
            for i, types in enumerate(doc_filter['divtype']):
                if i == 0:
                    filt = Q("match", divtype=types)
                else:
                    filt = filt | Q("match", divtype=types)
            s = s.filter(filt)
        n_hits = s.count()
        if 'docsource' in doc_filter:
            for i, types in enumerate(doc_filter['docsource']):
                if i == 0:
                    filt = Q("match", docsource=types)
                else:
                    filt = filt | Q("match", docsource=types)
            s = s.filter(filt)

        flag = 0
        if 'end' in doc_filter:
            flag = 1
            end_year = datetime.datetime(int(doc_filter['end']), 12, 31)
        else:
            end_year = datetime.datetime.now()

        if 'start' in doc_filter:
            flag = 0
            start_year = datetime.datetime(int(doc_filter['start']), 1, 1)
            s = s.filter('range',
                         publishdate={
                             'gte': start_year,
                             'lte': end_year
                         })

        if flag:
            s = s.filter('range', publishdate={'lte': end_year})
        # the search object. -p indicates sort by order=desc on p
        # --------------------------------------query-------------------------------------------------------
        q1 = Q("multi_match",
               query=keyword,
               fields=["title", "keywords", "doc"],
               type="best_fields",
               cutoff_frequency=0.0007,
               operator="and",
               fuzziness="AUTO")
        q2 = Q("multi_match",
               query=keyword,
               fields=["title", "keywords", "doc"],
               type="phrase")
        q3 = Q("bool", must=[q1], should=[q2])
        s = s.query(q3)

        s = s.suggest("didYouMean", keyword, phrase={'field': 'did_you_mean'})

        s = s.highlight_options(order="score",
                                pre_tags=["<mark>"],
                                post_tags=["</mark>"],
                                fragment_size=80,
                                no_match_size=0)
        s = s.highlight('title', number_of_fragments=0)
        s = s.highlight('keywords', number_of_fragments=10)
        s = s.highlight('doc', number_of_fragments=10)
        # ---------------------------------------------------------------------------------------------------
        n_hits = s.count()
        print "hits = ", n_hits
        hits_start = 0
        return s, n_hits

コード例 #14

0

ファイルを表示

ファイル: rest.py プロジェクト: mtog/AraGWAS

    def search_results(self, request, query_term):
        """ Display results based on search term. """
        is_gene_suggest = False
        if request.method == "GET":
            client = Elasticsearch([ES_HOST], timeout=60)
            search_gene = Search().using(client).doc_type('genes').source(
                exclude=['isoforms.cds', 'isoforms.exons',
                         'GO'])  #'isoforms.cds','GO'])
            if query_term == None:
                studies = Study.objects.all()
                phenotypes = Phenotype.objects.all()
                # Elasticsearch query cannot be made before knowing the ordering and the page number, etc as this is taken into account by elasticsearch.py
            else:
                studies = Study.objects.filter(
                    Q(name__icontains=query_term)
                    | Q(phenotype__trait_ontology_name__icontains=query_term)
                    | Q(phenotype__name__icontains=query_term)
                    | Q(phenotype__description__icontains=query_term)
                    | Q(publication_pmid__icontains=query_term)
                    | Q(publication_pmcid__icontains=query_term)).order_by(
                        'n_hits_perm').reverse()
                phenotypes = Phenotype.objects.filter(
                    Q(name__icontains=query_term)
                    | Q(description__icontains=query_term)).order_by('name')
                # Add chromosome position search for genomic regions
                try:
                    int(query_term)
                    isnum = True
                except ValueError:
                    isnum = False
                import re
                pattern = re.compile(
                    "(Chr|CHR|chr)+\s?([0-9]{1,2})+(-|:)?(\d*)\s*(-|:|)?\s*(\d+|)"
                )
                if isnum:  # Only a number, look for neighboring genes on all chromosomes.
                    q = QES('range',
                            positions={
                                "gte": int(query_term),
                                'lte': int(query_term)
                            })
                    search_gene = search_gene.query(q)
                elif pattern.match(query_term):  # Specific genomic range
                    splitted = re.split(
                        "(Chr|CHR|chr)+\s?([0-9]{1,2})+(-|:)?(\d*)\s*(-|:|)?\s*(\d+|)",
                        query_term)
                    chr = int(splitted[2])
                    s_p = None
                    e_p = None
                    if splitted[4]:
                        s_p = int(splitted[4])
                    if splitted[6]:
                        e_p = int(splitted[6])
                    # Need to retrieve all genes that overlap somehow with that region (all-in, right part in, left part in, etc)
                    q = QES('match', chr='chr' + str(chr))
                    search_gene = search_gene.query(q)
                    if s_p:
                        if e_p:
                            # Look for genes overlapping with region of interest
                            q = QES('range',
                                    positions={
                                        'gte': s_p,
                                        'lte': e_p
                                    }) | QES('range',
                                             positions={
                                                 'gte': s_p,
                                                 'lte': s_p
                                             }) | QES('range',
                                                      positions={
                                                          'gte': e_p,
                                                          'lte': e_p
                                                      })
                        else:
                            q = QES('range',
                                    positions={
                                        'gte': s_p,
                                        'lte': s_p
                                    }) | QES('range', positions={'gte': s_p})
                        search_gene = search_gene.query(q)
                else:  # other type of request
                    is_gene_suggest = True
                    search_gene = search_gene.suggest('gene_suggest',
                                                      query_term,
                                                      completion={
                                                          'field': 'suggest',
                                                          'size': 200
                                                      })
            # custom ordering
            ordering = request.query_params.get('ordering', None)
            ordering_fields = {
                'studies':
                ['name', 'genotype', 'phenotype', 'method', 'transformation'],
                'phenotypes': ['name', 'description'],
                'genes': [
                    'name', 'chr', 'start', 'end', 'SNPs_count',
                    'association_count', 'description'
                ]
            }
            if ordering is not None:
                from django.db.models.functions import Lower
                inverted = False
                if ordering.startswith('-'):
                    inverted = True
                    ordering = ordering[1:]
                if ordering in ordering_fields['studies'] and studies:
                    if ordering == 'phenotype' or ordering == 'genotype':  # Need to reference the names and not the internal IDs for ordering
                        ordering += '__name'
                    studies = studies.order_by(Lower(ordering)).reverse()
                    if inverted:
                        studies = studies.reverse()
                if ordering in ordering_fields['phenotypes'] and phenotypes:
                    phenotypes = phenotypes.order_by(Lower(ordering))
                    if inverted:
                        phenotypes = phenotypes.reverse()
                if ordering in ordering_fields['genes']:
                    # if ordering == 'snp' or ordering == 'study':
                    #     ordering += '__name'
                    # genes = genes.order_by(Lower(ordering))
                    if ordering == 'start' or ordering == 'end':
                        ordering += '_position'
                    if inverted:
                        ordering = "-" + ordering
                    search_gene.sort(ordering)

            n_genes = search_gene.count()
            if studies:
                pagest = self.paginate_queryset(studies)
                study_serializer = StudySerializer(pagest, many=True)
            else:
                study_serializer = StudySerializer(studies, many=True)

            if n_genes:
                size = min(200, search_gene.count())
                if is_gene_suggest:
                    size = 0
                results = search_gene[0:size].execute()
                if is_gene_suggest:
                    genes = results.to_dict(
                    )['suggest']['gene_suggest'][0]['options']
                else:
                    genes = results.to_dict()['hits']['hits']
                genes_out = []
                for gene in genes:
                    genes_out.append(gene["_source"])
                pagege = self.paginate_queryset(genes_out)
            else:
                genes = []
                pagege = []

            if phenotypes:
                pagephe = self.paginate_queryset(phenotypes)
                phenotype_serializer = PhenotypeListSerializer(pagephe,
                                                               many=True)
            else:
                phenotype_serializer = PhenotypeListSerializer(phenotypes,
                                                               many=True)

            counts = [len(genes), len(phenotypes), len(studies)]
            PAGE_SIZE = 25.
            import math
            page_counts = [
                int(math.ceil(float(len(genes)) / PAGE_SIZE)),
                int(math.ceil(float(len(phenotypes)) / PAGE_SIZE)),
                int(math.ceil(float(len(studies)) / PAGE_SIZE))
            ]
            data = {
                'study_search_results': study_serializer.data,
                'phenotype_search_results': phenotype_serializer.data,
                'gene_search_results': pagege,
                'counts': counts,
                'page_counts': page_counts
            }

            if any([studies, genes, phenotypes]):
                return self.get_paginated_response(data)
            else:
                return Response({
                    'results': {i: data[i]
                                for i in data if i != 'counts'},
                    'count': counts,
                    'page_count': [0, 0, 0]
                })

コード例 #15

0

ファイルを表示

ファイル: __init__.py プロジェクト: ekmixon/kuma

def _find(params,
          total_only=False,
          make_suggestions=False,
          min_suggestion_score=0.8):
    search_query = Search(index=settings.SEARCH_INDEX_NAME, )
    if make_suggestions:
        # XXX research if it it's better to use phrase suggesters and if
        # that works
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters.html#phrase-suggester
        search_query = search_query.suggest("title_suggestions",
                                            params["query"],
                                            term={"field": "title"})
        search_query = search_query.suggest("body_suggestions",
                                            params["query"],
                                            term={"field": "body"})

    sub_queries = []
    sub_queries.append(
        Q("match", title={
            "query": params["query"],
            "boost": 2.0
        }))
    sub_queries.append(
        Q("match", body={
            "query": params["query"],
            "boost": 1.0
        }))
    if " " in params["query"]:
        sub_queries.append(
            Q("match_phrase", title={
                "query": params["query"],
                "boost": 10.0
            }))
        sub_queries.append(
            Q("match_phrase", body={
                "query": params["query"],
                "boost": 5.0
            }))

    sub_query = query.Bool(should=sub_queries)

    if params["locales"]:
        search_query = search_query.filter("terms", locale=params["locales"])
    if params["archive"] == "exclude":
        search_query = search_query.filter("term", archived=False)
    elif params["archive"] == "only":
        search_query = search_query.filter("term", archived=True)

    if params["slug_prefixes"]:
        sub_queries = [Q("prefix", slug=x) for x in params["slug_prefixes"]]
        search_query = search_query.query(query.Bool(should=sub_queries))

    search_query = search_query.highlight_options(
        pre_tags=["<mark>"],
        post_tags=["</mark>"],
        number_of_fragments=3,
        fragment_size=120,
        encoder="html",
    )
    search_query = search_query.highlight("title", "body")

    if params["sort"] == "relevance":
        search_query = search_query.sort("_score", "-popularity")
        search_query = search_query.query(sub_query)
    elif params["sort"] == "popularity":
        search_query = search_query.sort("-popularity", "_score")
        search_query = search_query.query(sub_query)
    else:
        popularity_factor = 10.0
        boost_mode = "sum"
        score_mode = "max"
        search_query = search_query.query(
            "function_score",
            query=sub_query,
            functions=[
                query.SF(
                    "field_value_factor",
                    field="popularity",
                    factor=popularity_factor,
                    missing=0.0,
                )
            ],
            boost_mode=boost_mode,
            score_mode=score_mode,
        )

    search_query = search_query.source(excludes=["body"])

    search_query = search_query[params["size"] *
                                (params["page"] - 1):params["size"] *
                                params["page"]]

    retry_options = {
        "retry_exceptions": (
            # This is the standard operational exception.
            exceptions.ConnectionError,
            # This can happen if the search happened right as the index had
            # just been deleted due to a fresh re-indexing happening in Yari.
            exceptions.NotFoundError,
            # This can happen when the index simply isn't ready yet.
            exceptions.TransportError,
        ),
        # The default in redo is 60 seconds. Let's tone that down.
        "sleeptime":
        settings.ES_RETRY_SLEEPTIME,
        "attempts":
        settings.ES_RETRY_ATTEMPTS,
        "jitter":
        settings.ES_RETRY_JITTER,
    }
    with retrying(search_query.execute, **retry_options) as retrying_function:
        response = retrying_function()

    if total_only:
        return response.hits.total

    metadata = {
        "took_ms": response.took,
        "total": {
            # The `response.hits.total` is a `elasticsearch_dsl.utils.AttrDict`
            # instance. Pluck only the exact data needed.
            "value": response.hits.total.value,
            "relation": response.hits.total.relation,
        },
        "size": params["size"],
        "page": params["page"],
    }
    documents = []
    for hit in response:
        try:
            body_highlight = list(hit.meta.highlight.body)
        except AttributeError:
            body_highlight = []
        try:
            title_highlight = list(hit.meta.highlight.title)
        except AttributeError:
            title_highlight = []

        d = {
            "mdn_url": hit.meta.id,
            "score": hit.meta.score,
            "title": hit.title,
            "locale": hit.locale,
            "slug": hit.slug,
            "popularity": hit.popularity,
            "archived": hit.archived,
            "summary": hit.summary,
            "highlight": {
                "body": body_highlight,
                "title": title_highlight,
            },
        }
        documents.append(d)

    try:
        suggest = getattr(response, "suggest")
    except AttributeError:
        suggest = None

    suggestions = []
    if suggest:
        suggestion_strings = _unpack_suggestions(
            params["query"],
            response.suggest,
            ("body_suggestions", "title_suggestions"),
        )

        for score, string in suggestion_strings:
            if score > min_suggestion_score or 1:
                # Sure, this is different way to spell, but what will it yield
                # if you actually search it?
                total = _find(dict(params, query=string), total_only=True)
                if total["value"] > 0:
                    suggestions.append({
                        "text": string,
                        "total": {
                            # This 'total' is an `AttrDict` instance.
                            "value": total.value,
                            "relation": total.relation,
                        },
                    })
                    # Since they're sorted by score, it's usually never useful
                    # to suggestion more than exactly 1 good suggestion.
                    break

    return {
        "documents": documents,
        "metadata": metadata,
        "suggestions": suggestions,
    }

コード例 #16

0

ファイルを表示

class GeneralQueryService:
    CONJUNCTIVE_OPTION = "and"
    DISJUNCTIVE_OPTION = "or"

    def __init__(self, index_path):
        client = Elasticsearch()
        self.search = Search(using=client, index=index_path)

    def query(self,
              query_text="",
              author_query="",
              min_time_query=date.min,
              max_time_query=date.max,
              query_option=DISJUNCTIVE_OPTION,
              page=1) -> dict:

        # search for runtime using a range query
        s = self.search.query('range',
                              publish_time={
                                  'gte': min_time_query,
                                  'lte': max_time_query
                              })
        s = _do_free_text_query(s, query_text, query_option)
        s = _do_author_query(s, author_query)
        s = _do_highlight(s)
        s = _do_pagination(s, page)

        response = s.execute()
        result_dict = _extract_response(response)
        return {
            "result_dict": result_dict,
            "total_hits": response.hits.total['value'],
            "stop_words_included": extract_stop_words(query_text),
            "synonyms": get_synonyms(query_text)
        }

    def autocomplete(self, text):
        # do suggest on the query term
        s = self.search.suggest('autocomplete',
                                text=text,
                                completion={'field': 'suggestion'})
        response = s.execute()
        options = response.suggest.autocomplete[0].options
        results = list()
        for option in options:
            if option['_source']['title'] not in results:
                results.append(option['_source']['title'])
        return results

    def doc_result(self, query_id):
        # get article detail and the 'more like this' result
        response = self.search.query('ids', values=query_id).execute()
        article_dic = dict()
        article_dic['Title'] = response.hits[0].title
        article_dic['Abstract'] = response.hits[0].abstract
        article_dic['Body'] = response.hits[0].body.replace("\n", "</br></br>")
        article_dic['Author'] = response.hits[0].author
        article_dic['Publish Time'] = response.hits[0].publish_time
        text = article_dic['Title'] + article_dic['Abstract']
        more_like_this_dic = get_more_like_this(self.search, text)
        return article_dic, more_like_this_dic