def test_suggest_can_be_run_separately(data_client): s = Search() s = s.suggest('simple_suggestion', 'elasticserach', term={'field': 'organization'}) response = s.execute_suggest() assert response.success() assert response.simple_suggestion[0].options[0].text == 'elasticsearch'
def get(self, request): key_words = request.GET.get('s','') key_type = request.GET.get('doc', DEFAULT_DOCUMENT) try: doc = Search(index=key_type) except KeyError: doc = Search(index=DEFAULT_DOCUMENT) re_datas = [] try: if key_words: s = doc.suggest('my_suggest', key_words, completion={ "field":"suggest", "fuzzy":{ "fuzziness":2 }, "size": 5}) suggestions = s.execute() for match in suggestions.suggest.my_suggest[0].options: source = match._source re_datas.append(str(source['title'])) except ConnectionTimeout as e: print(e) return HttpResponse(json.dumps(re_datas), content_type="application/json")
def get_suggest(input): if not input: return None s = Search(using=es) s = s.index('imdb') s = s.suggest('suggestion', input, completion={'field': 'suggest'}) s = s.source(False) ret = s.execute() results = [x['text'] for x in ret.suggest.suggestion[0]['options']] return jsonify(result=results)
def get_queryset(self): s = Search(index=ELASTIC_INDEX) title_param = self.request.query_params.get('q', None) s = s.suggest('auto_complete', title_param, completion={'field': 'suggest'}) response = s.execute() return [{ 'title': i._source.title, 'score': i._score } for i in response.suggest.auto_complete[0].options]
def suggest(q, field, size=10): s = Search(using=es.client, index=es.index_name) s = s.suggest('suggestions', q, completion={ 'field': field, 'size': size, }) result = s.execute_suggest().to_dict() try: suggestions = result.get('suggestions', [])[0]['options'] return suggestions except (IndexError, AttributeError): return []
def autocomplete(): text = request.args.get('text') type = request.args.get('type') s = Search(using=client, index="autocomplete") s = s.suggest('autocomplete', text, completion={ 'field': type, 'fuzzy': True, "skip_duplicates": True }) s = s[0:0] response = s.execute() # return response.to_dict() response = s.execute() #
def autocomplete(): # get search term entered by user text = request.args.getlist('search[term]') search = Search(index='covid_index') # do suggest on the query term s = search.suggest('autocomplete', text=text, completion={'field': 'suggestion'}) response = s.execute() options = response.suggest.autocomplete[0].options results = list() for option in options: if option['_source']['title'] not in results: results.append(option['_source']['title']) return jsonify(results)
def get_queryset(self, queryset, data): phrase = data.get('q') if 'models' not in data: models = self._supported_models else: models = data['models'].split(',') advanced = data.get('advanced') op, suffix = get_advanced_options(advanced) lang = get_language() per_model = data.get('per_model', 1) ms = MultiSearch(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME) for model in models: if is_enabled('S39_filter_by_geodata.be' ) and model in self._completion_models: sug_query = Search(index=f'{model}s') sug_query = sug_query.suggest('title', phrase, completion={ 'field': f'title.{lang}.suggest', 'size': per_model }) res = sug_query.execute() suggestions = res.suggest['title'][0] ids = [sug['_id'] for sug in suggestions['options']] query = Search(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME) query = query.filter('term', model=model).query('ids', values=ids) else: query = Search(index=settings.ELASTICSEARCH_COMMON_ALIAS_NAME) query = query.filter('term', model=model) query = query.query('bool', should=[ nested_query_with_advanced_opts( phrase, field, lang, op, suffix) for field in ('title', 'notes') ]) query = query.extra(size=per_model) ms = ms.add(query) return ms
def suggestions(text): """ concat a string for 'did you mean "XY"?' check if there is an option (if not take the original word) """ s = Search(using=Elasticsearch(settings.ELASTIC_URL)) res = s.suggest('suggestion', text, term={'field': 'all_tags_str'}).execute() suggested_words = [] suggestions = res.suggest['suggestion'] for ou in suggestions: options = ou['options'] if options: suggested_words.append(options[0].text) else: suggested_words.append(ou['text']) suggested = ' '.join(suggested_words) if suggested.lower() != text.lower(): return suggested
def autocomplete_view(request: HttpRequest) -> HttpResponse: if not settings.ES_ENABLED or not es_client: return HttpResponse({}) if not es_client.indices.exists(es_index_name): return HttpResponse({}) query = request.GET.get('q', '') s = Search(using=es_client, index=es_index_name) response = s.suggest('title_complete', query, completion={ "field": 'title_complete', }).execute() options = response['title_complete'][0]['options'] data = json.dumps([{'id': i['_id'], 'title': i['text']} for i in options]) mime_type = 'application/json; charset=utf-8' http_response = HttpResponse(data, mime_type) # http_response['Access-Control-Allow-Origin'] = 'http://localhost:3000' # http_response['Access-Control-Allow-Credentials'] = 'true' return http_response
def search(search_params, index, page_size, ip, request, filter_dead, page=1) -> Tuple[List[Hit], int, int]: """ Given a set of keywords and an optional set of filters, perform a ranked paginated search. :param search_params: Search parameters. See :class: `ImageSearchQueryStringSerializer`. :param index: The Elasticsearch index to search (e.g. 'image') :param page_size: The number of results to return per page. :param ip: The user's hashed IP. Hashed IPs are used to anonymously but uniquely identify users exclusively for ensuring query consistency across Elasticsearch shards. :param request: Django's request object. :param filter_dead: Whether dead links should be removed. :param page: The results page number. :return: Tuple with a List of Hits from elasticsearch, the total count of pages and results. """ s = Search(index=index) # Apply term filters. Each tuple pairs a filter's parameter name in the API # with its corresponding field in Elasticsearch. "None" means that the # names are identical. filters = [('extension', None), ('categories', None), ('aspect_ratio', None), ('size', None), ('source', 'provider'), ('license', 'license__keyword'), ('license_type', 'license__keyword')] for tup in filters: api_field, elasticsearch_field = tup s = _apply_filter(s, search_params, api_field, elasticsearch_field) # Get suggestions for any route s = s.suggest('get_suggestion', '', term={'field': 'creator'}) # Exclude mature content unless explicitly enabled by the requester if not search_params.data['mature']: s = s.exclude('term', mature=True) # Hide data sources from the catalog dynamically. filter_cache_key = 'filtered_providers' filtered_providers = cache.get(key=filter_cache_key) if not filtered_providers: filtered_providers = models.ContentProvider.objects\ .filter(filter_content=True)\ .values('provider_identifier') cache.set(key=filter_cache_key, timeout=CACHE_TIMEOUT, value=filtered_providers) to_exclude = [f['provider_identifier'] for f in filtered_providers] s = s.exclude('terms', provider=to_exclude) # Search either by generic multimatch or by "advanced search" with # individual field-level queries specified. search_fields = ['tags.name', 'title', 'description'] if 'q' in search_params.data: query = _quote_escape(search_params.data['q']) s = s.query('simple_query_string', query=query, fields=search_fields) # Get suggestions for term query s = s.suggest('get_suggestion', query, term={'field': 'creator'}) else: if 'creator' in search_params.data: creator = _quote_escape(search_params.data['creator']) s = s.query('simple_query_string', query=creator, fields=['creator']) # Get suggestions for creator s = s.suggest('get_suggestion', creator, term={'field': 'creator'}) if 'title' in search_params.data: title = _quote_escape(search_params.data['title']) s = s.query('simple_query_string', query=title, fields=['title']) # Get suggestions for title s = s.suggest('get_suggestion', title, term={'field': 'title'}) if 'tags' in search_params.data: tags = _quote_escape(search_params.data['tags']) s = s.query('simple_query_string', fields=['tags.name'], query=tags) # Get suggestions for tags s = s.suggest('get_suggestion', tags, term={'field': 'tags.name'}) # Boost by popularity metrics if POPULARITY_BOOST: queries = [] factors = ['comments', 'views', 'likes'] boost_factor = 100 / len(factors) for factor in factors: rank_feature_query = Q('rank_feature', field=factor, boost=boost_factor) queries.append(rank_feature_query) s = Search().query( Q('bool', must=s.query, should=queries, minimum_should_match=1)) # Use highlighting to determine which fields contribute to the selection of # top results. s = s.highlight(*search_fields) s = s.highlight_options(order='score') s.extra(track_scores=True) # Route users to the same Elasticsearch worker node to reduce # pagination inconsistencies and increase cache hits. s = s.params(preference=str(ip), request_timeout=7) # Paginate start, end = _get_query_slice(s, page_size, page, filter_dead) s = s[start:end] try: search_response = s.execute() log.info(f'query={s.to_dict()}, es_took_ms={search_response.took}') except RequestError as e: raise ValueError(e) results = _post_process_results(s, start, end, page_size, search_response, request, filter_dead) suggestion = _query_suggestions(search_response) result_count, page_count = _get_result_and_page_count( search_response, results, page_size) return results, page_count, result_count, suggestion
def search_keyword(self, keyword, doc_filter=None, size=10): ''' Create the search object and get the number of hits. ''' s = Search(index='lucid').using(self.client) print doc_filter if 'divtype' in doc_filter: for i, types in enumerate(doc_filter['divtype']): if i == 0: filt = Q("match", divtype=types) else: filt = filt | Q("match", divtype=types) s = s.filter(filt) n_hits = s.count() if 'docsource' in doc_filter: for i, types in enumerate(doc_filter['docsource']): if i == 0: filt = Q("match", docsource=types) else: filt = filt | Q("match", docsource=types) s = s.filter(filt) flag = 0 if 'end' in doc_filter: flag = 1 end_year = datetime.datetime(int(doc_filter['end']), 12, 31) else: end_year = datetime.datetime.now() if 'start' in doc_filter: flag = 0 start_year = datetime.datetime(int(doc_filter['start']), 1, 1) s = s.filter('range', publishdate={ 'gte': start_year, 'lte': end_year }) if flag: s = s.filter('range', publishdate={'lte': end_year}) # the search object. -p indicates sort by order=desc on p # --------------------------------------query------------------------------------------------------- q1 = Q("multi_match", query=keyword, fields=["title", "keywords", "doc"], type="best_fields", cutoff_frequency=0.0007, operator="and", fuzziness="AUTO") q2 = Q("multi_match", query=keyword, fields=["title", "keywords", "doc"], type="phrase") q3 = Q("bool", must=[q1], should=[q2]) s = s.query(q3) s = s.suggest("didYouMean", keyword, phrase={'field': 'did_you_mean'}) s = s.highlight_options(order="score", pre_tags=["<mark>"], post_tags=["</mark>"], fragment_size=80, no_match_size=0) s = s.highlight('title', number_of_fragments=0) s = s.highlight('keywords', number_of_fragments=10) s = s.highlight('doc', number_of_fragments=10) # --------------------------------------------------------------------------------------------------- n_hits = s.count() print "hits = ", n_hits hits_start = 0 return s, n_hits
def search_results(self, request, query_term): """ Display results based on search term. """ is_gene_suggest = False if request.method == "GET": client = Elasticsearch([ES_HOST], timeout=60) search_gene = Search().using(client).doc_type('genes').source( exclude=['isoforms.cds', 'isoforms.exons', 'GO']) #'isoforms.cds','GO']) if query_term == None: studies = Study.objects.all() phenotypes = Phenotype.objects.all() # Elasticsearch query cannot be made before knowing the ordering and the page number, etc as this is taken into account by elasticsearch.py else: studies = Study.objects.filter( Q(name__icontains=query_term) | Q(phenotype__trait_ontology_name__icontains=query_term) | Q(phenotype__name__icontains=query_term) | Q(phenotype__description__icontains=query_term) | Q(publication_pmid__icontains=query_term) | Q(publication_pmcid__icontains=query_term)).order_by( 'n_hits_perm').reverse() phenotypes = Phenotype.objects.filter( Q(name__icontains=query_term) | Q(description__icontains=query_term)).order_by('name') # Add chromosome position search for genomic regions try: int(query_term) isnum = True except ValueError: isnum = False import re pattern = re.compile( "(Chr|CHR|chr)+\s?([0-9]{1,2})+(-|:)?(\d*)\s*(-|:|)?\s*(\d+|)" ) if isnum: # Only a number, look for neighboring genes on all chromosomes. q = QES('range', positions={ "gte": int(query_term), 'lte': int(query_term) }) search_gene = search_gene.query(q) elif pattern.match(query_term): # Specific genomic range splitted = re.split( "(Chr|CHR|chr)+\s?([0-9]{1,2})+(-|:)?(\d*)\s*(-|:|)?\s*(\d+|)", query_term) chr = int(splitted[2]) s_p = None e_p = None if splitted[4]: s_p = int(splitted[4]) if splitted[6]: e_p = int(splitted[6]) # Need to retrieve all genes that overlap somehow with that region (all-in, right part in, left part in, etc) q = QES('match', chr='chr' + str(chr)) search_gene = search_gene.query(q) if s_p: if e_p: # Look for genes overlapping with region of interest q = QES('range', positions={ 'gte': s_p, 'lte': e_p }) | QES('range', positions={ 'gte': s_p, 'lte': s_p }) | QES('range', positions={ 'gte': e_p, 'lte': e_p }) else: q = QES('range', positions={ 'gte': s_p, 'lte': s_p }) | QES('range', positions={'gte': s_p}) search_gene = search_gene.query(q) else: # other type of request is_gene_suggest = True search_gene = search_gene.suggest('gene_suggest', query_term, completion={ 'field': 'suggest', 'size': 200 }) # custom ordering ordering = request.query_params.get('ordering', None) ordering_fields = { 'studies': ['name', 'genotype', 'phenotype', 'method', 'transformation'], 'phenotypes': ['name', 'description'], 'genes': [ 'name', 'chr', 'start', 'end', 'SNPs_count', 'association_count', 'description' ] } if ordering is not None: from django.db.models.functions import Lower inverted = False if ordering.startswith('-'): inverted = True ordering = ordering[1:] if ordering in ordering_fields['studies'] and studies: if ordering == 'phenotype' or ordering == 'genotype': # Need to reference the names and not the internal IDs for ordering ordering += '__name' studies = studies.order_by(Lower(ordering)).reverse() if inverted: studies = studies.reverse() if ordering in ordering_fields['phenotypes'] and phenotypes: phenotypes = phenotypes.order_by(Lower(ordering)) if inverted: phenotypes = phenotypes.reverse() if ordering in ordering_fields['genes']: # if ordering == 'snp' or ordering == 'study': # ordering += '__name' # genes = genes.order_by(Lower(ordering)) if ordering == 'start' or ordering == 'end': ordering += '_position' if inverted: ordering = "-" + ordering search_gene.sort(ordering) n_genes = search_gene.count() if studies: pagest = self.paginate_queryset(studies) study_serializer = StudySerializer(pagest, many=True) else: study_serializer = StudySerializer(studies, many=True) if n_genes: size = min(200, search_gene.count()) if is_gene_suggest: size = 0 results = search_gene[0:size].execute() if is_gene_suggest: genes = results.to_dict( )['suggest']['gene_suggest'][0]['options'] else: genes = results.to_dict()['hits']['hits'] genes_out = [] for gene in genes: genes_out.append(gene["_source"]) pagege = self.paginate_queryset(genes_out) else: genes = [] pagege = [] if phenotypes: pagephe = self.paginate_queryset(phenotypes) phenotype_serializer = PhenotypeListSerializer(pagephe, many=True) else: phenotype_serializer = PhenotypeListSerializer(phenotypes, many=True) counts = [len(genes), len(phenotypes), len(studies)] PAGE_SIZE = 25. import math page_counts = [ int(math.ceil(float(len(genes)) / PAGE_SIZE)), int(math.ceil(float(len(phenotypes)) / PAGE_SIZE)), int(math.ceil(float(len(studies)) / PAGE_SIZE)) ] data = { 'study_search_results': study_serializer.data, 'phenotype_search_results': phenotype_serializer.data, 'gene_search_results': pagege, 'counts': counts, 'page_counts': page_counts } if any([studies, genes, phenotypes]): return self.get_paginated_response(data) else: return Response({ 'results': {i: data[i] for i in data if i != 'counts'}, 'count': counts, 'page_count': [0, 0, 0] })
def _find(params, total_only=False, make_suggestions=False, min_suggestion_score=0.8): search_query = Search(index=settings.SEARCH_INDEX_NAME, ) if make_suggestions: # XXX research if it it's better to use phrase suggesters and if # that works # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters.html#phrase-suggester search_query = search_query.suggest("title_suggestions", params["query"], term={"field": "title"}) search_query = search_query.suggest("body_suggestions", params["query"], term={"field": "body"}) sub_queries = [] sub_queries.append( Q("match", title={ "query": params["query"], "boost": 2.0 })) sub_queries.append( Q("match", body={ "query": params["query"], "boost": 1.0 })) if " " in params["query"]: sub_queries.append( Q("match_phrase", title={ "query": params["query"], "boost": 10.0 })) sub_queries.append( Q("match_phrase", body={ "query": params["query"], "boost": 5.0 })) sub_query = query.Bool(should=sub_queries) if params["locales"]: search_query = search_query.filter("terms", locale=params["locales"]) if params["archive"] == "exclude": search_query = search_query.filter("term", archived=False) elif params["archive"] == "only": search_query = search_query.filter("term", archived=True) if params["slug_prefixes"]: sub_queries = [Q("prefix", slug=x) for x in params["slug_prefixes"]] search_query = search_query.query(query.Bool(should=sub_queries)) search_query = search_query.highlight_options( pre_tags=["<mark>"], post_tags=["</mark>"], number_of_fragments=3, fragment_size=120, encoder="html", ) search_query = search_query.highlight("title", "body") if params["sort"] == "relevance": search_query = search_query.sort("_score", "-popularity") search_query = search_query.query(sub_query) elif params["sort"] == "popularity": search_query = search_query.sort("-popularity", "_score") search_query = search_query.query(sub_query) else: popularity_factor = 10.0 boost_mode = "sum" score_mode = "max" search_query = search_query.query( "function_score", query=sub_query, functions=[ query.SF( "field_value_factor", field="popularity", factor=popularity_factor, missing=0.0, ) ], boost_mode=boost_mode, score_mode=score_mode, ) search_query = search_query.source(excludes=["body"]) search_query = search_query[params["size"] * (params["page"] - 1):params["size"] * params["page"]] retry_options = { "retry_exceptions": ( # This is the standard operational exception. exceptions.ConnectionError, # This can happen if the search happened right as the index had # just been deleted due to a fresh re-indexing happening in Yari. exceptions.NotFoundError, # This can happen when the index simply isn't ready yet. exceptions.TransportError, ), # The default in redo is 60 seconds. Let's tone that down. "sleeptime": settings.ES_RETRY_SLEEPTIME, "attempts": settings.ES_RETRY_ATTEMPTS, "jitter": settings.ES_RETRY_JITTER, } with retrying(search_query.execute, **retry_options) as retrying_function: response = retrying_function() if total_only: return response.hits.total metadata = { "took_ms": response.took, "total": { # The `response.hits.total` is a `elasticsearch_dsl.utils.AttrDict` # instance. Pluck only the exact data needed. "value": response.hits.total.value, "relation": response.hits.total.relation, }, "size": params["size"], "page": params["page"], } documents = [] for hit in response: try: body_highlight = list(hit.meta.highlight.body) except AttributeError: body_highlight = [] try: title_highlight = list(hit.meta.highlight.title) except AttributeError: title_highlight = [] d = { "mdn_url": hit.meta.id, "score": hit.meta.score, "title": hit.title, "locale": hit.locale, "slug": hit.slug, "popularity": hit.popularity, "archived": hit.archived, "summary": hit.summary, "highlight": { "body": body_highlight, "title": title_highlight, }, } documents.append(d) try: suggest = getattr(response, "suggest") except AttributeError: suggest = None suggestions = [] if suggest: suggestion_strings = _unpack_suggestions( params["query"], response.suggest, ("body_suggestions", "title_suggestions"), ) for score, string in suggestion_strings: if score > min_suggestion_score or 1: # Sure, this is different way to spell, but what will it yield # if you actually search it? total = _find(dict(params, query=string), total_only=True) if total["value"] > 0: suggestions.append({ "text": string, "total": { # This 'total' is an `AttrDict` instance. "value": total.value, "relation": total.relation, }, }) # Since they're sorted by score, it's usually never useful # to suggestion more than exactly 1 good suggestion. break return { "documents": documents, "metadata": metadata, "suggestions": suggestions, }
class GeneralQueryService: CONJUNCTIVE_OPTION = "and" DISJUNCTIVE_OPTION = "or" def __init__(self, index_path): client = Elasticsearch() self.search = Search(using=client, index=index_path) def query(self, query_text="", author_query="", min_time_query=date.min, max_time_query=date.max, query_option=DISJUNCTIVE_OPTION, page=1) -> dict: # search for runtime using a range query s = self.search.query('range', publish_time={ 'gte': min_time_query, 'lte': max_time_query }) s = _do_free_text_query(s, query_text, query_option) s = _do_author_query(s, author_query) s = _do_highlight(s) s = _do_pagination(s, page) response = s.execute() result_dict = _extract_response(response) return { "result_dict": result_dict, "total_hits": response.hits.total['value'], "stop_words_included": extract_stop_words(query_text), "synonyms": get_synonyms(query_text) } def autocomplete(self, text): # do suggest on the query term s = self.search.suggest('autocomplete', text=text, completion={'field': 'suggestion'}) response = s.execute() options = response.suggest.autocomplete[0].options results = list() for option in options: if option['_source']['title'] not in results: results.append(option['_source']['title']) return results def doc_result(self, query_id): # get article detail and the 'more like this' result response = self.search.query('ids', values=query_id).execute() article_dic = dict() article_dic['Title'] = response.hits[0].title article_dic['Abstract'] = response.hits[0].abstract article_dic['Body'] = response.hits[0].body.replace("\n", "</br></br>") article_dic['Author'] = response.hits[0].author article_dic['Publish Time'] = response.hits[0].publish_time text = article_dic['Title'] + article_dic['Abstract'] more_like_this_dic = get_more_like_this(self.search, text) return article_dic, more_like_this_dic