def get_uniprot2ensembl(self, uniprot_id): assert uniprot_id is not None self.cache_u2e.queries += 1 if uniprot_id in self.cache_u2e: self.cache_u2e.hits += 1 return self.cache_u2e[uniprot_id] response = Search().using(self._es).index( self._es_index).extra(track_total_hits=True).query( Bool(should=[ Match(uniprot_id=uniprot_id), Match(uniprot_accessions=uniprot_id) ]))[0:1].source(includes=["ensembl_gene_id"]).execute() #see https://www.elastic.co/guide/en/elasticsearch/reference/7.x/search-request-track-total-hits.html if response.hits.total.value == 0: #no hit, return None self.cache_u2e[uniprot_id] = None return None elif response.hits.total.value == 1: #exactly one hit, return it val = response.hits[0].ensembl_gene_id self.cache_u2e[uniprot_id] = val return val else: #more then one hit, throw error raise ValueError("Multiple genes with uniprot %s" % (uniprot_id))
def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = Match(_type='post') \ & Terms(forum_pk=self.authorized_forums) \ & Term(is_visible=True) \ & MultiMatch(query=self.search_query, fields=['text_html']) functions_score = [ {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']}, {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']}, { 'filter': Range(like_dislike_ratio={'gt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1'] }, { 'filter': Range(like_dislike_ratio={'lt': 1}), 'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1'] } ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query
def author_messages_search(self, author, time): """"All of author’s activity on a server.""" s = self.search \ .query(self.time_range(time)) \ .query(Match(**{'server.id': author.server.id})) \ .query(Match(**{'author.id': author.id})) return s
def get_queryset_posts(self): """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible. Score is modified if: + post is the first one in a topic; + post is marked as "useful"; + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0. """ query = ( Match(_type="post") & Terms(forum_pk=self.authorized_forums) & Term(is_visible=True) & MultiMatch(query=self.search_query, fields=["text_html"]) ) functions_score = [ {"filter": Match(position=1), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_first"]}, {"filter": Match(is_useful=True), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"]}, { "filter": Range(like_dislike_ratio={"gt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"], }, { "filter": Range(like_dislike_ratio={"lt": 1}), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"], }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query
def get_queryset_topics(self): """Search in topics, and remove the result if the forum is not allowed for the user. Score is modified if: + topic is solved; + topic is sticky; + topic is locked. """ query = ( Match(_type="topic") & Terms(forum_pk=self.authorized_forums) & MultiMatch(query=self.search_query, fields=["title", "subtitle", "tags"]) ) functions_score = [ {"filter": Match(is_solved=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"]}, {"filter": Match(is_sticky=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"]}, {"filter": Match(is_locked=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"]}, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) return scored_query
def search_database(drive_order: List[str], fuzzy_search: bool, query: str, index: Type[Document]) -> List[Dict]: # search through the database for a given query, over the drives specified in drive_orders, # using the search index specified in s (this enables reuse of code between Card and Token search functions) if not Index(index.Index.name).exists(): raise SearchExceptions.IndexNotFoundException(index.__name__) s = index.search() results = [] query_parsed = to_searchable(query) # set up search - match the query and use the AND operator if fuzzy_search: match = Match(searchq={"query": query_parsed, "operator": "AND"}) else: match = Match(searchq_keyword={ "query": query_parsed, "operator": "AND" }) s_query = s.query(match) hits = (s_query.sort({ "priority": { "order": "desc" } }).params(preserve_order=True).scan()) hits_dict = [x.to_dict() for x in hits] if hits_dict: if fuzzy_search: hits_dict.sort(key=lambda x: distance(x["searchq"], query_parsed)) for drive in drive_order: results += [x for x in hits_dict if x["source"] == drive] return results
def get(self, request, *args, **kwargs): if 'q' in request.GET: self.search_query = ''.join(request.GET['q']) results = [] if self.index_manager.connected_to_es and self.search_query: self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() query = Match(_type='topic') \ & Terms(forum_pk=self.authorized_forums) \ & MultiMatch(query=self.search_query, fields=['title', 'subtitle', 'tags']) functions_score = [ {'filter': Match(is_solved=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_solved']}, {'filter': Match(is_sticky=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_sticky']}, {'filter': Match(is_locked=True), 'weight': settings.ZDS_APP['search']['boosts']['topic']['if_locked']} ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) search_queryset = search_queryset.query(scored_query)[:10] # Build the result for hit in search_queryset.execute(): result = {'id': hit.pk, 'url': str(hit.get_absolute_url), 'title': str(hit.title)} results.append(result) data = {'results': results} return HttpResponse(json.dumps(data), content_type='application/json')
def get(self, request, *args, **kwargs): query = self.request.query_params.get('query') country = self.request.query_params.get('country') points = self.request.query_params.get('points') search = Search(index=constants.ES_INDEX) q = {'should': [], 'filter': []} if query: q['should'] = [ Match(variety={ 'query': query, 'boost': 3.0 }), Match(winery={ 'query': query, 'boost': 2.0 }), Match(description={ 'query': query, 'boost': 1.0 }) ] q['minimum_should_match'] = 1 search = search.highlight_options(number_of_fragments=0, pre_tags=['<mark>'], post_tags=['</mark>']) search = search.highlight('variety', 'winery', 'description') if country: q['filter'].append(Term(country=country)) if points: q['filter'].append(Term(points=points)) response = search.query('bool', **q).params(size=100).execute() if response.hits.total.value > 0: return Response(data=[{ 'id': hit.meta.id, 'country': hit.country, 'description': (hit.meta.highlight.description[0] if 'highlight' in hit.meta and 'description' in hit.meta.highlight else hit.description), 'points': hit.points, 'price': hit.price, 'variety': ( hit.meta.highlight.variety[0] if 'highlight' in hit.meta and 'variety' in hit.meta.highlight else hit.variety), 'winery': ( hit.meta.highlight.winery[0] if 'highlight' in hit.meta and 'winery' in hit.meta.highlight else hit.winery) } for hit in response]) else: return Response(data=[])
def get(self, request, *args, **kwargs): if "q" in request.GET: self.search_query = "".join(request.GET["q"]) excluded_content_ids = request.GET.get("excluded", "").split(",") results = [] if self.index_manager.connected_to_es and self.search_query: self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() if len(excluded_content_ids) > 0 and excluded_content_ids != [""]: search_queryset = search_queryset.exclude( "terms", content_pk=excluded_content_ids) query = Match(_type="publishedcontent") & MultiMatch( query=self.search_query, fields=["title", "description"]) functions_score = [ { "filter": Match(content_type="TUTORIAL"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_tutorial"], }, { "filter": Match(content_type="ARTICLE"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_article"], }, { "filter": Match(content_type="OPINION"), "weight": settings.ZDS_APP["search"]["boosts"]["publishedcontent"] ["if_opinion"], }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) search_queryset = search_queryset.query(scored_query)[:10] # Build the result for hit in search_queryset.execute(): result = { "id": hit.content_pk, "pubdate": hit.publication_date, "title": str(hit.title), "description": str(hit.description), } results.append(result) data = {"results": results} return HttpResponse(json_handler.dumps(data), content_type="application/json")
def server_author_messages(self, server, author, parser_args): """An author’s messages on a specific server.""" time = parser_args.time s = self.search \ .query(Match(**{'server.id': server.id})) \ .query(Match(**{'author.id': author.id})) \ .query(self.time_range(time)) \ .sort({'timestamp': {'order': 'asc'}}) return s
def get(self, request, *args, **kwargs): if "q" in request.GET: self.search_query = "".join(request.GET["q"]) results = [] if self.index_manager.connected_to_es and self.search_query: self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() query = (Match(_type="topic") & Terms(forum_pk=self.authorized_forums) & MultiMatch(query=self.search_query, fields=["title", "subtitle", "tags"])) functions_score = [ { "filter": Match(is_solved=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_solved"] }, { "filter": Match(is_sticky=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_sticky"] }, { "filter": Match(is_locked=True), "weight": settings.ZDS_APP["search"]["boosts"]["topic"]["if_locked"] }, ] scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score) search_queryset = search_queryset.query(scored_query)[:10] # Build the result for hit in search_queryset.execute(): result = { "id": hit.pk, "url": str(hit.get_absolute_url), "title": str(hit.title), "subtitle": str(hit.subtitle), "forumTitle": str(hit.forum_title), "forumUrl": str(hit.forum_get_absolute_url), "pubdate": str(hit.pubdate), } results.append(result) data = {"results": results} return HttpResponse(json_handler.dumps(data), content_type="application/json")
def search_close(self, origin_timestamp, channel, qterm, number_results): """ Find log entries close to origin timestamp, filter by channel, highlight qterm and return them sorted by date. :param origin_timestamp: origin timestamp to find logs around :param channel: Channel to be filtered :param qterm: Term to be highlighted :param number_results: how many results :return: List of sorted log entries (Elastic-search response) :rtype: ``list`` """ # Prepare query s = DslSearch(using=self._es, index=self._index_prefix.format('*')) # Function score main_query_boosting = 1e-15 # only used for highlighting, not for scoring -> give very low signifance pos = MatchPhrase(msg={'query': qterm, 'boost': main_query_boosting}) | \ Match(**{'username': {'query': qterm, 'boost': main_query_boosting}}) | \ Match(channel={'query': qterm, 'boost': main_query_boosting}) | \ Match(msg={'query': qterm, 'boost': main_query_boosting}) main_query = (pos | Q('match_all')) function_score_query = Q('function_score', query=main_query, functions=[ SF( 'exp', **{ '@timestamp': { "origin": origin_timestamp, "scale": "1m", "decay": 0.999 } }) ]) s = s.query(function_score_query) # filter channel s = s.filter('term', **{'channel.keyword': channel}) # Number of results s = s[0:number_results] # Highlight s = s.highlight_options(order='score') s = s.highlight('msg', number_of_fragments=0) s = s.highlight('username') s = s.highlight('channel') # Execute response = s.execute() # Sort results response_sorted = sorted(response, key=lambda hit: hit['@timestamp']) return response_sorted
def author_lastseen(self, author): """Last known date where author has send a message.""" server = author.server s = self.search \ .query(Match(**{'server.id': server.id})) \ .query(Match(**{'author.id': author.id})) \ .sort({'timestamp': {'order': 'desc'}}) s = s[0] for hit in s.execute(): return hit.timestamp
def get(self, request, *args, **kwargs): if 'q' in request.GET: self.search_query = ''.join(request.GET['q']) excluded_content_ids = request.GET.get('excluded', '').split(',') results = [] if self.index_manager.connected_to_es and self.search_query: self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() if len(excluded_content_ids) > 0 and excluded_content_ids != ['']: search_queryset = search_queryset.exclude( 'terms', content_pk=excluded_content_ids) query = Match(_type='publishedcontent') & MultiMatch( query=self.search_query, fields=['title', 'description']) functions_score = [{ 'filter': Match(content_type='TUTORIAL'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_tutorial'] }, { 'filter': Match(content_type='ARTICLE'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_article'] }, { 'filter': Match(content_type='OPINION'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent'] ['if_opinion'] }] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) search_queryset = search_queryset.query(scored_query)[:10] # Build the result for hit in search_queryset.execute(): result = { 'id': hit.content_pk, 'pubdate': hit.publication_date, 'title': str(hit.title), 'description': str(hit.description) } results.append(result) data = {'results': results} return HttpResponse(json_handler.dumps(data), content_type='application/json')
def get_queryset_chapters(self): """Search in content chapters.""" query = Match(_type="chapter") & MultiMatch(query=self.search_query, fields=["title", "text"]) if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) return query
def search(): query = request.args.get('query', '') partner = request.args.get('partner', '') platform = request.args.get('platform', '') page_num = int(request.args.get('page', '1')) s = Search(using=es, index='platforms') if partner: m = Match(partners={ "query": partner, "type": "phrase", "minimum_should_match": "100%" }) s = s.query(m) if platform: m = Match(platform={ "query": platform, "type": "phrase", "minimum_should_match": "100%" }) s = s.query(m) if query: m = MultiMatch(query=query, fields=[ 'title^3', 'description', 'tags^2', 'partners', 'platform' ], type='most_fields', minimum_should_match="75%") s = s.query(m) _from = 10 * (page_num - 1) to = _from + 10 s = s[_from:to] res = s.execute() total_hits = res.hits.total if total_hits == 0: return render_template('error.html', error='No courses') pages = paginate(request.args.to_dict(), total_hits, page_num) courses = res.hits return render_template('courses.html', courses=courses, total=total_hits, pages=pages, query=query)
def query_for_similar_items(self): count = 0 for sim_item in self.similar_items: fulltext = "[200] GET " + sim_item es_instance = connections.create_connection(hosts=[MLTCandidate.ES_URL], timeout=60) initsearch = Search(using=es_instance, index="logstash-*") session_search = initsearch.query(Match(session_id={ "query" : self.session_id})) sim_search = session_search.query(Match(message={ "query" : fulltext, "type" : "phrase"})) response = sim_search.execute() if(len(response) > 0): self.similar_items[sim_item] = True self.judgements[count] = 1 count += 1
def get_queryset_chapters(self): """Search in content chapters.""" query = Match(_type='chapter') \ & MultiMatch(query=self.search_query, fields=['title', 'text']) if self.content_category: query &= Match(categories=self.content_category) if self.content_subcategory: query &= Match(subcategories=self.content_subcategory) return query
def create_mlt_search(self): self.mlt_search = Search(using=ES_INSTANCE, index="logstash-*").query( Match(message={ "query": "[200] GET", "type": "phrase" })).query('bool', filter=[self.temporal_filter]).query( 'bool', filter=[ ~Q('terms', message=[ 'index', 'search.json', 'ancestor-self-siblings.json' ]) ]).query(Match(action={"query": "show"}))
def get_queryset_publishedcontents(self): """Find in PublishedContents. """ query = Match(_type='publishedcontent') \ & MultiMatch(query=self.search_query, fields=['title', 'description', 'categories', 'tags', 'text']) functions_score = [ { 'filter': Match(content_type='TUTORIAL'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_tutorial'] }, { 'filter': Match(content_type='TUTORIAL') & Match(has_chapters=True), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_medium_or_big_tutorial'] }, { 'filter': Match(content_type='ARTICLE'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_article'] }, { 'filter': Match(content_type='OPINION'), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion'] }, { 'filter': Match(content_type='OPINION') & Match(picked=False), 'weight': settings.ZDS_APP['search']['boosts']['publishedcontent']['if_opinion_not_picked'] }, ] scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score) return scored_query
def get_elk_response(self, request, task_id): # FIXME try to use django-rest-elasticsearch instead page = int(request.GET.get('page')) if request.GET.get('page') else 0 limit = int( request.GET.get('limit')) if request.GET.get('limit') else 20 if request.GET.get('order'): field, order = request.GET.get('order').split(',') sort_option = {field: {"order": order}} else: sort_option = {} client = Elasticsearch(hosts=[ELK_HOST + ':9200'], http_auth=('elastic', 'L5M3LPXk6QhxTyZenwo5')) s = Search(using=client, index="logstash*").query("match", task_id=task_id) if request.GET.get('category'): s = s.query( Match(categories={"query": request.GET.get('category')})) s = s.sort(sort_option)[(page * limit):(page * limit + limit)] try: elk_response = s.execute() except RequestError as exc: logging.warning(exc) return Response({"Message": "Wrong query!"}) return elk_response, limit, page
def query(self, q, kind='term', field='email.raw', offset=0, limit=20, indices=[INDICES_ALL]): search = Search(using=self.es, index=indices) if isinstance(q, dict): search.update_from_dict(q) else: if kind == 'default': search.query = (Term(**{'email.raw': { 'value': q, 'boost': 3 }}) | Term(**{'username.raw': { 'value': q, 'boost': 2 }}) | Match(username={'query': q})) else: cls_ = QUERY_KINDS[kind] search = search.query(cls_(**{field: q})) results = search[offset:offset + limit].execute() return dict(results=results, total=results.hits.total.value, hits_relation=results.hits.total.relation, took=results.took, timed_out=results.timed_out)
def search_database(drive_order, query, s): # TODO: elasticsearch_dsl.serializer.serializer ? # search through the database for a given query, over the drives specified in drive_orders, # using the search index specified in s (this enables reuse of code between Card and Token search functions) results = [] # set up search - match the query and use the AND operator match = Match(searchq={"query": to_searchable(query), "operator": "AND"}) # match the cardname once instead of for every drive to save on search time s_query = s.query(match) # iterate over drives, filtering on the current drive, ordering by priority in descending order, # then add the returned hits to the results list hits = s_query.sort({ 'priority': { 'order': 'desc' } }).params(preserve_order=True).scan() results0 = [x.to_dict() for x in hits] for drive in drive_order: results += [x for x in results0 if x['source'] == drive] return results
def search_new(s, source, page=0): # define page size and the range to paginate with page_size = 6 start_idx = page_size * page end_idx = page_size * (page + 1) # match the given source match = Match(source={"query": source}) s_query = s.query(match) # quantity related things qty = s_query.count() results = {"qty": qty} if qty > 0: # results["hits"] = serializers.serialize('json', s_query[start_idx:end_idx].to_queryset()) # retrieve a page's worth of hits, and convert the results to dict for ez parsing in frontend hits = s_query[start_idx:end_idx] results0 = [x.to_dict() for x in hits] results["hits"] = [x for x in results0] # let the frontend know whether to continue to show the load more button # TODO: I couldn't be f****d to solve true vs True for json serialisation but this works fine so eh? results["more"] = "false" if qty > end_idx: results["more"] = "true" return results
def _build_match_query(field, value, fuzzy=False, operator='or', analyzer=None): """Crea una condición 'Match' para Elasticsearch. Args: field (str): Campo de la condición. value (str): Valor de comparación. fuzzy (bool): Bandera para habilitar tolerancia a errores. operator (bool): Operador a utilizar para conectar clausulas 'term' analyzer (str): Analizador a utilizar para el análisis del texto de búsqueda. Returns: Query: Condición para Elasticsearch. """ options = {'query': value, 'operator': operator} if fuzzy: options['fuzziness'] = constants.DEFAULT_FUZZINESS if analyzer: options['analyzer'] = analyzer return Match(**{field: options})
def _search_korpus(request): if not request.data or request.data['term'] is None: return bad_request('no search term') term = request.data['term'] hits = [] s = Search(index=KORPUS_INDEX) s = s.source(includes=['pk', 'osnovniOblik']) s.query = Bool( must=[Match(oblici=term)] ) try: response = s.execute() for hit in response.hits.hits: hits.append(hit['_source']) serializer = KorpusResponseSerializer(hits, many=True) data = serializer.data return Response( data, status=HTTP_200_OK, content_type=JSON ) except ElasticsearchException as error: return server_error(error.args)
def __contains__(self, efo_id): self.cache_contains.queries += 1 if efo_id in self.cache_contains: self.cache_contains.hits += 1 return self.cache_contains[efo_id] #check the main cache too if efo_id in self.cache_efo: if self.cache_efo[efo_id] is None: return False else: return True response = Search().using(self._es).index( self._es_index).extra(track_total_hits=True).query( Match(_id=efo_id))[0:1].source(False).execute() #see https://www.elastic.co/guide/en/elasticsearch/reference/7.x/search-request-track-total-hits.html if response.hits.total.value == 0: #no hit self.cache_contains[efo_id] = False return False else: #exactly one hit self.cache_contains[efo_id] = True return True
def get_reaction(self, reaction_id): response = Search().using(self.es).index(self.index).query( Match(_id=reaction_id))[0:1].execute() if response.hits.total > 0: return response.hits[0].to_dict() else: return None
def _build_queries(self, key, value, search_param, parents=[]): queries = [] if "fields" in value: param = f"{key}__ngram" if parents: param = "" for parent in parents: param += f"{parent}__" param += f"{key}__ngram" query = Match(**{param: {"query": search_param}}) queries.append(query) for parent in parents: parents.remove(parent) if "properties" in value: parents.append(key) for k, v in value["properties"].items(): queries.extend( self._build_queries(k, v, search_param, parents=deepcopy(parents))) for parent in parents: parents.remove(parent) return queries
def get_uniprot2ensembl(self, uniprot_id): assert uniprot_id is not None response = Search().using(self._es).index(self._es_index).query( Bool(should=[ Match(uniprot_id=uniprot_id), Match(uniprot_accessions=uniprot_id) ]))[0:1].source(includes=["ensembl_gene_id"]).execute() if response.hits.total == 0: #no hit, return None return None elif response.hits.total == 1: #exactly one hit, return it return response.hits[0].ensembl_gene_id else: #more then one hit, throw error raise ValueError("Multiple genes with uniprot %s" %(uniprot_id))