def _get_text_queries(self, *, query, fields): """ Returns a list of query objects according to the query. SimpleQueryString provides a syntax to let advanced users manipulate the results explicitly. We need to search for both "and" and "or" operators. The score of "and" should be higher as it satisfies both "or" and "and". For valid options, see: - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html # noqa """ queries = [] is_advanced_query = self.use_advanced_query or self._is_advanced_query( query) for operator in self.operators: if is_advanced_query: query_string = SimpleQueryString( query=query, fields=fields, default_operator=operator, ) else: query_string = self._get_fuzzy_query( query=query, fields=fields, operator=operator, ) queries.append(query_string) return queries
def query(self, search, query): """ Add query part to ``search`` when needed. Also: * Adds SimpleQueryString instead of default query. * Adds HTML encoding of results to avoid XSS issues. """ search = search.highlight_options(encoder='html', number_of_fragments=3) search = search.source(exclude=['content', 'headers']) all_queries = [] # need to search for both 'and' and 'or' operations # the score of and should be higher as it satisfies both or and and for operator in self.operators: query_string = SimpleQueryString(query=query, fields=self.fields, default_operator=operator) all_queries.append(query_string) # run bool query with should, so it returns result where either of the query matches bool_query = Bool(should=all_queries) search = search.query(bool_query) return search
def _get_text_query(self, *, query, fields, operator): """ Returns a text query object according to the query. - SimpleQueryString: Provides a syntax to let advanced users manipulate the results explicitly. - MultiMatch: Allows us to have more control over the results (like fuzziness) to provide a better experience for simple queries. For valid options, see: - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-query.html - https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html # noqa """ if self.use_advanced_query or self._is_advanced_query(query): query_string = SimpleQueryString(query=query, fields=fields, default_operator=operator) else: query_string = MultiMatch( query=query, fields=fields, operator=operator, fuzziness="AUTO:4,6", prefix_length=1, ) return query_string
def __call__(self, search, params): if "any" not in params: return search qs = " ".join(popall(params, "any")) return search.query( SimpleQueryString( query=qs, fields=["quote", "tags", "text", "uri.parts"], default_operator="and", ))
def _get_query(qterm): """ Return query for search-term (used in search and search_day) :param qterm: ``str`` string to build query for :return: ``Query`` Search-Query """ if helpers.is_simple_query_string_query(qterm): msg_query = SimpleQueryString( query=qterm, fields=['msg', 'username', 'channel'], default_operator='AND', boost=5) else: msg_query = DisMax(tie_breaker=0.7, boost=1, queries=[ SimpleQueryString( query=qterm, fields=['username', 'channel'], default_operator='AND', boost=1), MatchPhrase(msg={ 'query': qterm, 'boost': 1 }) ]) pos = DisMax(tie_breaker=0.7, boost=1, queries=[ msg_query, Common(msg={ 'query': qterm, 'cutoff_frequency': 0.001 }) ]) return pos
def generate_nested_query(self, query, path, fields, inner_hits): """Generate a nested query with passed parameters.""" queries = [] for operator in self.operators: query_string = SimpleQueryString(query=query, fields=fields, default_operator=operator) queries.append(query_string) bool_query = Bool(should=queries) nested_query = Nested(path=path, inner_hits=inner_hits, query=bool_query) return nested_query
def get_es_query(cls, query): """Return the Elasticsearch query generated from the query string""" all_queries = [] # Need to search for both 'AND' and 'OR' operations # The score of AND should be higher as it satisfies both OR and AND for operator in ['AND', 'OR']: query_string = SimpleQueryString(query=query, fields=cls.search_fields, default_operator=operator) all_queries.append(query_string) # Run bool query with should, so it returns result where either of the query matches bool_query = Bool(should=all_queries) return bool_query
def query(self, search, query): """Manipulates query to support nested query.""" search = search.highlight_options(**self._common_highlight_options) all_queries = [] # match query for the title (of the page) field. for operator in self.operators: all_queries.append( SimpleQueryString(query=query, fields=self.fields, default_operator=operator)) # nested query for search in sections sections_nested_query = self.generate_nested_query( query=query, path='sections', fields=self._section_fields, inner_hits={ 'highlight': dict(self._common_highlight_options, fields={ 'sections.title': {}, 'sections.content': {}, }) }) # nested query for search in domains domains_nested_query = self.generate_nested_query( query=query, path='domains', fields=self._domain_fields, inner_hits={ 'highlight': dict(self._common_highlight_options, fields={ 'domains.name': {}, 'domains.docstrings': {}, }) }) all_queries.extend([sections_nested_query, domains_nested_query]) final_query = Bool(should=all_queries) search = search.query(final_query) return search
def query(self, search, query): """Use a custom SimpleQueryString instead of default query.""" search = super().query(search, query) all_queries = [] # need to search for both 'and' and 'or' operations # the score of and should be higher as it satisfies both or and and for operator in ['AND', 'OR']: query_string = SimpleQueryString(query=query, fields=self.fields, default_operator=operator) all_queries.append(query_string) # run bool query with should, so it returns result where either of the query matches bool_query = Bool(should=all_queries) search = search.query(bool_query) return search
def _get_text_query(self, *, query, fields, operator): """ Returns a text query object according to the query. - SimpleQueryString: Provides a syntax to let advanced users manipulate the results explicitly. - MultiMatch: Allows us to have more control over the results (like fuzziness) to provide a better experience for simple queries. """ if self.use_advanced_query or self._is_advanced_query(query): query_string = SimpleQueryString(query=query, fields=fields, default_operator=operator) else: query_string = MultiMatch( query=query, fields=fields, operator=operator, fuzziness="AUTO", ) return query_string
def site(slug): """ GET args: q (Optional[str]): Query string sort (Optional[str]): datetime, matches order (Optional[str]): asc or desc ... (custom attribute search) """ q = request.args.get('q') if q: from elasticsearch_dsl.query import SimpleQueryString pages = Page.search().query(SimpleQueryString(query=q)).execute() return render_template('search.html', pages=pages) else: pages = Page.search().execute() return render_template('site.html', pages=pages) pages_dict = [{ 'path': p.to_dict()['path'], 'body': p.to_dict()['body'] } for p in pages] referer = urlparse(request.headers.get('Referer')) if referer.netloc == config['rubberband']['host']: return render_template('search.html', pages=pages_dict)
def add_query_fields(self, s, qterm, **kwargs): r"""Searches in the elasticsearch index for the mail :param s: DSL-Query to modify :type s: ``DslSearch`` Elasticsearch DSL query :param qterm: Query-string :type qterm: ``str`` :param \**kwargs: See below :Keyword Arguments: * *date_gte* (``datetime``) -- Filter, From: only emails greater than * *date_lte* (``datetime``) -- Filter, To: only emails less than * *date_sliding* (``str``) -- Filter sliding window, only emails of the past XX-hours/days/years... e.g. '-1d/d','-5y/y' -- See: https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math * *date_sliding_type* (``str``) -- Valid date-type: e.g. y M d * *use_sliding_value* (``bool``) -- True: Only respect date_sliding and date_sliding_type. False: only respect fix date: date_gte and date_lte * *include_spam* (``bool``) -- True: Include spam in search (Both) False: Spam will be filtered and not respected in search * *only_attachment* (``bool``) -- True: Only find emails with attachments False: emails with and without attachments (Both) * *number_results* (``int``) -- Number of total results to return * *sort_field* (``str``) -- By which field should results be sorted e.g. date, _score, fromEmail.keyword * *sort_dir* (``str``) -- In Which direction should results be sorted '+': ascending '-': descending) :return: ``DslSearch`` Elasticsearch DSL query """ # Query fields = [ 'body', 'fromEmail', 'toEmail', 'replyToEmail', 'fromName', 'toName', 'replyToName', 'subject', 'attachmentNames' ] if helpers.is_simple_query_string_query(qterm): body_query = SimpleQueryString(query=qterm, fields=fields, default_operator='AND', boost=5) else: body_query = DisMax(tie_breaker=0.7, boost=1, queries=[ SimpleQueryString(query=qterm, fields=fields, default_operator='AND', boost=1), MatchPhrase(body={ 'query': qterm, 'boost': 1 }), ]) pos = DisMax(tie_breaker=0.7, boost=1, queries=[ body_query, Common(body={ 'query': qterm, 'cutoff_frequency': 0.001 }), ]) # penalize if spam neg = Match(subject={'query': 'spam'}) boosting = Boosting(positive=pos, negative=neg, negative_boost=0.2) s = s.query(boosting) # Get specific query arguments include_spam = False only_attachment = False mailq = '' for key, value in kwargs.items(): if key == 'include_spam': include_spam = value if key == 'only_attachment': only_attachment = value if key == 'mailq': mailq = value # Filter mail if mailq != '': s = s.filter(Match(**{'fromEmail.keyword':mailq}) | \ Match(**{'toEmail.keyword': mailq}) | \ Match(**{'replyToEmail.keyword': mailq})) # Filter spam if not include_spam: s = s.filter(~Match(subject={'query': 'spam'})) s = s.filter( ~Term(spam=1) ) # TODO: Spam-flag currently not in use, but for use with different spam filter # Filter attachment if only_attachment: s = s.filter('term', hasAttachment=True) # Extra s = s.extra( indices_boost={ self._index_prefix.format('ja'): 1.5, self._index_prefix.format('en'): 1, self._index_prefix.format('un'): 0.5 }) # s = s.extra(_source={'excludes': ['body']}) # Body needed, no link available # Highlight s = s.highlight_options(order='score') s = s.highlight('body', fragment_size=50) # s = s.highlight('body', number_of_fragments=0) s = s.highlight('subject') s = s.highlight('fromEmail') s = s.highlight('toEmail') s = s.highlight('replyToEmail') s = s.highlight('fromEmail.keyword') s = s.highlight('toEmail.keyword') s = s.highlight('replyToEmail.keyword') s = s.highlight('fromName') s = s.highlight('toName') s = s.highlight('replyToName') s = s.highlight('attachmentNames') return s
search = Content.search() print('\n *************************** \nsearch results:') keywords = [ ' صدا و سیما', ' قابل مقــــایسه', 'جبران', 'عوارض' ] regex_list = [] kwargs_list = [] for i in range(0, len(keywords)): regex_list.append(get_keyword_regex(keywords[i])) kwargs_list.append(SimpleQueryString(query=regex_list[i], fields=['context'], default_operator='and')) result = search.query(kwargs_list[i]).execute() print('\n' + keywords[i] + ':') for hit in result.hits.hits: print(hit) # result = search.query(Q('constant_score', filter=kwargs1)).execute() # kwargs_exclude = SimpleQueryString(query=regex_list[1], fields=['context'], default_operator='not') result3 = search.query().exclude(kwargs_list[1]).execute() result4 = search.query().filter('match_phrase_prefix', context=regex_list[0]).execute() result5 = search.query().filter('match_phrase', context=regex_list[0]).execute() result6 = search.query().filter('match_phrase', context=regex_list[2]).execute() # result7 = search.query().filter('match_phrase', context=regex_list[2]).filter('match_phrase', context=regex_list[3]).execute() #works # result7 = search.query().filter(Q("match_phrase", context=regex_list[2]) & Q("match_phrase", context=regex_list[3])).execute()
def simple_search(query, fields, page=1): query = SimpleQueryString(query=query, fields=fields) return search_by_query({ 'query': query.to_dict() }, page)
def simple_scan(query, fields): q = SimpleQueryString(query=query, fields=fields) return scan_by_query(q)