def search(self, args, es_client=client): search = Search(using=es_client, index=SearchableEvent.meta.index) if args.get('name'): search = search.query('fuzzy', name=args['name']) search = search.highlight('name') if args.get('description'): search = search.query('match', description=args['description']) search = search.highlight('description') if args.get('location-name'): search = search.query('fuzzy', location_name=args['location_name']) search = search.highlight('location_name') if args.get('organizer-name'): search = search.query( 'fuzzy', organizer_name=args['organizer_name']) search = search.highlight('organizer_name') if args.get('organizer-description'): search = search.query( 'fuzzy', organizer_description=args['organizer_description']) search = search.highlight('organizer_description') return [to_dict(r) for r in search.execute()]
def get_highlights(): wiki_field = 'wiki_content' qb_field = 'qb_content' text = request.form['text'] s = Search(index='qb')[0:20].query('multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} else: guessForEvidence = request.form['guessForEvidence'] guessForEvidence = guessForEvidence.split( "style=\"color:blue\">")[1].split("</a>")[0].lower() guess = None for index, item in enumerate(results): if item.page.lower().replace( "_", " ")[0:25] == guessForEvidence: guess = results[index] break if guess == None: print("expanding search") s = Search(index='qb')[0:80].query( 'multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) for index, item in enumerate(results): if item.page.lower().replace( "_", " ")[0:25] == guessForEvidence: guess = results[index] break if guess == None: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} return jsonify(highlights) _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = { 'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page } return jsonify(highlights)
def interface_get_highlights(): wiki_field = "wiki_content" qb_field = "qb_content" text = request.form["text"] s = Search(index="qb")[0:20].query("multi_match", query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {"wiki": [""], "qb": [""], "guess": ""} else: guessForEvidence = request.form["guessForEvidence"] guessForEvidence = (guessForEvidence.split( 'style="color:blue">')[1].split("</a>")[0].lower()) guess = None for index, item in enumerate(results): if item.page.lower().replace( "_", " ")[0:25] == guessForEvidence: guess = results[index] break if guess == None: print("expanding search") s = Search(index="qb")[0:80].query( "multi_match", query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) for index, item in enumerate(results): if (item.page.lower().replace( "_", " ")[0:25] == guessForEvidence): guess = results[index] break if guess == None: highlights = {"wiki": [""], "qb": [""], "guess": ""} return jsonify(highlights) _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [""] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [""] highlights = { "wiki": wiki_content, "qb": qb_content, "guess": guess.page, } return jsonify(highlights)
def search_close(self, origin_timestamp, channel, qterm, number_results): """ Find log entries close to origin timestamp, filter by channel, highlight qterm and return them sorted by date. :param origin_timestamp: origin timestamp to find logs around :param channel: Channel to be filtered :param qterm: Term to be highlighted :param number_results: how many results :return: List of sorted log entries (Elastic-search response) :rtype: ``list`` """ # Prepare query s = DslSearch(using=self._es, index=self._index_prefix.format('*')) # Function score main_query_boosting = 1e-15 # only used for highlighting, not for scoring -> give very low signifance pos = MatchPhrase(msg={'query': qterm, 'boost': main_query_boosting}) | \ Match(**{'username': {'query': qterm, 'boost': main_query_boosting}}) | \ Match(channel={'query': qterm, 'boost': main_query_boosting}) | \ Match(msg={'query': qterm, 'boost': main_query_boosting}) main_query = (pos | Q('match_all')) function_score_query = Q('function_score', query=main_query, functions=[ SF( 'exp', **{ '@timestamp': { "origin": origin_timestamp, "scale": "1m", "decay": 0.999 } }) ]) s = s.query(function_score_query) # filter channel s = s.filter('term', **{'channel.keyword': channel}) # Number of results s = s[0:number_results] # Highlight s = s.highlight_options(order='score') s = s.highlight('msg', number_of_fragments=0) s = s.highlight('username') s = s.highlight('channel') # Execute response = s.execute() # Sort results response_sorted = sorted(response, key=lambda hit: hit['@timestamp']) return response_sorted
def interface_get_highlights(): wiki_field = 'wiki_content' qb_field = 'qb_content' text = request.form['text'] s = Search(index='qb')[0:20].query( 'multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} else: guessForEvidence = request.form['guessForEvidence'] guessForEvidence = guessForEvidence.split("style=\"color:blue\">")[1].split("</a>")[0].lower() guess = None for index, item in enumerate(results): if item.page.lower().replace("_", " ")[0:25] == guessForEvidence: guess = results[index] break if guess == None: print("expanding search") s = Search(index='qb')[0:80].query( 'multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) for index, item in enumerate(results): if item.page.lower().replace("_", " ")[0:25] == guessForEvidence: guess = results[index] break if guess == None: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} return jsonify(highlights) _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = {'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page} return jsonify(highlights)
def run(self, key: Union[int, slice] = slice(0, settings.SEARCH_RESULTS_PER_PAGE)): """Perform search, placing the results in `self.results`, and the total number of results (across all pages) in `self.total`. Chainable.""" search = DSLSearch( using=es7_client(), index=self.get_index()).params(**settings.ES7_SEARCH_PARAMS) # add the search class' filter search = search.query(self.get_filter()) # add highlights for the search class' highlight_fields for highlight_field, options in self.get_highlight_fields_options(): search = search.highlight(highlight_field, **options) # slice search search = search[key] # perform search self.hits = search.execute().hits self.last_key = key self.total = self.hits.total.value self.results = [self.make_result(hit) for hit in self.hits] return self
def get_highlights(self, text): # query top 10 guesses s = Search(index="qb_ir_instance_of")[0:10].query( "multi_match", query=text, fields=["wiki_content", "qb_content", "source_content"], ) s = s.highlight("qb_content").highlight("wiki_content") results = list(s.execute()) guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = None try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = None highlights = { "wiki": wiki_content, "qb": qb_content, "guess": guess.page } return highlights
def GetAuditDataMain(self, data): s = Search() s = s[0:1000] s = s.highlight('*') s = s.highlight_options(require_field_match=False) t = Q('query_string', query=data) & ~Q('query_string', default_field="AuditType.Generator", query="stateagentinspector") & ~Q('query_string', default_field="AuditType.Generator", query="w32processes-tree") query = s.query(t) try: r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False) except ConnectionError as e: ret = {"connection_error": e.args[0]} return ret data = [] try: for x in r.json()['hits']['hits']: for y, v in x['highlight'].iteritems(): data.append({ "doc_id": x['_id'], "endpoint": x['_parent'], "audittype": x['_source']['AuditType']['Generator'], "field": y, "response": v }) except KeyError: pass return data
def get_highlights(): wiki_field = "wiki_content" qb_field = "qb_content" text = request.form["text"] s = Search(index="qb")[0:10].query("multi_match", query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {"wiki": [""], "qb": [""], "guess": ""} else: guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [""] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [""] highlights = { "wiki": wiki_content, "qb": qb_content, "guess": guess.page, } return jsonify(highlights)
def get_highlights(text): # query top 10 guesses s = Search(index='qb_ir_instance_of')[0:10].query('multi_match', query=text, fields=['wiki_content', 'qb_content', 'source_content']) s = s.highlight('qb_content').highlight('wiki_content') results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} return highlights guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = {'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page} return highlights
def get_highlights(text): # query top 10 guesses s = Search(index="qb_0")[0:10].query( "multi_match", query=text, fields=["wiki_content", "qb_content", "source_content"], ) s = s.highlight("qb_content").highlight("wiki_content") results = list(s.execute()) if len(results) == 0: highlights = {"wiki": [""], "qb": [""], "guess": ""} return highlights guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [""] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [""] highlights = {"wiki": wiki_content, "qb": qb_content, "guess": guess.page} return highlights
def get_highlights(): wiki_field = 'wiki_content' qb_field = 'qb_content' text = request.form['text'] s = Search(index='qb')[0:10].query('multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} else: guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = { 'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page } return jsonify(highlights)
def run(self, query, page=1, default_operator="AND", **kwargs): """Perform search, placing the results in `self.results`, and the total number of results (across all pages) in `self.total`. Chainable.""" search = DSLSearch(using=es7_client(), index=self.get_index()).params( **settings.ES7_SEARCH_PARAMS ) # add the search class' filter search = search.query( self.get_filter(query=query, default_operator=default_operator, **kwargs) ) # add highlights for the search class' highlight_fields search = search.highlight(*self.get_highlight_fields(), **self.get_highlight_options()) # do pagination start = (page - 1) * self.results_per_page search = search[start : start + self.results_per_page] # perform search self.hits = search.execute().hits self.total = self.hits.total.value if self.hits else 0 self.results = [self.make_result(hit) for hit in self.hits] return self
def get_highlights(text): # query top 10 guesses s = Search(index='qb_ir_instance_of')[0:10].query( 'multi_match', query=text, fields=['wiki_content', 'qb_content', 'source_content']) s = s.highlight('qb_content').highlight('wiki_content') results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} return highlights guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = {'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page} return highlights
def get_highlights(): wiki_field = 'wiki_content' qb_field = 'qb_content' text = request.form['text'] s = Search(index='qb')[0:10].query( 'multi_match', query=text, fields=[wiki_field, qb_field]) s = s.highlight(wiki_field).highlight(qb_field) results = list(s.execute()) if len(results) == 0: highlights = {'wiki': [''], 'qb': [''], 'guess': ''} else: guess = results[0] # take the best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = [''] try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = [''] highlights = {'wiki': wiki_content, 'qb': qb_content, 'guess': guess.page} return jsonify(highlights)
def get(self, request, *args, **kwargs): query = self.request.query_params.get('query') country = self.request.query_params.get('country') points = self.request.query_params.get('points') search = Search(index=constants.ES_INDEX) q = {'should': [], 'filter': []} if query: q['should'] = [ Match(variety={ 'query': query, 'boost': 3.0 }), Match(winery={ 'query': query, 'boost': 2.0 }), Match(description={ 'query': query, 'boost': 1.0 }) ] q['minimum_should_match'] = 1 search = search.highlight_options(number_of_fragments=0, pre_tags=['<mark>'], post_tags=['</mark>']) search = search.highlight('variety', 'winery', 'description') if country: q['filter'].append(Term(country=country)) if points: q['filter'].append(Term(points=points)) response = search.query('bool', **q).params(size=100).execute() if response.hits.total.value > 0: return Response(data=[{ 'id': hit.meta.id, 'country': hit.country, 'description': (hit.meta.highlight.description[0] if 'highlight' in hit.meta and 'description' in hit.meta.highlight else hit.description), 'points': hit.points, 'price': hit.price, 'variety': ( hit.meta.highlight.variety[0] if 'highlight' in hit.meta and 'variety' in hit.meta.highlight else hit.variety), 'winery': ( hit.meta.highlight.winery[0] if 'highlight' in hit.meta and 'winery' in hit.meta.highlight else hit.winery) } for hit in response]) else: return Response(data=[])
async def fetchArticlBody(*, projectName: str = Path(...), urlItem: str, word: str): # 查询 项目数据库,articles 表中数据 # projectName 转 projectId projectId = await findProjectIdFromProjectName( dbPrefix, 'Project', queryDict={'projectName': projectName}, showDict={'_id': 1}) if not projectId: raise HTTPException(status_code=503, detail='projectNotExist') # 页码起始 start = 0 end = 0 # 带搜索的 es索引 (等价于 mongo中的 数据库) _index = f'kwm-{projectId}.articles'.lower() #print('_index', _index) s = Search() q1 = Q("match_phrase", url=f"\"{urlItem}\"") # url 匹配 q2 = Q('match_phrase', body=f"\"{word}\"") # word 匹配 s = s.query(q1) s = s.query(q2) s = s.source(includes=['']) # 不返回输出 s = s.highlight_options(order='score') s = s.highlight_options( pre_tags="<strong style=\"background: yellow;color: red\">") s = s.highlight_options(post_tags="</strong>") s = s.highlight_options(fragment_size=300) # s = s.highlight('body') s = s[0:10000] # common setting #print(s.to_dict()) # 执行 response = await esRun(s.to_dict(), _index) #s.execute(ignore_cache=True) #totalCount = response.hits.total.value temp = response.to_dict()['hits']['hits'] result = [] for item in temp: tt = {'_id': {'$oid': item['_id']}} tt.update(item['_source']) if item.get('highlight'): tt.update({'highlight': item['highlight']}) if start >= 0 and end > 0: tt.update({'id': start + 1}) result.append(tt) start = start + 1 return (result)
def portalSearch(expression, start=0, end=25): client = Elasticsearch() ret = {'nodes': [], 'Counts': {}} q = Q("bool", must=[Q('match', _all=expression)]) s = Search(using=client, index="neo4j-inquisite-node", doc_type="Repository,Data").query(q) q_total = s.count() s = s[0:q_total] s = s.highlight_options(require_field_match=False) s = s.highlight('*', fragment_size=45) res = s.execute() data = {} uuids = [] pub_uuids = {} if res: for r in res: d = r.to_dict() if r.meta.doc_type == 'Repository': if int(d['published']) == 0: continue repo_id = r.meta.id ret['nodes'].append({ "id": r.meta.id, "type": "Repository", "name": d['name'], "description": d['readme'] }) repo_uuids = SearchManager._getDataUUIDsForRepo(repo_id) pub_uuids[repo_id] = repo_uuids else: hits = [] highs = r.meta.highlight.to_dict() for high_field, high_value in highs.items(): hits.append({high_field: high_value}) data[r.meta.id] = {'id': r.meta.id, "hits": hits} uuids.append(r.meta.id) qString = "MATCH (r:Repository)--(t:SchemaType)--(d:Data) WHERE d.uuid IN {uuids} AND r.published = '1' RETURN d.uuid as uuid, r.name as repo_name, r.uuid as repo_id" pub_data = db.run(qString, {"uuids": uuids}) data_max = 0 for checked in pub_data: if data_max >= 32: break ret['nodes'].append({ "id": checked['uuid'], "type": "Data", "repo_id": checked['repo_id'], "repo_name": checked['repo_name'], "hits": data[checked['uuid']]['hits'] }) data_max += 1 return ret else: return ret
def es_search(query_string, branch, ops_type, limit, fuzzy): s = Search(using=elasticsearch(), index=(branch), doc_type=ops_type) s = s[0:int(limit)] if fuzzy: q = Q('multi_match', query=query_string, fields=['label^4', 'title^3', 'prefLabel^4', 'identifier', 'description', 'altLabel^2', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class'], fuzziness="AUTO", prefix_length=5, type='best_fields', tie_breaker=0.3) else: q = Q('multi_match', query=query_string, fields=['label^4', 'title^3', 'prefLabel^4', 'identifier', 'description', 'altLabel^2', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class'], fuzziness=0, type='best_fields', tie_breaker=0.3) s = s.highlight('label', 'title', 'identifier', 'description', 'prefLabel', 'altLabel', 'Synonym', 'Definition', 'shortName', 'mnemonic', 'disease_class') s = s.query(q) es_response = s.execute() return es_response.to_dict()
def search_content(keyword, limit=50): client = Elasticsearch() q = Q("multi_match", query=keyword, fields=['title', 'content']) s = Search(using=client) # s = Search(using=client, index="pet-index").query("match", content="金毛") s = Search(using=client, index="pet-index").query(q) s = s[0:limit] s = s.highlight_options(order='score') s = s.highlight('content') response = s.execute() return response
def match_phrase_in_text(phrase): s = Search(using=client, index="sample_film_index") q = Q('match_phrase', text=phrase) s = s.query(q) s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>') # for html s = s.highlight('text', fragment_size=999999999, number_of_fragments=1) response = s.execute() print "Num hits for", phrase, len(response.to_dict()['hits']['hits']) for hit in response: print hit.meta.score #doc score print hit.meta.highlight #highlighted snippet
def test6_highlight(): ''' 高亮显示 :return: ''' s = Search(using=client, index='test-index') s = s.query('match', sport='足球') s = s.highlight('sport') # 高亮字段 response = s.execute() for hit in response: for h_result in hit.meta.highlight.sport: # 获得高亮结果 print h_result
def run(self, query, page=1, default_operator="AND"): """Perform search, placing the results in `self.results`, and the total number of results (across all pages) in `self.total`. Chainable.""" # Default to a dfs query search = DSLSearch( using=es7_client(), index=self.get_index()).params(search_type="dfs_query_then_fetch") # add the search class' filter search = search.query("bool", filter=self.get_filter()) # add query, search over the search class' fields search = search.query( "simple_query_string", query=query, default_operator=default_operator, fields=self.get_fields(), # everything apart from WHITESPACE as that interferes with char mappings # and synonyms with whitespace in them by breaking up the phrase into tokens, # before they have a chance to go through the filter: flags="AND|ESCAPE|FUZZY|NEAR|NOT|OR|PHRASE|PRECEDENCE|PREFIX|SLOP", ) # add highlights for the search class' highlight_fields search = search.highlight( *self.get_highlight_fields(), type="fvh", # order highlighted fragments by their relevance: order="score", # only get one fragment per field: number_of_fragments=1, # split fragments at the end of sentences: boundary_scanner="sentence", # return fragments roughly this size: fragment_size=SNIPPET_LENGTH, # add these tags before/after the highlighted sections: pre_tags=[f"<{HIGHLIGHT_TAG}>"], post_tags=[f"</{HIGHLIGHT_TAG}>"], ) # do pagination start = (page - 1) * self.results_per_page search = search[start:start + self.results_per_page] # perform search self.hits = search.execute().hits self.total = self.hits.total.value if self.hits else 0 self.results = [self.make_result(hit) for hit in self.hits] return self
def doSearch(self, body): try: client = connections.create_connection(hosts=[settings.ES_URL]) s = Search(using=client, index=settings.ES_INDEX_NAME, doc_type=settings.ES_INDEX_TYPE) s = Search.from_dict(body) s = s.index(settings.ES_INDEX_NAME) s = s.doc_type(settings.ES_INDEX_TYPE) # hightlight the following fields in the search result s = s.highlight('title') s = s.highlight('description') s = s.highlight('data_time') s = s.highlight('source') body = s.to_dict() response = s.execute() except Exception: return None return response
def click3(self): ### - ELASTIC SEARCH try: # search from the elastic search fully function client = Elasticsearch() res = es.search(index="movies", body={}) sample = res['hits']['hits'] s = Search(using=client, index="movies") #print(s) getText = self.textSearch.toPlainText() q = Q('match', title=getText) s = s.query(q) s = s.highlight('text', fragment_size=20) response = s.execute() allTogether = '' for hit in response.hits.hits: allTogether = allTogether + "\n" + hit._source.title + " ----> By" + hit._source.cast print('FROM FILE 1 >>>>') self.textOut.setText(allTogether) res = es.search(index="imdb", body={}) sample = res['hits']['hits'] s = Search(using=client, index="imdb") print('From FILE 2 ->>>>') getText = self.textSearch.toPlainText() q = Q('match', title=getText) s = s.query(q) s = s.highlight('text', fragment_size=20) response = s.execute() allTogether = '' for hit in response.hits.hits: allTogether = allTogether + "\n" + hit._source.title + "->>> By " + hit._source.country print('FROM FILE 2 >>>>') self.textOut.setText(allTogether) except NotFoundError: print('error not found')
def free_search_in_title(word): s = Search(using=client, index="sample_film_index") # Q is a shortcut for constructing a query object q = Q('match', title=word) # At some point, q has to be added to the search object. s = s.query(q) s = s.highlight_options(pre_tags='<mark>', post_tags='</mark>') # for html s = s.highlight('title', word, fragment_size=999999999, number_of_fragments=1) response = s.execute() print "Num hits for", word, len(response.to_dict()['hits']['hits']) for hit in response: print hit.meta.score #doc score print hit.meta.highlight #highlighted snippet
def get_queryset(self): if not self.index_manager.connected_to_es: messages.warning(self.request, _(u'Impossible de se connecter à Elasticsearch')) return [] if self.search_query: # find forums the user is allowed to visit self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() # setting the different querysets (according to the selected models, if any) part_querysets = [] chosen_groups = self.search_form.cleaned_data['models'] if chosen_groups: models = [] for group in chosen_groups: if group in settings.ZDS_APP['search']['search_groups']: models.append(settings.ZDS_APP['search']['search_groups'][group][1]) else: models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].iteritems()] models = reduce(operator.concat, models) for model in models: part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))()) queryset = part_querysets[0] for query in part_querysets[1:]: queryset |= query # weighting: weight_functions = [] for _type, weights in settings.ZDS_APP['search']['boosts'].items(): if _type in models: weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']}) scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions) search_queryset = search_queryset.query(scored_queryset) # highlighting: search_queryset = search_queryset.highlight_options( fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]']) search_queryset = search_queryset.highlight('text').highlight('text_html') # executing: return self.index_manager.setup_search(search_queryset) return []
def run(self, query, page=1, default_operator="AND"): """Perform search, placing the results in `self.results`, and the total number of results (across all pages) in `self.total`. Chainable.""" # Default to a dfs query search = DSLSearch( using=es7_client(), index=self.get_index()).params(search_type="dfs_query_then_fetch") # add the search class' filter search = search.query("bool", filter=self.get_filter()) # add query, search over the search class' fields search = search.query( "simple_query_string", query=query, default_operator=default_operator, fields=self.get_fields(), ) # add highlights for the search class' highlight_fields search = search.highlight( *self.get_highlight_fields(), type="fvh", # order highlighted fragments by their relevance: order="score", # only get one fragment per field: number_of_fragments=1, # split fragments at the end of sentences: boundary_scanner="sentence", # return fragments roughly this size: fragment_size=SNIPPET_LENGTH, # add these tags before/after the highlighted sections: pre_tags=[f"<{HIGHLIGHT_TAG}>"], post_tags=[f"</{HIGHLIGHT_TAG}>"], ) # do pagination start = (page - 1) * self.results_per_page search = search[start:start + self.results_per_page] # perform search self.hits = search.execute().hits self.total = self.hits.total.value if self.hits else 0 self.results = [self.make_result(hit) for hit in self.hits] return self
def find(query, company_id, proposal_id): client = get_client() index = current_app.config["ES_IMPORT_INDEX"] s = Search(using=client, index=index) s = s.filter("term", company_id=company_id) # s = s.filter(~Q("term", proposal_id=proposal_id)) # Weighting title more than the content since a user writing an exact title # should yield that section rather than the same query in a content s = s.query(Q("multi_match", query=query, fields=["title^4", "content"])) s = s.highlight_options(order="score", pre_tags=["<span class='search-highlight'>"], post_tags=["</span>"]) s = s.highlight("title", "content") # Only get the first 20 results response = s[:20].execute() return response.hits
def GetAuditDataMain(self, data): s = Search() s = s[0:1000] s = s.highlight('*') s = s.highlight_options(require_field_match=False) t = Q('query_string', query=data) & ~Q('query_string', default_field="AuditType.Generator", query="stateagentinspector") & ~Q( 'query_string', default_field="AuditType.Generator", query="w32processes-tree") query = s.query(t) try: r = requests.post(self.es_host + ":" + self.es_port + self.index + self.type_audit_type + '/_search', data=json.dumps(query.to_dict()), auth=(self.elastic_user, self.elastic_pass), verify=False) except ConnectionError as e: ret = {"connection_error": e.args[0]} return ret data = [] try: for x in r.json()['hits']['hits']: for y, v in x['highlight'].iteritems(): data.append({ "doc_id": x['_id'], "endpoint": x['_parent'], "audittype": x['_source']['AuditType']['Generator'], "field": y, "response": v }) except KeyError: pass return data
async def get(self): """Get the results from Elasticsearch.""" q = self.request.query.get("q") if not q: return web.json_response([]) es = Elasticsearch( hosts=[self.request.app["settings"].ELASTICSEARCH_URL], timeout=ELASTICSEARCH_TIMEOUT, verify_certs=ELASTICSEARCH_VERIFY_CERTS, ) mapping = es.indices.get_mapping(ELASTICSEARCH_INDEX, include_type_name=True) search = Search(index=ELASTICSEARCH_INDEX, using=es) search = search.highlight_options( pre_tags=[PRE_HIGHLIGHT_TAG], post_tags=[POST_HIGHLIGHT_TAG], ) query = self.queries(mapping, q) search = search.query(query) highlights = self.build_highlight( mapping[ELASTICSEARCH_INDEX]["mappings"]["_doc"]["properties"]) for highlight in highlights: search = search.highlight(highlight, type="plain") search = search.extra( from_=0, size=MAX_RESULTS, ) values = [] for hit in search.execute(): hit._d_.pop(META, None) if HIGHLIGHT and hasattr(hit.meta, "highlight"): highlight = hit.meta.highlight query = DictQuery(hit._d_) for key in highlight: path = key.split(".")[:-1] value = highlight[key][0] query.set("/".join(path), value) values.append(query) else: values.append(hit._d_) return web.json_response(values)
def portalSearch(expression, start=0, end=25): client = Elasticsearch() ret = {'nodes': [], 'Counts': {}} q = Q("bool", must=[Q('match', _all=expression)]) s = Search(using=client, index="neo4j-inquisite-node", doc_type="Repository,Data").query(q) q_total = s.count() s = s[0:q_total] s = s.highlight_options(require_field_match=False) s = s.highlight('*', fragment_size=45) res = s.execute() data = {} uuids = [] pub_uuids = {} if res: for r in res: d = r.to_dict() if r.meta.doc_type == 'Repository': if int(d['published']) == 0: continue repo_id = r.meta.id ret['nodes'].append({"id": r.meta.id, "type": "Repository", "name": d['name'], "description": d['readme']}) repo_uuids = SearchManager._getDataUUIDsForRepo(repo_id) pub_uuids[repo_id] = repo_uuids else: hits = [] highs = r.meta.highlight.to_dict() for high_field,high_value in highs.items(): hits.append({high_field: high_value}) data[r.meta.id] = {'id': r.meta.id, "hits": hits} uuids.append(r.meta.id) qString = "MATCH (r:Repository)--(t:SchemaType)--(d:Data) WHERE d.uuid IN {uuids} AND r.published = '1' RETURN d.uuid as uuid, r.name as repo_name, r.uuid as repo_id" pub_data = db.run(qString, {"uuids": uuids}) data_max = 0 for checked in pub_data: if data_max >= 32: break; ret['nodes'].append({"id": checked['uuid'], "type": "Data", "repo_id": checked['repo_id'], "repo_name": checked['repo_name'], "hits": data[checked['uuid']]['hits']}) data_max += 1 return ret else: return ret
def search_by_keywords(self, keywords, subject): search = Search(using=self.es, index='arxiv-index') query_content = Q() keywords = re.sub('[^A-Za-z0-9 ]+', '', keywords).lower() for keyword in keywords.split(' '): query_content = query_content + \ (Q('wildcard', pdf='*' + keyword + '*') | \ Q('wildcard', abstract='*' + keyword + '*') | \ Q('wildcard', authors='*' + keyword + '*')) query_subject = Q() query_other = Q() if subject and subject != 'all': query_subject = Q('wildcard', subject='*' + subject + '.*') query_other = Q('wildcard', other_subjects='*' + subject + '.*') final_query = Q('bool', must=[query_content], should=[query_subject, query_other], minimum_should_match=1) search = search.query(final_query) search = search.source([ 'title', 'authors', 'subject', 'other_subjects', 'abstract', 'abstract_url', 'pdf_url', 'submit_date' ]) search = search.highlight_options(order='score') search = search.highlight('abstract', fragment_size=400) total = search.count() search = search[0:total] search = self._extend_query(search, keywords) request = search.execute() for hit in request: response = hit.to_dict() if 'highlight' in hit.meta: response.update({'fragment': hit.meta.highlight.abstract}) else: response.update({'fragment': []}) yield response
def get_second_best_wiki_words(question): text = question.flatten_text() # query top 10 guesses s = Search(index="qb_ir_instance_of")[0:10].query( "multi_match", query=text, fields=["wiki_content", "qb_content", "source_content"], ) s = s.highlight("qb_content").highlight("wiki_content") results = list(s.execute()) guess = results[1] # take the second best answer _highlights = guess.meta.highlight try: wiki_content = list(_highlights.wiki_content) except AttributeError: wiki_content = None try: qb_content = list(_highlights.qb_content) except AttributeError: qb_content = None words = {} if wiki_content is None: words["wiki"] = None else: words["wiki"] = itertools.chain( *[re.findall("<em>(.*?)</em>", x) for x in list(wiki_content)]) if qb_content is None: words["qb"] = None else: words["qb"] = itertools.chain( *[re.findall("<em>(.*?)</em>", x) for x in list(qb_content)]) return words
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ config = get_configuration() timeout = getattr(config, 'request_timeout', 20) search_fields = getattr(config, 'search_fields', None) if not search_fields: search_fields = SEARCH_FIELDS search_fields = search_fields.split() logger.info(search_fields) if query_blocker.blocked: return record = parseIndexRequest(request, self.id) if record.keys is None: return None es = get_query_client() search = Search(using=es, index=index_name()) search = search.params(request_timeout=timeout) search = search.sort('rid', '_id') search = search.source(include='rid') query_string = record.keys[0].decode('utf8') logger.info(query_string) if '*' in query_string: query_string = query_string.replace('*', ' ') query_string = query_string.strip() search = search.query('simple_query_string', query=query_string, fields=search_fields) results_count = search.count() search = search.params(request_timeout=timeout, size=BATCH_SIZE, track_scores=True) # setup highlighting for field in search_fields: name = field.split('^')[0] if name == 'title': # title shows up in results anyway continue search = search.highlight(name, fragment_size=FRAGMENT_SIZE) # initial return value, other batches to be applied retval = IIBTree() highlights = OOBTree() last_seen = None count = 0 batch_count = results_count / BATCH_SIZE if results_count % BATCH_SIZE != 0: batch_count = batch_count + 1 for i in xrange(batch_count): if last_seen is not None: search = search.update_from_dict({'search_after': last_seen}) try: results = search.execute(ignore_cache=True) except TransportError: # No es client, return empty results logger.exception('ElasticSearch client not available.') return IIBTree(), (self.id, ) for r in results: rid = getattr(r, 'rid', None) if rid is not None: retval[rid] = int(10000 * float(r.meta.score)) # Index query returns only rids, so we need # to save highlights for later use highlight_list = [] if getattr(r.meta, 'highlight', None) is not None: for key in dir(r.meta.highlight): highlight_list.extend(r.meta.highlight[key]) highlights[r.meta.id] = highlight_list last_seen = [rid, r.meta.id] count = count + 1 # store highlights try: annotations = IAnnotations(self.REQUEST) annotations[HIGHLIGHT_KEY] = highlights except TypeError: # maybe we are in a test pass return retval, (self.id, )
def get_queryset(self): if not self.index_manager.connected_to_es: messages.warning(self.request, _('Impossible de se connecter à Elasticsearch')) return [] if self.search_query: # Searches forums the user is allowed to visit self.authorized_forums = get_authorized_forums(self.request.user) search_queryset = Search() # Restrict (sub)category if any if self.search_form.cleaned_data['category']: self.content_category = self.search_form.cleaned_data['category'] if self.search_form.cleaned_data['subcategory']: self.content_subcategory = self.search_form.cleaned_data['subcategory'] # Mark that contents must come from library if required self.from_library = False if self.search_form.cleaned_data['from_library'] == 'on': self.from_library = True # Setting the different querysets (according to the selected models, if any) part_querysets = [] chosen_groups = self.search_form.cleaned_data['models'] if chosen_groups: models = [] for group in chosen_groups: if group in settings.ZDS_APP['search']['search_groups']: models.append(settings.ZDS_APP['search']['search_groups'][group][1]) else: models = [v[1] for k, v in settings.ZDS_APP['search']['search_groups'].items()] models = reduce(operator.concat, models) for model in models: part_querysets.append(getattr(self, 'get_queryset_{}s'.format(model))()) queryset = part_querysets[0] for query in part_querysets[1:]: queryset |= query # Weighting: weight_functions = [] for _type, weights in list(settings.ZDS_APP['search']['boosts'].items()): if _type in models: weight_functions.append({'filter': Match(_type=_type), 'weight': weights['global']}) scored_queryset = FunctionScore(query=queryset, boost_mode='multiply', functions=weight_functions) search_queryset = search_queryset.query(scored_queryset) # Highlighting: search_queryset = search_queryset.highlight_options( fragment_size=150, number_of_fragments=5, pre_tags=['[hl]'], post_tags=['[/hl]']) search_queryset = search_queryset.highlight('text').highlight('text_html') # Executing: return self.index_manager.setup_search(search_queryset) return []