def generate_response_best_show(self, query, annotated_query): # find document id with max polarity payload = { '_source': ['documentSentiment.polarity'], 'query': { 'bool': { 'must': [{ 'match': { 'Full text:': p } } for p in annotated_query.shows] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] polarities = [(i['_id'], i['_source']['documentSentiment']['polarity']) for i in r] id_max_polarity = max(polarities, key=itemgetter(1))[0] # return sentence from document id that contains show in a sentence payload = { '_source': ['sentences.content', 'Full text:', 'ProQ:'], 'query': { 'bool': { 'must': [{ 'ids': { 'values': [id_max_polarity] } }, { 'nested': { 'path': 'sentences', 'query': { 'bool': { 'must': [{ 'match': { 'sentences.content': p } } for p in annotated_query.shows] } }, 'inner_hits': {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def search(): """ Search view, querying the elastic search backend based on the 'query' GET parameter. """ query = request.args.get('query') response = jsonify(results=elastic.search(query)) return response
def api_search(): fields = request.json["fields"] term = request.json["term"] filters = request.json["filters"] res = search(aggregated_serch(term, fields, filters)) if len(fields) == 0: return jsonify(res['aggregations']) mapped = unwrap_agg_res(res) return jsonify(mapped)
def generate_response(self, query, annotated_query): ids = self.get_relevant_document_ids(query) # Make Fall-back ES Query payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "ids": { "values": ids } }, { "nested": { "path": "sentences", "query": { "bool": { "should": [{ "match": { "sentences.content": i } } for i in [ 'strong', 'dynamic', 'elegant', 'up-and-coming', 'powerful', 'good', 'bad', 'excellent', 'flat', 'disappointing', 'shocking', 'emerging', 'growing', 'riveting', 'depressing', 'awful', 'focused', 'intelligent', 'smart', 'subtle', 'outstanding', 'accomplished', 'terrific', 'great', 'love', 'hate', 'like' ]], "must": { "match": { "sentences.content": annotated_query.people[0] } } } }, "inner_hits": {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def get_relevant_documents(self, search_phrase): """ Fetches relevant documents from elastic search based on query. Get only the documents that have a score greater than the average score. input: search_phrase (string): string to search for in ES output: (dict): dictionary of fetched documents """ # get all scores for top 100 documents index = 'flattened-articles/_search' score_payload = {'from': 0, 'size': 500, \ 'fields': '_score', \ 'query': {'query_string': { \ 'query': search_phrase.encode('utf-8'), \ 'fields': ['Full text:']}}} score_response = json.loads( elastic.search(elastic.ES_URL, index, score_payload)) # create list of scores with 0 excluded scores = [] for i in score_response['hits']['hits']: float_score = float(i['_score']) if float_score > 0: scores.append(float_score) quantile = np.percentile(scores, 50) # get responses where min_score >= quantile payload = {'_source': ['ProQ:', 'sentences', 'documentSentiment', 'Full text:'], 'min_score': quantile, \ 'from': 0, 'size': 500, \ 'query': {'query_string': {'query': search_phrase.encode('utf-8'), \ 'fields': ['Full text:']}}} response = json.loads(elastic.search(elastic.ES_URL, index, payload)) return response
def search_cities(query=None, **options): if 'fields' not in options: options['fields'] = [ 'name', 'cid', 'regions', 'location', 'images' ] return elastic.search( query, index=elastic.SEARCH_INDEX, **options)
def get_entities_for_type(self, entity_type, overwrite=False): """ Gets entites for specific entity_type from elastic search Inputs: entity_type (string): entity type to look for overwrite (bool): overwrite file on disk """ index = 'flattened-articles/_search' if entity_type == 'show': elastic_response = json.loads( elastic.search(elastic.ES_URL, index, SHOW_QUERY)) elif entity_type == 'people': elastic_response = json.loads( elastic.search(elastic.ES_URL, index, PEOPLE_QUERY)) elif entity_type == 'theater': elastic_response = json.loads( elastic.search(elastic.ES_URL, index, THEATER_QUERY)) for i in elastic_response['hits']['hits']: for j in i['inner_hits']['entities']['hits']['hits']: curr_name = j['_source']['name'] if entity_type == 'show': self.show_entities.add(curr_name) elif entity_type == 'people': self.people_entities.add(curr_name) elif entity_type == 'theater': if 'theater' in curr_name.lower(): self.theater_entities.add(curr_name) if overwrite: if entity_type == 'show': self.save_entities(self.show_entities, self.show_file) elif entity_type == 'people': self.save_entities(self.people_entities, self.people_file) elif entity_type == 'theater': self.save_entities(self.theater_entities, self.theater_file)
def generate_response_favorite_person(self, query, annotated_query): ids = self.get_relevant_document_ids(query) payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "ids": { "values": ids } }, { "nested": { "path": "sentences", "query": { "bool": { "should": [{ "match": { "sentences.content": i } } for i in [ 'favorite', 'outstanding', 'terrific', 'killer', 'best', 'precious', 'dearest', 'greatest' ]], "must": [{ "match": { "sentences.content": p } } for p in annotated_query. keywords['keywords']['NOUN']] } }, "inner_hits": {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] print 'Favorite {}'.format( annotated_query.keywords['keywords']['NOUN'][0]) return self.format_response(r[0])
def api_suggest(): term = request.json["term"] body = { "suggest": { "search_suggest": { "prefix": term, "completion": {"field": "suggest"}, } } } res = search(body) mapped = unwrap_suggest(res) return jsonify(mapped)
def search(): text = get_text() field = request.args.get('field','lemmatized_text') skip = request.args.get('skip', 0) limit = request.args.get('limit',20) timeout = request.args.get('timeout','5s') lemmatize = request.args.get('lemmatize',True) from_date = request.args.get('from_date','2000-01-01') to_date = request.args.get('to_date','2030-01-01') index = request.args.get('index','articles') if lemmatize!="false": o = text_processor.process_text(text, clear=True) text = o.get('lemmatized_text','') else: logging.warning('NOT lemmatized !!!!') search_result = elastic.search(text, skip=skip, limit=limit, field=field, timeout=timeout, from_date=from_date, to_date=to_date, index=index) return make_response(search_result)
def generate_response(self, query, annotated_query): theater = annotated_query.theaters[0] ids = self.get_relevant_document_ids(theater) at_theater = "at " + theater payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "ids": { "values": ids } }, { "nested": { "path": "sentences", "query": { "bool": { "must": [{ "match": { "sentences.content": theater } }, { "match": { "sentences.content": at_theater } }] } }, "inner_hits": {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def generate_response_person_in_show(self, query, annotated_query): match_queries = [{ 'match': { 'Full text:': show } } for show in annotated_query.shows] match_queries.append({ 'nested': { 'path': 'sentences', 'query': { 'bool': { 'must': [{ 'match': { 'sentences.content': p } } for p in annotated_query.people] } }, 'inner_hits': {} } }) payload = { '_source': ['sentences.content', 'Full text:', 'ProQ:'], 'query': { 'bool': { 'must': match_queries } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload)) print r r = r['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def generate_response(self, query, annotated_query): ids = self.get_relevant_document_ids(query) # Make Fall-back ES Query payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "ids": { "values": ids } }, { "nested": { "path": "sentences", "query": { "bool": { "must": [{ "match": { "sentences.content": annotated_query. keywords['keywords']['NOUN'][0] } }] } }, "inner_hits": {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def get_relevant_document_ids(self, query): """ Get the relevant document ids from Elastic Search for a full text query args: query (string): A text string to be used on a full-text query return: ids (list): A list of document IDs """ payload = { "query": { "query_string": { "query": query.encode('utf-8'), "fields": ["Full text:"] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] ids = [i['_id'] for i in r] return ids
def generate_response_good_noun(self, query, annotated_query): payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "match": { "Full text:": p } } for p in annotated_query.people] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] ids = [i['_id'] for i in r] payload = { "_source": ["sentences.content", "Full text:", "ProQ:"], "query": { "bool": { "must": [{ "ids": { "values": ids } }, { "nested": { "path": "sentences", "query": { "bool": { "should": [{ "match": { "sentences.content": i } } for i in [ 'strong', 'dynamic', 'elegant', 'powerful', 'good', 'excellent', 'shocking', 'emerging', 'riveting', 'focused', 'intelligent', 'smart', 'subtle', 'outstanding', 'accomplished', 'terrific', 'great', 'love', 'performance', 'favorite', 'best', 'portral', 'cast' ]], "must": [{ "match": { "sentences.content": p } } for p in annotated_query.people + annotated_query. keywords['keywords']['NOUN']] } }, "inner_hits": {} } }] } } } r = json.loads( elastic.search(elastic.ES_URL, '/flattened-articles/_search', payload))['hits']['hits'] r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'], i['_source']['Full text:']) for i in r] return self.format_response(r[0])
def results(): query=request.form['query'] hits = search(query) hits = hits['hits']['hits'] return render_template('form_action.html', hits = hits)
def get(self, types, search_term, from_date, to_date, size, offset): types_formatted = str(types).split(",") result = elastic.search(types_formatted, search_term, from_date, to_date, size, offset) self.write(result)