Ejemplo n.º 1
0
    def generate_response_best_show(self, query, annotated_query):
        # find document id with max polarity
        payload = {
            '_source': ['documentSentiment.polarity'],
            'query': {
                'bool': {
                    'must': [{
                        'match': {
                            'Full text:': p
                        }
                    } for p in annotated_query.shows]
                }
            }
        }

        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        polarities = [(i['_id'], i['_source']['documentSentiment']['polarity'])
                      for i in r]
        id_max_polarity = max(polarities, key=itemgetter(1))[0]

        # return sentence from document id that contains show in a sentence
        payload = {
            '_source': ['sentences.content', 'Full text:', 'ProQ:'],
            'query': {
                'bool': {
                    'must': [{
                        'ids': {
                            'values': [id_max_polarity]
                        }
                    }, {
                        'nested': {
                            'path': 'sentences',
                            'query': {
                                'bool': {
                                    'must': [{
                                        'match': {
                                            'sentences.content': p
                                        }
                                    } for p in annotated_query.shows]
                                }
                            },
                            'inner_hits': {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
Ejemplo n.º 2
0
def search():
    """
    Search view, querying the elastic search backend
    based on the 'query' GET parameter.
    """
    query = request.args.get('query')
    response = jsonify(results=elastic.search(query))
    return response
Ejemplo n.º 3
0
def api_search():
    fields = request.json["fields"]
    term = request.json["term"]
    filters = request.json["filters"]

    res = search(aggregated_serch(term, fields, filters))

    if len(fields) == 0:
      return jsonify(res['aggregations'])
    mapped = unwrap_agg_res(res)
    return jsonify(mapped)
    def generate_response(self, query, annotated_query):
        ids = self.get_relevant_document_ids(query)

        # Make Fall-back ES Query
        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "ids": {
                            "values": ids
                        }
                    }, {
                        "nested": {
                            "path": "sentences",
                            "query": {
                                "bool": {
                                    "should": [{
                                        "match": {
                                            "sentences.content": i
                                        }
                                    } for i in [
                                        'strong', 'dynamic', 'elegant',
                                        'up-and-coming', 'powerful', 'good',
                                        'bad', 'excellent', 'flat',
                                        'disappointing', 'shocking',
                                        'emerging', 'growing', 'riveting',
                                        'depressing', 'awful', 'focused',
                                        'intelligent', 'smart', 'subtle',
                                        'outstanding', 'accomplished',
                                        'terrific', 'great', 'love', 'hate',
                                        'like'
                                    ]],
                                    "must": {
                                        "match": {
                                            "sentences.content":
                                            annotated_query.people[0]
                                        }
                                    }
                                }
                            },
                            "inner_hits": {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
    def get_relevant_documents(self, search_phrase):
        """
        Fetches relevant documents from elastic search based on query.

        Get only the documents that have a score greater than the average score.

        input:
            search_phrase (string): string to search for in ES

        output:
            (dict): dictionary of fetched documents
        """
        #  get all scores for top 100 documents
        index = 'flattened-articles/_search'
        score_payload = {'from': 0, 'size': 500, \
                         'fields': '_score', \
                         'query': {'query_string': { \
                                   'query': search_phrase.encode('utf-8'), \
                                   'fields': ['Full text:']}}}
        score_response = json.loads(
            elastic.search(elastic.ES_URL, index, score_payload))

        # create list of scores with 0 excluded
        scores = []
        for i in score_response['hits']['hits']:
            float_score = float(i['_score'])
            if float_score > 0:
                scores.append(float_score)

        quantile = np.percentile(scores, 50)

        # get responses where min_score >= quantile
        payload = {'_source': ['ProQ:', 'sentences', 'documentSentiment', 'Full text:'],
                   'min_score': quantile, \
                   'from': 0, 'size': 500, \
                   'query': {'query_string': {'query': search_phrase.encode('utf-8'), \
                                              'fields': ['Full text:']}}}

        response = json.loads(elastic.search(elastic.ES_URL, index, payload))
        return response
Ejemplo n.º 6
0
def search_cities(query=None, **options):
    if 'fields' not in options:
        options['fields'] = [
            'name',
            'cid',
            'regions',
            'location',
            'images'
            ]
    return elastic.search(
        query,
        index=elastic.SEARCH_INDEX,
        **options)
Ejemplo n.º 7
0
    def get_entities_for_type(self, entity_type, overwrite=False):
        """
        Gets entites for specific entity_type from elastic search

        Inputs:
            entity_type (string): entity type to look for
            overwrite (bool): overwrite file on disk
        """
        index = 'flattened-articles/_search'
        if entity_type == 'show':
            elastic_response = json.loads(
                elastic.search(elastic.ES_URL, index, SHOW_QUERY))
        elif entity_type == 'people':
            elastic_response = json.loads(
                elastic.search(elastic.ES_URL, index, PEOPLE_QUERY))
        elif entity_type == 'theater':
            elastic_response = json.loads(
                elastic.search(elastic.ES_URL, index, THEATER_QUERY))

        for i in elastic_response['hits']['hits']:
            for j in i['inner_hits']['entities']['hits']['hits']:
                curr_name = j['_source']['name']

                if entity_type == 'show':
                    self.show_entities.add(curr_name)
                elif entity_type == 'people':
                    self.people_entities.add(curr_name)
                elif entity_type == 'theater':
                    if 'theater' in curr_name.lower():
                        self.theater_entities.add(curr_name)

        if overwrite:
            if entity_type == 'show':
                self.save_entities(self.show_entities, self.show_file)
            elif entity_type == 'people':
                self.save_entities(self.people_entities, self.people_file)
            elif entity_type == 'theater':
                self.save_entities(self.theater_entities, self.theater_file)
    def generate_response_favorite_person(self, query, annotated_query):
        ids = self.get_relevant_document_ids(query)

        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "ids": {
                            "values": ids
                        }
                    }, {
                        "nested": {
                            "path": "sentences",
                            "query": {
                                "bool": {
                                    "should": [{
                                        "match": {
                                            "sentences.content": i
                                        }
                                    } for i in [
                                        'favorite', 'outstanding', 'terrific',
                                        'killer', 'best', 'precious',
                                        'dearest', 'greatest'
                                    ]],
                                    "must": [{
                                        "match": {
                                            "sentences.content": p
                                        }
                                    } for p in annotated_query.
                                             keywords['keywords']['NOUN']]
                                }
                            },
                            "inner_hits": {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        print 'Favorite {}'.format(
            annotated_query.keywords['keywords']['NOUN'][0])
        return self.format_response(r[0])
Ejemplo n.º 9
0
def api_suggest():
  term = request.json["term"]

  body = {
    "suggest": {
      "search_suggest": {
        "prefix": term,
        "completion": {"field": "suggest"},
      }
    }
  }

  res = search(body)

  mapped = unwrap_suggest(res)
  return jsonify(mapped)
Ejemplo n.º 10
0
def search():
    text = get_text()
    field = request.args.get('field','lemmatized_text')
    skip = request.args.get('skip', 0)
    limit = request.args.get('limit',20)
    timeout = request.args.get('timeout','5s')
    lemmatize = request.args.get('lemmatize',True)
    from_date = request.args.get('from_date','2000-01-01')
    to_date = request.args.get('to_date','2030-01-01')
    index = request.args.get('index','articles')

    if lemmatize!="false":
        o = text_processor.process_text(text, clear=True)
        text = o.get('lemmatized_text','')
    else:
        logging.warning('NOT lemmatized !!!!')

    search_result = elastic.search(text, skip=skip, limit=limit, field=field, timeout=timeout, from_date=from_date, to_date=to_date, index=index)
    return make_response(search_result)
Ejemplo n.º 11
0
    def generate_response(self, query, annotated_query):
        theater = annotated_query.theaters[0]
        ids = self.get_relevant_document_ids(theater)
        at_theater = "at " + theater
        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "ids": {
                            "values": ids
                        }
                    }, {
                        "nested": {
                            "path": "sentences",
                            "query": {
                                "bool": {
                                    "must": [{
                                        "match": {
                                            "sentences.content": theater
                                        }
                                    }, {
                                        "match": {
                                            "sentences.content": at_theater
                                        }
                                    }]
                                }
                            },
                            "inner_hits": {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
Ejemplo n.º 12
0
    def generate_response_person_in_show(self, query, annotated_query):
        match_queries = [{
            'match': {
                'Full text:': show
            }
        } for show in annotated_query.shows]
        match_queries.append({
            'nested': {
                'path': 'sentences',
                'query': {
                    'bool': {
                        'must': [{
                            'match': {
                                'sentences.content': p
                            }
                        } for p in annotated_query.people]
                    }
                },
                'inner_hits': {}
            }
        })
        payload = {
            '_source': ['sentences.content', 'Full text:', 'ProQ:'],
            'query': {
                'bool': {
                    'must': match_queries
                }
            }
        }

        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))
        print r
        r = r['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
Ejemplo n.º 13
0
    def generate_response(self, query, annotated_query):
        ids = self.get_relevant_document_ids(query)

        # Make Fall-back ES Query
        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "ids": {
                            "values": ids
                        }
                    }, {
                        "nested": {
                            "path": "sentences",
                            "query": {
                                "bool": {
                                    "must": [{
                                        "match": {
                                            "sentences.content":
                                            annotated_query.
                                            keywords['keywords']['NOUN'][0]
                                        }
                                    }]
                                }
                            },
                            "inner_hits": {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
Ejemplo n.º 14
0
    def get_relevant_document_ids(self, query):
        """
        Get the relevant document ids from Elastic Search for a full text query

        args:
            query (string): A text string to be used on a full-text query

        return:
            ids (list): A list of document IDs
        """
        payload = {
            "query": {
                "query_string": {
                    "query": query.encode('utf-8'),
                    "fields": ["Full text:"]
                }
            }
        }

        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        ids = [i['_id'] for i in r]
        return ids
Ejemplo n.º 15
0
    def generate_response_good_noun(self, query, annotated_query):
        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "match": {
                            "Full text:": p
                        }
                    } for p in annotated_query.people]
                }
            }
        }

        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        ids = [i['_id'] for i in r]

        payload = {
            "_source": ["sentences.content", "Full text:", "ProQ:"],
            "query": {
                "bool": {
                    "must": [{
                        "ids": {
                            "values": ids
                        }
                    }, {
                        "nested": {
                            "path": "sentences",
                            "query": {
                                "bool": {
                                    "should": [{
                                        "match": {
                                            "sentences.content": i
                                        }
                                    } for i in [
                                        'strong', 'dynamic', 'elegant',
                                        'powerful', 'good', 'excellent',
                                        'shocking', 'emerging', 'riveting',
                                        'focused', 'intelligent', 'smart',
                                        'subtle', 'outstanding',
                                        'accomplished', 'terrific', 'great',
                                        'love', 'performance', 'favorite',
                                        'best', 'portral', 'cast'
                                    ]],
                                    "must": [{
                                        "match": {
                                            "sentences.content": p
                                        }
                                    } for p in annotated_query.people +
                                             annotated_query.
                                             keywords['keywords']['NOUN']]
                                }
                            },
                            "inner_hits": {}
                        }
                    }]
                }
            }
        }
        r = json.loads(
            elastic.search(elastic.ES_URL, '/flattened-articles/_search',
                           payload))['hits']['hits']
        r = [(i['inner_hits']['sentences']['hits'], i['_source']['ProQ:'],
              i['_source']['Full text:']) for i in r]

        return self.format_response(r[0])
Ejemplo n.º 16
0
def results():
    query=request.form['query']
    hits = search(query)
    hits = hits['hits']['hits']
    return render_template('form_action.html', hits = hits)
Ejemplo n.º 17
0
 def get(self, types, search_term, from_date, to_date, size, offset):
     types_formatted = str(types).split(",")
     result = elastic.search(types_formatted, search_term, from_date,
                             to_date, size, offset)
     self.write(result)