def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""): """ Returns the candidate generated from search results. This methods doesn't look into the existing candidates, but rather creates a new one based on search results. :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them. """ if isinstance(utterance, unicode): utterance = utterance.encode('utf-8') if utterance in self._answers_cache: return self._answers_cache[utterance] logger.debug("-------------------------------------\nQUESTION: " + utterance) question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)]) logger.debug("Question entities: " + str(find_entity_mentions(utterance.encode("utf-8"), use_tagme=True))) res = self._searcher.search(utterance, topn=self._topn) res = json.loads(res) entities = dict() for r in res['webPages']['value']: title_entities = find_entity_mentions(r['name'].encode("utf-8"), use_tagme=True) snippet_entities = find_entity_mentions(r['snippet'].encode("utf-8"), use_tagme=True) logger.debug("\nTitle:\t" + r['name'].encode("utf-8") + "\nSnippet:\t" + r['snippet'].encode("utf-8")) logger.debug(title_entities) logger.debug(snippet_entities) for e in title_entities + snippet_entities: if e['score'] > self._entity_linking_score_threshold: if e['name'] not in entities: entities[e['name']] = 0 entities[e['name']] += e['score'] answers = sorted(entities.items(), key=operator.itemgetter(1), reverse=True) logger.debug("Answer:\t" + str(answers)) answers = [answer[0] for answer in answers if answer[0] not in question_entities] answers = [EvaluationCandidate(None, "", [answer, ]) for answer in answers] self._answers_cache[utterance] = answers return answers
def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""): """ Returns the candidate generated from search results. This methods doesn't look into the existing candidates, but rather creates a new one based on search results. :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them. """ if isinstance(utterance, unicode): utterance = utterance.encode('utf-8') if utterance in self._answers_cache: return self._answers_cache[utterance] question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)]) res = self._searcher.search(utterance, topn=self._topn) res = json.loads(res) entities = dict() for r in res: for e in r['entities']: if e['mid'] not in entities: entities[e['mid']] = [] entities[e['mid']].append((r['phrase'], r['score'])) answers = sorted(entities.items(), key=lambda x: sum(score for _, score in x[1]), reverse=True) answers = [(KBEntity.get_entity_name(answer[0].replace("/", ".")), answer[1]) for answer in answers if answer[0] not in question_entities] answers = [EvaluationCandidate(None, answer[1], [answer[0], ]) for answer in answers] self._answers_cache[utterance] = answers return answers