class SentAnswerer(Ranker): """Ranks based on a simple score of relation and entity matches.""" def __init__(self, name, topn=50, **kwargs): Ranker.__init__(self, name, **kwargs) answers_cache_file = globals.config.get('WebSearchAnswers', 'sentsearch-answers-cache') self._answers_cache = shelve.open(answers_cache_file) self._searcher = SentSearchApi() self._topn = topn def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""): """ Returns the candidate generated from search results. This methods doesn't look into the existing candidates, but rather creates a new one based on search results. :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them. """ if isinstance(utterance, unicode): utterance = utterance.encode('utf-8') if utterance in self._answers_cache: return self._answers_cache[utterance] question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)]) res = self._searcher.search(utterance, topn=self._topn) res = json.loads(res) entities = dict() for r in res: for e in r['entities']: if e['mid'] not in entities: entities[e['mid']] = [] entities[e['mid']].append((r['phrase'], r['score'])) answers = sorted(entities.items(), key=lambda x: sum(score for _, score in x[1]), reverse=True) answers = [(KBEntity.get_entity_name(answer[0].replace("/", ".")), answer[1]) for answer in answers if answer[0] not in question_entities] answers = [EvaluationCandidate(None, answer[1], [answer[0], ]) for answer in answers] self._answers_cache[utterance] = answers return answers def close(self): self._searcher.close() self._answers_cache.close()
def __init__(self, name, topn=50, **kwargs): Ranker.__init__(self, name, **kwargs) answers_cache_file = globals.config.get('WebSearchAnswers', 'sentsearch-answers-cache') self._answers_cache = shelve.open(answers_cache_file) self._searcher = SentSearchApi() self._topn = topn