Beispiel #1
0
class SentAnswerer(Ranker):
    """Ranks based on a simple score of relation and entity matches."""

    def __init__(self, name, topn=50, **kwargs):
        Ranker.__init__(self, name, **kwargs)
        answers_cache_file = globals.config.get('WebSearchAnswers', 'sentsearch-answers-cache')
        self._answers_cache = shelve.open(answers_cache_file)
        self._searcher = SentSearchApi()
        self._topn = topn

    def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""):
        """
        Returns the candidate generated from search results. This methods doesn't look into the
         existing candidates, but rather creates a new one based on search results.
        :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them.
        """
        if isinstance(utterance, unicode):
            utterance = utterance.encode('utf-8')
        if utterance in self._answers_cache:
            return self._answers_cache[utterance]

        question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)])
        res = self._searcher.search(utterance, topn=self._topn)
        res = json.loads(res)
        entities = dict()
        for r in res:
            for e in r['entities']:
                if e['mid'] not in entities:
                    entities[e['mid']] = []
                entities[e['mid']].append((r['phrase'], r['score']))

        answers = sorted(entities.items(), key=lambda x: sum(score for _, score in x[1]), reverse=True)
        answers = [(KBEntity.get_entity_name(answer[0].replace("/", ".")), answer[1]) for answer in answers
                   if answer[0] not in question_entities]
        answers = [EvaluationCandidate(None, answer[1], [answer[0], ]) for answer in answers]
        self._answers_cache[utterance] = answers
        return answers

    def close(self):
        self._searcher.close()
        self._answers_cache.close()
Beispiel #2
0
 def __init__(self, name, topn=50, **kwargs):
     Ranker.__init__(self, name, **kwargs)
     answers_cache_file = globals.config.get('WebSearchAnswers', 'sentsearch-answers-cache')
     self._answers_cache = shelve.open(answers_cache_file)
     self._searcher = SentSearchApi()
     self._topn = topn