예제 #1
0
파일: web_answerer.py 프로젝트: DenXX/aqqu
class BingWebAnswerer(Ranker):
    """Ranks based on a simple score of relation and entity matches."""

    def __init__(self, name, entity_link_min_score=0.3, topn=50, use_search_cache=True, use_answers_cache=True, **kwargs):
        Ranker.__init__(self, name, **kwargs)
        answers_cache_file = globals.config.get('WebSearchAnswers', 'websearch-answers-cache')
        self._answers_cache = shelve.open(answers_cache_file) if use_answers_cache else dict()
        self._searcher = BingWebSearchApi(globals.config.get('WebSearchAnswers', 'bing-api-key'), use_search_cache)
        self._topn = topn
        self._entity_linking_score_threshold = entity_link_min_score
        self.parameters.web_search_candidates = True


    def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""):
        """
        Returns the candidate generated from search results. This methods doesn't look into the
         existing candidates, but rather creates a new one based on search results.
        :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them.
        """
        if isinstance(utterance, unicode):
            utterance = utterance.encode('utf-8')
        if utterance in self._answers_cache:
            return self._answers_cache[utterance]
        logger.debug("-------------------------------------\nQUESTION: " + utterance)
        question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)])
        logger.debug("Question entities: " + str(find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)))
        res = self._searcher.search(utterance, topn=self._topn)
        res = json.loads(res)
        entities = dict()
        for r in res['webPages']['value']:
            title_entities = find_entity_mentions(r['name'].encode("utf-8"), use_tagme=True)
            snippet_entities = find_entity_mentions(r['snippet'].encode("utf-8"), use_tagme=True)
            logger.debug("\nTitle:\t" + r['name'].encode("utf-8") + "\nSnippet:\t" + r['snippet'].encode("utf-8"))
            logger.debug(title_entities)
            logger.debug(snippet_entities)
            for e in title_entities + snippet_entities:
                if e['score'] > self._entity_linking_score_threshold:
                    if e['name'] not in entities:
                        entities[e['name']] = 0
                    entities[e['name']] += e['score']

        answers = sorted(entities.items(), key=operator.itemgetter(1), reverse=True)
        logger.debug("Answer:\t" + str(answers))
        answers = [answer[0] for answer in answers if answer[0] not in question_entities]
        answers = [EvaluationCandidate(None, "", [answer, ]) for answer in answers]
        self._answers_cache[utterance] = answers
        return answers

    def close(self):
        self._searcher.close()
        if not isinstance(self._answers_cache, dict):
            self._answers_cache.close()
예제 #2
0
파일: web_answerer.py 프로젝트: DenXX/aqqu
 def __init__(self, name, entity_link_min_score=0.3, topn=50, use_search_cache=True, use_answers_cache=True, **kwargs):
     Ranker.__init__(self, name, **kwargs)
     answers_cache_file = globals.config.get('WebSearchAnswers', 'websearch-answers-cache')
     self._answers_cache = shelve.open(answers_cache_file) if use_answers_cache else dict()
     self._searcher = BingWebSearchApi(globals.config.get('WebSearchAnswers', 'bing-api-key'), use_search_cache)
     self._topn = topn
     self._entity_linking_score_threshold = entity_link_min_score
     self.parameters.web_search_candidates = True