예제 #1
0
파일: web_answerer.py 프로젝트: DenXX/aqqu
    def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""):
        """
        Returns the candidate generated from search results. This methods doesn't look into the
         existing candidates, but rather creates a new one based on search results.
        :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them.
        """
        if isinstance(utterance, unicode):
            utterance = utterance.encode('utf-8')
        if utterance in self._answers_cache:
            return self._answers_cache[utterance]
        logger.debug("-------------------------------------\nQUESTION: " + utterance)
        question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)])
        logger.debug("Question entities: " + str(find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)))
        res = self._searcher.search(utterance, topn=self._topn)
        res = json.loads(res)
        entities = dict()
        for r in res['webPages']['value']:
            title_entities = find_entity_mentions(r['name'].encode("utf-8"), use_tagme=True)
            snippet_entities = find_entity_mentions(r['snippet'].encode("utf-8"), use_tagme=True)
            logger.debug("\nTitle:\t" + r['name'].encode("utf-8") + "\nSnippet:\t" + r['snippet'].encode("utf-8"))
            logger.debug(title_entities)
            logger.debug(snippet_entities)
            for e in title_entities + snippet_entities:
                if e['score'] > self._entity_linking_score_threshold:
                    if e['name'] not in entities:
                        entities[e['name']] = 0
                    entities[e['name']] += e['score']

        answers = sorted(entities.items(), key=operator.itemgetter(1), reverse=True)
        logger.debug("Answer:\t" + str(answers))
        answers = [answer[0] for answer in answers if answer[0] not in question_entities]
        answers = [EvaluationCandidate(None, "", [answer, ]) for answer in answers]
        self._answers_cache[utterance] = answers
        return answers
예제 #2
0
파일: web_answerer.py 프로젝트: DenXX/aqqu
    def rank_query_candidates(self, query_candidates, key=lambda x: x, utterance=""):
        """
        Returns the candidate generated from search results. This methods doesn't look into the
         existing candidates, but rather creates a new one based on search results.
        :param query_candidates: List of EvaluationCandidate objects. This answerer don't actually use them.
        """
        if isinstance(utterance, unicode):
            utterance = utterance.encode('utf-8')
        if utterance in self._answers_cache:
            return self._answers_cache[utterance]

        question_entities = set([e['name'] for e in find_entity_mentions(utterance.encode("utf-8"), use_tagme=True)])
        res = self._searcher.search(utterance, topn=self._topn)
        res = json.loads(res)
        entities = dict()
        for r in res:
            for e in r['entities']:
                if e['mid'] not in entities:
                    entities[e['mid']] = []
                entities[e['mid']].append((r['phrase'], r['score']))

        answers = sorted(entities.items(), key=lambda x: sum(score for _, score in x[1]), reverse=True)
        answers = [(KBEntity.get_entity_name(answer[0].replace("/", ".")), answer[1]) for answer in answers
                   if answer[0] not in question_entities]
        answers = [EvaluationCandidate(None, answer[1], [answer[0], ]) for answer in answers]
        self._answers_cache[utterance] = answers
        return answers