Ejemplo n.º 1
0
    def percentage_of_concept_ans_in_doc(self, include_extended=False):
        """
        Where answer is found in the relevant passage.
        """
        from baselines import read_concepts
        n_all = 0
        n_found = 0
        for datum in self.dataset[DATA_KEY]:
            title_and_passage = datum[DOC_KEY][TITLE_KEY] + "\n" + datum[
                DOC_KEY][CONTEXT_KEY]
            concept_set = {
                " ".join(concept).lower()
                for concept in read_concepts(title_and_passage)
            }
            for qa in datum[DOC_KEY][QAS_KEY]:
                ans = qa[ANS_KEY]
                a = ""
                for _a in ans:
                    if _a[ORIG_KEY] == "dataset":
                        a = _a[TXT_KEY]
                assert a
                if a.lower() in concept_set:
                    n_found += 1
                elif include_extended:
                    is_found = False
                    for _a in ans:
                        if _a[ORIG_KEY] == "UMLS":
                            a = _a[TXT_KEY]
                            if a.lower() in concept_set:
                                is_found = True
                    if is_found:
                        n_found += 1
                n_all += 1

        return 100 * n_found / n_all
Ejemplo n.º 2
0
 def entities_passage(self, lowercase=True):
     from baselines import read_concepts
     e = Counter()
     for datum in self.dataset[DATA_KEY]:
         title_and_passage = datum[DOC_KEY][TITLE_KEY] + "\n" + datum[
             DOC_KEY][CONTEXT_KEY]
         e.update([
             to_lower(" ".join(concept), lowercase)
             for concept in read_concepts(title_and_passage)
         ])
     return e
Ejemplo n.º 3
0
 def entities(self, lowercase=True, include_extended=False):
     from baselines import read_concepts
     e = Counter()
     for datum in self.dataset[DATA_KEY]:
         title_and_passage = datum[DOC_KEY][TITLE_KEY] + "\n" + datum[
             DOC_KEY][CONTEXT_KEY]
         e.update([
             to_lower(" ".join(concept), lowercase)
             for concept in read_concepts(title_and_passage)
         ])
     for datum in self.dataset[DATA_KEY]:
         for qa in datum[DOC_KEY][QAS_KEY]:
             e.update([
                 to_lower(" ".join(concept), lowercase)
                 for concept in read_concepts(qa[QUERY_KEY])
             ])
             for a in qa[ANS_KEY]:
                 if a[ORIG_KEY] == "dataset" or include_extended:
                     e[to_lower(a[TXT_KEY], lowercase)] += 1
     return e