コード例 #1
0
    def format_line(self, line):
        annotation = line["label"]
        if annotation is None:
            annotation = line["verifiable"]

        newpages = []
        docs = []
        if 'evidence' in line:
            pages = set(
                flatten([[(ev[2], ev[3]) for ev in annotation
                          if ev[2] is not None]
                         for annotation in line["evidence"]]))
            docs = set(
                flatten([[ev[2] for ev in annotation if ev[2] is not None]
                         for annotation in line["evidence"]]))

            for page in pages:
                newpages.append(
                    (page[0], page[1], self.get_doc_line(page[0], page[1])))

        return {
            "claim": self.tokenize(line["claim"]),
            "docs": docs,
            "evidence": newpages,
            "label": self.label_schema.get_id(annotation),
            "label_text": annotation
        }
コード例 #2
0
    def bodies(self, data):
        s = set(flatten(self.body_ids(data)))
        lis = []
        ind = 1
        for id in s:
            if id is None:
                continue
            print(f"passing id ({ind}/{len(s)}) ", id)
            lis.append(self.doc_db.get_doc_text(id))
            ind += 1

        # lis = [self.doc_db.get_doc_text(id) for id in s]
        return lis
コード例 #3
0
 def bodies(self, data):
     ret = []
     for datum in data:
         if isinstance(datum[self.ename], list):
             ret.extend([
                 self.fever_db.get_doc_text(id)
                 for id in set(flatten(self.body_ids([datum])))
             ])
         else:
             ret.extend([
                 self.fnc_db.get_doc_text(id)
                 for id in set(self.body_id([datum]))
             ])
     return list(set(ret))
コード例 #4
0
 def bodies(self,data):
     return [self.doc_db.get_doc_text(id) for id in set(flatten(self.body_ids(data)))]