def format_line(self, line): annotation = line["label"] if annotation is None: annotation = line["verifiable"] newpages = [] docs = [] if 'evidence' in line: pages = set( flatten([[(ev[2], ev[3]) for ev in annotation if ev[2] is not None] for annotation in line["evidence"]])) docs = set( flatten([[ev[2] for ev in annotation if ev[2] is not None] for annotation in line["evidence"]])) for page in pages: newpages.append( (page[0], page[1], self.get_doc_line(page[0], page[1]))) return { "claim": self.tokenize(line["claim"]), "docs": docs, "evidence": newpages, "label": self.label_schema.get_id(annotation), "label_text": annotation }
def bodies(self, data): s = set(flatten(self.body_ids(data))) lis = [] ind = 1 for id in s: if id is None: continue print(f"passing id ({ind}/{len(s)}) ", id) lis.append(self.doc_db.get_doc_text(id)) ind += 1 # lis = [self.doc_db.get_doc_text(id) for id in s] return lis
def bodies(self, data): ret = [] for datum in data: if isinstance(datum[self.ename], list): ret.extend([ self.fever_db.get_doc_text(id) for id in set(flatten(self.body_ids([datum]))) ]) else: ret.extend([ self.fnc_db.get_doc_text(id) for id in set(self.body_id([datum])) ]) return list(set(ret))
def bodies(self,data): return [self.doc_db.get_doc_text(id) for id in set(flatten(self.body_ids(data)))]