def load_sofia(): logger.info('Loading Sofia statements') fnames = glob.glob(os.path.join(data_path, 'sofia/Nov_*.xlsx')) stmts = [] doc_ids = set() for idx, fname in enumerate(fnames): sp = sofia.process_table(fname) if idx == 0: for stmt in sp.statements: for ev in stmt.evidence: doc_id = ev.pmid.split('.')[0] doc_ids.add(doc_id) stmts += sp.statements else: for stmt in sp.statements: doc_id = stmt.evidence[0].pmid.split('.')[0] if doc_id not in doc_ids: stmts.append(stmt) for stmt in stmts: for ev in stmt.evidence: doc_id = ev.pmid.split('.')[0] ev.annotations['provenance'] = [{ '@type': 'Provenance', 'document': { '@id': doc_id } }] logger.info(f'Loaded {len(stmts)} statements from Sofia') return stmts
def process_sofia(): print('Processing Sofia output') fname = 'docs/sofia/MITRE_AnnualEval_v1.xlsx' sp = sofia.process_table(fname) for stmt in sp.statements: for ev in stmt.evidence: prov = [{'document': {'@id': ev.pmid}}] ev.annotations['provenance'] = prov return sp.statements
def load_sofia(cached=True): logger.info('Loading Sofia statements') pkl_name = os.path.join(data_path, 'sofia', 'stmts_regrounded.pkl') if cached: if os.path.exists(pkl_name): with open(pkl_name, 'rb') as fh: stmts = pickle.load(fh) logger.info(f'Loaded {len(stmts)} statements') return stmts fnames = glob.glob(os.path.join(data_path, 'sofia/*.xlsx')) stmts = [] doc_ids = set() for idx, fname in enumerate(fnames): logger.info(f'Processing {fname}') sp = sofia.process_table(fname) if idx == 0: for stmt in sp.statements: for ev in stmt.evidence: doc_id = ev.pmid.split('.')[0] doc_ids.add(doc_id) stmts += sp.statements else: for stmt in sp.statements: doc_id = stmt.evidence[0].pmid.split('.')[0] if doc_id not in doc_ids: stmts.append(stmt) for stmt in stmts: for ev in stmt.evidence: doc_id = ev.pmid.split('.')[0] ev.annotations['provenance'] = [{ '@type': 'Provenance', 'document': { '@id': doc_id } }] logger.info(f'Loaded {len(stmts)} statements from Sofia') with open(pkl_name, 'wb') as fh: pickle.dump(stmts, fh) return stmts
def read_sofia(fname): sp = sofia.process_table(fname) return sp.statements