예제 #1
0
def load_sofia():
    logger.info('Loading Sofia statements')
    fnames = glob.glob(os.path.join(data_path, 'sofia/Nov_*.xlsx'))

    stmts = []
    doc_ids = set()
    for idx, fname in enumerate(fnames):
        sp = sofia.process_table(fname)
        if idx == 0:
            for stmt in sp.statements:
                for ev in stmt.evidence:
                    doc_id = ev.pmid.split('.')[0]
                    doc_ids.add(doc_id)
            stmts += sp.statements
        else:
            for stmt in sp.statements:
                doc_id = stmt.evidence[0].pmid.split('.')[0]
                if doc_id not in doc_ids:
                    stmts.append(stmt)
    for stmt in stmts:
        for ev in stmt.evidence:
            doc_id = ev.pmid.split('.')[0]
            ev.annotations['provenance'] = [{
                '@type': 'Provenance',
                'document': {
                    '@id': doc_id
                }
            }]
    logger.info(f'Loaded {len(stmts)} statements from Sofia')
    return stmts
예제 #2
0
def process_sofia():
    print('Processing Sofia output')
    fname = 'docs/sofia/MITRE_AnnualEval_v1.xlsx'
    sp = sofia.process_table(fname)
    for stmt in sp.statements:
        for ev in stmt.evidence:
            prov = [{'document': {'@id': ev.pmid}}]
            ev.annotations['provenance'] = prov
    return sp.statements
예제 #3
0
def load_sofia(cached=True):
    logger.info('Loading Sofia statements')
    pkl_name = os.path.join(data_path, 'sofia', 'stmts_regrounded.pkl')
    if cached:
        if os.path.exists(pkl_name):
            with open(pkl_name, 'rb') as fh:
                stmts = pickle.load(fh)
                logger.info(f'Loaded {len(stmts)} statements')
                return stmts
    fnames = glob.glob(os.path.join(data_path, 'sofia/*.xlsx'))

    stmts = []
    doc_ids = set()
    for idx, fname in enumerate(fnames):
        logger.info(f'Processing {fname}')
        sp = sofia.process_table(fname)
        if idx == 0:
            for stmt in sp.statements:
                for ev in stmt.evidence:
                    doc_id = ev.pmid.split('.')[0]
                    doc_ids.add(doc_id)
            stmts += sp.statements
        else:
            for stmt in sp.statements:
                doc_id = stmt.evidence[0].pmid.split('.')[0]
                if doc_id not in doc_ids:
                    stmts.append(stmt)
    for stmt in stmts:
        for ev in stmt.evidence:
            doc_id = ev.pmid.split('.')[0]
            ev.annotations['provenance'] = [{
                '@type': 'Provenance',
                'document': {
                    '@id': doc_id
                }
            }]
    logger.info(f'Loaded {len(stmts)} statements from Sofia')
    with open(pkl_name, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts
예제 #4
0
def read_sofia(fname):
    sp = sofia.process_table(fname)
    return sp.statements