Beispiel #1
0
def load_cwms(cached=True):
    logger.info('Loading CWMS statements')
    pkl_name = os.path.join(data_path, 'cwms', 'stmts_regrounded.pkl')
    if cached:
        if os.path.exists(pkl_name):
            with open(pkl_name, 'rb') as fh:
                stmts = pickle.load(fh)
                logger.info(f'Loaded {len(stmts)} statements')
                return stmts
    fnames = glob.glob(os.path.join(data_path, 'cwms', 'ekbs', '*.ekb'))
    #fnames += glob.glob(os.path.join(data_path, 'cwms', 'j_ekbs', '*.ekb'))
    stmts = []
    for fname in tqdm.tqdm(fnames):
        logger.info(f'Processing {fname}')
        try:
            cp = cwms.process_ekb_file(fname)
        except Exception as e:
            continue
        stmts += cp.statements
    for stmt in stmts:
        for ev in stmt.evidence:
            ev.annotations['provenance'] = [{
                '@type': 'Provenance',
                'document': {
                    '@id': ev.pmid
                }
            }]
    logger.info(f'Loaded {len(stmts)} statements from CWMS')
    with open(pkl_name, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts
Beispiel #2
0
def process_cwms():
    print('Processing CWMS output')
    path = 'docs/cwms/20181114/*.ekb'
    ekbs = sorted(glob.glob(path))
    stmts = []
    for ekb in ekbs:
        cp = cwms.process_ekb_file(ekb)
        stmts += cp.statements
    for stmt in stmts:
        for ev in stmt.evidence:
            prov = [{'document': {'@id': ev.pmid}}]
            ev.annotations['provenance'] = prov
    return stmts
Beispiel #3
0
import glob
from indra.sources import cwms

if __name__ == '__main__':
    #fnames = glob.glob('ekbs/*.ekb')
    fnames = ['ekbs/t_time-start_20190611T102511_r.ekb']
    stmts = []
    for fname in fnames:
        print('Reading %s' % fname)
        cp = cwms.process_ekb_file(fname)
        print('Got %d statements' % len(cp.statements))
        stmts += cp.statements
Beispiel #4
0
 do_upload = False
 stmts = []
 for reader in readers:
     version = reader_versions[grounding][reader]
     pattern = '*' if reader != 'sofia' \
         else ('*_new' if grounding == 'compositional' else '*_old')
     fnames = glob.glob('/Users/ben/data/dart/%s/%s/%s' %
                        (reader, version, pattern))
     print('Found %d files for %s' % (len(fnames), reader))
     for fname in tqdm.tqdm(fnames):
         if reader == 'eidos':
             pp = eidos.process_json_file(fname, grounding_mode=grounding)
         elif reader == 'hume':
             pp = hume.process_jsonld_file(fname, grounding_mode=grounding)
         elif reader == 'cwms':
             pp = cwms.process_ekb_file(fname, grounding_mode=grounding)
         elif reader == 'sofia':
             pp = sofia.process_json_file(fname, grounding_mode=grounding)
         doc_id = os.path.basename(fname)[:32]
         for stmt in pp.statements:
             for ev in stmt.evidence:
                 if 'provenance' not in ev.annotations:
                     ev.annotations['provenance'] = [{
                         'document': {
                             '@id': doc_id
                         }
                     }]
                 else:
                     prov = ev.annotations['provenance'][0]['document']
                     prov['@id'] = doc_id
         stmts += pp.statements