Beispiel #1
0
def test_process_json_ld_file():
    ep = eidos.process_json_file(test_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert 'UN' in st.subj.concept.db_refs
    assert 'UN' in st.obj.concept.db_refs

    ep = eidos.process_json_file(test_jsonld, grounding_ns=['UN'])
    st = ep.statements[0]
    assert set(st.subj.concept.db_refs.keys()) == {'TEXT', 'UN'}
Beispiel #2
0
def test_process_correlations():
    correl_jsonld = os.path.join(path_this, 'eidos_correlation.json')
    ep = eidos.process_json_file(correl_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Association)
    assert isinstance(st.members[0], Event)
    names = {m.concept.name for m in st.members}
    assert names == {'harvest', 'requirement'}, names

    # This is to check the extraction filter
    ep = eidos.process_json_file(correl_jsonld, extract_filter={'influence'})
    assert len(ep.statements) == 0
Beispiel #3
0
def test_geoloc_obj():
    se_jsonld = os.path.join(path_this, 'eidos_geoloc_obj.json')
    ep = eidos.process_json_file(se_jsonld)
    st = ep.statements[1]
    ev = st.evidence[0]
    assert not ev.context, ev.context
    assert st.obj.context
Beispiel #4
0
def assemble_one_corpus():
    """For assembling one of the four corpora."""
    path = '/home/bmg16/data/wm/2-Jsonld'
    corpus_size = '16k'
    prefix = '%s%s' % (path, corpus_size)
    fnames = glob.glob('%s/*.jsonld' % prefix)

    # For large corpus
    all_statements = []
    for idx, fname in enumerate(fnames):
        ep = eidos.process_json_file(fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                ev.annotations['provenance'][0]['document']['@id'] = \
                    os.path.basename(fname)

        all_statements += ep.statements
        print('%d: %d' % (idx, len(all_statements)))
    with open('%s/3-Indra%s.pkl' % (prefix, corpus_size), 'wb') as fh:
        pickle.dump(all_statements, fh)

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
    with open('%s/3-Indra%s.json' % (prefix, corpus_size), 'w') as fh:
        json.dump(jd, fh, indent=1)
Beispiel #5
0
def test_process_correlations():
    correl_jsonld = os.path.join(path_this, 'eidos_correlation.json')
    ep = eidos.process_json_file(correl_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Association)
    names = {c.name for c in st.members}
    assert names == {'harvest', 'requirement'}, names
Beispiel #6
0
def test_process_correlations():
    correl_jsonld = os.path.join(path_this, 'eidos_correlation.json')
    ep = eidos.process_json_file(correl_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Association)
    assert isinstance(st.members[0], Event)
    names = {m.concept.name for m in st.members}
    assert names == {'harvest', 'requirement'}, names
Beispiel #7
0
def test_process_negation_hedging():
    nh_jsonld = os.path.join(path_this, 'eidos_neg_hedge.json')
    ep = eidos.process_json_file(nh_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    epi = st.evidence[0].epistemics
    assert epi.get('hedgings') == ['may'], epi
    assert epi.get('negated') is True, epi
    annot = st.evidence[0].annotations
    assert annot.get('negated_texts') == ['not']
Beispiel #8
0
def test_process_corefs():
    coref_jsonld = os.path.join(path_this, 'eidos_coref.json')
    ep = eidos.process_json_file(coref_jsonld)
    assert ep.coreferences.get('_:Extraction_6') == '_:Extraction_4'
    assert len(ep.statements) == 2
    # Get summaru of subj/objs from statements
    concepts = [(s.subj.name, s.obj.name) for s in ep.statements]
    assert ('rainfall', 'flood') in concepts, concepts
    # This ensures that the coreference was successfully resolved
    assert ('flood', 'displacement') in concepts, concepts
Beispiel #9
0
def test_process_negation_hedging():
    nh_jsonld = os.path.join(path_this, 'eidos_neg_hedge.json')
    ep = eidos.process_json_file(nh_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    epi = st.evidence[0].epistemics
    assert epi.get('hedgings') == ['may'], epi
    assert epi.get('negated') is True, epi
    annot = st.evidence[0].annotations
    assert annot.get('negated_texts') == ['not']
Beispiel #10
0
def test_process_json():
    ep = eidos.process_json_file(test_json)
    assert ep is not None
    assert len(ep.statements) == 1
    stmt = ep.statements[0]
    assert isinstance(stmt, Influence)
    assert stmt.subj_delta.get('polarity') == 1
    assert stmt.obj_delta.get('polarity') == -1
    assert stmt.subj_delta.get('adjectives') == ['large']
    assert stmt.obj_delta.get('adjectives') == ['seriously']
    print(stmt)
Beispiel #11
0
def test_process_corefs():
    coref_jsonld = os.path.join(path_this, 'eidos_coref.json')
    ep = eidos.process_json_file(coref_jsonld)
    assert ep.doc.coreferences.get('_:Extraction_6') == '_:Extraction_4'
    assert len(ep.statements) == 2
    # Get summaru of subj/objs from statements
    concepts = [(s.subj.concept.name, s.obj.concept.name) for s in
                ep.statements]
    assert ('rainfall', 'flood') in concepts, concepts
    # This ensures that the coreference was successfully resolved
    assert ('flood', 'displacement') in concepts, concepts
Beispiel #12
0
def test_process_geoids():
    geo_jsonld = os.path.join(path_this, 'eidos_geoid.json')
    ep = eidos.process_json_file(geo_jsonld)
    # Make sure we collect all geoids up front
    ss_loc = {'name': 'South Sudan', 'db_refs': {'GEOID': '7909807'}}
    assert len(ep.geolocs) == 5, len(ep.geoids)
    assert ep.geolocs['_:GeoLocation_1'].to_json() == ss_loc
    # Make sure this event has the right geoid
    ev = ep.statements[1].evidence[0]
    assert ev.context.geo_location.to_json() == ss_loc
    # And that the subject context is captured in annotations
    assert ev.annotations['subj_context']['geo_location'] == ss_loc
Beispiel #13
0
def load_eidos():
    logger.info('Loading Eidos statements')
    fnames = glob.glob(os.path.join(data_path, 'eidos/jsonldDir/*.jsonld'))

    stmts = []
    for fname in fnames:
        doc_id = os.path.basename(fname).split('.')[0]
        ep = eidos.process_json_file(fname)
        fix_provenance(ep.statements, doc_id)
        stmts += ep.statements
    logger.info(f'Loaded {len(stmts)} statements from Eidos')
    return stmts
Beispiel #14
0
def process_eidos_un():
    print('Processing Eidos output for UN corpus')
    fnames = sorted(glob.glob('/Users/ben/data/wm/2-Jsonld16k/*.jsonld'))
    stmts = []
    for fname in tqdm.tqdm(fnames):
        ep = eidos.process_json_file(fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                doc_id = os.path.splitext(os.path.basename(fname))[0]
                ev.annotations['provenance'][0]['document']['@id'] = doc_id
            stmts.append(stmt)
    return stmts
Beispiel #15
0
def test_standalone_event():
    se_jsonld = os.path.join(path_this, 'eidos_standalone_event.json')
    ep = eidos.process_json_file(se_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Event)
    assert hasattr(st, 'evidence')
    ev = st.evidence[0]
    assert ev.text is not None
    js = st.to_json()
    assert js['evidence']
    from indra.statements import stmts_to_json
    js2 = stmts_to_json([st])[0]
    assert 'evidence' in js2
Beispiel #16
0
def test_process_json():
    ep = eidos.process_json_file(test_json)
    assert ep is not None
    assert len(ep.statements) == 1
    stmt = ep.statements[0]
    assert isinstance(stmt, Influence)
    assert stmt.subj_delta.get('polarity') == 1
    assert stmt.obj_delta.get('polarity') == -1
    assert stmt.subj_delta.get('adjectives') == ['large']
    assert stmt.obj_delta.get('adjectives') == ['seriously']

    assert(stmt.evidence[0].annotations['found_by']
           == 'ported_syntax_1_verb-Causal')
    print(stmt)
Beispiel #17
0
def test_standalone_event():
    se_jsonld = os.path.join(path_this, 'eidos_standalone_event.json')
    ep = eidos.process_json_file(se_jsonld)
    assert len(ep.statements) == 1
    st = ep.statements[0]
    assert isinstance(st, Event)
    assert hasattr(st, 'evidence')
    ev = st.evidence[0]
    assert ev.text is not None
    js = st.to_json()
    assert js['evidence']
    from indra.statements import stmts_to_json
    js2 = stmts_to_json([st])[0]
    assert 'evidence' in js2
Beispiel #18
0
def test_process_timex():
    timex_jsonld = os.path.join(path_this, 'eidos_timex.json')
    ep = eidos.process_json_file(timex_jsonld)
    assert len(ep.statements) == 1
    ev = ep.statements[0].evidence[0]
    assert ev.context is not None
    assert ev.context.__repr__() == ev.context.__str__()
    assert ev.context.time.duration == 365 * 86400, ev.context.time.duration
    assert ev.context.time.start == \
        datetime.datetime(year=2018, month=1, day=1, hour=0, minute=0), \
        ev.context.time.start
    assert ev.context.time.end == \
        datetime.datetime(year=2019, month=1, day=1, hour=0, minute=0), \
        ev.context.time.end
Beispiel #19
0
def test_process_polarity():
    test_jsonld = os.path.join(path_this, 'eidos_neg_event.json')
    ep = eidos.process_json_file(test_jsonld)
    assert ep is not None
    assert len(ep.statements) == 1
    stmt = ep.statements[0]
    assert isinstance(stmt, Influence)
    assert stmt.subj.concept.name == 'fuel', stmt.subj.concept.name
    assert stmt.obj.concept.name == 'water trucking', stmt.obj.concept.name
    assert stmt.obj.delta.polarity == -1
    assert stmt.evidence[0].annotations['found_by'] == \
        'ported_syntax_1_verb-Causal'
    assert 'TEXT' in stmt.subj.concept.db_refs
    assert 'TEXT' in stmt.obj.concept.db_refs
Beispiel #20
0
def test_process_geoids():
    geo_jsonld = os.path.join(path_this, 'eidos_geoid.json')
    ep = eidos.process_json_file(geo_jsonld)
    # Make sure we collect all geoids up front
    ss_loc = {'name': 'South Sudan', 'db_refs': {'GEOID': '7909807'}}
    assert len(ep.doc.geolocs) == 5, len(ep.geoids)
    assert ep.doc.geolocs['_:GeoLocation_1'].to_json() == ss_loc
    # Make sure this event has the right geoid
    assert isinstance(ep.statements[0], Influence)
    ev = ep.statements[1].evidence[0]
    assert ev.context.geo_location.to_json() == ss_loc
    # And that the subject context is captured in annotations
    assert 'subj_context' in ev.annotations, ev.annotations
    assert ev.annotations['subj_context']['geo_location'] == ss_loc
Beispiel #21
0
def test_process_timex():
    timex_jsonld = os.path.join(path_this, 'eidos_timex.json')
    ep = eidos.process_json_file(timex_jsonld)
    assert len(ep.statements) == 1
    ev = ep.statements[0].evidence[0]
    assert ev.context is not None
    assert ev.context.__repr__() == ev.context.__str__()
    assert ev.context.time.duration == 365 * 86400, ev.context.time.duration
    assert ev.context.time.start == \
        datetime.datetime(year=2018, month=1, day=1, hour=0, minute=0), \
        ev.context.time.start
    assert ev.context.time.end == \
        datetime.datetime(year=2019, month=1, day=1, hour=0, minute=0), \
        ev.context.time.end
Beispiel #22
0
def test_process_polarity():
    test_jsonld = os.path.join(path_this, 'eidos_neg_event.json')
    ep = eidos.process_json_file(test_jsonld)
    assert ep is not None
    assert len(ep.statements) == 1
    stmt = ep.statements[0]
    assert isinstance(stmt, Influence)
    assert stmt.subj.concept.name == 'fuel', stmt.subj.concept.name
    assert stmt.obj.concept.name == 'water trucking', stmt.obj.concept.name
    assert stmt.obj.delta.polarity == -1
    assert stmt.evidence[0].annotations['found_by'] == \
        'ported_syntax_1_verb-Causal'
    assert 'TEXT' in stmt.subj.concept.db_refs
    assert 'TEXT' in stmt.obj.concept.db_refs
Beispiel #23
0
def load_eidos(limit=None, cached=True):
    logger.info('Loading Eidos statements')
    pkl_name = os.path.join(data_path, 'eidos', 'stmts.pkl')
    if cached:
        if os.path.exists(pkl_name):
            with open(pkl_name, 'rb') as fh:
                stmts = pickle.load(fh)
                logger.info(f'Loaded {len(stmts)} statements')
                return stmts
    fnames = glob.glob(os.path.join(data_path, 'eidos/jsonldDir/*.jsonld'))

    stmts = []
    for fname in tqdm.tqdm(fnames[:limit]):
        doc_id = os.path.basename(fname).split('.')[0]
        ep = eidos.process_json_file(fname)
        fix_provenance(ep.statements, doc_id)
        stmts += ep.statements
    logger.info(f'Loaded {len(stmts)} statements from Eidos')
    with open(pkl_name, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts
Beispiel #24
0

#def assemble_all():
if __name__ == '__main__':
    corpora = {
               #'50': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
               #      'wm_fao/20181101/2-Jsonld50',
               '500': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \
                      'wm_fao/20181101/2-Jsonld500',
                '16k': '/home/bmg16/data/wm/2-Jsonld16k',
                }
    all_statements = []
    for corpus_size, path in corpora.items():
        fnames = glob.glob('%s/*.jsonld' % path)
        for idx, fname in enumerate(fnames):
            ep = eidos.process_json_file(fname)
            for stmt in ep.statements:
                for ev in stmt.evidence:
                    ev.annotations['provenance'][0]['document']['@id'] = \
                        os.path.basename(fname)
                    ev.annotations['provenance'][0]['document']['corpus'] = \
                        corpus_size
            all_statements += ep.statements
            print('%d: %d' % (idx, len(all_statements)))

    scorer = get_eidos_scorer()
    assembled_stmts = ac.run_preassembly(all_statements,
                                         belief_scorer=scorer,
                                         return_toplevel=False)

    jd = stmts_to_json(assembled_stmts, use_sbo=False)
Beispiel #25
0
def test_process_json_ld_file():
    ep = eidos.process_json_file(test_jsonld)
    assert len(ep.statements) == 1
    assert 'UN' in ep.statements[0].subj.concept.db_refs
    assert 'UN' in ep.statements[0].obj.concept.db_refs
Beispiel #26
0
if __name__ == '__main__':
    readers = ['sofia', 'eidos', 'hume', 'cwms']
    grounding = 'compositional'
    do_upload = False
    stmts = []
    for reader in readers:
        version = reader_versions[grounding][reader]
        pattern = '*' if reader != 'sofia' \
            else ('*_new' if grounding == 'compositional' else '*_old')
        fnames = glob.glob('/Users/ben/data/dart/%s/%s/%s' %
                           (reader, version, pattern))
        print('Found %d files for %s' % (len(fnames), reader))
        for fname in tqdm.tqdm(fnames):
            if reader == 'eidos':
                pp = eidos.process_json_file(fname, grounding_mode=grounding)
            elif reader == 'hume':
                pp = hume.process_jsonld_file(fname, grounding_mode=grounding)
            elif reader == 'cwms':
                pp = cwms.process_ekb_file(fname, grounding_mode=grounding)
            elif reader == 'sofia':
                pp = sofia.process_json_file(fname, grounding_mode=grounding)
            doc_id = os.path.basename(fname)[:32]
            for stmt in pp.statements:
                for ev in stmt.evidence:
                    if 'provenance' not in ev.annotations:
                        ev.annotations['provenance'] = [{
                            'document': {
                                '@id': doc_id
                            }
                        }]
Beispiel #27
0
def test_compositional_grounding():
    jsonld = os.path.join(path_this, 'eidos_compositional.jsonld')
    ep = eidos.process_json_file(jsonld, grounding_mode='compositional')
    assert ep.statements
Beispiel #28
0
def test_process_json_ld_file():
    ep = eidos.process_json_file(test_jsonld)
    assert len(ep.statements) == 1
    assert 'UN' in ep.statements[0].subj.concept.db_refs
    assert 'UN' in ep.statements[0].obj.concept.db_refs