Ejemplo n.º 1
0
def test_text_and_norm_text():
    gm.gilda_mode = 'local'

    # We should filter out ignores in both TEXT and TEXT_NORM
    ag = Agent('x', db_refs={'TEXT': 'XREF_BIBR', 'TEXT_NORM': 'ERK'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert not res
    ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XREF_BIBR'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert not res

    # We should disambiguate based on both TEXT and TEXT_NORM
    ag = Agent('x', db_refs={'TEXT': 'AA', 'TEXT_NORM': 'XXX'},)
    stmt = Phosphorylation(None, ag,
                           evidence=Evidence(text='Arachidonic acid (AA)'))
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'arachidonic acid', res[0]
    ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'AA'})
    stmt = Phosphorylation(None, ag,
                           evidence=Evidence(text='Arachidonic acid (AA)'))
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'arachidonic acid', res[0]

    ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'ERK'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'ERK', res[0]

    ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XXX'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'ERK', res[0]
Ejemplo n.º 2
0
def test_association_refinement():
    health = 'UN/entities/human/health'
    food = 'UN/entities/human/food'
    food_security = 'UN/entities/human/food/food_security'
    eh = Event(Concept('health', db_refs={'UN': [(health, 1.0)]}))
    ef = Event(Concept('food', db_refs={'UN': [(food, 1.0)]}))
    efs = Event(
        Concept('food security', db_refs={'UN': [(food_security, 1.0)]}))
    st1 = Association([eh, ef], evidence=[Evidence(source_api='eidos1')])
    st2 = Association([ef, eh], evidence=[Evidence(source_api='eidos2')])
    st3 = Association([eh, efs], evidence=[Evidence(source_api='eidos3')])
    st4 = Association([ef, efs], evidence=[Evidence(source_api='eidos4')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates()  # debugging
    assert len(unique_stmts) == 3
    rel_stmts = pa.combine_related()
    assert len(rel_stmts) == 2
    eh_efs_stmt = [
        st for st in rel_stmts
        if (st.members[0].concept.name in {'health', 'food security'}
            and st.members[1].concept.name in {'health', 'food security'})
    ][0]
    assert len(eh_efs_stmt.supported_by) == 1
    assert (eh_efs_stmt.supported_by[0].members[0].concept.name
            in {'food', 'health'})
    assert (eh_efs_stmt.supported_by[0].members[1].concept.name
            in {'food', 'health'})
Ejemplo n.º 3
0
def test_gilda_disambiguation():
    gm.gilda_mode = 'web'
    er1 = Agent('NDR1', db_refs={'TEXT': 'NDR1'})
    pmid1 = '18362890'
    stmt1 = Phosphorylation(None, er1,
                            evidence=[Evidence(pmid=pmid1,
                                               text_refs={'PMID': pmid1})])

    er2 = Agent('NDR1', db_refs={'TEXT': 'NDR1'})
    pmid2 = '16832411'
    stmt2 = Inhibition(None, er2,
                       evidence=[Evidence(pmid=pmid2,
                                          text_refs={'PMID': pmid2})])
    mapped_stmts1 = gm.map_stmts([stmt1])
    assert mapped_stmts1[0].sub.name == 'STK38', mapped_stmts1[0].sub.name
    assert mapped_stmts1[0].sub.db_refs['HGNC'] == '17847', \
        mapped_stmts1[0].sub.db_refs
    assert mapped_stmts1[0].sub.db_refs['UP'] == 'Q15208', \
        mapped_stmts1[0].sub.db_refs

    mapped_stmts2 = gm.map_stmts([stmt2])
    assert mapped_stmts2[0].obj.name == 'NDRG1', \
        mapped_stmts2[0].obj.name
    assert mapped_stmts2[0].obj.db_refs['HGNC'] == '7679', \
        mapped_stmts2[0].obj.db_refs
    assert mapped_stmts2[0].obj.db_refs['UP'] == 'Q92597', \
        mapped_stmts2[0].obj.db_refs

    annotations = mapped_stmts2[0].evidence[0].annotations
    assert len(annotations['agents']['gilda'][1]) == 2, \
        annotations
    assert annotations['agents']['gilda'][0] is None
    assert annotations['agents']['gilda'][1] is not None
Ejemplo n.º 4
0
def test_adeft_mapping_non_pos():
    er = Agent('ER', db_refs={'TEXT': 'ER'})
    # This is an exact definition of a pos_label entry so we
    # expect that it will be applied as a grounding even though the
    # Adeft model has low precision for this label.
    ev = Evidence(text='estradiol (ER)')
    stmt = Phosphorylation(None, er, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'CHEBI' in mapped_stmt.sub.db_refs, mapped_stmt.evidence
    # This one is not an exact definition so we expect the grounding to
    # be stripped out.
    ev = Evidence(text='Estradiol is one of the three estrogen hormones'
                  'naturally produced in the body.')
    stmt = Phosphorylation(None, er, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'CHEBI' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence
    # This is a non-positive label, and we expect it to be stripped out
    # whether it's an exact definition or not.
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    ev = Evidence(text='physical component summary (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
    ev = Evidence(text='physical component summary')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
Ejemplo n.º 5
0
    def add_raw_database_statements(self, stmt_lists):
        """Add raw statementes that came from knowledge bases/databases."""
        assert self.databases is not None
        if self.raw_statements is None:
            self.raw_statements = []
        new_raw_statements = []
        for dbidx, stmt_list in enumerate(stmt_lists):
            db_info = self.databases[dbidx]

            for stmt in stmt_list:
                ev = Evidence(db_info.source_api)
                stmt.evidence.append(ev)
                src_hash = ev.get_source_hash()
                raw_json = stmt.to_json()
                db_rs = self.db.RawStatements(
                    db_info_id=db_info.id,
                    json=json.dumps(raw_json).encode('utf-8'),
                    type=raw_json['type'],
                    uuid=stmt.uuid,
                    batch_id=1,
                    source_hash=src_hash,
                    mk_hash=stmt.get_hash(),
                    indra_version="test")
                self.raw_statements.append(db_rs)
                new_raw_statements.append(db_rs)

        self.db.session.add_all(new_raw_statements)
        self.db.session.commit()

        insert_raw_agents(self.db, 1,
                          [s for slist in stmt_lists for s in slist])
Ejemplo n.º 6
0
def test_adeft_mapping():
    er1 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid1 = '30775882'
    stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1,
                                                          text_refs={'PMID':
                                                                     pmid1})])

    er2 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid2 = '28369137'
    stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2,
                                                     text_refs={'PMID':
                                                                pmid2})])

    mapped_stmts1 = gm.map_stmts([stmt1])
    assert mapped_stmts1[0].sub.name == 'ESR', \
        mapped_stmts1[0].sub.name
    assert mapped_stmts1[0].sub.db_refs['FPLX'] == 'ESR', \
        mapped_stmts1[0].sub.db_refs

    mapped_stmts2 = gm.map_stmts([stmt2])
    assert mapped_stmts2[0].obj.name == 'endoplasmic reticulum', \
        mapped_stmts2[0].obj.name
    assert mapped_stmts2[0].obj.db_refs['GO'] == 'GO:0005783', \
        mapped_stmts2[0].obj.db_refs

    annotations = mapped_stmts2[0].evidence[0].annotations
    assert 'GO:GO:0005783' in annotations['agents']['adeft'][1]
Ejemplo n.º 7
0
def test_adeft_mapping():
    er1 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid1 = '30775882'
    stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1,
                                                          text_refs={'PMID':
                                                                     pmid1})])

    er2 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid2 = '28369137'
    stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2,
                                                     text_refs={'PMID':
                                                                pmid2})])

    gm = GroundingMapper(default_grounding_map, default_agent_map)
    mapped_stmts1 = gm.map_agents([stmt1])
    assert mapped_stmts1[0].sub.name == 'ESR1'
    assert mapped_stmts1[0].sub.db_refs['HGNC'] == '3467'
    assert mapped_stmts1[0].sub.db_refs['UP'] == 'P03372'

    mapped_stmts2 = gm.map_agents([stmt2])
    assert mapped_stmts2[0].obj.name == 'Endoplasmic Reticulum'
    assert mapped_stmts2[0].obj.db_refs['GO'] == 'GO:0005783'

    annotations = mapped_stmts2[0].evidence[0].annotations
    assert 'GO:GO:0005783' in annotations['agents']['adeft'][1]
Ejemplo n.º 8
0
def fix_invalidities_evidence(ev: Evidence):
    """Fix invalidities of a single INDRA Evidence in place."""
    for k, v in copy.deepcopy(ev.text_refs).items():
        if v is None:
            ev.text_refs.pop(k, None)
        elif not k.isupper():
            ev.text_refs.pop(k)
            ev.text_refs[k.upper()] = v

    if ev.pmid and not re.match(text_ref_patterns['PMID'], ev.pmid):
        ev.pmid = None
    if ev.text_refs.get('PMID') and not re.match(text_ref_patterns['PMID'],
                                                 ev.text_refs['PMID']):
        ev.text_refs.pop('PMID', None)

    if ev.pmid is None and ev.text_refs.get('PMID') is not None:
        ev.pmid = ev.text_refs['PMID']
    elif ev.text_refs.get('PMID') is None and ev.pmid is not None:
        ev.text_refs['PMID'] = ev.pmid

    if 'DOI' in ev.text_refs and not re.match(text_ref_patterns['DOI'],
                                              ev.text_refs['DOI']):
        ev.text_refs.pop('DOI', None)
    if 'PMC' in ev.text_refs and not re.match(text_ref_patterns['PMC'],
                                              ev.text_refs['PMC']):
        ev.text_refs.pop('PMC', None)

    if ev.context is not None:
        fix_invalidities_context(ev.context)
Ejemplo n.º 9
0
def test_flatten_evidence_hierarchy():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf,
                          mek,
                          'S',
                          '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 2
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 1
    assert supporting_stmt.evidence[0].text == 'foo'
    supporting_stmt.evidence[0].text = 'changed_foo'
    assert supporting_stmt.evidence[0].text == 'changed_foo'
    assert 'changed_foo' not in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \
        == {'direct', 'supported_by'}
Ejemplo n.º 10
0
def test_model_json():
    """Test the json structure and content of EmmaaModel.to_json() output"""
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.add_statements(emmaa_stmts)

    emmaa_model_json = emmaa_model.to_json()

    # Test json structure
    assert emmaa_model_json['name'] == 'test'
    assert isinstance(emmaa_model_json['stmts'], list)
    assert emmaa_model_json['ndex_network'] == \
        'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'

    # Test config
    assert emmaa_model_json['search_terms'][0]['type'] == 'gene'
    assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'}

    # Test json statements
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'BRAF activates MAP2K1.' == \
           emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text']
    assert 'Active MAP2K1 activates MAPK1.' == \
           emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text']
    assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF'
    assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1'
    assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1'

    # Need hashes to be strings so that javascript can read them
    assert isinstance(
        emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'],
        str)
Ejemplo n.º 11
0
def test_combine_duplicates():
    raf = Agent('RAF1')
    mek = Agent('MEK1')
    erk = Agent('ERK2')
    p1 = Phosphorylation(raf, mek, evidence=Evidence(text='foo'))
    p2 = Phosphorylation(raf, mek, evidence=Evidence(text='bar'))
    p3 = Phosphorylation(raf, mek, evidence=Evidence(text='baz'))
    p4 = Phosphorylation(raf, mek, evidence=Evidence(text='beep'))
    p5 = Phosphorylation(mek, erk, evidence=Evidence(text='foo2'))
    p6 = Dephosphorylation(mek, erk, evidence=Evidence(text='bar2'))
    p7 = Dephosphorylation(mek, erk, evidence=Evidence(text='baz2'))
    p8 = Dephosphorylation(mek, erk, evidence=Evidence(text='beep2'))
    p9 = Dephosphorylation(Agent('SRC'),
                           Agent('KRAS'),
                           evidence=Evidence(text='beep'))
    stmts = [p1, p2, p3, p4, p5, p6, p7, p8, p9]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    # The statements come out sorted by their matches_key
    assert len(pa.unique_stmts) == 4, len(pa.unique_stmts)
    num_evs = [len(s.evidence) for s in pa.unique_stmts]
    assert pa.unique_stmts[0].matches(p6)  # MEK dephos ERK
    assert num_evs[0] == 3, num_evs[0]
    assert pa.unique_stmts[1].matches(p9)  # SRC dephos KRAS
    assert num_evs[1] == 1, num_evs[1]
    assert pa.unique_stmts[2].matches(p5)  # MEK phos ERK
    assert num_evs[2] == 1, num_evs[2]
    assert pa.unique_stmts[3].matches(p1)  # RAF phos MEK
    assert num_evs[3] == 4, num_evs[3]
Ejemplo n.º 12
0
def test_duplicates_copy():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    ras = Agent('RAS', db_refs={'FA': '03663'})
    st1 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 1')])
    st2 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 2')])
    stmts = [st1, st2]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 1
    assert len(stmts) == 2
    assert len(stmts[0].evidence) == 1
    assert len(stmts[1].evidence) == 1
Ejemplo n.º 13
0
def test_filter_relevance():
    config_dict = {
        'ndex': {
            'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
        },
        'search_terms': [{
            'db_refs': {
                'HGNC': '20974'
            },
            'name': 'MAPK1',
            'search_term': 'MAPK1',
            'type': 'gene'
        }]
    }
    indra_stmts = \
        [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}),
                    Agent('MAP2K1'),
                    evidence=[Evidence(text='BRAF activates MAP2K1.',
                                       source_api='assertion')]),
         Activation(Agent('MAP2K1',
                          activity=ActivityCondition('activity', True)),
                    Agent('MAPK1'),
                    evidence=[Evidence(text='Active MAP2K1 activates '
                                            'MAPK1.',
                                       source_api='assertion')])
         ]
    st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1')
    emmaa_stmts = [
        EmmaaStatement(stmt, datetime.datetime.now(), [st])
        for stmt in indra_stmts
    ]

    # Try no filter first
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 2, emmaa_model.assembled_stmts

    # Next do a prior_one filter
    config_dict['assembly'] = {'filter_relevance': 'prior_one'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 1, emmaa_model.assembled_stmts
    assert emmaa_model.assembled_stmts[0].obj.name == 'MAPK1'

    # Next do a prior_all filter
    config_dict['assembly'] = {'filter_relevance': 'prior_all'}
    emmaa_model = EmmaaModel('test', config_dict)
    emmaa_model.extend_unique(emmaa_stmts)
    emmaa_model.run_assembly()
    assert len(emmaa_model.assembled_stmts) == 0
Ejemplo n.º 14
0
def test_ground_gilda_source():
    ev1 = Evidence(source_api='reach')
    ev2 = Evidence(source_api='sparser')
    ev3 = Evidence(source_api='trips')
    stmts = [Phosphorylation(None, Agent('x', db_refs={'TEXT': 'kras'}),
                             evidence=ev)
             for ev in (ev1, ev2, ev3)]
    grounded_stmts = ground_statements(stmts, sources=['trips'])
    assert grounded_stmts[0].sub.name == 'x', stmts[0]
    assert grounded_stmts[1].sub.name == 'x'
    assert grounded_stmts[2].sub.name == 'KRAS'
    grounded_stmts = ground_statements(stmts, sources=['reach', 'sparser'])
    assert all(stmt.sub.name == 'KRAS'
               for stmt in grounded_stmts[:2])
Ejemplo n.º 15
0
def test_combine_evidence_exact_duplicates():
    raf = Agent('RAF1')
    mek = Agent('MEK1')
    p1 = Phosphorylation(raf, mek, evidence=Evidence(text='foo'))
    p2 = Phosphorylation(raf, mek, evidence=Evidence(text='bar'))
    p3 = Phosphorylation(raf, mek, evidence=Evidence(text='bar'))
    stmts = [p1, p2, p3]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    # The statements come out sorted by their matches_key
    assert len(pa.unique_stmts) == 1
    assert len(pa.unique_stmts[0].evidence) == 2
    assert set(ev.text for ev in pa.unique_stmts[0].evidence) == \
        set(['foo', 'bar'])
Ejemplo n.º 16
0
def test_adeft_mapping_non_pos():
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS'})
    # This is an exact definition of a non-positive label entry so we
    # expect that it will be applied as a grounding
    ev = Evidence(text='post concussive symptoms (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence

    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    ev = Evidence(text='physical component summary')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
Ejemplo n.º 17
0
def test_adeft_mapping_non_pos():
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS'})
    # This is an exact definition of a non-positive label entry so we
    # expect that it will be applied as a grounding
    ev = Evidence(text='post concussive symptoms (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence

    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    # There a non-positive entry is implied but not exactly, so
    # the prior grounding will be removed.
    ev = Evidence(text='post symptoms concussive concussion')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence
Ejemplo n.º 18
0
def phosphosite_to_indra():
    df = pandas.DataFrame.from_csv(psite_fname, index_col=None)
    df = df[df['KIN_ORGANISM'] == 'human']
    dt = df[df['SUB_ORGANISM'] == 'human']
    stmts = []
    for _, row in df.iterrows():
        enz_name = row['GENE']
        enz_up = row['KIN_ACC_ID']
        sub_name = row['SUB_GENE']
        sub_up = row['SUB_ACC_ID']
        if not enz_name or not sub_name or \
            isinstance(enz_name, float) or isinstance(sub_name, float):
            continue
        enz = Agent(enz_name, db_refs={'UP': enz_up})
        sub = Agent(sub_name, db_refs={'UP': sub_up})
        site = row['SUB_MOD_RSD']
        if site[0] in ('S', 'T', 'Y'):
            residue = site[0]
            position = site[1:]
        else:
            residue = None
            position = None
        ev = Evidence('phosphosite')
        st = Phosphorylation(enz, sub, residue, position, ev)
        stmts.append(st)
    logger.info('%d human-human phosphorylations in Phosphosite' % len(stmts))
    with open('phosphosite_indra.pkl', 'wb') as fh:
        pickle.dump(stmts, fh, protocol=2)
    return stmts
Ejemplo n.º 19
0
    def _process_relations(relation_rows, event_dict):
        header = [cell.value for cell in next(relation_rows)]
        stmts = []
        for row in relation_rows:
            row_values = [r.value for r in row]
            row_dict = {h: v for h, v in zip(header, row_values)}
            cause_entries = row_dict.get('Cause Index')
            effect_entries = row_dict.get('Effect Index')

            # FIXME: Handle cases in which there is a missing cause/effect
            if not cause_entries or not effect_entries:
                continue
            causes = [c.strip() for c in cause_entries.split(',')]
            effects = [e.strip() for e in effect_entries.split(',')]

            rel = row_dict.get('Relation')
            if _in_rels(rel, pos_rels):
                pol = 1
            elif _in_rels(rel, neg_rels):
                pol = -1
            elif _in_rels(rel, neu_rels):
                pol = None
            # If we don't recognize this relation, we don't get any statements
            else:
                continue

            text = row_dict.get('Sentence')
            #annot_keys = ['Relation', 'Event_Type', 'Location', 'Time']
            #annots = {k: row_dict.get(k) for k in annot_keys}
            annot_keys = ['Relation']
            annots = {k: row_dict.get(k) for k in annot_keys}
            ref = row_dict.get('Source_File')
            ev = Evidence(source_api='sofia',
                          pmid=ref,
                          annotations=annots,
                          text=text)

            for cause_index, effect_index in itertools.product(
                    causes, effects):
                cause_name = event_dict[cause_index]['Relation']
                cause_grounding = event_dict[cause_index]['Event_Type']
                effect_name = event_dict[effect_index]['Relation']
                effect_grounding = event_dict[effect_index]['Event_Type']
                cause_concept = Concept(cause_name,
                                        db_refs={
                                            'TEXT': cause_name,
                                            'SOFIA': cause_grounding
                                        })
                effect_concept = Concept(effect_name,
                                         db_refs={
                                             'TEXT': effect_name,
                                             'SOFIA': effect_grounding
                                         })
                stmt = Influence(cause_concept, effect_concept, evidence=[ev])
                # Assume unknown polarity on the subject, put the overall
                # polarity in the sign of the object
                stmt.subj_delta['polarity'] = None
                stmt.obj_delta['polarity'] = pol
                stmts.append(stmt)
        return stmts
Ejemplo n.º 20
0
def test_model_extend():
    ev1 = Evidence(pmid='1234', text='abcd', source_api='x')
    ev2 = Evidence(pmid='1234', text='abcde', source_api='x')
    ev3 = Evidence(pmid='1234', text='abcd', source_api='x')
    indra_sts = [Phosphorylation(None, Agent('a'), evidence=ev) for ev in
                 [ev1, ev2, ev3]]
    emmaa_sts = [EmmaaStatement(st, datetime.datetime.now(), []) for st in
                 indra_sts]
    em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}})
    em.add_statements([emmaa_sts[0]])
    em.extend_unique(emmaa_sts[1:])
    assert len(em.stmts) == 2
    stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1),
                          datetime.datetime.now(), [])
    em.extend_unique([stmt])
    assert len(em.stmts) == 3
Ejemplo n.º 21
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        ev = Evidence(source_api='sofia', pmid=ref, text=text,
                      annotations=anns, source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept, context=context, evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
Ejemplo n.º 22
0
def get_evidence(assay):
    """Given an activity, return an INDRA Evidence object.

    Parameters
    ----------
    assay : dict
        an activity from the activities list returned by a query to the API

    Returns
    -------
    ev : :py:class:`Evidence`
        an :py:class:`Evidence` object containing the kinetics of the
    """
    kin = get_kinetics(assay)
    source_id = assay.get('assay_chembl_id')
    if not kin:
        return None
    annotations = {'kinetics': kin}
    chembl_doc_id = str(assay.get('document_chembl_id'))
    pmid = get_pmid(chembl_doc_id)
    ev = Evidence(source_api='chembl',
                  pmid=pmid,
                  source_id=source_id,
                  annotations=annotations)
    return ev
Ejemplo n.º 23
0
    def node_to_evidence(self, entity_node, is_direct):
        """Computes an evidence object for a statement.

        We assume that the entire event happens within a single statement, and
        get the text of the sentence by getting the text of the sentence
        containing the provided node that corresponds to one of the entities
        participanting in the event.

        The Evidence's pmid is whatever was provided to the constructor
        (perhaps None), and the annotations are the subgraph containing the
        provided node, its ancestors, and its descendants.
        """

        # We assume that the entire event is within a single sentence, and
        # get this sentence by getting the sentence containing one of the
        # entities
        sentence_text = self.G.node[entity_node]['sentence_text']

        # Make annotations object containing the fully connected subgraph
        # containing these nodes
        subgraph = self.connected_subgraph(entity_node)
        annotations = {
            'node_properties': subgraph.node,
            'edge_properties': subgraph.edge
        }

        # Make evidence object
        epistemics = dict()
        evidence = Evidence(source_api='tees',
                            pmid=self.pmid,
                            text=sentence_text,
                            epistemics={'direct': is_direct},
                            annotations=annotations)
        return evidence
Ejemplo n.º 24
0
def test_fix_stmts():
    stmts = [
        Translocation(Agent('x'), to_location=None, from_location=None),
        Phosphorylation(Agent('a', db_refs={
            'TEXT': None,
            'FPLX': 'ERK'
        }),
                        Agent('b'),
                        evidence=[Evidence(text='x')])
    ]
    stmts_out = fix_invalidities(stmts)
    assert len(stmts_out) == 1
    assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'}

    stmts_out = ac.fix_invalidities(stmts)

    assert len(stmts_out) == 1
    assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'}

    stmts_out = ac.fix_invalidities(stmts,
                                    in_place=True,
                                    print_report_before=True,
                                    print_report_after=True,
                                    prior_hash_annots=True)
    # Check the in-place effect
    assert stmts[1].enz.db_refs == {'FPLX': 'ERK'}
    assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'}

    assert stmts_out[0].evidence[0].annotations['prior_hash']
Ejemplo n.º 25
0
def test_make_evidence_html1():
    # Full evidence
    ev1 = Evidence(source_api='trips', pmid='12345', text='Some evidence')
    # Has PMID but no text
    ev2 = Evidence(source_api='biopax', pmid='23456', text=None)
    # No PMID or text but has source id
    ev3 = Evidence(source_api='bel', pmid=None, text=None, source_id='bel_id')
    # No evidence other than the source API
    ev4 = Evidence(source_api='bel', pmid=None, text=None, source_id=None)
    stmt = Phosphorylation(Agent('A'),
                           Agent('B'),
                           evidence=[ev1, ev2, ev3, ev4])
    ev_html = make_evidence_html([stmt], 'proof for a conclusion')
    assert 'Some evidence' in ev_html, ev_html
    assert 'Database entry in \'biopax\'' in ev_html, ev_html
    assert 'Database entry in \'bel\'' in ev_html, ev_html
Ejemplo n.º 26
0
    def get_event_compositional(self, event_entry: Dict[str, str]) -> Event:
        """Get an Event with compositional grounding

        Parameters
        ----------
        event_entry :
            The event to process

        Returns
        -------
        event :
            An Event statement
        """
        # Get get compositional grounding
        comp_name, comp_grnd = self.get_compositional_grounding(event_entry)
        if comp_name is not None and \
                comp_grnd[0] is not None and \
                comp_grnd[0][0] is not None:
            concept = Concept(comp_name,
                              db_refs={
                                  'TEXT': comp_name,
                                  'WM': [comp_grnd]
                              })
        # If not try to get old style Sofia grounding
        else:
            name = event_entry['Relation']
            concept = Concept(name, db_refs={'TEXT': name})
            if event_entry['Event_Type']:
                concept.db_refs['SOFIA'] = event_entry['Event_Type']

        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        agent = event_entry.get('Agent')
        patient = event_entry.get('Patient')
        anns = {}
        if agent:
            anns['agent'] = agent
        if patient:
            anns['patient'] = patient
        text_refs = {'DART': ref}
        ev = Evidence(source_api='sofia',
                      text_refs=text_refs,
                      text=text,
                      annotations=anns,
                      source_id=event_entry['Event Index'])
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta=QualitativeDelta(polarity=pol, adjectives=None))

        return event
Ejemplo n.º 27
0
 def __make_test_statement(a, b, source_api, ev_num=None):
     A = Agent(a)
     B = Agent(b)
     ev_text = "Evidence %d for %s phosphorylates %s." % (ev_num, a, b)
     ev_list = [Evidence(text=ev_text, source_api=source_api)]
     stmt = Phosphorylation(A, B, evidence=ev_list)
     return stmt
Ejemplo n.º 28
0
    def _get_evidence(self, event, subj_concept, obj_concept):
        """Return the Evidence object for the INDRA Statement."""
        provenance = event.get('provenance')

        # First try looking up the full sentence through provenance
        doc_info = provenance[0].get('document')
        doc_id = doc_info['@id']
        agent_strs = [ag.db_refs['TEXT'] for ag in [subj_concept, obj_concept]]
        text = None
        for sent in self.document_dict[doc_id]['sentences'].values():
            # We take the first match, which _might_ be wrong sometimes. Perhaps
            # refine further later.
            if all([agent_text in sent for agent_text in agent_strs]):
                text = self._sanitize(sent)
                break
        else:
            logger.warning("Could not find sentence in document %s for event "
                           "with agents: %s" % (doc_id, str(agent_strs)))

        annotations = {
            'found_by': event.get('rule'),
            'provenance': provenance,
        }
        location = self.document_dict[doc_id]['location']
        ev = Evidence(source_api='bbn',
                      text=text,
                      annotations=annotations,
                      pmid=location)
        return [ev]
Ejemplo n.º 29
0
    def _get_evidence(self, event, subj_concept, obj_concept, adjectives):
        """Return the Evidence object for the INDRA Statement."""
        provenance = event.get('provenance')

        # First try looking up the full sentence through provenance
        doc_id = provenance[0]['document']['@id']
        sent_id = provenance[0]['sentence']
        text = self.document_dict[doc_id]['sentences'][sent_id]
        text = self._sanitize(text)
        bounds = [
            provenance[0]['documentCharPositions'][k]
            for k in ['start', 'end']
        ]

        annotations = {
            'found_by': event.get('rule'),
            'provenance': provenance,
            'event_type': basename(event.get('type')),
            'adjectives': adjectives,
            'bounds': bounds
        }
        location = self.document_dict[doc_id]['location']
        ev = Evidence(source_api='hume',
                      text=text,
                      annotations=annotations,
                      pmid=location)
        return [ev]
Ejemplo n.º 30
0
    def get_event(event_entry):
        name = event_entry['Relation']
        concept = Concept(name, db_refs={'TEXT': name})
        grounding = event_entry['Event_Type']
        if grounding:
            concept.db_refs['SOFIA'] = grounding
        context = WorldContext()
        time = event_entry.get('Time')
        if time:
            context.time = TimeContext(text=time.strip())
        loc = event_entry.get('Location')
        if loc:
            context.geo_location = RefContext(name=loc)

        text = event_entry.get('Text')
        ref = event_entry.get('Source')
        ev = Evidence(source_api='sofia', pmid=ref, text=text)
        pol = event_entry.get('Polarity')
        event = Event(concept,
                      context=context,
                      evidence=[ev],
                      delta={
                          'polarity': pol,
                          'adjectives': []
                      })

        return event