Beispiel #1
0
def test_render_stmt_graph():
    braf = Agent('BRAF', db_refs={'HGNC': '1097'})
    mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'})
    mek = Agent('MEK', db_refs={'FPLX':'MEK'})
    # Statements
    p0 = Phosphorylation(braf, mek)
    p1 = Phosphorylation(braf, mek1)
    p2 = Phosphorylation(braf, mek1, position='218')
    p3 = Phosphorylation(braf, mek1, position='222')
    p4 = Phosphorylation(braf, mek1, 'serine')
    p5 = Phosphorylation(braf, mek1, 'serine', '218')
    p6 = Phosphorylation(braf, mek1, 'serine', '222')
    stmts = [p0, p1, p2, p3, p4, p5, p6]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_related()
    graph = render_stmt_graph(pa.related_stmts, reduce=False)
    # One node for each statement
    assert len(graph.nodes()) == 7
    # Edges:
    # p0 supports p1-p6 = 6 edges
    # p1 supports p2-p6 = 5 edges
    # p2 supports p5 = 1 edge
    # p3 supports p6 = 1 edge
    # p4 supports p5-p6 = 2 edges
    # (p5 and p6 support none--they are top-level)
    # 6 + 5 + 1 + 1 + 2 = 15 edges
    assert len(graph.edges()) == 15
Beispiel #2
0
def test_flatten_evidence_hierarchy():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek, 'S', '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 2
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 1
    assert supporting_stmt.evidence[0].text == 'foo'
    supporting_stmt.evidence[0].text = 'changed_foo'
    assert supporting_stmt.evidence[0].text == 'changed_foo'
    assert 'changed_foo' not in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \
        == {'direct', 'supported_by'}
Beispiel #3
0
def test_render_stmt_graph():
    braf = Agent('BRAF', db_refs={'HGNC': '1097'})
    mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'})
    mek = Agent('MEK', db_refs={'FPLX': 'MEK'})
    # Statements
    p0 = Phosphorylation(braf, mek)
    p1 = Phosphorylation(braf, mek1)
    p2 = Phosphorylation(braf, mek1, position='218')
    p3 = Phosphorylation(braf, mek1, position='222')
    p4 = Phosphorylation(braf, mek1, 'serine')
    p5 = Phosphorylation(braf, mek1, 'serine', '218')
    p6 = Phosphorylation(braf, mek1, 'serine', '222')
    stmts = [p0, p1, p2, p3, p4, p5, p6]
    pa = Preassembler(bio_ontology, stmts=stmts)
    pa.combine_related()
    graph = render_stmt_graph(pa.related_stmts, reduce=False)
    # One node for each statement
    assert len(graph.nodes()) == 7
    # Edges:
    # p0 supports p1-p6 = 6 edges
    # p1 supports p2-p6 = 5 edges
    # p2 supports p5 = 1 edge
    # p3 supports p6 = 1 edge
    # p4 supports p5-p6 = 2 edges
    # (p5 and p6 support none--they are top-level)
    # 6 + 5 + 1 + 1 + 2 = 15 edges
    assert len(graph.edges()) == 15
Beispiel #4
0
def test_translocation():
    st1 = Translocation(Agent('AKT'), None, None)
    st2 = Translocation(Agent('AKT'), None, 'plasma membrane')
    st3 = Translocation(Agent('AKT'), None, 'nucleus')
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Beispiel #5
0
def test_pathsfromto():
    bp = biopax.process_pc_pathsfromto(['MAP2K1'], ['MAPK1'])
    bp.get_phosphorylation()
    assert_pmids(bp.statements)
    pre = Preassembler(hierarchies, bp.statements)
    pre.combine_related()
    assert unicode_strs(pre.unique_stmts)
Beispiel #6
0
def test_pathsfromto():
    bp = biopax.process_pc_pathsfromto(['MAP2K1'], ['MAPK1'])
    bp.get_phosphorylation()
    assert_pmids(bp.statements)
    pre = Preassembler(hierarchies, bp.statements)
    pre.combine_related()
    assert unicode_strs(pre.unique_stmts)
Beispiel #7
0
def test_modification_refinement_residue_noenz():
    erbb3 = Agent('Erbb3')
    st1 = Phosphorylation(None, erbb3)
    st2 = Phosphorylation(None, erbb3, 'Y')
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Beispiel #8
0
def test_flatten_evidence_hierarchy():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf,
                          mek,
                          'S',
                          '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 2
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 1
    assert supporting_stmt.evidence[0].text == 'foo'
    supporting_stmt.evidence[0].text = 'changed_foo'
    assert supporting_stmt.evidence[0].text == 'changed_foo'
    assert 'changed_foo' not in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \
        == {'direct', 'supported_by'}
Beispiel #9
0
def test_modification_refinement_residue_noenz():
    erbb3 = Agent('Erbb3')
    st1 = Phosphorylation(None, erbb3)
    st2 = Phosphorylation(None, erbb3, 'Y')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Beispiel #10
0
def test_translocation():
    st1 = Translocation(Agent('AKT'), None, None)
    st2 = Translocation(Agent('AKT'), None, 'plasma membrane')
    st3 = Translocation(Agent('AKT'), None, 'nucleus')
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3])
    pa.combine_related()
    assert len(pa.related_stmts) == 2, pa.related_stmts
Beispiel #11
0
def test_complex_refinement_order():
    st1 = Complex([Agent('MED23'), Agent('ELK1')])
    st2 = Complex([Agent('ELK1', mods=[ModCondition('phosphorylation')]),
                   Agent('MED23')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Beispiel #12
0
def test_complex_agent_refinement():
    ras = Agent('RAS')
    raf1 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, True)])
    raf2 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, False)])
    st1 = Complex([ras, raf1])
    st2 = Complex([ras, raf2])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Beispiel #13
0
def test_homodimer_refinement():
    egfr = Agent('EGFR')
    erbb = Agent('ERBB2')
    st1 = Complex([erbb, erbb])
    st2 = Complex([erbb, egfr])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Beispiel #14
0
def test_complex_refinement():
    ras = Agent('RAS')
    raf = Agent('RAF')
    mek = Agent('MEK')
    st1 = Complex([ras, raf])
    st2 = Complex([mek, ras, raf])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Beispiel #15
0
def test_complex_refinement_order():
    st1 = Complex([Agent('MED23'), Agent('ELK1')])
    st2 = Complex([
        Agent('ELK1', mods=[ModCondition('phosphorylation')]),
        Agent('MED23')
    ])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Beispiel #16
0
def test_homodimer_refinement():
    egfr = Agent('EGFR')
    erbb = Agent('ERBB2')
    st1 = Complex([erbb, erbb])
    st2 = Complex([erbb, egfr])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Beispiel #17
0
def test_complex_refinement():
    ras = Agent('RAS')
    raf = Agent('RAF')
    mek = Agent('MEK')
    st1 = Complex([ras, raf])
    st2 = Complex([mek, ras, raf])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Beispiel #18
0
def test_flatten_stmts():
    st1 = Phosphorylation(Agent('MAP3K5'), Agent('RAF1'), 'S', '338')
    st2 = Phosphorylation(None, Agent('RAF1'), 'S', '338')
    st3 = Phosphorylation(None, Agent('RAF1'))
    st4 = Phosphorylation(Agent('PAK1'), Agent('RAF1'), 'S', '338')
    st5 = Phosphorylation(None, Agent('RAF1'), evidence=Evidence(text='foo'))
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4, st5])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 2
    assert len(flatten_stmts(pa.unique_stmts)) == 4
    assert len(flatten_stmts(pa.related_stmts)) == 4
Beispiel #19
0
def test_complex_agent_refinement():
    ras = Agent('RAS')
    raf1 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, True)])
    raf2 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, False)])
    st1 = Complex([ras, raf1])
    st2 = Complex([ras, raf2])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Beispiel #20
0
def test_flatten_stmts():
    st1 = Phosphorylation(Agent('MAP3K5'), Agent('RAF1'), 'S', '338')
    st2 = Phosphorylation(None, Agent('RAF1'), 'S', '338')
    st3 = Phosphorylation(None, Agent('RAF1'))
    st4 = Phosphorylation(Agent('PAK1'), Agent('RAF1'), 'S', '338')
    st5 = Phosphorylation(None, Agent('RAF1'), evidence=Evidence(text='foo'))
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3, st4, st5])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 2
    assert len(flatten_stmts(pa.unique_stmts)) == 4
    assert len(flatten_stmts(pa.related_stmts)) == 4
Beispiel #21
0
def test_activation_refinement():
    subj = Agent('alcohol', db_refs={'CHEBI': 'CHEBI:16236',
                                     'HMDB': 'HMDB00108',
                                     'PUBCHEM': '702',
                                     'TEXT': 'alcohol'})
    obj = Agent('endotoxin', db_refs={'TEXT': 'endotoxin'})
    st1 = Inhibition(subj, obj)
    st2 = Activation(subj, obj)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Beispiel #22
0
def test_activation_refinement():
    subj = Agent('alcohol',
                 db_refs={
                     'CHEBI': 'CHEBI:16236',
                     'HMDB': 'HMDB00108',
                     'PUBCHEM': '702',
                     'TEXT': 'alcohol'
                 })
    obj = Agent('endotoxin', db_refs={'TEXT': 'endotoxin'})
    st1 = Inhibition(subj, obj)
    st2 = Activation(subj, obj)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Beispiel #23
0
def test_association_refinement():
    health = 'UN/entities/human/health'
    food = 'UN/entities/human/food'
    food_security = 'UN/entities/human/food/food_security'
    eh = Event(Concept('health', db_refs={'UN': [(health, 1.0)]}))
    ef = Event(Concept('food', db_refs={'UN': [(food, 1.0)]}))
    efs = Event(Concept('food security', db_refs={'UN': [(food_security, 1.0)]}))
    st1 = Association([eh, ef], evidence=[Evidence(source_api='eidos1')])
    st2 = Association([ef, eh], evidence=[Evidence(source_api='eidos2')])
    st3 = Association([eh, efs], evidence=[Evidence(source_api='eidos3')])
    st4 = Association([ef, efs], evidence=[Evidence(source_api='eidos4')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates() # debugging
    assert len(unique_stmts) == 3
    rel_stmts = pa.combine_related()
    assert len(rel_stmts) == 2
    eh_efs_stmt = [st for st in rel_stmts if (st.members[0].concept.name in
                   {'health', 'food security'} and st.members[1].concept.name
                   in {'health', 'food security'})][0]
    assert len(eh_efs_stmt.supported_by) == 1
    assert (eh_efs_stmt.supported_by[0].members[0].concept.name
            in {'food', 'health'})
    assert (eh_efs_stmt.supported_by[0].members[1].concept.name
            in {'food', 'health'})
Beispiel #24
0
def test_modification_refinement_noenz2():
    """A more specific modification statement should be supported by a more
    generic modification statement.

    Similar to test_modification_refinement_noenz for statements where one
    argument is associated with a component in the hierarchy (SIRT1 in this
    case) but the other is not (BECN1).
    """
    sirt1 = Agent('SIRT1',
                  db_refs={
                      'HGNC': '14929',
                      'UP': 'Q96EB6',
                      'TEXT': 'SIRT1'
                  })
    becn1 = Agent('BECN1',
                  db_refs={
                      'HGNC': '1034',
                      'UP': 'Q14457',
                      'TEXT': 'Beclin 1'
                  })
    st1 = Deacetylation(sirt1, becn1)
    st2 = Deacetylation(None, becn1)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert (len(stmts) == 1)
    assert (stmts[0].equals(st1))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st2))
    assert (stmts[0].supported_by[0].supports[0].equals(st1))
Beispiel #25
0
def test_association_refinement():
    health = 'UN/entities/human/health'
    food = 'UN/entities/human/food'
    food_security = 'UN/entities/human/food/food_security'
    eh = Event(Concept('health', db_refs={'UN': [(health, 1.0)]}))
    ef = Event(Concept('food', db_refs={'UN': [(food, 1.0)]}))
    efs = Event(
        Concept('food security', db_refs={'UN': [(food_security, 1.0)]}))
    st1 = Association([eh, ef], evidence=[Evidence(source_api='eidos1')])
    st2 = Association([ef, eh], evidence=[Evidence(source_api='eidos2')])
    st3 = Association([eh, efs], evidence=[Evidence(source_api='eidos3')])
    st4 = Association([ef, efs], evidence=[Evidence(source_api='eidos4')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates()  # debugging
    assert len(unique_stmts) == 3
    rel_stmts = pa.combine_related()
    assert len(rel_stmts) == 2
    eh_efs_stmt = [
        st for st in rel_stmts
        if (st.members[0].concept.name in {'health', 'food security'}
            and st.members[1].concept.name in {'health', 'food security'})
    ][0]
    assert len(eh_efs_stmt.supported_by) == 1
    assert (eh_efs_stmt.supported_by[0].members[0].concept.name
            in {'food', 'health'})
    assert (eh_efs_stmt.supported_by[0].members[1].concept.name
            in {'food', 'health'})
Beispiel #26
0
def test_association_refinement():
    unrelated = 'wm/concept/causal_factor/wild_food_sources'
    parent = 'wm/concept/causal_factor/health_and_life'
    child = 'wm/concept/causal_factor/health_and_life/' \
        'living_condition/food_safety'
    parent_event = Event(Concept('parent', db_refs={'WM': [(parent, 1.0)]}))
    unrelated_event = \
        Event(Concept('unrelated', db_refs={'WM': [(unrelated, 1.0)]}))
    child_event = Event(Concept('child', db_refs={'WM': [(child, 1.0)]}))
    st1 = Association([parent_event, unrelated_event],
                      evidence=[Evidence(source_api='eidos1')])
    st2 = Association([unrelated_event, parent_event],
                      evidence=[Evidence(source_api='eidos2')])
    st3 = Association([parent_event, child_event],
                      evidence=[Evidence(source_api='eidos3')])
    st4 = Association([unrelated_event, child_event],
                      evidence=[Evidence(source_api='eidos4')])
    pa = Preassembler(world_ontology, [st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 3
    top_level_stmts = pa.combine_related()
    assert len(top_level_stmts) == 2, top_level_stmts

    names = {
        tuple(sorted(e.concept.name for e in stmt.members)): stmt
        for stmt in top_level_stmts
    }
    stmt = names[('child', 'unrelated')]
    assert len(stmt.supported_by) == 1
    assert {e.concept.name for e in stmt.supported_by[0].members} == \
           {'parent', 'unrelated'}
Beispiel #27
0
def test_return_toplevel():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related(return_toplevel=True)
    assert len(stmts) == 1
    assert len(stmts[0].supported_by) == 1
    assert len(stmts[0].supported_by[0].supports) == 1
    stmts = pa.combine_related(return_toplevel=False)
    assert len(stmts) == 2
    ix = 1 if stmts[0].residue else 0
    assert len(stmts[1 - ix].supported_by) == 1
    assert len(stmts[1 - ix].supported_by[0].supports) == 1
    assert len(stmts[ix].supports) == 1
    assert len(stmts[ix].supports[0].supported_by) == 1
Beispiel #28
0
def test_return_toplevel():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related(return_toplevel=True)
    assert len(stmts) == 1
    assert len(stmts[0].supported_by) == 1
    assert len(stmts[0].supported_by[0].supports) == 1
    stmts = pa.combine_related(return_toplevel=False)
    assert len(stmts) == 2
    ix = 1 if stmts[0].residue else 0
    assert len(stmts[1-ix].supported_by) == 1
    assert len(stmts[1-ix].supported_by[0].supports) == 1
    assert len(stmts[ix].supports) == 1
    assert len(stmts[ix].supports[0].supported_by) == 1
Beispiel #29
0
def assemble_model(requester_name):
    global stmts
    # Performing grounding mapping on the statements
    gmapper = gm.GroundingMapper(gm.default_grounding_map)
    stmts = gmapper.map_agents(stmts)
    pa = Preassembler(hierarchies, stmts)
    pa.combine_related()
    stmts = pa.related_stmts
    ml = MechLinker(stmts)
    linked_stmts = ml.link_statements()
    if linked_stmts:
        for linked_stmt in linked_stmts:
            if linked_stmt.inferred_stmt:
                question = mechlinker_queries.print_linked_stmt(linked_stmt)
                say(question)
                stmts.append(linked_stmt.inferred_stmt)
    say("%s: Done, updating layout." % requester_name)
    update_layout()
Beispiel #30
0
def test_flatten_evidence_multilevel():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek, 'S',
                          evidence=[Evidence(text='bar')])
    st3 = Phosphorylation(braf, mek, 'S', '218',
                          evidence=[Evidence(text='baz')])
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 3, len(top_stmt.evidence)
    anns = [ev.annotations['support_type'] for ev in top_stmt.evidence]
    assert anns.count('direct') == 1
    assert anns.count('supported_by') == 2
Beispiel #31
0
def test_flatten_evidence_multilevel():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek, 'S', evidence=[Evidence(text='bar')])
    st3 = Phosphorylation(braf,
                          mek,
                          'S',
                          '218',
                          evidence=[Evidence(text='baz')])
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 3, len(top_stmt.evidence)
    anns = [ev.annotations['support_type'] for ev in top_stmt.evidence]
    assert anns.count('direct') == 1
    assert anns.count('supported_by') == 2
Beispiel #32
0
def test_conversion_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    gtp = Agent('GTP')
    gdp = Agent('GDP')
    st1 = Conversion(ras, gtp, gdp)
    st2 = Conversion(hras, gtp, gdp)
    st3 = Conversion(hras, [gtp, gdp], gdp)
    st4 = Conversion(hras, [gdp, gtp], gdp)
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3, st4])
    toplevel_stmts = pa.combine_related()
    assert len(toplevel_stmts) == 2
Beispiel #33
0
def test_conversion_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    gtp = Agent('GTP')
    gdp = Agent('GDP')
    st1 = Conversion(ras, gtp, gdp)
    st2 = Conversion(hras, gtp, gdp)
    st3 = Conversion(hras, [gtp, gdp], gdp)
    st4 = Conversion(hras, [gdp, gtp], gdp)
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4])
    toplevel_stmts = pa.combine_related()
    assert len(toplevel_stmts) == 2
Beispiel #34
0
def test_preassemble_related_complex():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Complex([kras, hras])
    st2 = Complex([kras, ras])
    st3 = Complex([hras, kras])
    st4 = Complex([ras, kras])
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    uniq = pa.combine_duplicates()
    assert len(uniq) == 2
    top = pa.combine_related()
    assert len(top) == 1
Beispiel #35
0
def test_preassemble_related_complex():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Complex([kras, hras])
    st2 = Complex([kras, ras])
    st3 = Complex([hras, kras])
    st4 = Complex([ras, kras])
    pa = Preassembler(bio_ontology, [st1, st2, st3, st4])
    uniq = pa.combine_duplicates()
    assert len(uniq) == 2
    top = pa.combine_related()
    assert len(top) == 1
Beispiel #36
0
def test_flatten_evidence_hierarchy():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf,
                          mek,
                          'S',
                          '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 2
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 1
    assert supporting_stmt.evidence[0].text == 'foo'
Beispiel #37
0
def test_modification_norefinement_subsfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(src, ras, 'Y', '32', evidence=[Evidence(text='foo')])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert len(stmts) == 2
    assert len(stmts[0].evidence) == 1, stmts
Beispiel #38
0
def test_superfamily_refinement_isa_or_partof():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    prkag1 = Agent('PRKAG1', db_refs={'HGNC': '9385'})
    ampk = Agent('AMPK', db_refs={'FPLX': 'AMPK'})
    st1 = Phosphorylation(src, ampk, 'tyrosine', '32')
    st2 = Phosphorylation(src, prkag1, 'tyrosine', '32')
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Beispiel #39
0
def extract_phos():
    with open(stmts_fname, 'rb') as fh:
        model = pickle.load(fh)

    stmts = []
    for pmid, pmid_stmts in model.items():
        for stmt in pmid_stmts:
            if isinstance(stmt, Phosphorylation):
                stmts.append(stmt)
    logger.info('%d phosphorylations in RAS Machine' % len(stmts))

    stmts = [s for s in stmts if s.enz is not None]
    logger.info('%d phosphorylations with enzyme in RAS Machine' % len(stmts))

    stmts_grounded = filter_grounded(stmts)
    logger.info('%d grounded phosphorylations in RAS Machine' %
                len(stmts_grounded))

    stmts_enzkinase = filter_enzkinase(stmts_grounded)
    logger.info('%d phosphorylations with kinase enzyme in RAS Machine' %
                len(stmts_enzkinase))

    sm = SiteMapper(default_site_map)
    stmts_valid, _ = sm.map_sites(stmts_enzkinase)
    logger.info('%d valid-sequence phosphorylations in RAS Machine' %
                len(stmts_valid))

    pa = Preassembler(hierarchies, stmts_valid)
    stmts_unique = pa.combine_duplicates()
    logger.info('%d unique phosphorylations in RAS Machine' %
                len(stmts_unique))

    stmts_unique = pa.combine_related()
    logger.info('%d top-level phosphorylations in RAS Machine' %
                len(stmts_unique))

    with open('mapped_unique_phos.pkl', 'wb') as fh:
        pickle.dump(stmts_unique, fh, protocol=2)

    # Filter RAS Machine statements for direct and not hypothesis
    stmts = filter_direct(stmts_unique)
    logger.info('%d direct phosphorylations in RAS Machine' % len(stmts))
    stmts = filter_non_hypothesis(stmts)
    logger.info('%d non-hypothesis phosphorylations in RAS Machine' %
                len(stmts))

    with open('filtered_phos.pkl', 'wb') as fh:
        pickle.dump(stmts, fh, protocol=2)

    return stmts
Beispiel #40
0
def test_superfamily_refinement_isa_or_partof():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    prkag1 = Agent('PRKAG1', db_refs = {'HGNC': '9385'})
    ampk = Agent('AMPK', db_refs = {'FPLX': 'AMPK'})
    st1 = Phosphorylation(src, ampk, 'tyrosine', '32')
    st2 = Phosphorylation(src, prkag1, 'tyrosine', '32')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Beispiel #41
0
def test_modification_norefinement_enzfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    mek = Agent('MEK')
    raf = Agent('RAF')
    braf = Agent('BRAF')
    st1 = Phosphorylation(raf, mek, 'Y', '32', evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert len(stmts) == 2
    assert len(stmts[1].evidence) == 1
Beispiel #42
0
def test_modification_norefinement_noenz():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(None, nras, 'Y', '32',
                          evidence=[Evidence(text='foo')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined. 
    assert len(stmts) == 2
    assert len(stmts[1].evidence)==1
Beispiel #43
0
def test_modification_refinement():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert len(stmts) == 1
    assert stmts[0].equals(st1)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st2)
Beispiel #44
0
def test_modification_norefinement_enzfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    mek = Agent('MEK')
    raf = Agent('RAF')
    braf = Agent('BRAF')
    st1 = Phosphorylation(raf, mek, 'Y', '32',
                          evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined. 
    assert len(stmts) == 2
    assert len(stmts[1].evidence)==1
Beispiel #45
0
def test_bound_condition_norefinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    gtp = Agent('GTP', db_refs = {'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    nrasgtp = Agent('NRAS', db_refs = {'HGNC': '7989'},
        bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The bound condition is more specific in st2 but the modification is less
    # specific. Therefore these statements should not be combined.
    assert len(stmts) == 2
Beispiel #46
0
def test_modification_refinement():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert len(stmts) == 1
    assert stmts[0].equals(st1)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st2)
Beispiel #47
0
def test_superfamily_refinement():
    """A gene-level statement should be supported by a family-level
    statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, ras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras, 'tyrosine', '32')
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert (stmts[0].equals(st2))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st1))
Beispiel #48
0
def test_superfamily_refinement():
    """A gene-level statement should be supported by a family-level
    statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    ras = Agent('RAS', db_refs = {'FPLX': 'RAS'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, ras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras, 'tyrosine', '32')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert (stmts[0].equals(st2))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st1))
Beispiel #49
0
def test_bound_condition_norefinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    gtp = Agent('GTP', db_refs={'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    nrasgtp = Agent('NRAS',
                    db_refs={'HGNC': '7989'},
                    bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The bound condition is more specific in st2 but the modification is less
    # specific. Therefore these statements should not be combined.
    assert len(stmts) == 2
Beispiel #50
0
def test_modification_norefinement_noenz():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(None,
                          nras,
                          'Y',
                          '32',
                          evidence=[Evidence(text='foo')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert (len(stmts) == 2)
    assert (len(stmts[1].evidence) == 1)
Beispiel #51
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
Beispiel #52
0
def test_bound_condition_refinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    gtp = Agent('GTP', db_refs = {'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    nrasgtp = Agent('NRAS', db_refs = {'HGNC': '7989'},
        bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp, 'tyrosine', '32')
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Beispiel #53
0
def test_flatten_evidence_hierarchy_supports():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek, 'S', '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa_stmts = pa.combine_related(return_toplevel=False)
    assert len(pa_stmts) == 2
    flattened = flatten_evidence(pa_stmts, collect_from='supports')
    assert len(flattened) == 2
    top_stmt = flattened[1]
    assert len(top_stmt.evidence) == 1
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 2
    assert set([e.text for e in supporting_stmt.evidence]) == {'foo', 'bar'}
Beispiel #54
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = map_onto(statements)
    ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl')
    statements = ac.filter_grounded_only(statements, score_threshold=0.7)

    #statements = ac.filter_by_db_refs(statements, 'UN',
    #    ['conflict', 'food_security', 'precipitation'], policy='one',
    #    match_suffix=True)
    statements = ac.filter_by_db_refs(
        statements,
        'UN', [
            'conflict', 'food_security', 'flooding', 'food_production',
            'human_migration', 'drought', 'food_availability', 'market',
            'food_insecurity'
        ],
        policy='all',
        match_suffix=True)
    assume_polarity(statements)
    statements = filter_has_polarity(statements)

    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    #related_stmts = ac.filter_belief(related_stmts, 0.8)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)

    pa.stmts = top_stmts
    print('%d top-level statements' % len(top_stmts))
    conflicts = pa.find_contradicts()
    top_stmts = remove_contradicts(top_stmts, conflicts)

    ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl')

    return top_stmts
Beispiel #55
0
def extract_phos():
    with open(stmts_fname, 'rb') as fh:
        model = pickle.load(fh)

    stmts = []
    for pmid, pmid_stmts in model.items():
        for stmt in pmid_stmts:
            if isinstance(stmt, Phosphorylation):
                stmts.append(stmt)
    logger.info('%d phosphorylations in RAS Machine' % len(stmts))

    stmts = [s for s in stmts if s.enz is not None]
    logger.info('%d phosphorylations with enzyme in RAS Machine' % len(stmts))

    stmts_grounded = filter_grounded(stmts)
    logger.info('%d grounded phosphorylations in RAS Machine' % len(stmts_grounded))

    stmts_enzkinase = filter_enzkinase(stmts_grounded)
    logger.info('%d phosphorylations with kinase enzyme in RAS Machine' % len(stmts_enzkinase))

    sm = SiteMapper(default_site_map)
    stmts_valid, _ = sm.map_sites(stmts_enzkinase)
    logger.info('%d valid-sequence phosphorylations in RAS Machine' % len(stmts_valid))

    pa = Preassembler(hierarchies, stmts_valid)
    stmts_unique = pa.combine_duplicates()
    logger.info('%d unique phosphorylations in RAS Machine' % len(stmts_unique))

    stmts_unique = pa.combine_related()
    logger.info('%d top-level phosphorylations in RAS Machine' % len(stmts_unique))

    with open('mapped_unique_phos.pkl', 'wb') as fh:
        pickle.dump(stmts_unique, fh, protocol=2)

    # Filter RAS Machine statements for direct and not hypothesis
    stmts = filter_direct(stmts_unique)
    logger.info('%d direct phosphorylations in RAS Machine' % len(stmts))
    stmts = filter_non_hypothesis(stmts)
    logger.info('%d non-hypothesis phosphorylations in RAS Machine' % len(stmts))

    with open('filtered_phos.pkl', 'wb') as fh:
        pickle.dump(stmts, fh, protocol=2)

    return stmts
Beispiel #56
0
def test_multiprocessing():
    braf = Agent('BRAF', db_refs={'HGNC': '1097'})
    mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'})
    mek = Agent('MEK', db_refs={'FPLX':'MEK'})
    # Statements
    p0 = Phosphorylation(braf, mek)
    p1 = Phosphorylation(braf, mek1)
    p2 = Phosphorylation(braf, mek1, position='218')
    p3 = Phosphorylation(braf, mek1, position='222')
    p4 = Phosphorylation(braf, mek1, 'serine')
    p5 = Phosphorylation(braf, mek1, 'serine', '218')
    p6 = Phosphorylation(braf, mek1, 'serine', '222')
    p7 = Dephosphorylation(braf, mek1)
    stmts = [p0, p1, p2, p3, p4, p5, p6, p7]
    pa = Preassembler(hierarchies, stmts=stmts)
    # Size cutoff set to a low number so that one group will run remotely
    # and one locally
    toplevel = pa.combine_related(return_toplevel=True, poolsize=1,
                                  size_cutoff=2)
    assert len(toplevel) == 3, 'Got %d toplevel statements.' % len(toplevel)
Beispiel #57
0
def test_influence_refinement():
    tran = 'UN/entities/human/infrastructure/transportation'
    truck = 'UN/entities/human/infrastructure/transportation/' + \
        'transportation_methods'
    agr = 'UN/entities/human/livelihood'
    ctran = Event(Concept('transportation', db_refs={'UN': [(tran, 1.0)]}))
    ctruck = Event(Concept('trucking', db_refs={'UN': [(truck, 1.0)]}))
    cagr = Event(Concept('agriculture', db_refs={'UN': [(agr, 1.0)]}))
    stmt1 = Influence(ctran, cagr, evidence=[Evidence(source_api='eidos1')])
    stmt2 = Influence(ctruck, cagr, evidence=[Evidence(source_api='eidos2')])
    stmt3 = Influence(cagr, ctran, evidence=[Evidence(source_api='eidos3')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [stmt1, stmt2, stmt3])
    rel_stmts = pa.combine_related()
    assert len(rel_stmts) == 2
    truck_stmt = [st for st in rel_stmts if st.subj.concept.name ==
                  'trucking'][0]
    assert len(truck_stmt.supported_by) == 1
    assert truck_stmt.supported_by[0].subj.concept.name == 'transportation'
Beispiel #58
0
def test_modification_refinement_noenz2():
    """A more specific modification statement should be supported by a more
    generic modification statement.

    Similar to test_modification_refinement_noenz for statements where one
    argument is associated with a component in the hierarchy (SIRT1 in this
    case) but the other is not (BECN1).
    """
    sirt1 = Agent('SIRT1', db_refs={'HGNC':'14929', 'UP':'Q96EB6',
                                    'TEXT':'SIRT1'})
    becn1 = Agent('BECN1', db_refs={'HGNC': '1034', 'UP': 'Q14457',
                                    'TEXT': 'Beclin 1'})
    st1 = Deacetylation(sirt1, becn1)
    st2 = Deacetylation(None, becn1)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert (len(stmts) == 1)
    assert (stmts[0].equals(st1))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st2))
    assert (stmts[0].supported_by[0].supports[0].equals(st1))
Beispiel #59
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Beispiel #60
0
    def run_preassembly(self, stmts, print_summary=True):
        """Run complete preassembly procedure on the given statements.

        Results are returned as a dict and stored in the attribute
        :py:attr:`results`. They are also saved in the pickle file
        `<basename>_results.pkl`.

        Parameters
        ----------
        stmts : list of :py:class:`indra.statements.Statement`
            Statements to preassemble.
        print_summary : bool
            If True (default), prints a summary of the preassembly process to
            the console.

        Returns
        -------
        dict
            A dict containing the following entries:

            - `raw`: the starting set of statements before preassembly.
            - `duplicates1`: statements after initial de-duplication.
            - `valid`: statements found to have valid modification sites.
            - `mapped`: mapped statements (list of
              :py:class:`indra.preassembler.sitemapper.MappedStatement`).
            - `mapped_stmts`: combined list of valid statements and statements
              after mapping.
            - `duplicates2`: statements resulting from de-duplication of the
              statements in `mapped_stmts`.
            - `related2`: top-level statements after combining the statements
              in `duplicates2`.
        """
        # First round of preassembly: remove duplicates before sitemapping
        pa1 = Preassembler(hierarchies, stmts)
        logger.info("Combining duplicates")
        pa1.combine_duplicates()
        # Map sites
        logger.info("Mapping sites")
        (valid, mapped) = sm.map_sites(pa1.unique_stmts)
        # Combine valid and successfully mapped statements into single list
        correctly_mapped_stmts = []
        for ms in mapped:
            if all([True if mm[1] is not None else False
                         for mm in ms.mapped_mods]):
                correctly_mapped_stmts.append(ms.mapped_stmt)
        mapped_stmts = valid + correctly_mapped_stmts 
        # Second round of preassembly: de-duplicate and combine related
        pa2 = Preassembler(hierarchies, mapped_stmts)
        logger.info("Combining duplicates again")
        pa2.combine_duplicates()
        pa2.combine_related()
        # Fill out the results dict
        self.results = {}
        self.results['raw'] = stmts
        self.results['duplicates1'] = pa1.unique_stmts
        self.results['valid'] = valid
        self.results['mapped'] = mapped
        self.results['mapped_stmts'] = mapped_stmts
        self.results['duplicates2'] = pa2.unique_stmts
        self.results['related2'] = pa2.related_stmts
        # Print summary
        if print_summary:
            logger.info("\nStarting number of statements: %d" % len(stmts))
            logger.info("After duplicate removal: %d" % len(pa1.unique_stmts))
            logger.info("Unique statements with valid sites: %d" % len(valid))
            logger.info("Unique statements with invalid sites: %d" %
                        len(mapped))
            logger.info("After post-mapping duplicate removal: %d" %
                        len(pa2.unique_stmts))
            logger.info("After combining related statements: %d" %
                        len(pa2.related_stmts))
        # Save the results if we're caching
        if self.basename is not None:
            results_filename = '%s_results.pkl' % self.basename
            with open(results_filename, 'wb') as f:
                pickle.dump(self.results, f, protocol=2)
        return self.results