Example #1
0
def test_combine_duplicates():
    raf = Agent('RAF1')
    mek = Agent('MEK1')
    erk = Agent('ERK2')
    p1 = Phosphorylation(raf, mek,
            evidence=Evidence(text='foo'))
    p2 = Phosphorylation(raf, mek,
            evidence=Evidence(text='bar'))
    p3 = Phosphorylation(raf, mek,
            evidence=Evidence(text='baz'))
    p4 = Phosphorylation(raf, mek,
            evidence=Evidence(text='beep'))
    p5 = Phosphorylation(mek, erk,
            evidence=Evidence(text='foo'))
    p6 = Dephosphorylation(mek, erk,
            evidence=Evidence(text='bar'))
    p7 = Dephosphorylation(mek, erk,
            evidence=Evidence(text='baz'))
    p8 = Dephosphorylation(mek, erk,
            evidence=Evidence(text='beep'))
    p9 = Dephosphorylation(Agent('SRC'), Agent('KRAS'),
                           evidence=Evidence(text='beep'))
    stmts = [p1, p2, p3, p4, p5, p6, p7, p8, p9]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    # The statements come out sorted by their matches_key
    assert(len(pa.unique_stmts) == 4)
    assert(pa.unique_stmts[0].matches(p6)) # MEK dephos ERK
    assert(len(pa.unique_stmts[0].evidence) == 3)
    assert(pa.unique_stmts[1].matches(p9)) # SRC dephos KRAS
    assert(len(pa.unique_stmts[1].evidence) == 1)
    assert(pa.unique_stmts[2].matches(p5)) # MEK phos ERK
    assert(len(pa.unique_stmts[2].evidence) == 1)
    assert(pa.unique_stmts[3].matches(p1)) # RAF phos MEK
    assert(len(pa.unique_stmts[3].evidence) == 4)
Example #2
0
def test_pathsfromto():
    bp = biopax.process_pc_pathsfromto(['MAP2K1'], ['MAPK1'])
    bp.get_phosphorylation()
    assert_pmids(bp.statements)
    pre = Preassembler(hierarchies, bp.statements)
    pre.combine_related()
    assert unicode_strs(pre.unique_stmts)
Example #3
0
def test_flatten_evidence_hierarchy():
    braf = Agent('BRAF')
    mek = Agent('MAP2K1')
    st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek, 'S', '218',
                          evidence=[Evidence(text='bar')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
    flattened = flatten_evidence(pa.related_stmts)
    assert len(flattened) == 1
    top_stmt = flattened[0]
    assert len(top_stmt.evidence) == 2
    assert 'bar' in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert len(top_stmt.supported_by) == 1
    supporting_stmt = top_stmt.supported_by[0]
    assert len(supporting_stmt.evidence) == 1
    assert supporting_stmt.evidence[0].text == 'foo'
    supporting_stmt.evidence[0].text = 'changed_foo'
    assert supporting_stmt.evidence[0].text == 'changed_foo'
    assert 'changed_foo' not in [e.text for e in top_stmt.evidence]
    assert 'foo' in [e.text for e in top_stmt.evidence]
    assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \
        == {'direct', 'supported_by'}
Example #4
0
def test_modification_refinement_residue_noenz():
    erbb3 = Agent('Erbb3')
    st1 = Phosphorylation(None, erbb3)
    st2 = Phosphorylation(None, erbb3, 'Y')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Example #5
0
def test_render_stmt_graph():
    braf = Agent('BRAF', db_refs={'HGNC': '1097'})
    mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'})
    mek = Agent('MEK', db_refs={'FPLX':'MEK'})
    # Statements
    p0 = Phosphorylation(braf, mek)
    p1 = Phosphorylation(braf, mek1)
    p2 = Phosphorylation(braf, mek1, position='218')
    p3 = Phosphorylation(braf, mek1, position='222')
    p4 = Phosphorylation(braf, mek1, 'serine')
    p5 = Phosphorylation(braf, mek1, 'serine', '218')
    p6 = Phosphorylation(braf, mek1, 'serine', '222')
    stmts = [p0, p1, p2, p3, p4, p5, p6]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_related()
    graph = render_stmt_graph(pa.related_stmts, reduce=False)
    # One node for each statement
    assert len(graph.nodes()) == 7
    # Edges:
    # p0 supports p1-p6 = 6 edges
    # p1 supports p2-p6 = 5 edges
    # p2 supports p5 = 1 edge
    # p3 supports p6 = 1 edge
    # p4 supports p5-p6 = 2 edges
    # (p5 and p6 support none--they are top-level)
    # 6 + 5 + 1 + 1 + 2 = 15 edges
    assert len(graph.edges()) == 15
Example #6
0
def test_translocation():
    st1 = Translocation(Agent('AKT'), None, None)
    st2 = Translocation(Agent('AKT'), None, 'plasma membrane')
    st3 = Translocation(Agent('AKT'), None, 'nucleus')
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Example #7
0
def test_association_refinement():
    health = 'UN/entities/human/health'
    food = 'UN/entities/human/food'
    food_security = 'UN/entities/human/food/food_security'
    eh = Event(Concept('health', db_refs={'UN': [(health, 1.0)]}))
    ef = Event(Concept('food', db_refs={'UN': [(food, 1.0)]}))
    efs = Event(Concept('food security', db_refs={'UN': [(food_security, 1.0)]}))
    st1 = Association([eh, ef], evidence=[Evidence(source_api='eidos1')])
    st2 = Association([ef, eh], evidence=[Evidence(source_api='eidos2')])
    st3 = Association([eh, efs], evidence=[Evidence(source_api='eidos3')])
    st4 = Association([ef, efs], evidence=[Evidence(source_api='eidos4')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates() # debugging
    assert len(unique_stmts) == 3
    rel_stmts = pa.combine_related()
    assert len(rel_stmts) == 2
    eh_efs_stmt = [st for st in rel_stmts if (st.members[0].concept.name in
                   {'health', 'food security'} and st.members[1].concept.name
                   in {'health', 'food security'})][0]
    assert len(eh_efs_stmt.supported_by) == 1
    assert (eh_efs_stmt.supported_by[0].members[0].concept.name
            in {'food', 'health'})
    assert (eh_efs_stmt.supported_by[0].members[1].concept.name
            in {'food', 'health'})
Example #8
0
def test_complex_refinement_order():
    st1 = Complex([Agent('MED23'), Agent('ELK1')])
    st2 = Complex([Agent('ELK1', mods=[ModCondition('phosphorylation')]),
                   Agent('MED23')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 1
Example #9
0
def test_duplicates():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    ras = Agent('RAS', db_refs = {'FA': '03663'})
    st1 = Phosphorylation(src, ras)
    st2 = Phosphorylation(src, ras)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 1
Example #10
0
def analyze(filename, plot=False):
    # Load the file
    results = load_file(filename)

    # Put together a list of all statements
    all_stmts = [stmt for paper_stmts in results.values()
                      for stmt in paper_stmts]

    # Map grounding
    logger.info('Mapping grounding...')
    gmap = gm.GroundingMapper(gm.default_grounding_map)
    map_stmts = gmap.map_agents(all_stmts)
    map_stmts = gmap.rename_agents(map_stmts)

    # Combine duplicates
    logger.info('Removing duplicates...')
    pa = Preassembler(hierarchies, map_stmts)
    pa.combine_duplicates()

    # Map GO IDs to genes and associated statements
    logger.info('Building map from GO IDs to stmts')
    go_gene_map = {}
    go_name_map = {}
    for stmt in pa.unique_stmts:
        (bp_name, go, gene) = go_gene_pair(stmt)
        if bp_name is None and go is None and gene is None:
            continue
        go_gene_list = go_gene_map.get(go, [])
        go_gene_list.append((gene, stmt))
        go_gene_map[go] = go_gene_list
        go_name_set = go_name_map.get(go, set([]))
        go_name_set.add(bp_name)
        go_name_map[go] = go_name_set

    # Iterate over all of the GO IDs and compare the annotated genes in GO
    # to the ones from the given statements
    go_stmt_map = {}
    for ix, go_id in enumerate(go_gene_map.keys()):
        logger.info('Getting genes for %s (%s) from GO (%d of %d)' %
                    (go_id, ','.join(list(go_name_map[go_id])),
                     ix+1, len(go_gene_map.keys())))
        genes_from_go = get_genes_for_go_id(go_id)
        gene_stmt_list = go_gene_map[go_id]
        in_go = []
        not_in_go = []
        for (gene, stmt) in gene_stmt_list:
            if gene in genes_from_go:
                in_go.append(stmt)
            else:
                not_in_go.append(stmt)
        go_stmt_map[go_id] = {'names': list(go_name_map[go_id]),
                              'in_go': in_go, 'not_in_go': not_in_go}

    with open('go_stmt_map.pkl', 'wb') as f:
        pickle.dump(go_stmt_map, f, protocol=2)

    if plot:
        plot_stmt_counts(go_stmt_map, 'go_stmts.pdf')
Example #11
0
def test_agent_text_storage():
    A1 = Agent('A', db_refs={'TEXT': 'A'})
    A2 = Agent('A', db_refs={'TEXT': 'alpha'})
    B1 = Agent('B', db_refs={'TEXT': 'bag'})
    B2 = Agent('B', db_refs={'TEXT': 'bug'})
    C = Agent('C')
    D = Agent('D')
    inp = [
        Complex([A1, B1], evidence=Evidence(text='A complex bag.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha once.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha again.')),
        Complex([A1, C, B2], evidence=Evidence(text='A complex C bug.')),
        Phosphorylation(A1, B1, evidence=Evidence(text='A phospo bags.')),
        Phosphorylation(A2, B2, evidence=Evidence(text='alpha phospho bugs.')),
        Conversion(D, [A1, B1], [C, D],
                   evidence=Evidence(text='D: A bag -> C D')),
        Conversion(D, [B1, A2], [C, D],
                   evidence=Evidence(text='D: bag a -> C D')),
        Conversion(D, [B2, A2], [D, C],
                   evidence=Evidence(text='D: bug a -> D C')),
        Conversion(D, [B1, A1], [C, D],
                   evidence=Evidence(text='D: bag A -> C D')),
        Conversion(D, [A1], [A1, C],
                   evidence=Evidence(text='D: A -> A C'))
        ]
    pa = Preassembler(hierarchies, inp)
    unq1 = pa.combine_duplicates()
    assert len(unq1) == 5, len(unq1)
    assert all([len(ev.annotations['prior_uuids']) == 1
                for s in unq1 for ev in s.evidence
                if len(s.evidence) > 1]),\
        'There can only be one prior evidence per uuid at this stage.'
    ev_uuid_dict = {ev.annotations['prior_uuids'][0]: ev.annotations['agents']
                    for s in unq1 for ev in s.evidence}
    for s in inp:
        raw_text = [ag.db_refs.get('TEXT')
                    for ag in s.agent_list(deep_sorted=True)]
        assert raw_text == ev_uuid_dict[s.uuid]['raw_text'],\
            str(raw_text) + '!=' + str(ev_uuid_dict[s.uuid]['raw_text'])

    # Now run pa on the above corpus plus another statement.
    inp2 = unq1 + [
        Complex([A1, C, B1], evidence=Evidence(text='A complex C bag.'))
        ]
    pa2 = Preassembler(hierarchies, inp2)
    unq2 = pa2.combine_duplicates()
    assert len(unq2) == 5, len(unq2)
    old_ev_list = []
    new_ev = None
    for s in unq2:
        for ev in s.evidence:
            if ev.text == inp2[-1].evidence[0].text:
                new_ev = ev
            else:
                old_ev_list.append(ev)
    assert all([len(ev.annotations['prior_uuids']) == 2 for ev in old_ev_list])
    assert new_ev
    assert len(new_ev.annotations['prior_uuids']) == 1
Example #12
0
def test_complex_agent_refinement():
    ras = Agent('RAS')
    raf1 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, True)])
    raf2 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, False)])
    st1 = Complex([ras, raf1])
    st2 = Complex([ras, raf2])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Example #13
0
def test_grounding_aggregation():
    braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'})
    braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF'})
    braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'})
    st1 = Phosphorylation(None, braf1)
    st2 = Phosphorylation(None, braf2)
    st3 = Phosphorylation(None, braf3)
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 3
Example #14
0
def test_complex_refinement():
    ras = Agent('RAS')
    raf = Agent('RAF')
    mek = Agent('MEK')
    st1 = Complex([ras, raf])
    st2 = Complex([mek, ras, raf])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
Example #15
0
def test_homodimer_refinement():
    egfr = Agent('EGFR')
    erbb = Agent('ERBB2')
    st1 = Complex([erbb, erbb])
    st2 = Complex([erbb, egfr])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Example #16
0
def test_grounding_aggregation_complex():
    mek = Agent('MEK')
    braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'})
    braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF', 'dummy': 'dummy'})
    braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'})
    st1 = Complex([mek, braf1])
    st2 = Complex([braf2, mek])
    st3 = Complex([mek, braf3])
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 3
Example #17
0
def test_agent_coordinates():
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'reach_coordinates.json')
    stmts = reach.process_json_file(path).statements
    pa = Preassembler(hierarchies, stmts)
    unique_stmt = pa.combine_duplicates()[0]
    evidence_list = unique_stmt.evidence
    agent_annots = [ev.annotations['agents'] for ev in unique_stmt.evidence]
    assert all(a['raw_text'] == ['MEK1', 'ERK2'] for a in agent_annots)
    assert {tuple(a['coords']) for a in agent_annots} == {((21, 25), (0, 4)),
                                                          ((0, 4), (15, 19))}
Example #18
0
def test_duplicates_copy():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    ras = Agent('RAS', db_refs = {'FA': '03663'})
    st1 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 1')])
    st2 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 2')])
    stmts = [st1, st2]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 1
    assert len(stmts) == 2
    assert len(stmts[0].evidence) == 1
    assert len(stmts[1].evidence) == 1
Example #19
0
def test_normalize_opposites_influence():
    concept1 = 'wm/concept/causal_factor/food_security/food_stability'
    concept2 = 'wm/concept/causal_factor/food_insecurity/food_instability'
    dbr1 = {'WM': [(concept1, 1.0), (concept2, 0.5)]}
    dbr2 = {'WM': [(concept2, 1.0), (concept1, 0.5)]}
    stmt = Influence(
        Event(Concept('x', db_refs=dbr1), delta=QualitativeDelta(polarity=1)),
        Event(Concept('y', db_refs=dbr2), delta=QualitativeDelta(polarity=-1)))
    pa = Preassembler(world_ontology, stmts=[stmt])
    pa.normalize_opposites(ns='WM')
    assert pa.stmts[0].subj.delta.polarity == 1
    assert pa.stmts[0].obj.delta.polarity == 1
Example #20
0
def test_duplicates_copy():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    ras = Agent('RAS', db_refs={'FA': '03663'})
    st1 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 1')])
    st2 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 2')])
    stmts = [st1, st2]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    assert (len(pa.unique_stmts) == 1)
    assert (len(stmts) == 2)
    assert (len(stmts[0].evidence) == 1)
    assert (len(stmts[1].evidence) == 1)
Example #21
0
def test_complex_agent_refinement():
    ras = Agent('RAS')
    raf1 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, True)])
    raf2 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, False)])
    st1 = Complex([ras, raf1])
    st2 = Complex([ras, raf2])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert (len(pa.unique_stmts) == 2)
    assert (len(pa.related_stmts) == 2)
Example #22
0
def test_conversion_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    gtp = Agent('GTP')
    gdp = Agent('GDP')
    st1 = Conversion(ras, gtp, gdp)
    st2 = Conversion(hras, gtp, gdp)
    st3 = Conversion(hras, [gtp, gdp], gdp)
    st4 = Conversion(hras, [gdp, gtp], gdp)
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4])
    toplevel_stmts = pa.combine_related()
    assert len(toplevel_stmts) == 2
Example #23
0
def test_flatten_stmts():
    st1 = Phosphorylation(Agent('MAP3K5'), Agent('RAF1'), 'S', '338')
    st2 = Phosphorylation(None, Agent('RAF1'), 'S', '338')
    st3 = Phosphorylation(None, Agent('RAF1'))
    st4 = Phosphorylation(Agent('PAK1'), Agent('RAF1'), 'S', '338')
    st5 = Phosphorylation(None, Agent('RAF1'), evidence=Evidence(text='foo'))
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4, st5])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 2
    assert len(flatten_stmts(pa.unique_stmts)) == 4
    assert len(flatten_stmts(pa.related_stmts)) == 4
Example #24
0
def test_conversion_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    gtp = Agent('GTP')
    gdp = Agent('GDP')
    st1 = Conversion(ras, gtp, gdp)
    st2 = Conversion(hras, gtp, gdp)
    st3 = Conversion(hras, [gtp, gdp], gdp)
    st4 = Conversion(hras, [gdp, gtp], gdp)
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3, st4])
    toplevel_stmts = pa.combine_related()
    assert len(toplevel_stmts) == 2
Example #25
0
def test_preassemble_related_complex():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Complex([kras, hras])
    st2 = Complex([kras, ras])
    st3 = Complex([hras, kras])
    st4 = Complex([ras, kras])
    pa = Preassembler(bio_ontology, [st1, st2, st3, st4])
    uniq = pa.combine_duplicates()
    assert len(uniq) == 2
    top = pa.combine_related()
    assert len(top) == 1
Example #26
0
def test_activation_refinement():
    subj = Agent('alcohol', db_refs={'CHEBI': 'CHEBI:16236',
                                     'HMDB': 'HMDB00108',
                                     'PUBCHEM': '702',
                                     'TEXT': 'alcohol'})
    obj = Agent('endotoxin', db_refs={'TEXT': 'endotoxin'})
    st1 = Inhibition(subj, obj)
    st2 = Activation(subj, obj)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
Example #27
0
def test_find_contradicts_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Phosphorylation(Agent('x'), ras)
    st2 = Dephosphorylation(Agent('x'), kras)
    st3 = Dephosphorylation(Agent('x'), hras)
    pa = Preassembler(bio_ontology, [st1, st2, st3])
    contradicts = pa.find_contradicts()
    assert len(contradicts) == 2
    for s1, s2 in contradicts:
        assert {s1.uuid, s2.uuid} in ({st1.uuid,
                                       st2.uuid}, {st1.uuid, st3.uuid})
Example #28
0
def test_find_contradicts_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Phosphorylation(Agent('x'), ras)
    st2 = Dephosphorylation(Agent('x'), kras)
    st3 = Dephosphorylation(Agent('x'), hras)
    pa = Preassembler(hierarchies, [st1, st2, st3])
    contradicts = pa.find_contradicts()
    assert len(contradicts) == 2
    for s1, s2 in contradicts:
        assert {s1.uuid, s2.uuid} in ({st1.uuid, st2.uuid},
                                      {st1.uuid, st3.uuid})
Example #29
0
def test_preassemble_related_complex():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Complex([kras, hras])
    st2 = Complex([kras, ras])
    st3 = Complex([hras, kras])
    st4 = Complex([ras, kras])
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    uniq = pa.combine_duplicates()
    assert len(uniq) == 2
    top = pa.combine_related()
    assert len(top) == 1
Example #30
0
def test_combine_evidence_exact_duplicates():
    raf = Agent('RAF1')
    mek = Agent('MEK1')
    p1 = Phosphorylation(raf, mek, evidence=Evidence(text='foo'))
    p2 = Phosphorylation(raf, mek, evidence=Evidence(text='bar'))
    p3 = Phosphorylation(raf, mek, evidence=Evidence(text='bar'))
    stmts = [p1, p2, p3]
    pa = Preassembler(bio_ontology, stmts=stmts)
    pa.combine_duplicates()
    # The statements come out sorted by their matches_key
    assert len(pa.unique_stmts) == 1
    assert len(pa.unique_stmts[0].evidence) == 2
    assert set(ev.text for ev in pa.unique_stmts[0].evidence) == \
        set(['foo', 'bar'])
Example #31
0
def test_superfamily_refinement_isa_or_partof():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    prkag1 = Agent('PRKAG1', db_refs = {'HGNC': '9385'})
    ampk = Agent('AMPK', db_refs = {'FPLX': 'AMPK'})
    st1 = Phosphorylation(src, ampk, 'tyrosine', '32')
    st2 = Phosphorylation(src, prkag1, 'tyrosine', '32')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Example #32
0
def test_modification_norefinement_noenz():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(None, nras, 'Y', '32',
                          evidence=[Evidence(text='foo')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined. 
    assert len(stmts) == 2
    assert len(stmts[1].evidence)==1
Example #33
0
def test_split_idx():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Phosphorylation(Agent('x'), ras)
    st2 = Phosphorylation(Agent('x'), kras)
    st3 = Phosphorylation(Agent('x'), hras)
    pa = Preassembler(bio_ontology)
    maps = pa._generate_id_maps([st1, st2, st3])
    assert (1, 0) in maps, maps
    assert (2, 0) in maps, maps
    assert pa._comparison_counter == 2
    pa = Preassembler(bio_ontology)
    maps = pa._generate_id_maps([st1, st2, st3], split_idx=1)
    assert (2, 0) in maps, maps
    assert (1, 0) not in maps, maps
    assert pa._comparison_counter == 1

    # Test other endpoints
    refinements = pa._generate_relations([st1, st2, st3])
    assert refinements == \
        {st2.get_hash(): {st1.get_hash()},
         st3.get_hash(): {st1.get_hash()}}, refinements

    refinements = pa._generate_relation_tuples([st1, st2, st3])
    assert refinements == \
        {(st2.get_hash(), st1.get_hash()),
         (st3.get_hash(), st1.get_hash())}
Example #34
0
def extract_phos():
    with open(stmts_fname, 'rb') as fh:
        model = pickle.load(fh)

    stmts = []
    for pmid, pmid_stmts in model.items():
        for stmt in pmid_stmts:
            if isinstance(stmt, Phosphorylation):
                stmts.append(stmt)
    logger.info('%d phosphorylations in RAS Machine' % len(stmts))

    stmts = [s for s in stmts if s.enz is not None]
    logger.info('%d phosphorylations with enzyme in RAS Machine' % len(stmts))

    stmts_grounded = filter_grounded(stmts)
    logger.info('%d grounded phosphorylations in RAS Machine' %
                len(stmts_grounded))

    stmts_enzkinase = filter_enzkinase(stmts_grounded)
    logger.info('%d phosphorylations with kinase enzyme in RAS Machine' %
                len(stmts_enzkinase))

    sm = SiteMapper(default_site_map)
    stmts_valid, _ = sm.map_sites(stmts_enzkinase)
    logger.info('%d valid-sequence phosphorylations in RAS Machine' %
                len(stmts_valid))

    pa = Preassembler(hierarchies, stmts_valid)
    stmts_unique = pa.combine_duplicates()
    logger.info('%d unique phosphorylations in RAS Machine' %
                len(stmts_unique))

    stmts_unique = pa.combine_related()
    logger.info('%d top-level phosphorylations in RAS Machine' %
                len(stmts_unique))

    with open('mapped_unique_phos.pkl', 'wb') as fh:
        pickle.dump(stmts_unique, fh, protocol=2)

    # Filter RAS Machine statements for direct and not hypothesis
    stmts = filter_direct(stmts_unique)
    logger.info('%d direct phosphorylations in RAS Machine' % len(stmts))
    stmts = filter_non_hypothesis(stmts)
    logger.info('%d non-hypothesis phosphorylations in RAS Machine' %
                len(stmts))

    with open('filtered_phos.pkl', 'wb') as fh:
        pickle.dump(stmts, fh, protocol=2)

    return stmts
Example #35
0
def test_modification_norefinement_subsfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(src, ras, 'Y', '32', evidence=[Evidence(text='foo')])
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert len(stmts) == 2
    assert len(stmts[0].evidence) == 1, stmts
Example #36
0
def test_modification_norefinement_enzfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    mek = Agent('MEK')
    raf = Agent('RAF')
    braf = Agent('BRAF')
    st1 = Phosphorylation(raf, mek, 'Y', '32', evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert len(stmts) == 2
    assert len(stmts[1].evidence) == 1
Example #37
0
def test_normalize_opposites():
    concept1 = 'wm/concept/causal_factor/food_security/food_stability'
    concept2 = 'wm/concept/causal_factor/food_insecurity/food_instability'
    concept3 = ('wm/concept/causal_factor/crisis_and_disaster/'
                'environmental_disasters/natural_disaster/flooding')

    # First test the inherently positive being the main grounding
    dbr = {'WM': [(concept1, 1.0), (concept2, 0.5), (concept3, 0.1)]}
    ev = Event(Concept('x', db_refs=dbr), delta=QualitativeDelta(polarity=1))
    pa = Preassembler(world_ontology, stmts=[ev])
    pa.normalize_opposites(ns='WM')
    # We are normalizing to food supply since that is the inherently
    # positive concept
    assert pa.stmts[0].concept.db_refs['WM'][0] == \
        (concept1, 1.0), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].concept.db_refs['WM'][1] == \
        (concept1, 0.5), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].concept.db_refs['WM'][2] == \
        (concept3, 0.1), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].delta.polarity == 1

    # Next test the inherently negative being the main grounding
    dbr = {'WM': [(concept2, 1.0), (concept1, 0.5), (concept3, 0.1)]}
    ev = Event(Concept('x', db_refs=dbr), delta=QualitativeDelta(polarity=1))
    pa = Preassembler(world_ontology, stmts=[ev])
    pa.normalize_opposites(ns='WM')
    # We are normalizing to food supply since that is the inherently
    # positive concept
    assert pa.stmts[0].concept.db_refs['WM'][0] == \
           (concept1, 1.0), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].concept.db_refs['WM'][1] == \
           (concept1, 0.5), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].concept.db_refs['WM'][2] == \
           (concept3, 0.1), pa.stmts[0].concept.db_refs['WM']
    assert pa.stmts[0].delta.polarity == -1
Example #38
0
def test_superfamily_refinement_isa_or_partof():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    prkag1 = Agent('PRKAG1', db_refs={'HGNC': '9385'})
    ampk = Agent('AMPK', db_refs={'FPLX': 'AMPK'})
    st1 = Phosphorylation(src, ampk, 'tyrosine', '32')
    st2 = Phosphorylation(src, prkag1, 'tyrosine', '32')
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Example #39
0
def test_combine_evidence_exact_duplicates_different_raw_text():
    raf1 = Agent('RAF1', db_refs={'TEXT': 'Raf'})
    raf2 = Agent('RAF1', db_refs={'TEXT': 'RAF'})
    mek = Agent('MEK1')
    p1 = Phosphorylation(raf1, mek, evidence=Evidence(text='foo'))
    p2 = Phosphorylation(raf1, mek, evidence=Evidence(text='bar'))
    p3 = Phosphorylation(raf2, mek, evidence=Evidence(text='bar'))
    stmts = [p1, p2, p3]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    # The statements come out sorted by their matches_key
    assert len(pa.unique_stmts) == 1
    assert len(pa.unique_stmts[0].evidence) == 3
    assert set(ev.text for ev in pa.unique_stmts[0].evidence) == \
        set(['foo', 'bar', 'bar'])
Example #40
0
def test_modification_refinement():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert len(stmts) == 1
    assert stmts[0].equals(st1)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st2)
Example #41
0
def test_modification_refinement():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    assert len(stmts) == 1
    assert stmts[0].equals(st1)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st2)
Example #42
0
def test_bound_condition_norefinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    gtp = Agent('GTP', db_refs = {'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    nrasgtp = Agent('NRAS', db_refs = {'HGNC': '7989'},
        bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The bound condition is more specific in st2 but the modification is less
    # specific. Therefore these statements should not be combined.
    assert len(stmts) == 2
Example #43
0
def test_modification_norefinement_enzfamily():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    mek = Agent('MEK')
    raf = Agent('RAF')
    braf = Agent('BRAF')
    st1 = Phosphorylation(raf, mek, 'Y', '32',
                          evidence=[Evidence(text='foo')])
    st2 = Phosphorylation(braf, mek)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined. 
    assert len(stmts) == 2
    assert len(stmts[1].evidence)==1
Example #44
0
def test_duplicates_sorting():
    mc = ModCondition('phosphorylation')
    map2k1_1 = Agent('MAP2K1', mods=[mc])
    mc1 = ModCondition('phosphorylation', 'serine', '218')
    mc2 = ModCondition('phosphorylation', 'serine', '222')
    mc3 = ModCondition('phosphorylation', 'serine', '298')
    map2k1_2 = Agent('MAP2K1', mods=[mc1, mc2, mc3])
    mapk3 = Agent('MAPK3')
    st1 = Phosphorylation(map2k1_1, mapk3, position='218')
    st2 = Phosphorylation(map2k1_2, mapk3)
    st3 = Phosphorylation(map2k1_1, mapk3, position='218')
    stmts = [st1, st2, st3]
    pa = Preassembler(bio_ontology, stmts=stmts)
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
Example #45
0
def test_superfamily_refinement():
    """A gene-level statement should be supported by a family-level
    statement."""
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    ras = Agent('RAS', db_refs = {'FPLX': 'RAS'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, ras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras, 'tyrosine', '32')
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert (stmts[0].equals(st2))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st1))
Example #46
0
def test_superfamily_refinement():
    """A gene-level statement should be supported by a family-level
    statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, ras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras, 'tyrosine', '32')
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The top-level list should contain only one statement, the gene-level
    # one, supported by the family one.
    assert len(stmts) == 1
    assert (stmts[0].equals(st2))
    assert (len(stmts[0].supported_by) == 1)
    assert (stmts[0].supported_by[0].equals(st1))
Example #47
0
def test_bound_condition_norefinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    gtp = Agent('GTP', db_refs={'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    nrasgtp = Agent('NRAS',
                    db_refs={'HGNC': '7989'},
                    bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    # The bound condition is more specific in st2 but the modification is less
    # specific. Therefore these statements should not be combined.
    assert len(stmts) == 2
Example #48
0
def test_modification_norefinement_noenz():
    """A more specific modification statement should be supported by a more
    generic modification statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras)
    st2 = Phosphorylation(None,
                          nras,
                          'Y',
                          '32',
                          evidence=[Evidence(text='foo')])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related()
    # Modification is less specific, enzyme more specific in st1, therefore
    # these statements shouldn't be combined.
    assert (len(stmts) == 2)
    assert (len(stmts[1].evidence) == 1)
Example #49
0
def process_statements(stmts, **generate_id_map_kwargs):
    stmts = ac.map_grounding(stmts)
    stmts = ac.map_sequence(stmts)
    pa = Preassembler(hierarchies)
    unique_stmts = make_unique_statement_set(pa, stmts)
    match_key_maps = get_match_key_maps(pa, unique_stmts,
                                        **generate_id_map_kwargs)
    return unique_stmts, match_key_maps
Example #50
0
def test_duplicates_sorting():
    mc = ModCondition('phosphorylation')
    map2k1_1 = Agent('MAP2K1', mods=[mc])
    mc1 = ModCondition('phosphorylation', 'serine', '218')
    mc2 = ModCondition('phosphorylation', 'serine', '222')
    mc3 = ModCondition('phosphorylation', 'serine', '298')
    map2k1_2 = Agent('MAP2K1', mods=[mc1, mc2, mc3])
    mapk3 = Agent('MAPK3')
    #ras = Agent('MAPK3', db_refs = {'FA': '03663'})
    #nras = Agent('NRAS', db_refs = {'FA': '03663'})
    st1 = Phosphorylation(map2k1_1, mapk3, position='218')
    st2 = Phosphorylation(map2k1_2, mapk3)
    st3 = Phosphorylation(map2k1_1, mapk3, position='218')
    stmts = [st1, st2, st3]
    pa = Preassembler(hierarchies, stmts=stmts)
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
Example #51
0
def test_return_toplevel():
    src = Agent('SRC', db_refs = {'HGNC': '11283'})
    nras = Agent('NRAS', db_refs = {'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    stmts = pa.combine_related(return_toplevel=True)
    assert len(stmts) == 1
    assert len(stmts[0].supported_by) == 1
    assert len(stmts[0].supported_by[0].supports) == 1
    stmts = pa.combine_related(return_toplevel=False)
    assert len(stmts) == 2
    ix = 1 if stmts[0].residue else 0
    assert len(stmts[1-ix].supported_by) == 1
    assert len(stmts[1-ix].supported_by[0].supports) == 1
    assert len(stmts[ix].supports) == 1
    assert len(stmts[ix].supports[0].supported_by) == 1
Example #52
0
def test_influence_duplicate():
    gov = 'wm/concept/causal_factor/social_and_political/government'
    agr = 'wm/concept/causal_factor/agriculture/crop_production'
    cgov = Event(Concept('government', db_refs={'WM': [(gov, 1.0)]}))
    cagr = Event(Concept('agriculture', db_refs={'WM': [(agr, 1.0)]}))
    print(cgov.matches_key())
    stmt1 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos1')])
    stmt2 = Influence(cagr, cgov, evidence=[Evidence(source_api='eidos2')])
    stmt3 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos3')])
    pa = Preassembler(world_ontology, [stmt1, stmt2, stmt3])
    unique_stmts = pa.combine_duplicates()
    unique_stmts = sorted(unique_stmts, key=lambda x: len(x.evidence))
    assert len(unique_stmts) == 2
    assert len(unique_stmts[0].evidence) == 1
    assert len(unique_stmts[1].evidence) == 2, unique_stmts
    sources = [e.source_api for e in unique_stmts[1].evidence]
    assert set(sources) == {'eidos1', 'eidos3'}
Example #53
0
def test_return_toplevel():
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nras)
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related(return_toplevel=True)
    assert len(stmts) == 1
    assert len(stmts[0].supported_by) == 1
    assert len(stmts[0].supported_by[0].supports) == 1
    stmts = pa.combine_related(return_toplevel=False)
    assert len(stmts) == 2
    ix = 1 if stmts[0].residue else 0
    assert len(stmts[1 - ix].supported_by) == 1
    assert len(stmts[1 - ix].supported_by[0].supports) == 1
    assert len(stmts[ix].supports) == 1
    assert len(stmts[ix].supports[0].supported_by) == 1
Example #54
0
def test_grounding_aggregation():
    braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'})
    braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF'})
    braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'})
    braf4 = Agent('BRAF',
                  db_refs={
                      'TEXT': 'B-raf',
                      'UP': 'P15056',
                      'HGNC': '1097'
                  })
    st1 = Phosphorylation(None, braf1)
    st2 = Phosphorylation(None, braf2)
    st3 = Phosphorylation(None, braf3)
    st4 = Phosphorylation(None, braf4)
    pa = Preassembler(bio_ontology, stmts=[st1, st2, st3, st4])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 3, unique_stmts
Example #55
0
    def __init__(self,
                 batch_size=10000,
                 s3_cache=None,
                 print_logs=False,
                 stmt_type=None,
                 yes_all=False,
                 ontology=None):
        self.batch_size = batch_size
        if s3_cache is not None:
            # Make the cache specific to stmt type. This guards against
            # technical errors resulting from mixing this key parameter.
            if not isinstance(s3_cache, S3Path):
                raise TypeError(
                    f"Expected s3_cache to be type S3Path, but got "
                    f"type {type(s3_cache)}.")
            specifications = f'st_{stmt_type}/'
            self.s3_cache = s3_cache.get_element_path(specifications)

            # Report on what caches may already exist. This should hopefully
            # prevent re-doing work just because different batch sizes were
            # used.
            import boto3
            s3 = boto3.client('s3')
            if s3_cache.exists(s3):
                if self.s3_cache.exists(s3):
                    logger.info(f"A prior run with these parameters exists in "
                                f"the cache: {s3_cache}.")
                else:
                    logger.info(f"Prior job or jobs with different Statement "
                                f"type exist for the cache: {s3_cache}.")
            else:
                logger.info(f"No prior jobs appear in the cache: {s3_cache}.")
        else:
            self.s3_cache = None
        if ontology is None:
            ontology = bio_ontology
            ontology.initialize()
            ontology._build_transitive_closure()
        self.pa = Preassembler(ontology)
        self.__tag = 'Unpurposed'
        self.__print_logs = print_logs
        self.pickle_stashes = None
        self.stmt_type = stmt_type
        self.yes_all = yes_all
        return
Example #56
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
Example #57
0
def assemble_model(requester_name):
    global stmts
    # Performing grounding mapping on the statements
    gmapper = gm.GroundingMapper(gm.default_grounding_map)
    stmts = gmapper.map_agents(stmts)
    pa = Preassembler(hierarchies, stmts)
    pa.combine_related()
    stmts = pa.related_stmts
    ml = MechLinker(stmts)
    linked_stmts = ml.link_statements()
    if linked_stmts:
        for linked_stmt in linked_stmts:
            if linked_stmt.inferred_stmt:
                question = mechlinker_queries.print_linked_stmt(linked_stmt)
                say(question)
                stmts.append(linked_stmt.inferred_stmt)
    say("%s: Done, updating layout." % requester_name)
    update_layout()
Example #58
0
def test_influence_duplicate():
    gov = 'UN/entities/human/government/government_entity'
    agr = 'UN/entities/natural/crop_technology'
    cgov = Event(Concept('government', db_refs={'UN': [(gov, 1.0)]}))
    cagr = Event(Concept('agriculture', db_refs={'UN': [(agr, 1.0)]}))
    stmt1 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos1')])
    stmt2 = Influence(cagr, cgov, evidence=[Evidence(source_api='eidos2')])
    stmt3 = Influence(cgov, cagr, evidence=[Evidence(source_api='eidos3')])
    eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../sources/eidos/eidos_ontology.rdf')
    hm = HierarchyManager(eidos_ont, True, True)
    hierarchies = {'entity': hm}
    pa = Preassembler(hierarchies, [stmt1, stmt2, stmt3])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 2
    assert len(unique_stmts[0].evidence) == 2
    assert len(unique_stmts[1].evidence) == 1
    sources = [e.source_api for e in unique_stmts[0].evidence]
    assert set(sources) == set(['eidos1', 'eidos3'])
Example #59
0
def test_bound_condition_refinement():
    """A statement with more specific bound context should be supported by a
    less specific statement."""
    src = Agent('SRC', db_refs={'HGNC': '11283'})
    gtp = Agent('GTP', db_refs={'CHEBI': '15996'})
    nras = Agent('NRAS', db_refs={'HGNC': '7989'})
    nrasgtp = Agent('NRAS',
                    db_refs={'HGNC': '7989'},
                    bound_conditions=[BoundCondition(gtp, True)])
    st1 = Phosphorylation(src, nras, 'tyrosine', '32')
    st2 = Phosphorylation(src, nrasgtp, 'tyrosine', '32')
    # The top-level list should contain only one statement, the more specific
    # modification, supported by the less-specific modification.
    pa = Preassembler(bio_ontology, stmts=[st1, st2])
    stmts = pa.combine_related()
    assert len(stmts) == 1
    assert stmts[0].equals(st2)
    assert len(stmts[0].supported_by) == 1
    assert stmts[0].supported_by[0].equals(st1)
Example #60
0
def test_find_contradicts():
    st1 = Inhibition(Agent('a'), Agent('b'))
    st2 = Activation(Agent('a'), Agent('b'))
    st3 = IncreaseAmount(Agent('a'), Agent('b'))
    st4 = DecreaseAmount(Agent('a'), Agent('b'))
    st5 = ActiveForm(
        Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]),
        'kinase', True)
    st6 = ActiveForm(
        Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]),
        'kinase', False)
    pa = Preassembler(bio_ontology, [st1, st2, st3, st4, st5, st6])
    contradicts = pa.find_contradicts()
    assert len(contradicts) == 3
    for s1, s2 in contradicts:
        assert {s1.uuid,
                s2.uuid} in ({st1.uuid,
                              st2.uuid}, {st3.uuid,
                                          st4.uuid}, {st5.uuid, st6.uuid})