Beispiel #1
0
def test_act_phos_to_af():
    act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)),
                        Agent('B'))
    phos_st = Phosphorylation(Agent('A'), Agent('B'))
    ml = MechLinker([act_st, phos_st])
    linked_stmts = ml.link_statements()
    assert(len(linked_stmts) == 1)
Beispiel #2
0
def test_replace_complexes():
    phos = Phosphorylation(Agent('b'), Agent('a'))
    cplx = Complex([Agent('a'), Agent('b')])
    ml = MechLinker([phos, cplx])
    ml.replace_complexes()
    assert len(ml.statements) == 1
    print(ml.statements)
Beispiel #3
0
def test_replace_complexes():
    phos = Phosphorylation(Agent('b'), Agent('a'))
    cplx = Complex([Agent('a'), Agent('b')])
    ml = MechLinker([phos, cplx])
    ml.replace_complexes()
    assert len(ml.statements) == 1
    print(ml.statements)
Beispiel #4
0
def test_act_phos_to_af():
    act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)),
                        Agent('B'))
    phos_st = Phosphorylation(Agent('A'), Agent('B'))
    ml = MechLinker([act_st, phos_st])
    linked_stmts = ml.link_statements()
    assert (len(linked_stmts) == 1)
Beispiel #5
0
def test_replace_activations():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    phos = Phosphorylation(Agent('b'), Agent('a'))
    act = Activation(Agent('b'), Agent('a'))
    ml = MechLinker([af, phos, act])
    ml.replace_activations()
    assert len(ml.statements) == 2
    print(ml.statements)
Beispiel #6
0
def test_act_af_to_phos():
    act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)),
                        Agent('B'))
    af_st = ActiveForm(Agent('B', mods=[ModCondition('phosphorylation',
                                                     None, None, True)]),
                        'activity', True)
    ml = MechLinker([act_st, af_st])
    linked_stmts = ml.infer_modifications(ml.statements)
    assert len(linked_stmts) == 1
Beispiel #7
0
def test_act_af_to_phos():
    act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)),
                        Agent('B'))
    af_st = ActiveForm(
        Agent('B', mods=[ModCondition('phosphorylation', None, None, True)]),
        'activity', True)
    ml = MechLinker([act_st, af_st])
    linked_stmts = ml.infer_modifications(ml.statements)
    assert len(linked_stmts) == 1
Beispiel #8
0
def test_replace_activations():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    phos = Phosphorylation(Agent('b'), Agent('a'))
    act = Activation(Agent('b'), Agent('a'))
    ml = MechLinker([af, phos, act])
    ml.replace_activations()
    assert len(ml.statements) == 2
    print(ml.statements)
Beispiel #9
0
def test_require_active_forms_mod1():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    ph = Phosphorylation(Agent('a'), Agent('b'))
    ml = MechLinker([af, ph])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert len(ml.statements) == 2
    assert ml.statements[1].enz.mods
Beispiel #10
0
def test_reduce_mods1():
    phos1 = Phosphorylation(Agent('b'), Agent('a'))
    phos2 = Phosphorylation(Agent('c'), Agent('a'), 'T')
    phos3 = Phosphorylation(Agent('d'), Agent('a'), 'T', '143')
    ml = MechLinker([phos1, phos2, phos3])
    ml.gather_modifications()
    ml.reduce_modifications()
    assert len(ml.statements) == 3
    for st in ml.statements:
        assert st.residue == 'T'
        assert st.position == '143'
Beispiel #11
0
def test_require_active_forms_mod4():
    mc1 = ModCondition('phosphorylation', 'T', '185')
    mc2 = ModCondition('phosphorylation', 'Y', '187')
    af = ActiveForm(Agent('a', mods=[mc1, mc2]),
                    'kinase', True)
    ph = Phosphorylation(Agent('a', mods=[mc1]), Agent('b'))
    ml = MechLinker([af, ph])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert len(ml.statements) == 2
    assert len(ml.statements[1].enz.mods) == 2
Beispiel #12
0
def test_reduce_activity_types():
    a1 = Agent('a', location='cytoplasm')
    a2 = Agent('a', location='nucleus')
    af1 = ActiveForm(a1, 'activity', True)
    af2 = ActiveForm(a2, 'kinase', True)
    af3 = ActiveForm(a1, 'catalytic',True)
    ml = MechLinker([af1, af2, af3])
    ml.gather_explicit_activities()
    ml.reduce_activities()
    assert af1.activity == 'kinase'
    assert af2.activity == 'kinase'
    assert af3.activity == 'kinase'
Beispiel #13
0
def test_infer_activations():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    phos = Phosphorylation(Agent('b'), Agent('a'))
    linked_stmts = MechLinker.infer_activations([af, phos])
    assert len(linked_stmts) == 1
    print(linked_stmts)
Beispiel #14
0
def test_infer_activations():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    phos = Phosphorylation(Agent('b'), Agent('a'))
    linked_stmts = MechLinker.infer_activations([af, phos])
    assert len(linked_stmts) == 1
    print(linked_stmts)
Beispiel #15
0
def assemble_model(requester_name):
    global stmts
    # Performing grounding mapping on the statements
    gmapper = gm.GroundingMapper(gm.default_grounding_map)
    stmts = gmapper.map_agents(stmts)
    pa = Preassembler(hierarchies, stmts)
    pa.combine_related()
    stmts = pa.related_stmts
    ml = MechLinker(stmts)
    linked_stmts = ml.link_statements()
    if linked_stmts:
        for linked_stmt in linked_stmts:
            if linked_stmt.inferred_stmt:
                question = mechlinker_queries.print_linked_stmt(linked_stmt)
                say(question)
                stmts.append(linked_stmt.inferred_stmt)
    say("%s: Done, updating layout." % requester_name)
    update_layout()
Beispiel #16
0
def test_reduce_mods2():
    mc1 = ModCondition('phosphorylation', 'S', '123', False)
    mc2 = ModCondition('phosphorylation', 'S', None, True)
    mc3 = ModCondition('phosphorylation', 'T')
    mc4 = ModCondition('phosphorylation', 'T', '111')
    mc5 = ModCondition('phosphorylation', 'T', '999')
    mc6 = ModCondition('phosphorylation')
    mc7 = ModCondition('phosphorylation', None, '999')
    st1 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc1]))
    st2 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc2]))
    st3 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc3]))
    st4 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc4]))
    st5 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc5]))
    st6 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc6]))
    st7 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc7]))
    ml = MechLinker([st1, st2, st3, st4, st5, st6, st7])
    ml.gather_modifications()
    ml.reduce_modifications()
    assert len(ml.statements) == 7
    mc_red1 = ml.statements[0].obj.mods[0]
    mc_red2 = ml.statements[1].obj.mods[0]
    mc_red3 = ml.statements[2].obj.mods[0]
    mc_red4 = ml.statements[3].obj.mods[0]
    mc_red5 = ml.statements[4].obj.mods[0]
    mc_red6 = ml.statements[5].obj.mods[0]
    mc_red7 = ml.statements[6].obj.mods[0]
    # These ones stay the same because they shouldn't be reduced
    assert mc_red1.__dict__ == mc1.__dict__
    assert mc_red3.__dict__ == mc3.__dict__
    assert mc_red4.__dict__ == mc4.__dict__
    assert mc_red5.__dict__ == mc5.__dict__
    assert mc_red6.__dict__ == mc6.__dict__
    # mc2 has to be reduced to have position '123'
    assert mc_red2.mod_type == 'phosphorylation'
    assert mc_red2.residue == 'S'
    assert mc_red2.position == '123'
    assert mc_red2.is_modified == True
    # mc7 has to be reduced to have residue 'T'
    assert mc_red7.mod_type == 'phosphorylation'
    assert mc_red7.residue == 'T'
    assert mc_red7.position == '999'
    assert mc_red7.is_modified == True
Beispiel #17
0
def reduce_activities(stmts_in, **kwargs):
    """Reduce the activity types in a list of statements

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to reduce activity types in.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of reduced activity statements.
    """
    logger.info('Reducing activities on %d statements...' % len(stmts_in))
    stmts_out = [deepcopy(st) for st in stmts_in]
    ml = MechLinker(stmts_out)
    ml.get_activities()
    ml.reduce_activities()
    stmts_out = ml.statements
    dump_pkl = kwargs.get('save')
    if dump_pkl:
        dump_statements(stmts_out, dump_pkl)
    return stmts_out
Beispiel #18
0
 def get_missing_activities(self):
     ml = MechLinker(self.statements)
     ml.gather_explicit_activities()
     suggestions = []
     for stmt in self.statements:
         if isinstance(stmt,
                       (Modification, RegulateActivity, RegulateAmount)):
             # The subj here is in an "active" position
             subj, obj = stmt.agent_list()
             if subj is None:
                 continue
             subj_base = ml._get_base(subj)
             # If it has any activities but isn't in an active state
             # here
             if subj_base.activity_types and not subj.activity:
                 # We suggest making the subj active in this case
                 suggestion = deepcopy(stmt)
                 act_type = subj_base.activity_types[0]
                 new_subj = deepcopy(subj)
                 new_subj.activity = ActivityCondition(act_type, True)
                 suggestion.set_agent_list([new_subj, obj])
                 suggestions.append(suggestion)
     return suggestions
Beispiel #19
0
 def get_missing_activities(self):
     ml = MechLinker(self.statements)
     ml.gather_explicit_activities()
     suggestions = []
     for stmt in self.statements:
         if isinstance(stmt, (Modification, RegulateActivity,
                              RegulateAmount)):
             # The subj here is in an "active" position
             subj, obj = stmt.agent_list()
             if subj is None:
                 continue
             subj_base = ml._get_base(subj)
             # If it has any activities but isn't in an active state
             # here
             if subj_base.activity_types and not subj.activity:
                 # We suggest making the subj active in this case
                 suggestion = deepcopy(stmt)
                 act_type = subj_base.activity_types[0]
                 new_subj = deepcopy(subj)
                 new_subj.activity = ActivityCondition(act_type, True)
                 suggestion.set_agent_list([new_subj, obj])
                 suggestions.append(suggestion)
     return suggestions
Beispiel #20
0
def test_require_active_forms_act1():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    act = Activation(Agent('a'), Agent('b'))
    ml = MechLinker([af, act])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert (len(ml.statements) == 2)
    assert (ml.statements[1].subj.mods)
Beispiel #21
0
def test_require_active_forms_mod1():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    ph = Phosphorylation(Agent('a'), Agent('b'))
    ml = MechLinker([af, ph])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert len(ml.statements) == 2
    assert ml.statements[1].enz.mods
Beispiel #22
0
def test_require_active_forms_mod2():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    af2 = ActiveForm(Agent('a', location='nucleus'), 'activity', True)
    ph = Phosphorylation(Agent('a'), Agent('b'))
    ml = MechLinker([af, af2, ph])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert (len(ml.statements) == 4)
    assert (ml.statements[3].enz.location)
Beispiel #23
0
def test_require_active_forms_mod4():
    mc1 = ModCondition('phosphorylation', 'T', '185')
    mc2 = ModCondition('phosphorylation', 'Y', '187')
    af = ActiveForm(Agent('a', mods=[mc1, mc2]), 'kinase', True)
    ph = Phosphorylation(Agent('a', mods=[mc1]), Agent('b'))
    ml = MechLinker([af, ph])
    ml.gather_explicit_activities()
    ml.require_active_forms()
    assert len(ml.statements) == 2
    assert len(ml.statements[1].enz.mods) == 2
Beispiel #24
0
def test_reduce_mods1():
    phos1 = Phosphorylation(Agent('b'), Agent('a'))
    phos2 = Phosphorylation(Agent('c'), Agent('a'), 'T')
    phos3 = Phosphorylation(Agent('d'), Agent('a'), 'T', '143')
    ml = MechLinker([phos1, phos2, phos3])
    ml.gather_modifications()
    ml.reduce_modifications()
    assert len(ml.statements) == 3
    for st in ml.statements:
        assert st.residue == 'T'
        assert st.position == '143'
Beispiel #25
0
def test_reduce_activity_types():
    a1 = Agent('a', location='cytoplasm')
    a2 = Agent('a', location='nucleus')
    af1 = ActiveForm(a1, 'activity', True)
    af2 = ActiveForm(a2, 'kinase', True)
    af3 = ActiveForm(a1, 'catalytic', True)
    ml = MechLinker([af1, af2, af3])
    ml.gather_explicit_activities()
    ml.reduce_activities()
    assert af1.activity == 'kinase'
    assert af2.activity == 'kinase'
    assert af3.activity == 'kinase'
Beispiel #26
0
def test_reduce_mods2():
    mc1 = ModCondition('phosphorylation', 'S', '123', False)
    mc2 = ModCondition('phosphorylation', 'S', None, True)
    mc3 = ModCondition('phosphorylation', 'T')
    mc4 = ModCondition('phosphorylation', 'T', '111')
    mc5 = ModCondition('phosphorylation', 'T', '999')
    mc6 = ModCondition('phosphorylation')
    mc7 = ModCondition('phosphorylation', None, '999')
    st1 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc1]))
    st2 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc2]))
    st3 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc3]))
    st4 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc4]))
    st5 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc5]))
    st6 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc6]))
    st7 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc7]))
    ml = MechLinker([st1, st2, st3, st4, st5, st6, st7])
    ml.gather_modifications()
    ml.reduce_modifications()
    assert len(ml.statements) == 7
    mc_red1 = ml.statements[0].obj.mods[0]
    mc_red2 = ml.statements[1].obj.mods[0]
    mc_red3 = ml.statements[2].obj.mods[0]
    mc_red4 = ml.statements[3].obj.mods[0]
    mc_red5 = ml.statements[4].obj.mods[0]
    mc_red6 = ml.statements[5].obj.mods[0]
    mc_red7 = ml.statements[6].obj.mods[0]
    # These ones stay the same because they shouldn't be reduced
    assert mc_red1.__dict__ == mc1.__dict__
    assert mc_red3.__dict__ == mc3.__dict__
    assert mc_red4.__dict__ == mc4.__dict__
    assert mc_red5.__dict__ == mc5.__dict__
    assert mc_red6.__dict__ == mc6.__dict__
    # mc2 has to be reduced to have position '123'
    assert mc_red2.mod_type == 'phosphorylation'
    assert mc_red2.residue == 'S'
    assert mc_red2.position == '123'
    assert mc_red2.is_modified == True
    # mc7 has to be reduced to have residue 'T'
    assert mc_red7.mod_type == 'phosphorylation'
    assert mc_red7.residue == 'T'
    assert mc_red7.position == '999'
    assert mc_red7.is_modified == True
Beispiel #27
0
def test_infer_complexes():
    phos = Phosphorylation(Agent('b'), Agent('a'))
    linked_stmts = MechLinker.infer_complexes([phos])
    assert len(linked_stmts) == 1
    print(linked_stmts)
Beispiel #28
0
def test_base_agent():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    ml = MechLinker([af])
    ml.gather_explicit_activities()
Beispiel #29
0
def test_infer_complexes():
    phos = Phosphorylation(Agent('b'), Agent('a'))
    linked_stmts = MechLinker.infer_complexes([phos])
    assert len(linked_stmts) == 1
    print(linked_stmts)
Beispiel #30
0
from indra.mechlinker import MechLinker
from indra.assemblers import EnglishAssembler


def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt = 'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i + 1, t)
        if i < len(source_txts) - 1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt


if __name__ == '__main__':
    fname = 'models/rasmachine/rem/model.pkl'
    model = IncrementalModel(fname)
    model.preassemble()
    stmts = model.assembled_stmts
    linked_stmts = MechLinker.infer_active_forms(stmts)
    linked_stmts += MechLinker.infer_modifications(stmts)
    linked_stmts += MechLinker.infer_activations(stmts)
    for stmt in linked_stmts:
        print_linked_stmt(stmt)
Beispiel #31
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Beispiel #32
0
from indra.mechlinker import MechLinker
from indra.assemblers.english import EnglishAssembler


def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt =  'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i+1, t)
        if i < len(source_txts) -1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt


if __name__ == '__main__':
    fname = 'models/rasmachine/rem/model.pkl'
    model = IncrementalModel(fname)
    model.preassemble()
    stmts = model.assembled_stmts
    linked_stmts = MechLinker.infer_active_forms(stmts)
    linked_stmts += MechLinker.infer_modifications(stmts)
    linked_stmts += MechLinker.infer_activations(stmts)
    for stmt in linked_stmts:
        print_linked_stmt(stmt)
Beispiel #33
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Beispiel #34
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)
Beispiel #35
0
def run_assembly(stmts, folder, pmcid):
    indexcard_prefix = folder + '/index_cards/' + pmcid
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Filter for grounding
    grounded_stmts = []
    for st in stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(eh, mh)

    pa.add_statements(grounded_stmts)
    print '%d statements collected in total.' % len(pa.stmts)
    unique_stmts = pa.combine_duplicates()
    print '%d statements after combining duplicates.' % len(unique_stmts)
    ml = MechLinker(unique_stmts)
    ml.link_statements()
    pa = Preassembler(eh, mh, ml.statements)
    pa.combine_duplicates()
    related_stmts = pa.combine_related()
    print '%d statements after combining related.' % len(related_stmts)

    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(related_stmts, fh)

    flattened_evidence_stmts = flatten_evidence(related_stmts)

    card_counter = 1
    card_lim = float('inf')
    top_stmts = []
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: len(x.evidence), reverse=True):
        print len(st.evidence), st

        if is_background_knowledge(st):
            print 'This statement is background knowledge - skipping.'
            continue
        # Assemble IndexCards
        ia = IndexCardAssembler([st])
        ia.make_model()
        if ia.cards:
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    ea = EnglishAssembler(top_stmts)
    print '======================='
    print ea.make_model()
    print '======================='

    # Print the statement graph
    graph = render_stmt_graph(related_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')

    pya = PysbAssembler()
    pya.add_statements(related_stmts)
    model = pya.make_model()

    print 'PySB model has %d monomers and %d rules' %\
        (len(model.monomers), len(model.rules))
Beispiel #36
0
def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
Beispiel #37
0
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    return stmts
Beispiel #38
0
from indra.tools.incremental_model import IncrementalModel
from indra.mechlinker import MechLinker
from indra.assemblers import EnglishAssembler


def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt = 'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i + 1, t)
        if i < len(source_txts) - 1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt


if __name__ == '__main__':
    fname = 'models/rasmachine/rem/model.pkl'
    model = IncrementalModel(fname)
    model.preassemble()
    stmts = model.toplevel_stmts
    ml = MechLinker(stmts)
    linked_stmts = ml.link_statements()
    for stmt in linked_stmts:
        print_linked_stmt(stmt)
Beispiel #39
0
import pickle
from indra.tools.incremental_model import IncrementalModel
from indra.mechlinker import MechLinker
from indra.assemblers import EnglishAssembler


def print_linked_stmt(stmt):
    source_txts = []
    for source_stmt in stmt.source_stmts:
        source_txt = EnglishAssembler([source_stmt]).make_model()
        source_txts.append(source_txt)
    query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model()
    final_txt =  'I know that '
    for i, t in enumerate(source_txts):
        final_txt += '(%d) %s ' % (i+1, t)
        if i < len(source_txts) -1:
            final_txt = final_txt[:-2] + ', and '
    final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?'
    print(final_txt)
    return final_txt

if __name__ == '__main__':
    fname = 'models/rasmachine/rem/model.pkl'
    model = IncrementalModel(fname)
    model.preassemble()
    stmts = model.toplevel_stmts
    ml = MechLinker(stmts)
    linked_stmts = ml.link_statements()
    for stmt in linked_stmts:
        print_linked_stmt(stmt)
Beispiel #40
0
def test_base_agent():
    af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]),
                    'activity', True)
    ml = MechLinker([af])
    ml.gather_explicit_activities()