def reduce_activities(stmts_in, **kwargs): """Reduce the activity types in a list of statements Parameters ---------- stmts_in : list[indra.statements.Statement] A list of statements to reduce activity types in. save : Optional[str] The name of a pickle file to save the results (stmts_out) into. Returns ------- stmts_out : list[indra.statements.Statement] A list of reduced activity statements. """ logger.info('Reducing activities on %d statements...' % len(stmts_in)) stmts_out = [deepcopy(st) for st in stmts_in] ml = MechLinker(stmts_out) ml.gather_explicit_activities() ml.reduce_activities() stmts_out = ml.statements dump_pkl = kwargs.get('save') if dump_pkl: dump_statements(stmts_out, dump_pkl) return stmts_out
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def test_require_active_forms_mod1(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) ph = Phosphorylation(Agent('a'), Agent('b')) ml = MechLinker([af, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert len(ml.statements) == 2 assert ml.statements[1].enz.mods
def test_require_active_forms_act1(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) act = Activation(Agent('a'), Agent('b')) ml = MechLinker([af, act]) ml.gather_explicit_activities() ml.require_active_forms() assert (len(ml.statements) == 2) assert (ml.statements[1].subj.mods)
def test_require_active_forms_mod4(): mc1 = ModCondition('phosphorylation', 'T', '185') mc2 = ModCondition('phosphorylation', 'Y', '187') af = ActiveForm(Agent('a', mods=[mc1, mc2]), 'kinase', True) ph = Phosphorylation(Agent('a', mods=[mc1]), Agent('b')) ml = MechLinker([af, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert len(ml.statements) == 2 assert len(ml.statements[1].enz.mods) == 2
def test_require_active_forms_mod2(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) af2 = ActiveForm(Agent('a', location='nucleus'), 'activity', True) ph = Phosphorylation(Agent('a'), Agent('b')) ml = MechLinker([af, af2, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert (len(ml.statements) == 4) assert (ml.statements[3].enz.location)
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic', True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic',True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def preprocess_stmts(stmts, data_genes): # Filter the INDRA Statements to be put into the model stmts = ac.filter_mutation_status(stmts, {'BRAF': [('V', '600', 'E')]}, ['PTEN']) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() af_stmts = ac.filter_by_type(ml.statements, ActiveForm) non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True) af_stmts = ac.run_preassembly(af_stmts) stmts = af_stmts + non_af_stmts # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements return stmts
def get_missing_activities(self): ml = MechLinker(self.statements) ml.gather_explicit_activities() suggestions = [] for stmt in self.statements: if isinstance(stmt, (Modification, RegulateActivity, RegulateAmount)): # The subj here is in an "active" position subj, obj = stmt.agent_list() if subj is None: continue subj_base = ml._get_base(subj) # If it has any activities but isn't in an active state # here if subj_base.activity_types and not subj.activity: # We suggest making the subj active in this case suggestion = deepcopy(stmt) act_type = subj_base.activity_types[0] new_subj = deepcopy(subj) new_subj.activity = ActivityCondition(act_type, True) suggestion.set_agent_list([new_subj, obj]) suggestions.append(suggestion) return suggestions
def test_base_agent(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) ml = MechLinker([af]) ml.gather_explicit_activities()
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)