def reduce_activities(stmts_in, **kwargs): """Reduce the activity types in a list of statements Parameters ---------- stmts_in : list[indra.statements.Statement] A list of statements to reduce activity types in. save : Optional[str] The name of a pickle file to save the results (stmts_out) into. Returns ------- stmts_out : list[indra.statements.Statement] A list of reduced activity statements. """ logger.info('Reducing activities on %d statements...' % len(stmts_in)) stmts_out = [deepcopy(st) for st in stmts_in] ml = MechLinker(stmts_out) ml.get_activities() ml.reduce_activities() stmts_out = ml.statements dump_pkl = kwargs.get('save') if dump_pkl: dump_statements(stmts_out, dump_pkl) return stmts_out
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic', True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic',True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def preprocess_stmts(stmts, data_genes): # Filter the INDRA Statements to be put into the model stmts = ac.filter_mutation_status(stmts, {'BRAF': [('V', '600', 'E')]}, ['PTEN']) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() af_stmts = ac.filter_by_type(ml.statements, ActiveForm) non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True) af_stmts = ac.run_preassembly(af_stmts) stmts = af_stmts + non_af_stmts # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements return stmts
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)