def test_act_phos_to_af(): act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)), Agent('B')) phos_st = Phosphorylation(Agent('A'), Agent('B')) ml = MechLinker([act_st, phos_st]) linked_stmts = ml.link_statements() assert(len(linked_stmts) == 1)
def test_replace_complexes(): phos = Phosphorylation(Agent('b'), Agent('a')) cplx = Complex([Agent('a'), Agent('b')]) ml = MechLinker([phos, cplx]) ml.replace_complexes() assert len(ml.statements) == 1 print(ml.statements)
def test_act_phos_to_af(): act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)), Agent('B')) phos_st = Phosphorylation(Agent('A'), Agent('B')) ml = MechLinker([act_st, phos_st]) linked_stmts = ml.link_statements() assert (len(linked_stmts) == 1)
def test_replace_activations(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) phos = Phosphorylation(Agent('b'), Agent('a')) act = Activation(Agent('b'), Agent('a')) ml = MechLinker([af, phos, act]) ml.replace_activations() assert len(ml.statements) == 2 print(ml.statements)
def test_act_af_to_phos(): act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)), Agent('B')) af_st = ActiveForm(Agent('B', mods=[ModCondition('phosphorylation', None, None, True)]), 'activity', True) ml = MechLinker([act_st, af_st]) linked_stmts = ml.infer_modifications(ml.statements) assert len(linked_stmts) == 1
def test_act_af_to_phos(): act_st = Activation(Agent('A', activity=ActivityCondition('kinase', True)), Agent('B')) af_st = ActiveForm( Agent('B', mods=[ModCondition('phosphorylation', None, None, True)]), 'activity', True) ml = MechLinker([act_st, af_st]) linked_stmts = ml.infer_modifications(ml.statements) assert len(linked_stmts) == 1
def test_require_active_forms_mod1(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) ph = Phosphorylation(Agent('a'), Agent('b')) ml = MechLinker([af, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert len(ml.statements) == 2 assert ml.statements[1].enz.mods
def test_reduce_mods1(): phos1 = Phosphorylation(Agent('b'), Agent('a')) phos2 = Phosphorylation(Agent('c'), Agent('a'), 'T') phos3 = Phosphorylation(Agent('d'), Agent('a'), 'T', '143') ml = MechLinker([phos1, phos2, phos3]) ml.gather_modifications() ml.reduce_modifications() assert len(ml.statements) == 3 for st in ml.statements: assert st.residue == 'T' assert st.position == '143'
def test_require_active_forms_mod4(): mc1 = ModCondition('phosphorylation', 'T', '185') mc2 = ModCondition('phosphorylation', 'Y', '187') af = ActiveForm(Agent('a', mods=[mc1, mc2]), 'kinase', True) ph = Phosphorylation(Agent('a', mods=[mc1]), Agent('b')) ml = MechLinker([af, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert len(ml.statements) == 2 assert len(ml.statements[1].enz.mods) == 2
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic',True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def test_infer_activations(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) phos = Phosphorylation(Agent('b'), Agent('a')) linked_stmts = MechLinker.infer_activations([af, phos]) assert len(linked_stmts) == 1 print(linked_stmts)
def assemble_model(requester_name): global stmts # Performing grounding mapping on the statements gmapper = gm.GroundingMapper(gm.default_grounding_map) stmts = gmapper.map_agents(stmts) pa = Preassembler(hierarchies, stmts) pa.combine_related() stmts = pa.related_stmts ml = MechLinker(stmts) linked_stmts = ml.link_statements() if linked_stmts: for linked_stmt in linked_stmts: if linked_stmt.inferred_stmt: question = mechlinker_queries.print_linked_stmt(linked_stmt) say(question) stmts.append(linked_stmt.inferred_stmt) say("%s: Done, updating layout." % requester_name) update_layout()
def test_reduce_mods2(): mc1 = ModCondition('phosphorylation', 'S', '123', False) mc2 = ModCondition('phosphorylation', 'S', None, True) mc3 = ModCondition('phosphorylation', 'T') mc4 = ModCondition('phosphorylation', 'T', '111') mc5 = ModCondition('phosphorylation', 'T', '999') mc6 = ModCondition('phosphorylation') mc7 = ModCondition('phosphorylation', None, '999') st1 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc1])) st2 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc2])) st3 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc3])) st4 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc4])) st5 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc5])) st6 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc6])) st7 = Activation(Agent('KRAS'), Agent('BRAF', mods=[mc7])) ml = MechLinker([st1, st2, st3, st4, st5, st6, st7]) ml.gather_modifications() ml.reduce_modifications() assert len(ml.statements) == 7 mc_red1 = ml.statements[0].obj.mods[0] mc_red2 = ml.statements[1].obj.mods[0] mc_red3 = ml.statements[2].obj.mods[0] mc_red4 = ml.statements[3].obj.mods[0] mc_red5 = ml.statements[4].obj.mods[0] mc_red6 = ml.statements[5].obj.mods[0] mc_red7 = ml.statements[6].obj.mods[0] # These ones stay the same because they shouldn't be reduced assert mc_red1.__dict__ == mc1.__dict__ assert mc_red3.__dict__ == mc3.__dict__ assert mc_red4.__dict__ == mc4.__dict__ assert mc_red5.__dict__ == mc5.__dict__ assert mc_red6.__dict__ == mc6.__dict__ # mc2 has to be reduced to have position '123' assert mc_red2.mod_type == 'phosphorylation' assert mc_red2.residue == 'S' assert mc_red2.position == '123' assert mc_red2.is_modified == True # mc7 has to be reduced to have residue 'T' assert mc_red7.mod_type == 'phosphorylation' assert mc_red7.residue == 'T' assert mc_red7.position == '999' assert mc_red7.is_modified == True
def reduce_activities(stmts_in, **kwargs): """Reduce the activity types in a list of statements Parameters ---------- stmts_in : list[indra.statements.Statement] A list of statements to reduce activity types in. save : Optional[str] The name of a pickle file to save the results (stmts_out) into. Returns ------- stmts_out : list[indra.statements.Statement] A list of reduced activity statements. """ logger.info('Reducing activities on %d statements...' % len(stmts_in)) stmts_out = [deepcopy(st) for st in stmts_in] ml = MechLinker(stmts_out) ml.get_activities() ml.reduce_activities() stmts_out = ml.statements dump_pkl = kwargs.get('save') if dump_pkl: dump_statements(stmts_out, dump_pkl) return stmts_out
def get_missing_activities(self): ml = MechLinker(self.statements) ml.gather_explicit_activities() suggestions = [] for stmt in self.statements: if isinstance(stmt, (Modification, RegulateActivity, RegulateAmount)): # The subj here is in an "active" position subj, obj = stmt.agent_list() if subj is None: continue subj_base = ml._get_base(subj) # If it has any activities but isn't in an active state # here if subj_base.activity_types and not subj.activity: # We suggest making the subj active in this case suggestion = deepcopy(stmt) act_type = subj_base.activity_types[0] new_subj = deepcopy(subj) new_subj.activity = ActivityCondition(act_type, True) suggestion.set_agent_list([new_subj, obj]) suggestions.append(suggestion) return suggestions
def test_require_active_forms_act1(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) act = Activation(Agent('a'), Agent('b')) ml = MechLinker([af, act]) ml.gather_explicit_activities() ml.require_active_forms() assert (len(ml.statements) == 2) assert (ml.statements[1].subj.mods)
def test_require_active_forms_mod2(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) af2 = ActiveForm(Agent('a', location='nucleus'), 'activity', True) ph = Phosphorylation(Agent('a'), Agent('b')) ml = MechLinker([af, af2, ph]) ml.gather_explicit_activities() ml.require_active_forms() assert (len(ml.statements) == 4) assert (ml.statements[3].enz.location)
def test_reduce_activity_types(): a1 = Agent('a', location='cytoplasm') a2 = Agent('a', location='nucleus') af1 = ActiveForm(a1, 'activity', True) af2 = ActiveForm(a2, 'kinase', True) af3 = ActiveForm(a1, 'catalytic', True) ml = MechLinker([af1, af2, af3]) ml.gather_explicit_activities() ml.reduce_activities() assert af1.activity == 'kinase' assert af2.activity == 'kinase' assert af3.activity == 'kinase'
def test_infer_complexes(): phos = Phosphorylation(Agent('b'), Agent('a')) linked_stmts = MechLinker.infer_complexes([phos]) assert len(linked_stmts) == 1 print(linked_stmts)
def test_base_agent(): af = ActiveForm(Agent('a', mods=[ModCondition('phosphorylation')]), 'activity', True) ml = MechLinker([af]) ml.gather_explicit_activities()
from indra.mechlinker import MechLinker from indra.assemblers import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i + 1, t) if i < len(source_txts) - 1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.assembled_stmts linked_stmts = MechLinker.infer_active_forms(stmts) linked_stmts += MechLinker.infer_modifications(stmts) linked_stmts += MechLinker.infer_activations(stmts) for stmt in linked_stmts: print_linked_stmt(stmt)
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
from indra.mechlinker import MechLinker from indra.assemblers.english import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i+1, t) if i < len(source_txts) -1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.assembled_stmts linked_stmts = MechLinker.infer_active_forms(stmts) linked_stmts += MechLinker.infer_modifications(stmts) linked_stmts += MechLinker.infer_activations(stmts) for stmt in linked_stmts: print_linked_stmt(stmt)
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker ml = MechLinker(related_stmts) # Link statements linked_stmts = ml.link_statements() # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh, protocol=2) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model()) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)
def run_assembly(stmts, folder, pmcid): indexcard_prefix = folder + '/index_cards/' + pmcid otherout_prefix = folder + '/other_outputs/' + pmcid # Filter for grounding grounded_stmts = [] for st in stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(eh, mh) pa.add_statements(grounded_stmts) print '%d statements collected in total.' % len(pa.stmts) unique_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(unique_stmts) ml = MechLinker(unique_stmts) ml.link_statements() pa = Preassembler(eh, mh, ml.statements) pa.combine_duplicates() related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts) with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(related_stmts, fh) flattened_evidence_stmts = flatten_evidence(related_stmts) card_counter = 1 card_lim = float('inf') top_stmts = [] for st in sorted(flattened_evidence_stmts, key=lambda x: len(x.evidence), reverse=True): print len(st.evidence), st if is_background_knowledge(st): print 'This statement is background knowledge - skipping.' continue # Assemble IndexCards ia = IndexCardAssembler([st]) ia.make_model() if ia.cards: ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break ea = EnglishAssembler(top_stmts) print '=======================' print ea.make_model() print '=======================' # Print the statement graph graph = render_stmt_graph(related_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv') pya = PysbAssembler() pya.add_statements(related_stmts) model = pya.make_model() print 'PySB model has %d monomers and %d rules' %\ (len(model.monomers), len(model.rules))
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [stmt for stmt in related_stmts if stmt not in background_assertions] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def preprocess_stmts(stmts, data_genes): # Filter the INDRA Statements to be put into the model stmts = ac.filter_mutation_status(stmts, {'BRAF': [('V', '600', 'E')]}, ['PTEN']) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() af_stmts = ac.filter_by_type(ml.statements, ActiveForm) non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True) af_stmts = ac.run_preassembly(af_stmts) stmts = af_stmts + non_af_stmts # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements return stmts
from indra.tools.incremental_model import IncrementalModel from indra.mechlinker import MechLinker from indra.assemblers import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i + 1, t) if i < len(source_txts) - 1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.toplevel_stmts ml = MechLinker(stmts) linked_stmts = ml.link_statements() for stmt in linked_stmts: print_linked_stmt(stmt)
import pickle from indra.tools.incremental_model import IncrementalModel from indra.mechlinker import MechLinker from indra.assemblers import EnglishAssembler def print_linked_stmt(stmt): source_txts = [] for source_stmt in stmt.source_stmts: source_txt = EnglishAssembler([source_stmt]).make_model() source_txts.append(source_txt) query_txt = EnglishAssembler([stmt.inferred_stmt]).make_model() final_txt = 'I know that ' for i, t in enumerate(source_txts): final_txt += '(%d) %s ' % (i+1, t) if i < len(source_txts) -1: final_txt = final_txt[:-2] + ', and ' final_txt += 'Is it therefore true that ' + query_txt[:-1] + '?' print(final_txt) return final_txt if __name__ == '__main__': fname = 'models/rasmachine/rem/model.pkl' model = IncrementalModel(fname) model.preassemble() stmts = model.toplevel_stmts ml = MechLinker(stmts) linked_stmts = ml.link_statements() for stmt in linked_stmts: print_linked_stmt(stmt)