def test_render_stmt_graph(): braf = Agent('BRAF', db_refs={'HGNC': '1097'}) mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'}) mek = Agent('MEK', db_refs={'FPLX':'MEK'}) # Statements p0 = Phosphorylation(braf, mek) p1 = Phosphorylation(braf, mek1) p2 = Phosphorylation(braf, mek1, position='218') p3 = Phosphorylation(braf, mek1, position='222') p4 = Phosphorylation(braf, mek1, 'serine') p5 = Phosphorylation(braf, mek1, 'serine', '218') p6 = Phosphorylation(braf, mek1, 'serine', '222') stmts = [p0, p1, p2, p3, p4, p5, p6] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_related() graph = render_stmt_graph(pa.related_stmts, reduce=False) # One node for each statement assert len(graph.nodes()) == 7 # Edges: # p0 supports p1-p6 = 6 edges # p1 supports p2-p6 = 5 edges # p2 supports p5 = 1 edge # p3 supports p6 = 1 edge # p4 supports p5-p6 = 2 edges # (p5 and p6 support none--they are top-level) # 6 + 5 + 1 + 1 + 2 = 15 edges assert len(graph.edges()) == 15
def test_render_stmt_graph(): braf = Agent('BRAF', db_refs={'HGNC': '1097'}) mek1 = Agent('MAP2K1', db_refs={'HGNC': '6840'}) mek = Agent('MEK', db_refs={'FPLX': 'MEK'}) # Statements p0 = Phosphorylation(braf, mek) p1 = Phosphorylation(braf, mek1) p2 = Phosphorylation(braf, mek1, position='218') p3 = Phosphorylation(braf, mek1, position='222') p4 = Phosphorylation(braf, mek1, 'serine') p5 = Phosphorylation(braf, mek1, 'serine', '218') p6 = Phosphorylation(braf, mek1, 'serine', '222') stmts = [p0, p1, p2, p3, p4, p5, p6] pa = Preassembler(bio_ontology, stmts=stmts) pa.combine_related() graph = render_stmt_graph(pa.related_stmts, reduce=False) # One node for each statement assert len(graph.nodes()) == 7 # Edges: # p0 supports p1-p6 = 6 edges # p1 supports p2-p6 = 5 edges # p2 supports p5 = 1 edge # p3 supports p6 = 1 edge # p4 supports p5-p6 = 2 edges # (p5 and p6 support none--they are top-level) # 6 + 5 + 1 + 1 + 2 = 15 edges assert len(graph.edges()) == 15
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker ml = MechLinker(related_stmts) # Link statements linked_stmts = ml.link_statements() # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [ stmt for stmt in related_stmts if stmt not in background_assertions ] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh, protocol=2) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model()) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
def plot_assembly(stmts, fname): g = render_stmt_graph(stmts, reduce=False, rankdir='TB') print(g.nodes()) g.draw(fname, prog='dot') return g
def run_assembly(stmts, folder, pmcid, background_assertions=None): '''Run assembly on a list of statements, for a given PMCID.''' # Folder for index card output (scored submission) indexcard_prefix = folder + '/index_cards/' + pmcid # Folder for other outputs (for analysis, debugging) otherout_prefix = folder + '/other_outputs/' + pmcid # Do grounding mapping here # Load the TRIPS-specific grounding map and add to the default # (REACH-oriented) grounding map: trips_gm = load_grounding_map('trips_grounding_map.csv') default_grounding_map.update(trips_gm) gm = GroundingMapper(default_grounding_map) mapped_agent_stmts = gm.map_agents(stmts) renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts) # Filter for grounding grounded_stmts = [] for st in renamed_agent_stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(hierarchies) pa.add_statements(grounded_stmts) print('== %s ====================' % pmcid) print('%d statements collected in total.' % len(pa.stmts)) # Combine duplicates unique_stmts = pa.combine_duplicates() print('%d statements after combining duplicates.' % len(unique_stmts)) # Run BeliefEngine on unique statements epe = BeliefEngine() epe.set_prior_probs(pa.unique_stmts) # Build statement hierarchy related_stmts = pa.combine_related() # Run BeliefEngine on hierarchy epe.set_hierarchy_probs(related_stmts) print('%d statements after combining related.' % len(related_stmts)) # Instantiate the mechanism linker # Link statements linked_stmts = MechLinker.infer_active_forms(related_stmts) linked_stmts += MechLinker.infer_modifications(related_stmts) linked_stmts += MechLinker.infer_activations(related_stmts) # Run BeliefEngine on linked statements epe.set_linked_probs(linked_stmts) # Print linked statements for debugging purposes print('Linked\n=====') for ls in linked_stmts: print(ls.inferred_stmt.belief, ls.inferred_stmt) print('=============') # Combine all statements including linked ones all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts] # Instantiate a new preassembler pa = Preassembler(hierarchies, all_statements) # Build hierarchy again pa.combine_duplicates() # Choose the top-level statements related_stmts = pa.combine_related() # Remove top-level statements that came only from the prior if background_assertions is not None: nonbg_stmts = [stmt for stmt in related_stmts if stmt not in background_assertions] else: nonbg_stmts = related_stmts # Dump top-level statements in a pickle with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(nonbg_stmts, fh) # Flatten evidence for statements flattened_evidence_stmts = flatten_evidence(nonbg_stmts) # Start a card counter card_counter = 1 # We don't limit the number of cards reported in this round card_lim = float('inf') top_stmts = [] ############################################### # The belief cutoff for statements belief_cutoff = 0.3 ############################################### # Sort by amount of evidence for st in sorted(flattened_evidence_stmts, key=lambda x: x.belief, reverse=True): if st.belief >= belief_cutoff: print(st.belief, st) if st.belief < belief_cutoff: print('SKIP', st.belief, st) # If it's background knowledge, we skip the statement if is_background_knowledge(st): print('This statement is background knowledge - skipping.') continue # Assemble IndexCards ia = IndexCardAssembler([st], pmc_override=pmcid) ia.make_model() # If the index card was actually made # (not all statements can be assembled into index cards to # this is often not the case) if ia.cards: # Save the index card json ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break # Print the English-assembled model for debugging purposes ea = EnglishAssembler(top_stmts) print('=======================') print(ea.make_model().encode('utf-8')) print('=======================') # Print the statement graph graph = render_stmt_graph(nonbg_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')
xml_str = open(fn, 'rt').read() tp = trips.process_xml(xml_str) print 'Extracted events by type' print '------------------------' for k,v in tp.extracted_events.iteritems(): print k, len(v) print '------------------------' print '%s statements collected.' % len(tp.statements) pa.add_statements(tp.statements) print '----------------------------\n\n' print '%d statements collected in total.' % len(pa.stmts) duplicate_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(duplicate_stmts) related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts) # Print the statement graph graph = render_stmt_graph(related_stmts) graph.draw('trips_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, 'trips_statements.tsv') print_stmts(related_stmts, 'trips_related_statements.tsv') pya = PysbAssembler() pya.add_statements(related_stmts) model = pya.make_model() print 'PySB model has %d monomers and %d rules' %\ (len(model.monomers), len(model.rules))
def run_assembly(stmts, folder, pmcid): indexcard_prefix = folder + '/index_cards/' + pmcid otherout_prefix = folder + '/other_outputs/' + pmcid # Filter for grounding grounded_stmts = [] for st in stmts: if all([is_protein_or_chemical(a) for a in st.agent_list()]): grounded_stmts.append(st) # Instantiate the Preassembler pa = Preassembler(eh, mh) pa.add_statements(grounded_stmts) print '%d statements collected in total.' % len(pa.stmts) unique_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(unique_stmts) ml = MechLinker(unique_stmts) ml.link_statements() pa = Preassembler(eh, mh, ml.statements) pa.combine_duplicates() related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts) with open(otherout_prefix + '.pkl', 'wb') as fh: pickle.dump(related_stmts, fh) flattened_evidence_stmts = flatten_evidence(related_stmts) card_counter = 1 card_lim = float('inf') top_stmts = [] for st in sorted(flattened_evidence_stmts, key=lambda x: len(x.evidence), reverse=True): print len(st.evidence), st if is_background_knowledge(st): print 'This statement is background knowledge - skipping.' continue # Assemble IndexCards ia = IndexCardAssembler([st]) ia.make_model() if ia.cards: ia.save_model(indexcard_prefix + '-%d.json' % card_counter) card_counter += 1 top_stmts.append(st) if card_counter > card_lim: break ea = EnglishAssembler(top_stmts) print '=======================' print ea.make_model() print '=======================' # Print the statement graph graph = render_stmt_graph(related_stmts) graph.draw(otherout_prefix + '_graph.pdf', prog='dot') # Print statement diagnostics print_stmts(pa.stmts, otherout_prefix + '_statements.tsv') print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv') pya = PysbAssembler() pya.add_statements(related_stmts) model = pya.make_model() print 'PySB model has %d monomers and %d rules' %\ (len(model.monomers), len(model.rules))