def test_fakeprotein(): fname = os.path.join(path_this, 'trips_ekbs', 'FAKEPROTEIN.ekb') tp = trips.process_xml(open(fname, 'r').read()) agent = tp._get_agent_by_id('V38735', None) assert agent is not None assert agent.db_refs['UP'] == 'P04324', agent.db_refs assert agent.name == 'nef', agent.name
def test_mapped_chebi_id(): fname = os.path.join(path_this, 'trips_ekbs', 'chebi_id_test.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.db_refs['CHEBI'] == 'CHEBI:63637', agent.db_refs
def test_increase_amount_of(): fname = os.path.join(path_this, 'trips_ekbs', 'increase_amount_qty.ekb') tp = trips.process_xml(open(fname, 'r').read()) assert len(tp.statements) == 1 assert isinstance(tp.statements[0], IncreaseAmount) assert tp.statements[0].subj.name == 'TGFBR1' assert tp.statements[0].obj.name == 'SMURF2'
def get_statements(self, reprocess=False): """General method to create statements.""" if self._statements is None or reprocess: # Handle the case that there is no content. if self.content is None: self._statements = [] return [] # Map to the different processors. if self.reader == ReachReader.name: json_str = json.dumps(self.content) processor = reach.process_json_str(json_str) elif self.reader == SparserReader.name: processor = sparser.process_json_dict(self.content) if processor is not None: processor.set_statements_pmid(None) elif self.reader == TripsReader.name: processor = trips.process_xml(self.content) else: raise ReadingError("Unknown reader: %s." % self.reader) # Get the statements from the processor, if it was resolved. if processor is None: logger.error("Production of statements from %s failed for %s." % (self.reader, self.content_id)) stmts = [] else: stmts = processor.statements self._statements = stmts[:] else: stmts = self._statements[:] return stmts
def test_increase_amount_of(): fname = os.path.join(path_this, 'trips_ekbs', 'increase_amount_qty.ekb') tp = trips.process_xml(open(fname, 'r').read()) assert len(tp.statements) == 1 assert isinstance(tp.statements[0], IncreaseAmount) assert tp.statements[0].subj.name == 'TGFBR1' assert tp.statements[0].obj.name == 'SMURF2'
def test_up_go_location(): fname = os.path.join(path_this, 'trips_ekbs', 'endoplasmic_reticulum.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert 'GO' in agent.db_refs, agent.db_refs
def test_fakeprotein(): fname = os.path.join(path_this, 'trips_ekbs', 'FAKEPROTEIN.ekb') tp = trips.process_xml(open(fname, 'r').read()) agent = tp._get_agent_by_id('V38735', None) assert agent is not None assert agent.db_refs['UP'] == 'P04324', agent.db_refs assert agent.name == 'nef', agent.name
def test_assoc_with(): fname = os.path.join(path_this, 'trips_ekbs', 'ekb_assoc.ekb') tp = trips.process_xml(open(fname, 'r').read()) assert len(tp.statements) == 1 assert len(tp.statements[0].members) == 2 names = {m.name for m in tp.statements[0].members} assert names == {'EGF', 'EGFR'}
def test_ncit_multiple_mappings(): fname = os.path.join(path_this, 'trips_ekbs', 'mek1.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] agent.db_refs['NCIT'] == 'C52823'
def process_sentence_xml(sentence): fname = re.sub('[^a-zA-Z0-9]', '_', sentence[:-1]) + '.ekb' path = os.path.join(path_this, 'trips_ekbs', fname) with open(path, 'rb') as fh: xml = fh.read().decode('utf-8') tp = trips.process_xml(xml) return tp
def test_up_go_location(): fname = os.path.join(path_this, 'trips_ekbs', 'endoplasmic_reticulum.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert 'GO' in agent.db_refs, agent.db_refs
def remove_mechanism(self, mech_ekb, model_id): """Return a new model with the given mechanism having been removed.""" tp = trips.process_xml(mech_ekb) rem_stmts = tp.statements new_stmts = [] removed_stmts = [] model_stmts = self.models[model_id] for model_st in model_stmts: found = False for rem_st in rem_stmts: if model_st.refinement_of(rem_st, hierarchies): found = True break if not found: new_stmts.append(model_st) else: removed_stmts.append(model_st) res = {'model_id': model_id, 'model': new_stmts} model_exec = self.assemble_pysb(new_stmts) res['model_exec'] = model_exec if removed_stmts: res['removed'] = removed_stmts res['diagrams'] = make_diagrams(model_exec, model_id) self.new_model(new_stmts) return res
def test_gene_assoc_with_gene(): fname = os.path.join(path_this, 'trips_ekbs', 'egfr_protein.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.name == 'EGFR', agent.name
def test_gene_assoc_with_gene(): fname = os.path.join(path_this, 'trips_ekbs', 'egfr_protein.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.name == 'EGFR', agent.name
def test_ncit_multiple_mappings(): fname = os.path.join(path_this, 'trips_ekbs', 'mek1.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] agent.db_refs['NCIT'] == 'C52823'
def get_file_stmts(fname): with open(fname, 'rt') as fh: xml_str = fh.read() tp = trips.process_xml(xml_str) if tp is None: return [] return tp.statements
def remove_mechanism(self, mech_ekb, model_id): """Return a new model with the given mechanism having been removed.""" tp = trips.process_xml(mech_ekb) rem_stmts = tp.statements logger.info('Removing statements: %s' % rem_stmts) new_stmts = [] removed_stmts = [] model_stmts = self.models[model_id] for model_st in model_stmts: found = False for rem_st in rem_stmts: if model_st.refinement_of(rem_st, hierarchies): found = True break if not found: new_stmts.append(model_st) else: removed_stmts.append(model_st) new_model_id = self.new_model(new_stmts) model_exec = self.assemble_pysb(new_stmts) res = {'model_id': new_model_id, 'model': new_stmts} res['model_exec'] = model_exec if removed_stmts: res['removed'] = removed_stmts if not new_stmts: return res res['diagrams'] = make_diagrams(model_exec, new_model_id, self.models[new_model_id], self.context) return res
def test_assoc_with(): fname = os.path.join(path_this, 'trips_ekbs', 'ekb_assoc.ekb') tp = trips.process_xml(open(fname, 'r').read()) assert len(tp.statements) == 1 assert len(tp.statements[0].members) == 2 names = {m.name for m in tp.statements[0].members} assert names == {'EGF', 'EGFR'}
def set_user_goal(self, explain): # Get the event itself tp = trips.process_xml(explain) if tp is None: return {'error': 'Failed to process EKB.'} print(tp.statements) if not tp.statements: return self.explain = tp.statements[0] # Look for a term representing a cell line def get_context(explain_xml): import xml.etree.ElementTree as ET et = ET.fromstring(explain_xml) cl_tag = et.find("TERM/[type='ONT::CELL-LINE']/text") if cl_tag is not None: cell_line = cl_tag.text cell_line.replace('-', '') return cell_line return None try: self.context = get_context(explain) except Exception as e: logger.error('MRA could not set context from USER-GOAL') logger.error(e)
def get_statements(self, reprocess=False): """General method to create statements.""" if self._statements is None or reprocess: # Handle the case that there is no content. if self.content is None: self._statements = [] return [] # Map to the different processors. if self.reader == ReachReader.name: json_str = json.dumps(self.content) processor = reach.process_json_str(json_str) elif self.reader == SparserReader.name: processor = sparser.process_json_dict(self.content) if processor is not None: processor.set_statements_pmid(None) elif self.reader == TripsReader.name: processor = trips.process_xml(self.content) else: raise ReadingError("Unknown reader: %s." % self.reader) # Get the statements from the processor, if it was resolved. if processor is None: logger.error( "Production of statements from %s failed for %s." % (self.reader, self.content_id)) stmts = [] else: stmts = processor.statements self._statements = stmts[:] else: stmts = self._statements[:] return stmts
def test_mapped_chebi_id(): fname = os.path.join(path_this, 'trips_ekbs', 'chebi_id_test.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.db_refs['CHEBI'] == 'CHEBI:63637', agent.db_refs
def test_ncit_up_hgnc_mapping(): fname = os.path.join(path_this, 'trips_ekbs', 'estrogen_receptor_alpha.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert 'HGNC' in agent.db_refs, agent.db_refs
def stmts_from_text(text): """Return a list of INDRA Statements from text.""" ekb_xml = read_or_load(text) tp = trips.process_xml(ekb_xml) context = get_cell_line(ET.fromstring(ekb_xml)) if context: set_cell_line_context(tp.statements, context) return tp.statements
def test_fplx_hgnc_redundancy(): fname = os.path.join(path_this, 'trips_ekbs', 'cfos_gene.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.name == 'FOS', agent.name assert 'FPLX' not in agent.db_refs
def test_ncit_up_hgnc_mapping(): fname = os.path.join(path_this, 'trips_ekbs', 'estrogen_receptor_alpha.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert 'HGNC' in agent.db_refs, agent.db_refs
def test_fplx_hgnc_redundancy(): fname = os.path.join(path_this, 'trips_ekbs', 'cfos_gene.ekb') tp = trips.process_xml(open(fname, 'r').read()) agents = tp.get_agents() assert len(agents) == 1, agents agent = agents[0] assert agent.name == 'FOS', agent.name assert 'FPLX' not in agent.db_refs
def agent_from_text(text): """Return a single INDRA Agent from text.""" ekb_xml = ekb_from_text(text) tp = trips.process_xml(ekb_xml) agents = tp.get_agents() for agent in agents: if agent.bound_conditions: return agent return agents[0]
def trips_process_xml(): """Process TRIPS EKB XML and return INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) xml_str = body.get('xml_str') tp = trips.process_xml(xml_str) return _stmts_from_proc(tp)
def trips_process_xml(): """Process TRIPS EKB XML and return INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) xml_str = body.get('xml_str') tp = trips.process_xml(xml_str) return _stmts_from_proc(tp)
def trips_process_xml(): """Process TRIPS EKB XML and return INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) xml_str = body.get('xml_str') tp = trips.process_xml(xml_str) if tp and tp.statements: stmts = stmts_to_json(tp.statements) res = {'statements': stmts} return res else: res = {'statements': []} return res
def has_mechanism(self, mech_ekb, model_id): """Return True if the given model contains the given mechanism.""" tp = trips.process_xml(mech_ekb) res = {} if not tp.statements: res['has_mechanism'] = False return res query_st = tp.statements[0] res['query'] = query_st model_stmts = self.models[model_id] for model_st in model_stmts: if model_st.refinement_of(query_st, hierarchies): res['has_mechanism'] = True return res res['has_mechanism'] = False return res
def has_mechanism(self, mech_ekb, model_id): """Return True if the given model contains the given mechanism.""" tp = trips.process_xml(mech_ekb) res = {} if not tp.statements: res['has_mechanism'] = False return res query_st = tp.statements[0] res['query'] = query_st model_stmts = self.models[model_id] for model_st in model_stmts: if model_st.refinement_of(query_st, hierarchies): res['has_mechanism'] = True return res res['has_mechanism'] = False return res
def build_model_from_ekb(self, model_ekb): """Build a model using DRUM extraction knowledge base.""" tp = trips.process_xml(model_ekb) if tp is None: return {'error': 'Failed to process EKB.'} stmts = tp.statements model_id = self.new_model(stmts) res = {'model_id': model_id, 'model': stmts} if not stmts: return res ambiguities = get_ambiguities(tp) res['ambiguities'] = ambiguities model_exec = self.assemble_pysb(stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, model_id) return res
def get_entity(self): ekb_str = self.to_string() # Now process the EKB using the TRIPS processor to extract Statements tp = process_xml(ekb_str) # If there are any statements then we can return the CL-JSON of those if tp.statements: self.set_cell_line_context_for_stmts(tp.statements) res = tp.statements # Otherwise, we try extracting an Agent and return that else: agent = tp._get_agent_by_id(self.root_term, None) if agent is None: return None # Set the TRIPS ID in db_refs agent.db_refs['TRIPS'] = 'ONT::' + self.root_term # Fix some namings if self.type.upper() in {'ONT::SIGNALING-PATHWAY', 'ONT::SIGNALING'}: simple_name = agent.name.lower().replace('-', ' ') if not simple_name.endswith('signaling pathway'): agent.name += ' signaling pathway' elif agent.name.isupper() \ and ' ' not in agent.name \ and '-' in agent.name: agent.name = simple_name agent.db_refs['TEXT'] = agent.name elif self.type.upper() == 'ONT::RNA': agent.name = (agent.db_refs['TEXT'] .upper() .replace('-', '') .replace('PUNCMINUS', '-')) # Set the agent type inferred_type = infer_agent_type(agent) if inferred_type is not None \ and self.type not in {'ONT::SIGNALING-PATHWAY', 'ONT::SIGNALING'}: agent.db_refs['TYPE'] = inferred_type elif self.type: agent.db_refs['TYPE'] = self.type.upper() res = agent return res
def process_trips(txt, reread=True): print('Using TRIPS') ts = time.time() if reread: stmts = [] sentences = txt.strip().split('\n') for sentence in sentences: print(sentence) tp = trips.process_text(sentence) stmts += tp.statements else: tp = trips.process_xml(open('trips_output.xml', 'r').read()) stmts = tp.statements te = time.time() print('Time taken: %.2fs' % (te-ts)) for st in stmts: print('%s\t%s' % (st, st.evidence[0].text)) return stmts
def post(self): """Process TRIPS EKB XML and return INDRA Statements. Parameters ---------- xml_string : str A TRIPS extraction knowledge base (EKB) string to be processed. http://trips.ihmc.us/parser/api.html Returns ------- statements : list[indra.statements.Statement.to_json()] A list of extracted INDRA Statements. """ args = request.json xml_str = args.get('xml_str') tp = trips.process_xml(xml_str) return _stmts_from_proc(tp)
def build_model_from_ekb(self, model_ekb): """Build a model using DRUM extraction knowledge base.""" tp = trips.process_xml(model_ekb) if tp is None: return {'error': 'Failed to process EKB.'} stmts = tp.statements model_id = self.new_model(stmts) res = {'model_id': model_id, 'model': stmts} if not stmts: return res ambiguities = get_ambiguities(tp) res['ambiguities'] = ambiguities model_exec = self.assemble_pysb(stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, model_id, self.models[model_id], self.context) self.run_diagnoser(res, stmts, model_exec) return res
def expand_model_from_ekb(self, model_ekb, model_id): """Expand a model using DRUM extraction knowledge base.""" tp = trips.process_xml(model_ekb) if tp is None: return {'error': 'Failed to process EKB.'} stmts = tp.statements new_model_id, new_stmts = self.extend_model(stmts, model_id) logger.info('Old model id: %s, New model id: %s' % (model_id, new_model_id)) model_stmts = self.models[new_model_id] res = {'model_id': new_model_id, 'model': model_stmts} if not model_stmts: return res ambiguities = get_ambiguities(tp) res['ambiguities'] = ambiguities res['model_new'] = new_stmts model_exec = self.assemble_pysb(model_stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, new_model_id) return res
def read_model(model_name, reread=False): xml_fname = model_name + '.xml' if not reread: print('Processing %s' % xml_fname) if os.path.exists(xml_fname): with open(xml_fname, 'rb') as fh: tp = trips.process_xml(fh.read()) else: reread = True if reread: fname = model_name + '.txt' print('Reading %s' % fname) with open(fname, 'rb') as fh: ts = time.time() tp = trips.process_text(fh.read(), xml_fname) te = time.time() print('Reading took %.2fs' % (te - ts)) print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) print('----------------------') return tp.statements
def read_model(model_name, reread=False): xml_fname = model_name + '.xml' if not reread: print('Processing %s' % xml_fname) if os.path.exists(xml_fname): with open(xml_fname, 'rb') as fh: tp = trips.process_xml(fh.read()) else: reread = True if reread: fname = model_name + '.txt' print('Reading %s' % fname) with open(fname, 'rb') as fh: ts = time.time() tp = trips.process_text(fh.read(), xml_fname) te = time.time() print('Reading took %.2fs' % (te-ts)) print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) print('----------------------') return tp.statements
def expand_model_from_ekb(self, model_ekb, model_id): """Expand a model using DRUM extraction knowledge base.""" tp = trips.process_xml(model_ekb) if tp is None: return {'error': 'Failed to process EKB.'} stmts = tp.statements new_model_id, new_stmts = self.extend_model(stmts, model_id) logger.info('Old model id: %s, New model id: %s' % (model_id, new_model_id)) model_stmts = self.models[new_model_id] res = {'model_id': new_model_id, 'model': model_stmts} if not model_stmts: return res ambiguities = get_ambiguities(tp) res['ambiguities'] = ambiguities res['model_new'] = new_stmts model_exec = self.assemble_pysb(model_stmts) res['model_exec'] = model_exec res['diagrams'] = make_diagrams(model_exec, new_model_id, self.models[new_model_id], self.context) self.run_diagnoser(res, model_stmts, model_exec) return res
# Load the REACH reading output with open('reach/reach_stmts_batch_4_eval.pkl') as f: reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} with open('pmc_batch_4_id_map.txt') as f: csvreader = csv.reader(f, delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print 'Processing %s...' % pmcid # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print "No REACH statements for %s" % pmcid # Get NACTEM/ISI statements fname = 'nactem/' + pmcid + '.cards' if not os.path.exists(fname): nactem_stmts = [] else: icp = index_cards.process_json_file(fname, 'nactem') nactem_stmts = icp.statements # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid + nactem_stmts # Run assembly
def assemble_model(model_id, reread=False): model_name = 'model%d' % model_id # If model has already been read, just process the EKB XML if os.path.exists(model_name + '.xml') and not reread: tp = trips.process_xml(open(model_name + '.xml').read()) else: # Start with the basic model model_txt = open('model1.txt').read() # Apply patches one by one to get to the current model text for j in range(1, model_id): patch_txt = open('model%d_from%d.txt' % (j + 1, j)).read() model_txt = apply_patch(model_txt, patch_txt) print('Reading model %d text:' % model_id) print(model_txt) # Process model text and save result EKB XML tp = trips.process_text(model_txt, model_name + '.xml') print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) # Assemble the PySB model pa = PysbAssembler() pa.add_statements(tp.statements) model = pa.make_model(policies='two_step') # Set initial conditions erk = model.monomers['ERK'] obs = Observable('ERK_p', erk(phospho='p')) model.add_component(obs) vem = model.monomers['VEMURAFENIB'] obs = Observable('Vem_free', vem(map3k=None)) model.add_component(obs) ras = model.monomers['RAS'] obs = Observable('RAS_active', ras(gtp=ANY)) model.add_component(obs) braf = model.monomers['BRAF'] obs = Observable('BRAF_active', braf(vemurafenib=None)) model.add_component(obs) model.parameters['BRAF_0'].value = 0 egf = model.monomers['EGF'] obs = Observable('EGF_free', egf(erbb=None)) model.add_component(obs) # Add mutated form of BRAF as initial condition sites_dict = {} for site in braf.sites: if site in braf.site_states: sites_dict[site] = braf.site_states[site][0] else: sites_dict[site] = None sites_dict['V600'] = 'E' model.add_component(Parameter('BRAF_mut_0', 1e5)) model.initial(braf(**sites_dict), model.parameters['BRAF_mut_0']) # Set up model parameters model.parameters['kf_ee_bind_1'].value = 1 model.parameters['kr_ee_bind_1'].value = 0.1 model.parameters['kf_ee_bind_2'].value = 1 model.parameters['kr_ee_bind_2'].value = 0.1 model.parameters['kf_eg_bind_1'].value = 1 model.parameters['kr_eg_bind_1'].value = 0.1 model.parameters['kf_gs_bind_1'].value = 1 model.parameters['kr_gs_bind_1'].value = 0.1 model.parameters['kf_sr_bind_1'].value = 1 model.parameters['kr_sr_bind_1'].value = 50 model.parameters['kf_rg_bind_1'].value = 50 model.parameters['kr_rg_bind_1'].value = 0.5 model.parameters['kf_rb_bind_1'].value = 1 model.parameters['kr_rb_bind_1'].value = 0.5 model.parameters['kf_vb_bind_1'].value = 10 model.parameters['kr_vb_bind_1'].value = 1 model.parameters['kf_bm_bind_1'].value = 1 model.parameters['kr_bm_bind_1'].value = 0.1 model.parameters['kc_bm_phosphorylation_1'].value = 3 model.parameters['kf_pm_bind_1'].value = 1 model.parameters['kr_pm_bind_1'].value = 0.001 model.parameters['kc_pm_dephosphorylation_1'].value = 10 model.parameters['kf_me_bind_1'].value = 1 model.parameters['kr_me_bind_1'].value = 0.1 model.parameters['kc_me_phosphorylation_1'].value = 10 model.parameters['kf_de_bind_1'].value = 1 model.parameters['kr_de_bind_1'].value = 0.001 model.parameters['kc_de_dephosphorylation_1'].value = 10 model.parameters['VEMURAFENIB_0'].value = 0 model.parameters['EGF_0'].value = 1e3 model.parameters['EGFR_0'].value = 1e5 model.parameters['SOS_0'].value = 1e3 model.parameters['GRB2_0'].value = 1e5 model.parameters['RAS_0'].value = 2e5 model.parameters['GTP_0'].value = 1e7 model.parameters['MEK_0'].value = 1e5 model.parameters['ERK_0'].value = 1e5 model.parameters['DUSP6_0'].value = 1e3 model.parameters['PPP2CA_0'].value = 1e5 if model_id >= 2: model.parameters['Phosphatase_0'].value = 1e2 model.parameters['kf_es_bind_1'].value = 1e-05 model.parameters['kr_es_bind_1'].value = 1e-04 model.parameters['kc_es_phosphorylation_1'].value = 1 model.parameters['kf_ps_bind_1'].value = 1 model.parameters['kr_ps_bind_1'].value = 0.1 model.parameters['kc_ps_dephosphorylation_1'].value = 1e-04 if model_id >= 3: model.parameters['kf_bb_bind_1'].value = 10 model.parameters['kr_bb_bind_1'].value = 1 model.parameters['kf_vb_bind_2'].value = 1e-04 pa.model = model pa.save_model('model%d.py' % model_id) return model
def read_pmid_sentences(pmid_sentences, **drum_args): """Read sentences from a PMID-keyed dictonary and return all Statements Parameters ---------- pmid_sentences : dict[str, list[str]] A dictonary where each key is a PMID pointing to a list of sentences to be read. **drum_args Keyword arguments passed directly to the DrumReader. Typical things to specify are `host` and `port`. If `run_drum` is specified as True, this process will internally run the DRUM reading system as a subprocess. Otherwise, DRUM is expected to be running independently. Returns ------- all_statements : list[indra.statement.Statement] A list of INDRA Statements resulting from the reading """ def _set_pmid(statements, pmid): for stmt in statements: for evidence in stmt.evidence: evidence.pmid = pmid # See if we need to start DRUM as a subprocess run_drum = drum_args.get('run_drum', False) drum_process = None all_statements = {} # Iterate over all the keys and sentences to read for pmid, sentences in pmid_sentences.items(): logger.info('================================') logger.info('Processing %d sentences for %s' % (len(sentences), pmid)) ts = time.time() # Make a DrumReader instance drum_args['name'] = 'DrumReader%s' % pmid dr = DrumReader(**drum_args) time.sleep(3) # If there is no DRUM process set yet, we get the one that was # just started by the DrumReader if run_drum and drum_process is None: drum_args.pop('run_drum', None) drum_process = dr.drum_system # By setting this, we ensuer that the reference to the # process is passed in to all future DrumReaders drum_args['drum_system'] = drum_process # Now read each sentence for this key for sentence in sentences: dr.read_text(sentence) # Start receiving results and exit when done try: dr.start() except SystemExit: pass statements = [] # Process all the extractions into INDRA Statements for extraction in dr.extractions: # Sometimes we get nothing back if not extraction: continue tp = process_xml(extraction) statements += tp.statements # Set the PMIDs for the evidences of the Statements _set_pmid(statements, pmid) te = time.time() logger.info('Reading took %d seconds and produced %d Statements.' % (te-ts, len(statements))) all_statements[pmid] = statements # If we were running a DRUM process, we should kill it if drum_process and dr.drum_system: dr._kill_drum() return all_statements
def parse_results(reading_content): return process_xml(reading_content)
# Load the REACH reading output with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f: reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print('Processing %s...' % pmcid) # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print("No REACH statements for %s" % pmcid) # Get prior statements rasmodel_stmts = rasmodel.get_statements() # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid for stmt in all_statements: stmt.uuid = str(uuid.uuid4()) # Run assembly run_assembly(all_statements, 'combined', pmcid, background_assertions=rasmodel_stmts)
def process_ekb(ekb): tp = trips.process_xml(ekb) agents = get_agent_tuples(tp) ambiguities = get_ambiguities(tp) return agents, ambiguities
def stmts_json_from_text(text): ekb_xml = read_or_load(text) tp = trips.process_xml(ekb_xml) stmts_json = stmts_to_json(tp.statements) return stmts_json
def stmts_json_from_text(text): ekb_xml = read_or_load(text) tp = trips.process_xml(ekb_xml) stmts_json = stmts_to_json(tp.statements) return stmts_json
def assemble_model(model_id, reread=False): model_name = 'model%d' % model_id # If model has already been read, just process the EKB XML if os.path.exists(model_name + '.xml') and not reread: tp = trips.process_xml(open(model_name + '.xml').read()) else: # Start with the basic model model_txt = open('model1.txt').read() # Apply patches one by one to get to the current model text for j in range(1, model_id): patch_txt = open('model%d_from%d.txt' % (j+1, j)).read() model_txt = apply_patch(model_txt, patch_txt) print('Reading model %d text:' % model_id) print(model_txt) # Process model text and save result EKB XML tp = trips.process_text(model_txt, model_name + '.xml') print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) # Assemble the PySB model pa = PysbAssembler() pa.add_statements(tp.statements) model = pa.make_model(policies='two_step') # Set initial conditions erk = model.monomers['ERK'] obs = Observable('ERK_p', erk(phospho='p')) model.add_component(obs) vem = model.monomers['VEMURAFENIB'] obs = Observable('Vem_free', vem(map3k=None)) model.add_component(obs) ras = model.monomers['RAS'] obs = Observable('RAS_active', ras(gtp=ANY)) model.add_component(obs) braf = model.monomers['BRAF'] obs = Observable('BRAF_active', braf(vemurafenib=None)) model.add_component(obs) model.parameters['BRAF_0'].value = 0 egf = model.monomers['EGF'] obs = Observable('EGF_free', egf(erbb=None)) model.add_component(obs) # Add mutated form of BRAF as initial condition sites_dict = {} for site in braf.sites: if site in braf.site_states: sites_dict[site] = braf.site_states[site][0] else: sites_dict[site] = None sites_dict['V600'] = 'E' model.add_component(Parameter('BRAF_mut_0', 1e5)) model.initial(braf(**sites_dict), model.parameters['BRAF_mut_0']) # Set up model parameters model.parameters['kf_ee_bind_1'].value = 1 model.parameters['kr_ee_bind_1'].value = 0.1 model.parameters['kf_ee_bind_2'].value = 1 model.parameters['kr_ee_bind_2'].value = 0.1 model.parameters['kf_eg_bind_1'].value = 1 model.parameters['kr_eg_bind_1'].value = 0.1 model.parameters['kf_gs_bind_1'].value = 1 model.parameters['kr_gs_bind_1'].value = 0.1 model.parameters['kf_sr_bind_1'].value = 1 model.parameters['kr_sr_bind_1'].value = 50 model.parameters['kf_rg_bind_1'].value = 50 model.parameters['kr_rg_bind_1'].value = 0.5 model.parameters['kf_rb_bind_1'].value = 1 model.parameters['kr_rb_bind_1'].value = 0.5 model.parameters['kf_vb_bind_1'].value = 10 model.parameters['kr_vb_bind_1'].value = 1 model.parameters['kf_bm_bind_1'].value = 1 model.parameters['kr_bm_bind_1'].value = 0.1 model.parameters['kc_bm_phosphorylation_1'].value = 3 model.parameters['kf_pm_bind_1'].value = 1 model.parameters['kr_pm_bind_1'].value = 0.001 model.parameters['kc_pm_dephosphorylation_1'].value = 10 model.parameters['kf_me_bind_1'].value = 1 model.parameters['kr_me_bind_1'].value = 0.1 model.parameters['kc_me_phosphorylation_1'].value = 10 model.parameters['kf_de_bind_1'].value = 1 model.parameters['kr_de_bind_1'].value = 0.001 model.parameters['kc_de_dephosphorylation_1'].value = 10 model.parameters['VEMURAFENIB_0'].value = 0 model.parameters['EGF_0'].value = 1e3 model.parameters['EGFR_0'].value = 1e5 model.parameters['SOS_0'].value = 1e3 model.parameters['GRB2_0'].value = 1e5 model.parameters['RAS_0'].value = 2e5 model.parameters['GTP_0'].value = 1e7 model.parameters['MEK_0'].value = 1e5 model.parameters['ERK_0'].value = 1e5 model.parameters['DUSP6_0'].value = 1e3 model.parameters['PPP2CA_0'].value = 1e5 if model_id >= 2: model.parameters['Phosphatase_0'].value = 1e2 model.parameters['kf_es_bind_1'].value = 1e-05 model.parameters['kr_es_bind_1'].value = 1e-04 model.parameters['kc_es_phosphorylation_1'].value = 1 model.parameters['kf_ps_bind_1'].value = 1 model.parameters['kr_ps_bind_1'].value = 0.1 model.parameters['kc_ps_dephosphorylation_1'].value = 1e-04 if model_id >= 3: model.parameters['kf_bb_bind_1'].value = 10 model.parameters['kr_bb_bind_1'].value = 1 model.parameters['kf_vb_bind_2'].value = 1e-04 pa.model = model pa.save_model('model%d.py' % model_id) return model