def test_complex_agent_refinement(): ras = Agent('RAS') raf1 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, True)]) raf2 = Agent('RAF', mods=[ModCondition('ubiquitination', None, None, False)]) st1 = Complex([ras, raf1]) st2 = Complex([ras, raf2]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.unique_stmts) == 2 assert len(pa.related_stmts) == 2
def test_dynamic_property_to_english(): agent = Agent('EGFR', mods=[ModCondition('phosphorylation')], db_refs={'HGNC': '3236'}) query = DynamicProperty(agent, 'always_value', 'low', 'qualitative') assert query.to_english() == 'Phosphorylated EGFR is always low.' query.pattern_type = 'eventual_value' assert query.to_english() == 'Phosphorylated EGFR is eventually low.'
def test_duplicates_sorting(): mc = ModCondition('phosphorylation') map2k1_1 = Agent('MAP2K1', mods=[mc]) mc1 = ModCondition('phosphorylation', 'serine', '218') mc2 = ModCondition('phosphorylation', 'serine', '222') mc3 = ModCondition('phosphorylation', 'serine', '298') map2k1_2 = Agent('MAP2K1', mods=[mc1, mc2, mc3]) mapk3 = Agent('MAPK3') #ras = Agent('MAPK3', db_refs = {'FA': '03663'}) #nras = Agent('NRAS', db_refs = {'FA': '03663'}) st1 = Phosphorylation(map2k1_1, mapk3, position='218') st2 = Phosphorylation(map2k1_2, mapk3) st3 = Phosphorylation(map2k1_1, mapk3, position='218') stmts = [st1, st2, st3] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_duplicates() assert len(pa.unique_stmts) == 2
def test_stringify_dynamic_property(): agent = Agent('EGFR', mods=[ModCondition('phosphorylation')], db_refs={'HGNC': '3236'}) query = DynamicProperty(agent, 'always_value', 'low', 'qualitative') query_str = str(query) assert query_str == ("DynamicPropertyQuery(entity=EGFR(mods: " "(phosphorylation)), pattern=always_value, " "molecular quantity=('qualitative', 'low'))")
def test_find_contradicts(): st1 = Inhibition(Agent('a'), Agent('b')) st2 = Activation(Agent('a'), Agent('b')) st3 = IncreaseAmount(Agent('a'), Agent('b')) st4 = DecreaseAmount(Agent('a'), Agent('b')) st5 = ActiveForm( Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]), 'kinase', True) st6 = ActiveForm( Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]), 'kinase', False) pa = Preassembler(hierarchies, [st1, st2, st3, st4, st5, st6]) contradicts = pa.find_contradicts() assert len(contradicts) == 3 for s1, s2 in contradicts: assert {s1.uuid, s2.uuid} in ({st1.uuid, st2.uuid}, {st3.uuid, st4.uuid}, {st5.uuid, st6.uuid})
def test_complex_refinement_order(): st1 = Complex([Agent('MED23'), Agent('ELK1')]) st2 = Complex([ Agent('ELK1', mods=[ModCondition('phosphorylation')]), Agent('MED23') ]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_duplicates() pa.combine_related() assert len(pa.related_stmts) == 1
def test_intervention_query_to_english(): condition = Agent('EGF', db_refs={'HGNC': '3229'}) target = Agent('ERK', db_refs={'FPLX': 'ERK'}) phos = Agent('ERK', db_refs={'FPLX': 'ERK'}, mods=[ModCondition('phosphorylation')]) query = SimpleInterventionProperty(condition, target, 'up') assert query.to_english() == 'EGF increases ERK.' query = SimpleInterventionProperty(condition, target, 'down') assert query.to_english() == 'EGF decreases ERK.' query = SimpleInterventionProperty(condition, phos, 'up') assert query.to_english() == 'EGF increases phosphorylated ERK.'
def test_cljson(): ag = Agent('BRAF', mods=[ModCondition('phosphorylation', 'T', '396', False)], db_refs={ 'TEXT': 'Braf', 'HGNC': '123' }) cj = Bioagent.make_cljson(ag) ag2 = Bioagent.get_agent(cj) assert ag2.db_refs['TYPE'] == 'ONT::GENE-PROTEIN' ag2.db_refs.pop('TYPE') assert ag.equals(ag2), (ag, ag2)
def test_dynamic_property_to_json(): agent = Agent('EGFR', mods=[ModCondition('phosphorylation')], db_refs={'HGNC': '3236'}) query = DynamicProperty(agent, 'always_value', 'low', 'qualitative') json = query.to_json() assert json.get('type') == 'dynamic_property' entity = json.get('entity') assert entity.get('name') == 'EGFR' assert entity.get('db_refs') == {"HGNC": "3236"} assert json.get('pattern_type') == 'always_value' quantity = json.get('quantity') assert quantity.get('type') == 'qualitative' assert quantity.get('value') == 'low'
def get_phospho_antibody_map(fname=antibody_map_file): # First gather the annotations for the phosphosites df = pandas.read_csv(fname, index_col=None, sep=',', encoding='utf8') antibody_map = {} for _, row in df.iterrows(): ps = row['phosphosite'] sub_upid = row['SUB_ID'] if not pandas.isnull(sub_upid): if sub_upid.find('-') != -1: sub_upid = sub_upid.split('-')[0] sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid) sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol) else: sub_hgnc_symbol = row['SUB_GENE'] sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol) sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id) if sub_upid is None: continue sub = Agent(sub_hgnc_symbol, db_refs={ 'UP': sub_upid, 'HGNC': sub_hgnc }) residue = row['Actual_site'][0] if len(row['Actual_site']) > 1: position = row['Actual_site'][1:] else: position = None mc = ModCondition('phosphorylation', residue, position) sub.mods = [mc] if ps in antibody_map: found = False for p in antibody_map[ps]: if p.name == sub.name and p.mods[0].residue == residue and \ p.mods[0].position == position: found = True break if not found: antibody_map[ps].append(sub) else: antibody_map[ps] = [sub] return antibody_map
def test_bound_condition_deep_refinement(): """A statement with more specific bound context should be supported by a less specific statement.""" src = Agent('SRC', db_refs={'HGNC': '11283'}) gtp1 = Agent('GTP', db_refs={'CHEBI': '15996'}) gtp2 = Agent('GTP', mods=[ModCondition('phosphorylation')], db_refs={'CHEBI': '15996'}) nrasgtp1 = Agent('NRAS', db_refs={'HGNC': '7989'}, bound_conditions=[BoundCondition(gtp1, True)]) nrasgtp2 = Agent('NRAS', db_refs={'HGNC': '7989'}, bound_conditions=[BoundCondition(gtp2, True)]) st1 = Phosphorylation(src, nrasgtp1, 'tyrosine', '32') st2 = Phosphorylation(src, nrasgtp2, 'tyrosine', '32') # The top-level list should contain only one statement, the more specific # modification, supported by the less-specific modification. pa = Preassembler(hierarchies, stmts=[st1, st2]) stmts = pa.combine_related() assert len(stmts) == 1 assert stmts[0].equals(st2) assert len(stmts[0].supported_by) == 1 assert stmts[0].supported_by[0].equals(st1)
def read_phosphosite(fname): df = pandas.read_csv(fname, index_col=None) statements = [] antibody_map = {} for _, row in df.iterrows(): sub_upid = row['SUB_ID'] if not pandas.isnull(sub_upid): sub_hgnc_symbol = uniprot_client.get_gene_name(sub_upid) sub_hgnc = hgnc_client.get_hgnc_id(sub_hgnc_symbol) else: sub_hgnc_symbol = row['SUB_GENE'] sub_hgnc_id = hgnc_client.get_hgnc_id(sub_hgnc_symbol) sub_upid = hgnc_client.get_uniprot_id(sub_hgnc_id) sub = Agent(sub_hgnc_symbol, db_refs={ 'UP': sub_upid, 'HGNC': sub_hgnc }) residue = row['Actual_site'][0] if len(row['Actual_site']) > 1: position = row['Actual_site'][1:] else: position = None sub_readout = deepcopy(sub) mc = ModCondition('phosphorylation', residue, position) sub_readout.mods = [mc] ps = row['phosphosite'] if ps in antibody_map: found = False for p in antibody_map[ps]: if p.name == sub.name and p.mods[0].residue == residue and \ p.mods[0].position == position: found = True break if not found: antibody_map[ps].append(sub_readout) else: antibody_map[ps] = [sub_readout] kin_upid = row['KIN_ID'] if not pandas.isnull(kin_upid): if not uniprot_client.is_human(kin_upid): print('%s non human' % kin_upid) continue kin_hgnc_symbol = uniprot_client.get_gene_name(kin_upid) kin_hgnc = hgnc_client.get_hgnc_id(kin_hgnc_symbol) else: kin_hgnc_symbol = row['KINASE_GENE_SYMBOL'] kin_hgnc_id = hgnc_client.get_hgnc_id(kin_hgnc_symbol) kin_upid = hgnc_client.get_uniprot_id(kin_hgnc_id) kin = Agent(kin_hgnc_symbol, db_refs={ 'UP': kin_upid, 'HGNC': kin_hgnc }) ev = Evidence(source_api='phosphosite') st = Phosphorylation(kin, sub, residue, position, evidence=[ev]) statements.append(st) return statements, antibody_map
def test_missing_monomer_site(): stmts = [Activation(Agent('BRAF'), Agent('KRAS'))] model = tra_module.assemble_model(stmts) mc = ModCondition('phosphorylation', None, None, True) agent = Agent('KRAS', mods=[mc]) tra.get_create_observable(model, agent)
def pysb_to_gromet(pysb_model, model_name, statements=None, fname=None): """Convert PySB model to GroMEt object and save it to a JSON file. Parameters ---------- pysb_model : pysb.Model PySB model object. model_name : str A name of EMMAA model. statements : Optional[list[indra.statements.Statement]] A list of INDRA Statements a PySB model was assembled from. If provided the statement hashes will be propagated into GroMEt metadata. fname : Optional[str] If given, the GroMEt will be dumped into JSON file. Returns ------- g : automates.script.gromet.gromet.Gromet A GroMEt object built from PySB model. """ from gromet import Gromet, gromet_to_json, \ Junction, Wire, UidJunction, UidType, UidWire, Relation, \ UidBox, UidGromet, Literal, Val from gromet_metadata import IndraAgent, IndraAgentReferenceSet, \ ReactionReference, UidMetadatum, MetadatumMethod, Provenance, \ get_current_datetime, ModelInterface from pysb import Parameter, WILD from pysb.bng import generate_equations logger.info('Generating equations ...') generate_equations(pysb_model) logger.info('Creating GroMEt') junctions = [] wires = [] # Get all species values species_values = {} for initial in pysb_model.initials: ix = pysb_model.get_species_index(initial.pattern) if initial.value: species_values[ix] = Literal(uid=None, type=UidType("Integer"), value=Val(initial.value.value), name=None, metadata=None) # Get groundings for monomers groundings_by_monomer = {} # Build up db_refs for each monomer object for ann in pysb_model.annotations: if ann.predicate == 'is': m = ann.subject db_name, db_id = parse_identifiers_url(ann.object) if m in groundings_by_monomer: groundings_by_monomer[m][db_name] = db_id else: groundings_by_monomer[m] = {db_name: db_id} # Store species names to refer later species_nodes = [str(sp) for sp in pysb_model.species] # Add all species junctions for ix, sp in enumerate(pysb_model.species): # Map to a list of agents agents = [] for mp in sp.monomer_patterns: mods = [] if hasattr(mp.monomer, 'site_annotations'): for site, state in mp.site_conditions.items(): if isinstance(state, tuple) and state[1] == WILD: state = state[0] mod, mod_type, res, pos = None, None, None, None for ann in mp.monomer.site_annotations: if ann.subject == (site, state): mod_type = ann.object elif ann.subject == site and \ ann.predicate == 'is_residue': res = ann.object if ann.subject == site and \ ann.predicate == 'is_position': pos = ann.object if mod_type: not_mod, mod = states[mod_type] if state == mod: is_mod = True elif state == not_mod: is_mod = False else: logger.warning('Unknown state %s for %s, ' 'setting as not modified' % (state, mod_type)) is_mod = False mod = ModCondition(mod_type, res, pos, is_mod) if mod: mods.append(mod) if not mods: mods = None ag = Agent(mp.monomer.name, mods=mods, db_refs=groundings_by_monomer.get(mp.monomer)) agents.append(ag) agent_metadata = IndraAgentReferenceSet( uid=UidMetadatum(f'{species_nodes[ix]}_metadata'), provenance=Provenance(method=MetadatumMethod('from_emmaa_model'), timestamp=get_current_datetime()), indra_agent_references=[IndraAgent(ag.to_json()) for ag in agents]) junctions.append( Junction(uid=UidJunction(f'J:{species_nodes[ix]}'), type=UidType('State'), name=species_nodes[ix], value=species_values.get(ix), value_type=UidType('Integer'), metadata=[agent_metadata])) # Add wires for each reaction rate_counts = defaultdict(int) for rxn in pysb_model.reactions: rate_params = [ rate_term for rate_term in rxn['rate'].args if isinstance(rate_term, Parameter) ] assert len(rate_params) == 1 rate = rate_params[0].name rate_counts[rate] += 1 rate_node = f'{rate}:{rate_counts[rate]}' # Get metadata for rate node assert len(rxn['rule']) == 1 assert len(rxn['reverse']) == 1 rule = rxn['rule'][0] reverse = rxn['reverse'][0] if statements: stmt = stmt_from_rule(rule, pysb_model, statements) # Add rate junction for a reaction (uid and name are the same for now) reaction_metadata = ReactionReference( uid=UidMetadatum(f'{rate_node}_metadata'), provenance=Provenance(method=MetadatumMethod('from_emmaa_model'), timestamp=get_current_datetime()), indra_stmt_hash=stmt.get_hash(), reaction_rule=rule, is_reverse=reverse) wire_count = defaultdict(int) junctions.append( Junction(uid=UidJunction(f'J:{rate_node}'), type=UidType('Rate'), name=rate, value=Literal(uid=None, type=UidType("Float"), value=Val(rate_params[0].value), name=None, metadata=None), value_type=UidType('Float'), metadata=[reaction_metadata])) # Add wires from reactant to rate for reactant_ix in rxn['reactants']: reactant = species_nodes[reactant_ix] wire = f'{reactant}_{rate_node}' wire_count[wire] += 1 wires.append( Wire(uid=UidWire(f'W:{wire}:w{wire_count[wire]}'), type=None, value_type=None, name=None, value=None, metadata=None, src=UidJunction(f'J:{reactant}'), tgt=UidJunction(f'J:{rate_node}'))) # Add wires from rate to product for prod_ix in rxn['products']: prod = species_nodes[prod_ix] wire = f'{rate_node}_{prod}' wire_count[wire] += 1 wires.append( Wire(uid=UidWire(f'W:{wire}:w{wire_count[wire]}'), type=None, value_type=None, name=None, value=None, metadata=None, src=UidJunction(f'J:{rate_node}'), tgt=UidJunction(f'J:{prod}'))) # Create relation pnc = Relation( uid=UidBox(model_name), type=UidType("PetriNetClassic"), name=model_name, ports=None, # contents junctions=[j.uid for j in junctions], wires=[w.uid for w in wires], boxes=None, metadata=None) boxes = [pnc] # Create model interface metadata model_interface = \ ModelInterface( uid=UidMetadatum(f'{model_name}_model_interface'), provenance=Provenance(method=MetadatumMethod('from_emmaa_model'), timestamp=get_current_datetime()), variables=[j.uid for j in junctions], parameters=[j.uid for j in junctions if j.type == 'Rate'], initial_conditions=[j.uid for j in junctions if j.type == 'State']) # Create Gromet object g = Gromet(uid=UidGromet(f'{model_name}_pnc'), name=model_name, type=UidType("PetriNetClassic"), root=pnc.uid, types=None, literals=None, junctions=junctions, ports=None, wires=wires, boxes=boxes, variables=None, metadata=[model_interface]) logger.info('Created GroMEt') # Optionally save Gromet to JSON file if fname: gromet_to_json(g, fname) return g