def get_observed_result(self, DnaSpecie): """ Find observed concentrations for the metabolite or similar metabolites NOTE: Currently there are no Gene objects in common schema models. When added this query will be updated to input models.Gene and output data_model.ProteinSpecie Args: specie (:obj:`data_model.DnaSpecie`): species to find data for Returns: :obj:`list` of :obj:`data_model.Observable`: list of observed_result """ proteins = self.get_protein_by_DNA_sequence(DnaSpecie.sequence) observed_result = [] for subunits in proteins: protein = data_model.ProteinSpecie(uniprot_id = subunits[0].uniprot_id,\ entrez_id = subunits[0].entrez_id, gene_name = subunits[0].gene_name,\ length = subunits[0].length, mass = subunits[0].mass) protein.cross_references = [] for doc in subunits[0]._metadata.resource: protein.cross_references.append( data_model.Resource(namespace='pubmed', id=doc._id)) interaction = data_model.Interaction(name = 'Transcription Factor DNA Binding Site', \ position = subunits[1], score = subunits[2]) observed_result.append( data_model.Observable(specie=protein, interaction=interaction)) return observed_result
def get_observed_concentrations(self, metabolite): """ Find observed concentrations for the metabolite or similar metabolites Args: metabolite (:obj:`models.Metabolite`): metabolite to find data for Returns: :obj:`list` of :obj:`data_model.ObservedValue`: list of relevant observations """ concentrations = self.get_concentration_by_structure(metabolite.structure._value_inchi, only_formula_and_connectivity=False) observed_values = [] for c in concentrations: metadata = self.metadata_dump(c) observable = data_model.Observable( specie = self._port(metabolite), compartment = data_model.Compartment(name = c._metadata.cell_compartment[0].name) ) observed_values.append(data_model.ObservedValue( metadata = metadata, observable = observable, value = c.value, error = c.error, units = c.units )) return observed_values
def test_group_observed_results_by_properties(self): observed_results = [ data_model.ObservedValue( observable=data_model.Observable(property='Km'), value=1., units='' ) ] gen = data_query.ConsensusGenerator() observed_results
def test_SpecieSequenceSimilarityFilter(self): seq1 = 'CTAACTCTACCTCGTATGTATGGAAGTTCGTCTATCTCTGGTCGGTTGCT' seq2 = 'CTAACTCTACCTCGTATTATGGAAGTTCGTCTATCTTCTGGTCGGTTGCT' ov = data_model.ObservedValue(observable=data_model.Observable(specie=data_model.PolymerSpecie(sequence=seq1))) # min_similarity = 0 f = data_query.SpecieSequenceSimilarityFilter(min_similarity=0.) self.assertEqual(f.score(data_model.PolymerSpecie(sequence=seq2), ov), 0.96) # min_similarity = 0.75 f = data_query.SpecieSequenceSimilarityFilter(min_similarity=0.98) self.assertEqual(f.score(data_model.PolymerSpecie(sequence=seq2), ov), -1)
def test_SpecieStructuralSimilarityFilter(self): adp = 'NC1=C2N=CN(C3OC(COP([O-])(=O)OP([O-])([O-])=O)C(O)C3O)C2=NC=N1' atp = 'NC1=C2N=CN(C3OC(COP([O-])(=O)OP([O-])(=O)OP([O-])([O-])=O)C(O)C3O)C2=NC=N1' h2o = 'O' # min_similarity = 0 f = data_query.SpecieStructuralSimilarityFilter(min_similarity=0.) ov = data_model.ObservedValue(observable=data_model.Observable(specie=data_model.Specie(structure=adp))) numpy.testing.assert_almost_equal(f.score(data_model.Specie(structure=atp), ov), 0.955, decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable(specie=data_model.Specie(structure=h2o))) numpy.testing.assert_almost_equal(f.score(data_model.Specie(structure=atp), ov), 0, decimal=3) # min_similarity = 0.75 f = data_query.SpecieStructuralSimilarityFilter(min_similarity=0.75) ov = data_model.ObservedValue(observable=data_model.Observable(specie=data_model.Specie(structure=adp))) numpy.testing.assert_almost_equal(f.score(data_model.Specie(structure=atp), ov), 0.955, decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable(specie=data_model.Specie(structure=h2o))) numpy.testing.assert_almost_equal(f.score(data_model.Specie(structure=atp), ov), -1., decimal=3)
def test(self): c = data_model.Consensus( observable=data_model.Observable(), value=1.1, error=0.3, units='uM', method=data_model.ConsensusMethod.mean, evidence=[ data_model.Evidence(value=data_model.ObservedValue(), relevance=10.), data_model.Evidence(value=data_model.ObservedValue(), relevance=5.), ], user='******', date=datetime.utcnow(), )
def get_observed_result(self, protein): """ Find the observed values for protein abundance Args: protein (:obj:`models.ProteinSubunit`): Protein Subunit to find data for Returns: :obj:`list` of :obj:`data_model.ObservedValue`: list of relevant observed values """ abundances = self.get_abundance_by_uniprot(protein.uniprot_id) observed_vals = [] for abundance in abundances: metadata = self.metadata_dump(abundance.dataset) observable = data_model.Observable(specie=data_model.ProteinSpecie( name=protein.subunit_name, uniprot_id=protein.uniprot_id, entrez_id=protein.entrez_id, gene_name=protein.gene_name, length=protein.length, mass=protein.mass, sequence=protein.canonical_sequence)) observable.specie.cross_references = [ data_model.Resource(namespace='publication', id=abundance.dataset.file_name), data_model.Resource( namespace='url', id=abundance.dataset._metadata.resource[0]._id) ] observed_vals.append( data_model.ObservedValue( metadata=metadata, observable=observable, value=abundance.abundance, error=0, units='PPM', )) return observed_vals
def get_observed_result(self, metabolite): """ Find observed concentrations for the metabolite or similar metabolites Args: metabolite (:obj:`models.Metabolite`): metabolite to find data for Returns: :obj:`list` of :obj:`data_model.ObservedValue`: list of relevant observations """ concentrations = self.get_concentration_by_structure( metabolite.structure._value_inchi, only_formula_and_connectivity=False).all() observed_values = [] references = [ data_model.Resource(namespace=item.namespace, id=item._id) for item in metabolite._metadata.resource ] for c in concentrations: metadata = self.metadata_dump(c) observable = data_model.Observable( specie=data_model.Specie( name=metabolite.metabolite_name, cross_references=references, structure=metabolite.structure._value_inchi), compartment=data_model.Compartment( name=c._metadata.cell_compartment[0].name)) observed_values.append( data_model.ObservedValue(metadata=metadata, observable=observable, value=c.value, error=c.error, units=c.units)) return observed_values
def get_observed_result(self, reaction): """ Find observed kinetics for the reaction or similar reactions TODO: Add compartment infomrmation 1. Find kinetics observed for the reaction a. Find the metabolite(s) of each participant b. Find the reaction(s) which contain all of these metabolites c. Find the kinetic laws associated with these reactions 2. Find kinetics observed for similar reactions a. Find kinetics observed for the assigned EC number(s) b. Find kinetics observed for EC number(s) predicted by tools such as E-zyme Args: reaction (:obj:`data_model.Reaction`): reaction to find data for Returns: :obj:`list` of :obj:`data_model.ObservedValue`: list of relevant observed values """ q_law = self.get_kinetic_laws_by_reaction(reaction) observed_vals = [] for law in q_law: common_schema_reaction_id = next(xr._id for xr in law._metadata.resource if xr.namespace == 'sabiork.reaction') reaction = data_model.Reaction( cross_references=[ data_model.Resource(namespace='common_schema.kinetic_law_id', id=str(law.kinetic_law_id)), data_model.Resource(namespace='sabiork.reaction', id=common_schema_reaction_id), ], ) species = {} compartments = {} cs_rxn = self.data_source.session.query(models.Reaction).filter_by(kinetic_law_id = law.kinetic_law_id) reactants = cs_rxn.filter_by(_is_reactant = True).all() products = cs_rxn.filter_by(_is_product = True).all() modifiers = cs_rxn.filter_by(_is_modifier = True).all() for reactant in reactants: part = data_model.ReactionParticipant(coefficient=-1) if reactant.metabolite_id not in species: species[reactant.metabolite_id] = data_model.Specie(name=reactant.metabolite.metabolite_name) part.specie = species[reactant.metabolite_id] if reactant.metabolite.structure_id: part.specie.structure = reactant.metabolite.structure._value_inchi if reactant.compartment_id: if reactant.compartment.name not in compartments: compartments[reactant.compartment.name] = data_model.Compartment(name=reactant.compartment.name) part.compartment = compartments[reactant.compartment.name] reaction.participants.append(part) for product in products: part = data_model.ReactionParticipant(coefficient=1) if product.metabolite_id not in species: species[product.metabolite_id] = data_model.Specie(name=product.metabolite.metabolite_name) part.specie = species[product.metabolite_id] if product.metabolite.structure_id: part.specie.structure = product.metabolite.structure._value_inchi if product.compartment_id: if product.compartment.name not in compartments: compartments[product.compartment.name] = data_model.Compartment(name=product.compartment.name) part.compartment = compartments[product.compartment.name] reaction.participants.append(part) for modifier in modifiers: part = data_model.ReactionParticipant(coefficient=0) if modifier.metabolite_id not in species: species[modifier.metabolite_id] = data_model.Specie(name=modifier.metabolite.metabolite_name) part.specie = species[modifier.metabolite_id] if modifier.metabolite.structure_id: part.specie.structure = modifier.metabolite.structure._value_inchi if modifier.compartment_id: if modifier.compartment.name not in compartments: compartments[modifier.compartment.name] = data_model.Compartment(name=modifier.compartment.name) part.compartment = compartments[modifier.compartment.name] reaction.participants.append(part) metadata = self.metadata_dump(law) for parameter in law.parameter: if parameter.value is None: continue observable = data_model.Observable( interaction=reaction, property=parameter.observed_name, ) if parameter.metabolite_id: observable.specie = species[parameter.metabolite_id] # if parameter.compartment: # observable.compartment = data_model.Compartment( # id=parameter.compartment.name, # ) observed_vals.append(data_model.ObservedValue( metadata=metadata, observable=observable, value=parameter.value, error=parameter.error, units=parameter.units, )) return observed_vals
def test_ReactionParticipantFilter(self): atp = data_model.Specie(structure=( 'InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20' '/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/p-4/t4-,6-,7-,10-/m1/s1' )) h2o = data_model.Specie(structure='InChI=1S/H2O/h1H2') adp = data_model.Specie(structure=( 'InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20' '/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/p-3/t4-,6-,7-,10-/m1/s1' )) pi = data_model.Specie(structure='InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-2') h = data_model.Specie(structure='InChI=1S/p+1/i/hH') glc = data_model.Specie(structure='InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1') glc_2 = data_model.Specie(structure='InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2') gtp = data_model.Specie(structure=( 'InChI=1S/C10H16N5O14P3/c11-10-13-7-4(8(18)14-10)12-2-15(7)9-6(17)5(16)3(27-9)1-26-31(22,23)29-32(24,25)28-30(19,20)21' '/h2-3,5-6,9,16-17H,1H2,(H,22,23)(H,24,25)(H2,19,20,21)(H3,11,13,14,18)/p-4/t3-,5-,6-,9-/m1/s1' )) lactate = data_model.Specie(structure='InChI=1S/C3H6O3/c1-2(4)3(5)6/h2,4H,1H3,(H,5,6)/p-1') def get_reaction(ntp=atp, glc=glc): return data_model.Reaction( participants=[ data_model.ReactionParticipant(coefficient=-1, specie=ntp), data_model.ReactionParticipant(coefficient=-1, specie=h2o), data_model.ReactionParticipant(coefficient=1, specie=adp), data_model.ReactionParticipant(coefficient=1, specie=pi), data_model.ReactionParticipant(coefficient=1, specie=h), data_model.ReactionParticipant(coefficient=0, specie=glc), ]) f = data_query.ReactionParticipantFilter() rxn_atp = get_reaction(ntp=atp) rxn_gtp = get_reaction(ntp=gtp) rxn_lac = get_reaction(ntp=lactate) rxn_glc_2 = get_reaction(glc=glc_2) # identical reactant ov = data_model.ObservedValue(observable=data_model.Observable(property='Km', specie=atp, interaction=rxn_atp)) self.assertEqual(f.score(rxn_atp, ov), 1) # identical product ov = data_model.ObservedValue(observable=data_model.Observable(property='Ki', specie=adp, interaction=rxn_atp)) self.assertEqual(f.score(rxn_atp, ov), 1) # similar modifier ov = data_model.ObservedValue(observable=data_model.Observable(property='Ki', specie=glc_2, interaction=rxn_glc_2)) self.assertEqual(f.score(rxn_atp, ov), 1) # similar species ov = data_model.ObservedValue(observable=data_model.Observable(property='Km', specie=gtp, interaction=rxn_gtp)) numpy.testing.assert_almost_equal(f.score(rxn_atp, ov), 0.767, decimal=3) # different species ov = data_model.ObservedValue(observable=data_model.Observable(property='Km', specie=lactate, interaction=rxn_lac)) self.assertEqual(f.score(rxn_atp, ov), -1) # property without species ov = data_model.ObservedValue(observable=data_model.Observable(property='kcat', interaction=rxn_atp)) self.assertEqual(f.score(rxn_atp, ov), 1)
def test_ReactionSimilarityFilter(self): atp_structure = ( 'InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20' '/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/p-4/t4-,6-,7-,10-/m1/s1' ) h2o_structure = 'InChI=1S/H2O/h1H2' adp_structure = ( 'InChI=1S/C10H15N5O10P2/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(24-10)1-23-27(21,22)25-26(18,19)20' '/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H2,11,12,13)(H2,18,19,20)/p-3/t4-,6-,7-,10-/m1/s1' ) pi_structure = 'InChI=1S/H3O4P/c1-5(2,3)4/h(H3,1,2,3,4)/p-2' h_structure = 'InChI=1S/p+1/i/hH' def get_reaction(pi_structure=pi_structure, ec='1.1.1.1'): return data_model.Reaction( participants=[ data_model.ReactionParticipant(coefficient=-1, specie=data_model.Specie(structure=atp_structure)), data_model.ReactionParticipant(coefficient=-1, specie=data_model.Specie(structure=h2o_structure)), data_model.ReactionParticipant(coefficient=1, specie=data_model.Specie(structure=adp_structure)), data_model.ReactionParticipant(coefficient=1, specie=data_model.Specie(structure=pi_structure)), data_model.ReactionParticipant(coefficient=1, specie=data_model.Specie(structure=h_structure)), ], cross_references=[ data_model.Resource(namespace='ec-code', id=ec) ]) rxn = get_reaction() f = data_query.ReactionSimilarityFilter(min_ec_level=3, scale=1) # same participants ov = data_model.ObservedValue(observable=data_model.Observable(interaction=get_reaction())) numpy.testing.assert_almost_equal(f.score(rxn, ov), 1, decimal=3) # similiar participants ov = data_model.ObservedValue(observable=data_model.Observable(interaction=get_reaction(pi_structure='InChI=1S/H3O4P/c1-5(2,3)4'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), 1, decimal=3) # different participants, same 4-digit EC ov = data_model.ObservedValue(observable=data_model.Observable(interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), math.exp(-1), decimal=3) # different participants, same 3-digit EC ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1.2'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), math.exp(-2), decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), math.exp(-2), decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1.'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), math.exp(-2), decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1.-'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), math.exp(-2), decimal=3) # different participants, same 2-digit EC ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.2.1'))) numpy.testing.assert_almost_equal(f.score(rxn, ov), -1, decimal=3) # target reaction only has 3 digits rxn_1_1_1 = get_reaction(ec='1.1.1') rxn_1_1_1_1 = get_reaction(ec='1.1.1.1') f1 = data_query.ReactionSimilarityFilter(min_ec_level=3, scale=1) f2 = data_query.ReactionSimilarityFilter(min_ec_level=3, scale=1) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1'))) numpy.testing.assert_almost_equal(f1.score(rxn_1_1_1, ov), math.exp(-2), decimal=3) numpy.testing.assert_almost_equal(f2.score(rxn_1_1_1_1, ov), math.exp(-2), decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.1.1'))) numpy.testing.assert_almost_equal(f1.score(rxn_1_1_1, ov), math.exp(-2), decimal=3) numpy.testing.assert_almost_equal(f2.score(rxn_1_1_1_1, ov), math.exp(-1), decimal=3) ov = data_model.ObservedValue(observable=data_model.Observable( interaction=get_reaction(pi_structure='InChI=1S/H4O4P/c1-5(2,3)4', ec='1.1.2.1'))) numpy.testing.assert_almost_equal(f1.score(rxn_1_1_1, ov), -1, decimal=3) numpy.testing.assert_almost_equal(f2.score(rxn_1_1_1_1, ov), -1, decimal=3) # reverse direction, different numbers of reactants/products f = data_query.ReactionSimilarityFilter(min_ec_level=3, scale=1) for_rxn = get_reaction() rev_rxn = get_reaction() for part in rev_rxn.participants: part.coefficient = -1 * part.coefficient ov = data_model.ObservedValue(observable=data_model.Observable(interaction=for_rxn)) self.assertEqual(f.score(for_rxn, ov), 1.) ov = data_model.ObservedValue(observable=data_model.Observable(interaction=rev_rxn)) self.assertEqual(f.score(for_rxn, ov), -1.) # reverse direction, same numbers of reactants/products for_rxn = data_model.Reaction( participants=[ data_model.ReactionParticipant(coefficient=-1, specie=data_model.Specie(structure=atp_structure)), data_model.ReactionParticipant(coefficient=1, specie=data_model.Specie(structure=adp_structure)), ], cross_references=[ data_model.Resource(namespace='ec-code', id='1.1.1.1') ]) rev_rxn = data_model.Reaction( participants=[ data_model.ReactionParticipant(coefficient=1, specie=data_model.Specie(structure=atp_structure)), data_model.ReactionParticipant(coefficient=-1, specie=data_model.Specie(structure=adp_structure)), ], cross_references=[ data_model.Resource(namespace='ec-code', id='1.1.1.1') ]) ov = data_model.ObservedValue(observable=data_model.Observable(interaction=for_rxn)) self.assertEqual(f.score(for_rxn, ov), 1.) ov = data_model.ObservedValue(observable=data_model.Observable(interaction=rev_rxn)) self.assertEqual(f.score(for_rxn, ov), -1.)
def test_ObservedResultMetadata(self): o = data_model.ObservedResultMetadata() o.genetics = data_model.Genetics(taxon='Mycoplasma pneumoniae', variation='ΔMPN001') o.environment = data_model.Environment(temperature=37, ph=7., media='Hayflick') o.reference = data_model.Reference(title='title', author='author', year=2017, volume=1, number=1, pages='1-10') o.method = data_model.ExperimentalMethod( name='assay', description='description of assay') observable = data_model.Observable( interaction=data_model.Reaction(id='AtpSynthase'), specie=data_model.Specie(id='ATP'), compartment=data_model.Compartment(id='c'), property='K_m', ) ov = data_model.ObservedValue( observable=observable, value=1.0, error=0.5, units='U/mg', ) o.observed_result.append(ov) o.validate() self.assertEqual(o.observed_result, [ov]) self.assertEqual(ov.metadata, o) self.assertEqual(ov.observable.interaction.id, 'AtpSynthase') self.assertEqual(ov.observable.specie.id, 'ATP') self.assertEqual(ov.observable.compartment.id, 'c') self.assertEqual(ov.observable.property, 'K_m') self.assertEqual(ov.value, 1.0) self.assertEqual(ov.error, 0.5) self.assertEqual(ov.units, 'U/mg') self.assertEqual(o.genetics.taxon, 'Mycoplasma pneumoniae') self.assertEqual(o.genetics.variation, 'ΔMPN001') self.assertEqual(o.environment.temperature, 37.) self.assertEqual(o.environment.ph, 7.) self.assertEqual(o.environment.media, 'Hayflick') self.assertEqual(o.reference.title, 'title') self.assertEqual(o.reference.author, 'author') self.assertEqual(o.reference.year, 2017) self.assertEqual(o.reference.volume, 1) self.assertEqual(o.reference.number, 1) self.assertEqual(o.reference.pages, '1-10') self.assertEqual(o.method.name, 'assay') self.assertEqual(o.method.description, 'description of assay')