class rpEquilibrator: """Class containing collection of functions to intereact between rpSBML files and equilibrator. Includes a function to convert an rpSBML file to a SBtab format for MDF analysis """ def __init__(self, rpsbml=None, ph=7.5, ionic_strength=200, pMg=10.0, temp_k=298.15): """Constructor class for rpEquilibrator :param rpsbml: rpSBML object (Default: None) :param ph: pH of the cell input from the rpSBML model input (Default: 7.5) :param ionic_strength: Ionic strength from the rpSBML model input (Default: 200) :param pMg: pMg value from the rpSBML model input (Default: 10.0) :param temp_k: Temperature from the rpSBML model input in Kelvin (Default: 298.15) :type rpsbml: rpSBML :type ph: float :type ionic_strength: float :type pMg: float :type temp_k: float .. document private functions .. automethod:: _makeSpeciesStr .. automethod:: _makeReactionStr .. automethod:: _speciesCmpQuery .. automethod:: _reactionCmpQuery .. automethod:: _reactionStrQuery """ self.logger = logging.getLogger(__name__) self.logger.debug('Started instance of rpEquilibrator') self.cc = ComponentContribution() self.cc.p_h = Q_(ph) self.cc.ionic_strength = Q_(str(ionic_strength)+' mM') self.cc.p_mg = Q_(pMg) self.cc.temperature = Q_(str(temp_k)+' K') self.ph = ph self.ionic_strength = ionic_strength self.pMg = pMg self.temp_k = temp_k #self.mnx_default_conc = json.load(open('data/mnx_default_conc.json', 'r')) self.mnx_default_conc = json.load(open(os.path.join(os.path.dirname(os.path.abspath( __file__ )), 'data', 'mnx_default_conc.json'), 'r')) self.rpsbml = rpsbml self.calc_cmp = {} ################################################################################## ############################### PRIVATE ########################################## ################################################################################## #TODO: metanetx.chemical:MNXM7 + bigg.metabolite:pi def _makeSpeciesStr(self, libsbml_species, ret_type='xref'): """Private function that makes a Equilibrator friendly string of a species :param libsbml_species: A libsbml species object :param ret_type: Type of output. Valid output include: ['name', 'id', 'xref'] :type libsbml_species: libsbml.Species :type ret_type: str Take a libsbml species object, parse the MIRIAM or the brsynth (if present) to return the equilibrator appropriate string. The order of preference is the following: example input MIRIAM annotation: {'inchikey': ['GPRLSGONYQIRFK-UHFFFAOYSA-N'], 'seed': ['cpd00067'], 'sabiork': ['39'], 'reactome': ['R-ALL-74722', 'R-ALL-70106', 'R-ALL-5668577', 'R-ALL-428548', 'R-ALL-428040', 'R-ALL-427899', 'R-ALL-425999', 'R-ALL-425978', 'R-ALL-425969', 'R-ALL-374900', 'R-ALL-372511', 'R-ALL-351626', 'R-ALL-2872447', 'R-ALL-2000349', 'R-ALL-194688', 'R-ALL-193465', 'R-ALL-163953', 'R-ALL-156540', 'R-ALL-1470067', 'R-ALL-113529', 'R-ALL-1132304'], 'metacyc': ['PROTON'], 'hmdb': ['HMDB59597'], 'chebi': ['5584', '13357', '10744', '15378'], 'bigg': ['M_h', 'h'], 'metanetx': ['MNXM89553', 'MNXM145872', 'MNXM1', 'MNXM01']} -KEGG -CHEBI #-bigg #-MNX -inchikey ret_type -> valid options (xref, id, name) :rtype: str :return: The string id of the species or False if fail """ self.logger.debug('ret_type: '+str(ret_type)) if ret_type=='name': return libsbml_species.getName() elif ret_type=='id': return libsbml_species.getId() elif ret_type=='xref': annot = libsbml_species.getAnnotation() if not annot: self.logger.error('Cannot retreive the annotation') return False miriam_dict = self.rpsbml.readMIRIAMAnnotation(annot) self.logger.debug('miriam_dict: '+str(miriam_dict)) if not miriam_dict: self.logger.warning('The object annotation does not have any MIRIAM entries') return False if 'kegg' in miriam_dict: if miriam_dict['kegg']: try: #take the lowest value int_list = [int(i.replace('C', '')) for i in miriam_dict['kegg']] return 'KEGG:'+str(miriam_dict['kegg'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['kegg'])) elif 'chebi' in miriam_dict: if miriam_dict['chebi']: try: #take the lowest value int_list = [int(i) for i in miriam_dict['chebi']] return 'CHEBI:'+str(miriam_dict['chebi'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['chebi'])) elif 'metanetx' in miriam_dict: if miriam_dict['metanetx']: try: #take the lowest value int_list = [int(i.replace('MNXM', '')) for i in miriam_dict['metanetx']] return 'metanetx.chemical:'+str(miriam_dict['metanetx'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['metanetx'])) elif 'inchikey' in miriam_dict: if miriam_dict['inchikey']: if len(miriam_dict['inchikey'])==1: return miriam_dict['inchikey'][0] else: self.logger.warning('There are multiple values of inchikey: '+str(miriam_dict['inchikey'])) self.logger.warning('Taking the first one') return miriam_dict['inchikey'][0] else: self.logger.warning('Could not extract string input for '+str(miriam_dict)) return False self.logger.warning('The MIRIAM annotation does not have the required information') return False else: self.logger.warning('Cannot determine ret_type: '+str(ret_type)) def _makeReactionStr(self, libsbml_reaction, ret_type='xref', ret_stoichio=True): """Make the reaction formulae string to query equilibrator :param libsbml_reaction: A libsbml reaction object :param ret_type: Type of output. Valid output include: ['name', 'id', 'xref'] (Default: xref) :param ret_stoichio: Return the stoichio or not (Default: True) :type libsbml_reaction: libsbml.Reaction :type ret_type: str :type ret_stoichio: bool :rtype: str :return: The string id of the reaction or False if fail """ reac_str = '' for rea in libsbml_reaction.getListOfReactants(): rea_str = self._makeSpeciesStr(self.rpsbml.model.getSpecies(rea.getSpecies()), ret_type) if rea_str: if ret_stoichio: reac_str += str(rea.getStoichiometry())+' '+str(rea_str)+' + ' else: reac_str += str(rea_str)+' + ' else: return False reac_str = reac_str[:-2] reac_str += '<=> ' #TODO: need to find a way to determine the reversibility of the reaction for pro in libsbml_reaction.getListOfProducts(): pro_str = self._makeSpeciesStr(self.rpsbml.model.getSpecies(pro.getSpecies()), ret_type) if pro_str: if ret_stoichio: reac_str += str(pro.getStoichiometry())+' '+str(pro_str)+' + ' else: reac_str += str(pro_str)+' + ' else: return False reac_str = reac_str[:-2] self.logger.debug('reac_str: '+str(reac_str)) return reac_str ################### Equilibrator component contribution queries instead of using the native functions ########### def _speciesCmpQuery(self, libsbml_species): """Use the native equilibrator-api compound contribution method :param libsbml_species: A libsbml species object :type libsbml_species: libsbml.Reaction :rtype: tuple :return: Tuple of size two with mu and sigma values in that order or (None, None) if fail """ annot = libsbml_species.getAnnotation() if not annot: self.logger.warning('The annotation of '+str(libsbml_species)+' is None....') return None, None brs_annot = self.rpsbml.readBRSYNTHAnnotation(libsbml_species.getAnnotation()) #TODO: handle the condition where there are no inchi values but there are SMILES -- should rarely, if ever happen self.logger.debug('libsbml_species: '+str(libsbml_species)) #self.logger.debug('brs_annot: '+str(brs_annot)) #Try to get the cmp from the ID spe_id = self._makeSpeciesStr(libsbml_species) spe_cmp = None if spe_id: self.logger.debug('Trying to find the CMP using the xref string: '+str(spe_id)) spe_cmp = self.cc.ccache.get_compound(self._makeSpeciesStr(libsbml_species)) if not spe_cmp: self.logger.debug('Trying to find the CMP using the structure') #try to find it in the local data - we do this because there are repeated species in many files if brs_annot['inchi'] in self.calc_cmp: spe_cmp = self.calc_cmp[brs_annot['inchi']] elif brs_annot['smiles'] in self.calc_cmp: spe_cmp = self.calc_cmp[brs_annot['smiles']] else: #if you cannot find it then calculate it try: spe_cmp = get_or_create_compound(self.cc.ccache, brs_annot['inchi'], mol_format='inchi') self.calc_cmp[brs_annot['inchi']] = spe_cmp self.calc_cmp[brs_annot['smiles']] = spe_cmp except (OSError, KeyError, GroupDecompositionError) as e: try: spe_cmp = get_or_create_compound(self.cc.ccache, brs_annot['smiles'], mol_format='smiles') self.calc_cmp[brs_annot['smiles']] = spe_cmp self.calc_cmp[brs_annot['inchi']] = spe_cmp except (OSError, KeyError, GroupDecompositionError) as e: self.logger.warning('The following species does not have brsynth annotation InChI or SMILES: '+str(libsbml_species.getId())) self.logger.warning('Or Equilibrator could not convert the structures') self.logger.warning(e) return None, None if spe_cmp.id==4: #this is H+ and can be ignored return 'h', 'h' self.logger.debug('spe_cmp: '+str(spe_cmp)) #mu, sigma = self.cc.predictor.preprocess.get_compound_prediction(eq_cmp[0]) mu, sigma = self.cc.predictor.preprocess.get_compound_prediction(spe_cmp) return mu, sigma def _reactionCmpQuery(self, libsbml_reaction, write_results=False, physio_param=1e-3): """This method makes a list of structure compounds and uses equilibrator to return the reaction dG :param libsbml_reaction: A libsbml reaction object :param write_results: Write the results to the rpSBML file (Default: False) :param physio_param: The physiological parameter, i.e. the concentration of the compounds to calculate the dG (Default: 1e-3) :type libsbml_reaction: libsbml.Reaction :type write_results: bool :type physio_param: float :rtype: tuple :return: Tuple of size three with dfG_prime_o, dfG_prime_m, uncertainty values in that order or False if fail """ mus = [] sigma_vecs = [] S = [] dfG_prime_o = None dfG_prime_m = None uncertainty = None for rea in libsbml_reaction.getListOfReactants(): self.logger.debug('------------------- '+str(rea.getSpecies())+' --------------') mu, sigma = self._speciesCmpQuery(self.rpsbml.model.getSpecies(rea.getSpecies())) self.logger.debug('mu: '+str(mu)) if not mu: self.logger.warning('Failed to calculate the reaction mu thermodynamics using compound query') if write_results: self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol') return False elif mu=='h': #skipping the Hydrogen continue mus.append(mu) sigma_vecs.append(sigma) S.append([-rea.getStoichiometry()]) for pro in libsbml_reaction.getListOfProducts(): mu, sigma = self._speciesCmpQuery(self.rpsbml.model.getSpecies(pro.getSpecies())) if not mu: self.logger.warning('Failed to calculate the reaction mu thermodynamics using compound query') if write_results: self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol') return False elif mu=='h': #skipping the Hydrogen continue mus.append(mu) sigma_vecs.append(sigma) S.append([pro.getStoichiometry()]) mus = Q_(mus, 'kJ/mol') sigma_vecs = Q_(sigma_vecs, 'kJ/mol') np_S = np.array(S) dfG_prime_o = np_S.T@mus dfG_prime_o = float(dfG_prime_o.m[0]) ###### adjust fot physio parameters to calculate the dGm' #TODO: check with Elad dfG_prime_m = float(dfG_prime_o)+float(self.cc.RT.m)*sum([float(sto[0])*float(np.log(co)) for sto, co in zip(S, [physio_param]*len(S))]) uncertainty = np_S.T@sigma_vecs uncertainty = [email protected] uncertainty = uncertainty.m[0][0] if write_results: self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', dfG_prime_o, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', dfG_prime_m, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', uncertainty, 'kj_per_mol') return dfG_prime_o, dfG_prime_m, uncertainty ''' ## Not sure if we should implement such a function -- recommended by Elad I geuss # # def pathwayCmpQuery(self, write_results=False): #1) build the stochio matrix taking into account all the species of the reaction -- must keep track pass #################### native equilibrator-api functions ############### def speciesStrQuery(self, libsbml_species, write_results=False): """ Return the formation energy of a chemical species """ return False ''' #TODO: when an inchikey is passed, (and you don't have any other xref) and equilibrator finds the correct species then update the MIRIAM annotations def _reactionStrQuery(self, libsbml_reaction, write_results=False): """Build the string reaction from a libSBML reaction object to send to equilibrator and return the different thermodynamics analysis available :param libsbml_reaction: A libsbml reaction object :param write_results: Write the results to the rpSBML file (Default: False) :type libsbml_reaction: libsbml.Reaction :type write_results: bool :rtype: bool :return: Success or failue of the function """ reac_str = '' try: reac_str = self._makeReactionStr(libsbml_reaction) self.logger.debug('The reaction string is: '+str(reac_str)) if not reac_str: self.logger.warning('Could not generate the reaction string for: '+str(libsbml_reaction)) if write_results: self.logger.warning('Writing the 0 results to the file') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', 0.0, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', 0.0, 'kj_per_mol') #Are there default values for these? #self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'reversibility_index', 0.0) #self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'balanced', rxn.is_balanced()) return False rxn = self.cc.parse_reaction_formula(reac_str) standard_dg = self.cc.standard_dg(rxn) standard_dg_prime = self.cc.standard_dg_prime(rxn) physiological_dg_prime = self.cc.physiological_dg_prime(rxn) ln_reversibility_index = self.cc.ln_reversibility_index(rxn) if type(ln_reversibility_index)==float: self.logger.warning('The reversibility index is infinite: '+str(ln_reversibility_index)) ln_reversibility_index = None ln_reversibility_index_error = None else: ln_reversibility_index_error = ln_reversibility_index.error.m ln_reversibility_index = ln_reversibility_index.value.m self.logger.debug(rxn.is_balanced()) self.logger.debug('ln_reversibility_index: '+str(ln_reversibility_index)) self.logger.debug('standard_dg.value.m: '+str(standard_dg.value.m)) self.logger.debug('standard_dg.error.m: '+str(standard_dg.error.m)) self.logger.debug('standard_dg_prime.value.m: '+str(standard_dg_prime.value.m)) self.logger.debug('standard_dg_prime.error.m: '+str(standard_dg_prime.error.m)) self.logger.debug('physiological_dg_prime.value.m: '+str(physiological_dg_prime.value.m)) self.logger.debug('physiological_dg_prime.error.m: '+str(physiological_dg_prime.error.m)) if write_results: self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_o', standard_dg_prime.value.m, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_prime_m', physiological_dg_prime.value.m, 'kj_per_mol') self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'dfG_uncert', standard_dg.error.m, 'kj_per_mol') if ln_reversibility_index: self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'reversibility_index', ln_reversibility_index) self.rpsbml.addUpdateBRSynth(libsbml_reaction, 'balanced', rxn.is_balanced()) return (rxn.is_balanced(), (ln_reversibility_index, ln_reversibility_index_error), (float(standard_dg.value.m), float(standard_dg.error.m)), (float(standard_dg_prime.value.m), float(standard_dg_prime.error.m)), (float(physiological_dg_prime.value.m), float(physiological_dg_prime.error.m))) except equilibrator_cache.exceptions.ParseException: self.logger.warning('One of the reaction species cannot be parsed by equilibrator: '+str(reac_str)) return False except equilibrator_cache.exceptions.MissingDissociationConstantsException: self.logger.warning('Some of the species have not been pre-caclulated using ChemAxon') ################################################################################ ########################### PUBLIC FUNCTIONS ################################### ################################################################################ #WARNING: taking the sum of the reaction thermodynamics is perhaps not the best way to do it def pathway(self, pathway_id='rp_pathway', write_results=True): """Calculate the dG of a heterologous pathway :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway) :param write_results: Write the results to the rpSBML file (Default: True) :type pathway_id: str :type write_results: bool :rtype: tuple :return: Tuple with the following information, in order: sum dG_prime, std dG_prime, sum dG_prime_o, std dG_prime_o, sum dG_prime_m, std dG_prime_m. Also False if function error. """ groups = self.rpsbml.model.getPlugin('groups') rp_pathway = groups.getGroup(pathway_id) if not rp_pathway: self.logger.error('Cannot retreive the pathway: '+str(pathway_id)) return False pathway_balanced = [] pathway_reversibility_index = [] pathway_reversibility_index_error = [] pathway_standard_dg = [] pathway_standard_dg_error = [] pathway_standard_dg_prime = [] pathway_standard_dg_prime_error = [] pathway_physiological_dg_prime = [] pathway_physiological_dg_prime_error = [] for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]: self.logger.debug('Sending the following reaction to _reactionStrQuery: '+str(react)) res = self._reactionStrQuery(react, write_results) self.logger.debug('The result is :'+str(res)) if res: #WARNING: the uncertainty for the three thermo calculations should be the same pathway_balanced.append(res[0]) if not res[1][0]==None: pathway_reversibility_index.append(res[1][0]) else: pathway_reversibility_index.append(0.0) if not res[1][1]==None: pathway_reversibility_index_error.append(res[1][1]) else: pathway_reversibility_index_error.append(0.0) #ignoring -- need to see if legacy component contribution can return these values #pathway_standard_dg.append(res[2][0]) #pathway_standard_dg_error.append(res[2][1]) pathway_standard_dg_prime.append(res[3][0]) pathway_standard_dg_prime_error.append(res[3][1]) pathway_physiological_dg_prime.append(res[4][0]) pathway_physiological_dg_prime_error.append(res[4][1]) else: self.logger.info('Native equilibrator string query failed') self.logger.info('Trying equilibrator_api component contribution') self.logger.debug('Trying to calculate using CC: '+str(react)) res = self._reactionCmpQuery(react, write_results) if res: pathway_standard_dg_prime.append(res[0]) pathway_standard_dg_prime_error.append(res[2]) pathway_physiological_dg_prime.append(res[1]) pathway_physiological_dg_prime_error.append(res[2]) #TODO: need to implement pathway_balanced.append(None) pathway_reversibility_index.append(None) pathway_reversibility_index_error.append(None) else: self.logger.warning('Cannot calculate the thermodynmics for the reaction: '+str(react)) self.logger.warning('Setting everything to 0') pathway_standard_dg_prime.append(0.0) pathway_standard_dg_prime_error.append(0.0) pathway_physiological_dg_prime.append(0.0) pathway_physiological_dg_prime_error.append(0.0) #TODO: need to implement pathway_balanced.append(None) pathway_reversibility_index.append(None) pathway_reversibility_index_error.append(None) #return False #WARNING return is ignoring balanced and reversibility index -- need to implement in legacy to return it (however still writing these results to the SBML) if write_results: self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_o', np.sum(pathway_standard_dg_prime), 'kj_per_mol') self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_o_std', np.std(pathway_standard_dg_prime), 'kj_per_mol') self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_m', np.sum(pathway_physiological_dg_prime), 'kj_per_mol') self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_prime_m_std', np.std(pathway_physiological_dg_prime), 'kj_per_mol') self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_uncert', np.mean(pathway_standard_dg_prime_error), 'kj_per_mol') self.rpsbml.addUpdateBRSynth(rp_pathway, 'dfG_uncert_std', np.std(pathway_standard_dg_prime_error), 'kj_per_mol') return (np.sum(pathway_standard_dg_prime), np.std(pathway_standard_dg_prime)), (np.sum(pathway_physiological_dg_prime), np.std(pathway_physiological_dg_prime)), (np.sum(pathway_standard_dg_prime), np.std(pathway_standard_dg_prime)) def toNetworkSBtab(self, output, pathway_id='rp_pathway', thermo_id='dfG_prime_o', fba_id='fba_obj_fraction', stdev_factor=1.96): """Convert an SBML pathway to a simple network for input to equilibrator-pathway for MDF :param output: Output path of the TSV file :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway) :param thermo_id: The id of the thermodynamics result to be exported to the SBtab file (Default: dfG_prime_o, Valid Options: [dfG_prime_o, dfG_prime_m]) :param fba_id: The id of the FBA value to be exported to SBtab (Default: fba_obj_fraction) :param stdev_factor: The standard deviation factor (Default: 1.96) :type output: str :type pathway_id: str :type thermo_id: str :type fba_id: str :type stdev_factor: float :rtype: bool :return: Success or failure of the function """ groups = self.rpsbml.model.getPlugin('groups') rp_pathway = groups.getGroup(pathway_id) if not rp_pathway: self.logger.error('Cannot retreive the pathway: '+str(pathway_id)) return False with open(output, 'w') as fo: ####################### Make the header of the document ############## fo.write("!!!SBtab DocumentName='E. coli central carbon metabolism - balanced parameters' SBtabVersion='1.0'\t\t\t\n") fo.write("!!SBtab TableID='Configuration' TableType='Config'\t\t\t\n") fo.write("!Option\t!Value\t!Comment\t\n") fo.write("algorithm\tMDF\tECM, or MDF\t\n") fo.write("p_h\t"+str(self.ph)+"\t\t\n") fo.write("ionic_strength\t"+str(self.ionic_strength)+" mM\t\t\n") fo.write("p_mg\t"+str(self.pMg)+"\t\t\n") fo.write("stdev_factor "+str(stdev_factor)+"\n") fo.write("\t\t\t\n") ####################### Make the reaction list ###################### fo.write("!!SBtab TableID='Reaction' TableType='Reaction'\t\t\t\n") fo.write("!ID\t!ReactionFormula\t\t\n") #TODO: need to sort it in the reverse order #TODO: use the rpGraph to sort the pathway in the right order ordered_react = [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()] ordered_react.sort(key=lambda x: int(x.getId().replace('RP', '')), reverse=True) #for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]: for react in ordered_react: react_str = self._makeReactionStr(react, 'id', True) if react_str: fo.write(str(react.getId())+"\t"+str(react_str)+"\n") else: self.logger.error('Cannot build the reaction: '+str(rect)) return False fo.write("\t\t\t\n") fo.write("\t\t\t\n") ########################## Make the species list ################### fo.write("!!SBtab TableID='Compound' TableType='Compound'\t\t\t\n") fo.write("!ID\t!Identifiers\t\t\n") rp_species = self.rpsbml.readUniqueRPspecies(pathway_id) for spe_id in rp_species: spe = self.rpsbml.model.getSpecies(spe_id) miriam_dict = self.rpsbml.readMIRIAMAnnotation(spe.getAnnotation()) if not miriam_dict: self.logger.warning('The object annotation does not have any MIRIAM entries') return False iden_str = None if 'kegg' in miriam_dict: if miriam_dict['kegg']: try: #take the lowest value int_list = [int(i.replace('C', '')) for i in miriam_dict['kegg']] iden_str = 'KEGG:'+str(miriam_dict['kegg'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['kegg'])) if 'chebi' in miriam_dict and not iden_str: if miriam_dict['chebi']: try: #take the lowest value int_list = [int(i) for i in miriam_dict['chebi']] iden_str = 'CHEBI:'+str(miriam_dict['chebi'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['chebi'])) if 'metanetx' in miriam_dict and not iden_str: if miriam_dict['metanetx']: try: #take the lowest value int_list = [int(i.replace('MNXM', '')) for i in miriam_dict['metanetx']] iden_str = 'metanetx.chemical:'+str(miriam_dict['metanetx'][int_list.index(min(int_list))]) except ValueError: self.logger.warning('There is a non int value in: '+str(miriam_dict['metanetx'])) if 'inchikey' in miriam_dict and not iden_str: if miriam_dict['inchikey']: if len(miriam_dict['inchikey'])==1: iden_str = miriam_dict['inchikey'][0] else: self.logger.warning('There are multiple values of inchikey: '+str(miriam_dict['inchikey'])) self.logger.warning('Taking the first one') iden_str = miriam_dict['inchikey'][0] if not iden_str: self.logger.warning('Could not extract string input for '+str(miriam_dict)) fo.write(str(spe_id)+"\t"+str(iden_str)+"\t\t\n") fo.write("\t\t\t\n") ################## Add FBA values ############################## #TODO: perhaps find a better way than just setting this to 1 fo.write("!!SBtab TableID='Flux' TableType='Quantity' Unit='mM/s'\t\t\t\n") fo.write("!QuantityType\t!Reaction\t!Value\t\n") for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]: brs_annot = self.rpsbml.readBRSYNTHAnnotation(react.getAnnotation()) if fba_id: if fba_id in brs_annot: #the saved value is mmol_per_gDW_per_hr while rpEq required mmol_per_s #WARNING: rpEq says that these values are mMol/s while here we have mMol/gDw/h. However not changing since this would mean doing /3600 and #given the values that seems wrong fo.write("rate of reaction\t"+str(react.getId())+"\t"+str(brs_annot[fba_id]['value'])+"\t\n") else: self.logger.warning('Cannot retreive the FBA value '+str(fba_id)+'. Setting a default value of 1.') fo.write("rate of reaction\t"+str(react.getId())+"\t1\t\n") else: fo.write("rate of reaction\t"+str(react.getId())+"\t1\t\n") ################## Add the concentration bounds ############################## fo.write("\t\t\t\n") fo.write("!!SBtab TableID='ConcentrationConstraint' TableType='Quantity' Unit='mM'\t\t\t\n") fo.write("!QuantityType\t!Compound\t!Min\t!Max\n") for spe_id in rp_species: self.logger.debug('========= '+str(spe_id)+' ========') is_found = False spe = self.rpsbml.model.getSpecies(spe_id) miriam_dict = self.rpsbml.readMIRIAMAnnotation(spe.getAnnotation()) self.logger.debug(miriam_dict) if not miriam_dict: self.logger.warning('The object annotation does not have any MIRIAM entries') continue if 'metanetx' in miriam_dict: self.logger.debug(miriam_dict['metanetx']) for mnx in miriam_dict['metanetx']: if mnx in list(self.mnx_default_conc.keys()) and not is_found: self.logger.debug('Found default concentration range for '+str(spe.getId())+' ('+str(mnx)+'): '+str(self.mnx_default_conc[mnx])) fo.write("concentration\t"+spe.getId()+"\t"+str(self.mnx_default_conc[mnx]['c_min'])+"\t"+str(self.mnx_default_conc[mnx]['c_max'])+"\n") is_found = True if not is_found: self.logger.debug('Using default values for '+str(spe.getId())) fo.write("concentration\t"+spe.getId()+"\t0.001\t10\n") fo.write("\t\t\t\n") ############################ Add the thermo value ########################### #TODO: perform on the fly thermodynamic calculations when the values are not included within the SBML file if thermo_id: fo.write("!!SBtab TableID='Thermodynamics' TableType='Quantity' StandardConcentration='M'\t\t\t\n") fo.write("!QuantityType\t!Reaction\t!Compound\t!Value\t!Unit\n") for react in [self.rpsbml.model.getReaction(i.getIdRef()) for i in rp_pathway.getListOfMembers()]: brs_annot = self.rpsbml.readBRSYNTHAnnotation(react.getAnnotation()) try: #TODO: switch to dfG_prime_m when you are sure how to calculate it using the native equilibrator function if thermo_id in brs_annot: if brs_annot[thermo_id]: fo.write("reaction gibbs energy\t"+str(react.getId())+"\t\t"+str(brs_annot['dfG_prime_o']['value'])+"\tkJ/mol\n") else: self.logger.error(str(thermo_id)+' is empty. Was rpThermodynamics run on this SBML? Aborting...') return False else: self.logger.error('There is no '+str(thermo_id)+' in the reaction '+str(react.getId())) return False except KeyError: self.logger.error('The reaction '+str(react.getId())+' does not seem to have the following thermodynamic value: '+str(thermo_id)) return False return True def MDF(self, pathway_id='rp_pathway', thermo_id='dfG_prime_o', fba_id='fba_obj_fraction', stdev_factor=1.96, write_results=True): """Perform MDF analysis on the rpSBML file :param pathway_id: The id of the heterologous pathway of interest (Default: rp_pathway) :param thermo_id: The id of the thermodynamics result to be exported to the SBtab file (Default: dfG_prime_o, Valid Options: [dfG_prime_o, dfG_prime_m]) :param fba_id: The id of the FBA value to be exported to SBtab (Default: fba_obj_fraction) :param stdev_factor: The standard deviation factor (Default: 1.96) :param write_results: Write the results to the rpSBML file (Default: True) :type pathway_id: str :type thermo_id: str :type fba_id: str :type stdev_factor: float :type write_results: bool :rtype: float :return: MDF of the pathway """ to_ret_mdf = None groups = self.rpsbml.model.getPlugin('groups') rp_pathway = groups.getGroup(pathway_id) with tempfile.TemporaryDirectory() as tmpOutputFolder: path_sbtab = os.path.join(tmpOutputFolder, 'tmp_sbtab.tsv') sbtab_status = self.toNetworkSBtab(path_sbtab, pathway_id=pathway_id, thermo_id=thermo_id, fba_id=fba_id, stdev_factor=stdev_factor) if not sbtab_status: self.logger.error('There was a problem generating the SBtab... aborting') return 0.0 try: pp = Pathway.from_sbtab(path_sbtab, comp_contrib=self.cc) pp.update_standard_dgs() try: mdf_sol = pp.calc_mdf() except: self.logger.warning('The calc_mdf function failed') self.logger.warning('Exception: Cannot solve MDF primal optimization problem') self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol') return 0.0 #mdf_sol = pp.mdf_analysis() #plt_reac_plot = mdf_sol.reaction_plot #plt_cmp_plot = mdf_sol.compound_plot to_ret_mdf = float(mdf_sol.mdf.m) if write_results: self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', float(mdf_sol.mdf.m), 'kj_per_mol') except KeyError as e: self.logger.warning('Cannot calculate MDF') self.logger.warning(e) self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol') return 0.0 except equilibrator_cache.exceptions.MissingDissociationConstantsException as e: self.logger.warning('Some species are invalid: '+str(e)) self.rpsbml.addUpdateBRSynth(rp_pathway, 'MDF', 0.0, 'kj_per_mol') return 0.0 return to_ret_mdf
def create_reaction_id_to_dG0_mapping_json(model: cobra.Model, json_path: str) -> None: """Creates a reaction ID<->dG0 mapping using the Equilibrator API. This function uses the pregenerated BIGG ID to MetaNetX ID mapping, and lets the Equilibrator API calculate the dG0 values for each reaction using the MetaNetX IDs for the metabolites. This function is specifically written for the E. coli models used in CommModelPy's publication as its selection of dG0 values is based on BiGG identifiers and as it contains specific dG0 corrections for special reactions as referenced in the comments of this function. However, the general basis of this function can be easily adapted for other types of models. Arguments: * model: cobra.Model ~ The cobrapy model instance for which the dG0 mapping shall be created. * json_path: str ~ The path to the dG0 JSON that is going to be created. Output: * No variable but a JSON file with the mapping at json_path """ bigg_id_to_metanetx_id = json_load("./publication_runs/ecoli_models/bigg_id_to_metanetx_mapping_json/bigg_id_to_metanetx_id_mapping_from_iML1515.json") reaction_id_dG0_mapping: Dict[str, Dict[str, float]] = {} cc = ComponentContribution() cc.p_h = Q_(7.0) cc.ionic_strength = Q_("0.25M") cc.temperature = Q_("298.15K") cc.p_mg = Q_(3.0) reaction_ids_without_dG0: List[str] = [] for reaction in model.reactions: print("==dG0 GENERATION ATTEMPT FOR REACTION "+reaction.id+"==") # Exclude exchange reactions if reaction.id.startswith("EX_"): print("INFO: Reaction is identified as exchange reaction.") print(" No dG0 for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue # Exclude special transport reactions if ("tpp_" in reaction.id+"_") or ("t1pp_" in reaction.id+"_") or ("tex_" in reaction.id+"_") or reaction.id.endswith("tex"): # tpp: Facilitated transport or (proton) symport # t1pp: Facilitated transport or (proton) symport # tex: "Via diffusion" print("INFO: Reaction is identified as special transport reaction.") print(" No dG0 for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue # Exclude demand reactions if (reaction.id.startswith("DM_")): print("INFO: Reaction is identified as sink (demand) reaction.") print(" No dG0 for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue educt_strings: List[str] = [] product_strings: List[str] = [] is_metanetx_id_missing = False for metabolite in reaction.metabolites.keys(): base_metabolite_id = "".join(metabolite.id.split("_")[:-1]) if base_metabolite_id.endswith("BIO"): base_metabolite_id = base_metabolite_id[:-len("BIO")] if base_metabolite_id not in bigg_id_to_metanetx_id.keys(): is_metanetx_id_missing = True break if reaction.metabolites[metabolite] < 0: educt_strings.append(str(-reaction.metabolites[metabolite]) + " " + bigg_id_to_metanetx_id[base_metabolite_id]) else: product_strings.append(str(reaction.metabolites[metabolite]) + " " + bigg_id_to_metanetx_id[base_metabolite_id]) if is_metanetx_id_missing: print("INFO: MetanetX ID missing for " + base_metabolite_id) print(" No dG0 can be given for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue educt_string = " + ".join(educt_strings) product_string = " + ".join(product_strings) reaction_string = educt_string + " = " + product_string print("Reaction string with MetanetX IDs: " + reaction_string) try: parsed_reaction = cc.parse_reaction_formula(reaction_string) except Exception as e: print("INFO: Equilibrator reaction parsing error") print(e) print(" No dG0 can be given for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue try: dG0 = cc.standard_dg_prime(parsed_reaction) if dG0.m.s > 10000: print("INFO: dG0 standard deviation too high") print(" No dG0 can be given for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue else: reaction_id_dG0_mapping[reaction.id+"_ecoli1"] = {} reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"] = dG0.m.n reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["uncertainty"] = 0 reaction_id_dG0_mapping[reaction.id+"_ecoli2"] = {} reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["dG0"] = dG0.m.n reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["uncertainty"] = 0 reaction_id_dG0_mapping[reaction.id+"_ecoli3"] = {} reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["dG0"] = dG0.m.n reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["uncertainty"] = 0 print("dG0 calculation successful \\o/") print(f"dG0: {dG0}") except Exception as e: print("INFO:") print(e) print(" No dG0 can be given for this reaction!") reaction_ids_without_dG0.append(reaction.id) continue # Membrane-bound reaction corrections according to formula 9 in # Hamilton, J. J., Dwivedi, V., & Reed, J. L. (2013). # Quantitative assessment of thermodynamic constraints on # the solution space of genome-scale metabolic models. # Biophysical journal, 105(2), 512-522. # Formula is: # c_j * F * dPsi - 2.3 * h_j * R * T * dpH # c_j = net charge transported from outside to inside # h_j = Number of protons transported across membrane F = 0.10026 # kJ/mV/mol, or 0.02306; kcal/mV/mol dPsi = -130 # mV dpH = 0.4 # dimensionless R = 8.314e-3 # kJ⋅K⁻1⋅mol⁻1 T = 298.15 # K # c_j and h_j are taken from supplementary table 3 # of the mentioned publication (Hamilton et al., 2013) c_j = 0.0 h_j = 0.0 # ATP synthase if reaction.id.startswith("ATPS4"): c_j = 4.0 h_j = 4.0 # NADH dehydrogenases elif reaction.id.startswith("NADH16"): c_j = -3.5 h_j = -3.5 elif reaction.id.startswith("NADH17"): c_j = -2.0 h_j = -2.0 elif reaction.id.startswith("NADH18"): c_j = -2.8 h_j = -2.8 # Cytochrome dehydrogenases elif reaction.id.startswith("CYTBD"): c_j = -2.0 h_j = -2.0 elif reaction.id.startswith("CYTBO3"): c_j = -2.5 h_j = -2.5 if (c_j != 0) and (h_j != 0): print("Correcting dG0 for special membrane-bound reaction...") dG0_correction = c_j * F * dPsi - 2.3 * h_j * R * T * dpH print(f"Correction factor is {dG0_correction}") reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"] += dG0_correction reaction_id_dG0_mapping[reaction.id+"_ecoli2"]["dG0"] += dG0_correction reaction_id_dG0_mapping[reaction.id+"_ecoli3"]["dG0"] += dG0_correction print("New dG0 is " + str(reaction_id_dG0_mapping[reaction.id+"_ecoli1"]["dG0"])) num_reactions_without_dG0 = len(reaction_ids_without_dG0) print("\n==FINAL STATISTICS==") print("No dG0 for the following reactions:") print("\n".join(reaction_ids_without_dG0)) print("Total number: ", str(num_reactions_without_dG0)) print("Fraction from all reactions: ", num_reactions_without_dG0/len(model.reactions)) json_write(json_path, reaction_id_dG0_mapping)