def GetFullOxidationReaction(cid): kegg = Kegg.getInstance() basic_cids = [1, 7, 9, 11, 14] # H2O, O2, Pi, CO2, NH3 basic_elements = ["C", "O", "P", "N", "e-"] element_mat = np.matrix(np.zeros((len(basic_elements), len(basic_cids)))) for j in xrange(len(basic_cids)): atom_bag = kegg.cid2atom_bag(basic_cids[j]) atom_bag["e-"] = kegg.cid2num_electrons(basic_cids[j]) for i in xrange(len(basic_elements)): element_mat[i, j] = atom_bag.get(basic_elements[i], 0) cs_element_vec = np.zeros((len(basic_elements), 1)) atom_bag = kegg.cid2atom_bag(cid) atom_bag["e-"] = kegg.cid2num_electrons(cid) for i in xrange(len(basic_elements)): cs_element_vec[i, 0] = atom_bag.get(basic_elements[i], 0) x = np.linalg.inv(element_mat) * cs_element_vec sparse = dict([(basic_cids[i], np.round(x[i, 0], 3)) for i in xrange(len(basic_cids))]) sparse[cid] = -1 r = Reaction("complete oxidation of %s" % kegg.cid2name(cid), sparse) return r
def __init__(self, html_writer, dissociation, transformed=False, pH=default_pH, I=0, pMg=14, T=default_T): self.pH = pH self.I = I self.pMg = pMg self.T = T self.kegg = Kegg.getInstance() self.transformed = transformed self.dissociation = dissociation if self.dissociation is not None: self.cid2nH_nMg = self.dissociation.GetCid2nH_nMg(pH=self.pH, I=self.I, pMg=self.pMg, T=self.T) self.observations = [] self.html_writer = html_writer if transformed: self.gibbs_symbol = symbol_dr_G0_prime else: self.gibbs_symbol = symbol_dr_G0 self.FormationEnergyFileName = "../data/thermodynamics/formation_energies.csv"
def GetJSONDictionary(self): """Returns a JSON formatted thermodynamic data.""" kegg = Kegg.getInstance() formations = [] for cid in self.get_all_cids(): h = {} h['cid'] = cid try: h['name'] = kegg.cid2name(h['cid']) except KeyError: h['name'] = None try: h['inchi'] = kegg.cid2inchi(h['cid']) except KeyError: h['inchi'] = None try: h['num_electrons'] = kegg.cid2num_electrons(h['cid']) except KeggParseException: h['num_electrons'] = None h['source'] = self.cid2source_string.get(cid, None) h['species'] = [] for nH, z, nMg, dG0 in self.cid2PseudoisomerMap(cid).ToMatrix(): h['species'].append({"nH":nH, "z":z, "nMg":nMg, "dG0_f":dG0}) formations.append(h) return formations
def main(): pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15 dissociation = DissociationConstants.FromPublicDB() kegg = Kegg.getInstance() obs_fname = "../data/thermodynamics/formation_energies.csv" res_fname = "../res/formation_energies_transformed.csv" train_species = PsuedoisomerTableThermodynamics.FromCsvFile(obs_fname, label="testing") csv_out = csv.writer(open(res_fname, "w")) csv_out.writerow(["cid", "name", "dG'0", "pH", "I", "pMg", "T", "anchor", "compound_ref", "remark"]) for cid in train_species.get_all_cids(): pmap = train_species.cid2PseudoisomerMap(cid) source = train_species.cid2source_string[cid] pmatrix = pmap.ToMatrix() # ToMatrix returns tuples of (nH, z, nMg, dG0) if len(pmatrix) != 1: raise Exception("multiple training species for C%05d" % cid) nH, charge, nMg, dG0 = pmatrix[0] name = "%s (%d)" % (kegg.cid2name(cid), nH) logging.info("Adding the formation energy of %s", name) diss_table = dissociation.GetDissociationTable(cid, create_if_missing=True) if diss_table is None: raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a " "dissociation table" % (name, cid, nH, nMg)) diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) diss_table.SetCharge(nH, charge, nMg) dG0_prime = diss_table.Transform(pH, I, pMg, T) csv_out.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None])
def FromChemAxon(cid2mol=None, html_writer=None): kegg = Kegg.getInstance() diss = DissociationConstants() if cid2mol is None: cid2mol = dict([(cid, None) for cid in kegg.get_all_cids()]) for cid, mol in sorted(cid2mol.iteritems()): logging.info( "Using ChemAxon to find the pKa values for %s - C%05d" % (kegg.cid2name(cid), cid)) if html_writer: html_writer.write('<h2>%s - C%05d</h2>\n' % (kegg.cid2name(cid), cid)) # if this CID is not assigned to a Molecule, use the KEGG database # to create a Molecule for it. if mol is None: try: mol = kegg.cid2mol(cid) except KeggParseException: continue diss_table = mol.GetDissociationTable() diss.cid2DissociationTable[cid] = diss_table if diss_table and html_writer: diss_table.WriteToHTML(html_writer) html_writer.write('</br>\n') return diss
def GetFullOxidationReaction(cid): kegg = Kegg.getInstance() basic_cids = [1, 7, 9, 11, 14] # H2O, O2, Pi, CO2, NH3 basic_elements = ['C', 'O', 'P', 'N', 'e-'] element_mat = np.matrix(np.zeros((len(basic_elements), len(basic_cids)))) for j in xrange(len(basic_cids)): atom_bag = kegg.cid2atom_bag(basic_cids[j]) atom_bag['e-'] = kegg.cid2num_electrons(basic_cids[j]) for i in xrange(len(basic_elements)): element_mat[i, j] = atom_bag.get(basic_elements[i], 0) cs_element_vec = np.zeros((len(basic_elements), 1)) atom_bag = kegg.cid2atom_bag(cid) atom_bag['e-'] = kegg.cid2num_electrons(cid) for i in xrange(len(basic_elements)): cs_element_vec[i, 0] = atom_bag.get(basic_elements[i], 0) x = np.linalg.inv(element_mat) * cs_element_vec sparse = dict([(basic_cids[i], np.round(x[i, 0], 3)) for i in xrange(len(basic_cids))]) sparse[cid] = -1 r = Reaction("complete oxidation of %s" % kegg.cid2name(cid), sparse) return r
def ExportJSONFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) thermo_list = [] thermo_list.append(estimators[options.thermodynamics_source]) thermo_list.append(PsuedoisomerTableThermodynamics.FromCsvFile(options.thermodynamics_csv)) # Make sure we have all the data. kegg = Kegg.getInstance() for i, thermo in enumerate(thermo_list): print "Priority %d - formation energies of: %s" % (i+1, thermo.name) kegg.AddThermodynamicData(thermo, priority=(i+1)) db = SqliteDatabase('../res/gibbs.sqlite') print 'Exporting Group Contribution Nullspace matrix as JSON.' nullspace_vectors = [] for row in db.DictReader('ugc_conservations'): d = {'msg': row['msg']} sparse = json.loads(row['json']) d['reaction'] = [] for cid, coeff in sparse.iteritems(): d['reaction'].append([coeff, "C%05d" % int(cid)]) nullspace_vectors.append(d) WriteJSONFile(nullspace_vectors, options.nullspace_out_filename) print 'Exporting KEGG compounds as JSON.' WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename) print 'Exporting KEGG reactions as JSON.' WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename) print 'Exporting KEGG enzymes as JSON.' WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
def FromChemAxon(cid2mol=None, html_writer=None): kegg = Kegg.getInstance() diss = DissociationConstants() if cid2mol is None: cid2mol = dict([(cid, None) for cid in kegg.get_all_cids()]) for cid, mol in sorted(cid2mol.iteritems()): logging.info("Using ChemAxon to find the pKa values for %s - C%05d" % (kegg.cid2name(cid), cid)) if html_writer: html_writer.write('<h2>%s - C%05d</h2>\n' % (kegg.cid2name(cid), cid)) # if this CID is not assigned to a Molecule, use the KEGG database # to create a Molecule for it. if mol is None: try: mol = kegg.cid2mol(cid) except KeggParseException: continue diss_table = mol.GetDissociationTable() diss.cid2DissociationTable[cid] = diss_table if diss_table and html_writer: diss_table.WriteToHTML(html_writer) html_writer.write('</br>\n') return diss
def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def Populate(self, filename): """Populates the database from files.""" self._InitTables() f = open(filename) r = csv.DictReader(f) for row in r: insert_row = [] for table_header in self.ORG_TABLE_HEADERS: if table_header not in self.CSV_HEADER_MAPPING: insert_row.append(None) continue csv_header = self.CSV_HEADER_MAPPING[table_header] val = row.get(csv_header, None) if val and val.strip(): insert_row.append(val) else: insert_row.append(None) oxy_req = row.get(self.OXY_REQ, None) broad_req = self.GetBroadyOxyReq(oxy_req) insert_row[-1] = broad_req self.db.Insert('organisms', insert_row) f.close() k = Kegg.getInstance(loadFromAPI=False) enzyme_map = k.ec2enzyme_map for ec, enzyme in enzyme_map.iteritems(): for org in enzyme.genes.keys(): self.db.Insert('organism_enzymes', [org.lower(), ec])
def main(): opt_parser = flags.MakeOpts() options, _ = opt_parser.parse_args(sys.argv) estimators = LoadAllEstimators() print ('Parameters: T=%f K, pH=%.2g, pMg=%.2g, ' 'I=%.2gmM, Median concentration=%.2gM' % (default_T, options.ph, options.pmg, options.i_s, options.c_mid)) for thermo in estimators.values(): thermo.c_mid = options.c_mid thermo.pH = options.ph thermo.pMg = options.pmg thermo.I = options.i_s thermo.T = default_T kegg = Kegg.getInstance() while True: cid = GetReactionIdInput() compound = kegg.cid2compound(cid) print 'Compound Name: %s' % compound.name print '\tKegg ID: C%05d' % cid print '\tFormula: %s' % compound.formula print '\tInChI: %s' % compound.inchi for key, thermo in estimators.iteritems(): print "\t<< %s >>" % key try: print thermo.cid2PseudoisomerMap(cid), print '--> dG0\'f = %.1f kJ/mol' % compound.PredictFormationEnergy(thermo) except Exception as e: print '\t\tError: %s' % (str(e))
def __init__(self, S, reaction_ids, compound_ids, fluxes=None, name=None): """Initialize the stoichiometric model. Args: S: the stoichiometrix matrix. Reactions are on the rows, compounds on the columns. reaction_ids: the ids/names of the reactions (rows). compound_ids: the ids/names of the compounds (columns). fluxes: the list of relative fluxes through all reactions. if not supplied, assumed to be 1.0 for all reactions. name: a string name for this model. """ self.kegg = Kegg.getInstance() self.S = S self.reaction_ids = reaction_ids self.compound_ids = compound_ids self.Nr = len(self.reaction_ids) self.Nc = len(self.compound_ids) self.name = name self.slug_name = util.slugify(self.name) self.fluxes = np.array(fluxes) if fluxes is None: self.fluxes = np.ones((1, self.Nr)) expected_Nc, expected_Nr = self.S.shape if self.Nr != expected_Nr: raise ValueError('Number of columns does not match number of reactions') if self.Nc != expected_Nc: raise ValueError('Number of rows does not match number of compounds') if self.fluxes is None: self.fluxes = np.ones((self.Nr, 1))
def ParseReactionFormula(name, formula): """ parse a two-sided formula such as: 2 C00001 = C00002 + C00003 return the set of substrates, products and the direction of the reaction """ try: left, right = formula.split(' = ', 1) except ValueError: raise KeggParseException("There should be exactly one '=' sign") sparse_reaction = {} for cid, amount in NistRowData.ParseReactionFormulaSide( left).iteritems(): sparse_reaction[cid] = -amount for cid, amount in NistRowData.ParseReactionFormulaSide( right).iteritems(): if (cid in sparse_reaction): raise KeggParseException( "C%05d appears on both sides of this formula" % cid) sparse_reaction[cid] = amount reaction = Reaction([name], sparse_reaction, None, '=>') kegg = Kegg.getInstance() rid = kegg.reaction2rid(reaction) or kegg.reaction2rid( reaction.reverse()) reaction.rid = rid return reaction
def Train(self, FromDatabase=True, prior_thermodynamics=None): if FromDatabase and self.db.DoesTableExist('prc_S'): S = self.db.LoadSparseNumpyMatrix('prc_S') dG0 = self.db.LoadNumpyMatrix('prc_b').T cids = [] cid2nH_nMg = {} for rowdict in self.db.DictReader('prc_compounds'): cid, nH, nMg = int(rowdict['cid']), int(rowdict['nH']), int(rowdict['nMg']) cids.append(int(rowdict['cid'])) cid2nH_nMg[cid] = (nH, nMg) else: cid2nH_nMg = self.GetDissociation().GetCid2nH_nMg( self.pH, self.I, self.pMg, self.T) S, dG0, cids = self.ReverseTransform(cid2nH_nMg=cid2nH_nMg) self.db.SaveSparseNumpyMatrix('prc_S', S) self.db.SaveNumpyMatrix('prc_b', dG0.T) self.db.CreateTable('prc_compounds', 'cid INT, name TEXT, nH INT, nMg INT') kegg = Kegg.getInstance() for cid in cids: nH, nMg = cid2nH_nMg[cid] self.db.Insert('prc_compounds', [cid, kegg.cid2name(cid), nH, nMg]) self.db.Commit() # Train the formation energies using linear regression self.LinearRegression(S, dG0, cids, cid2nH_nMg, prior_thermodynamics) self.ToDatabase(self.db, 'prc_pseudoisomers')
def __init__(self, S, rids, fluxes, cids, formation_energies=None, reaction_energies=None, cid2bounds=None, c_range=None): Pathway.__init__(self, S, formation_energies=formation_energies, reaction_energies=reaction_energies, fluxes=fluxes) assert len(cids) == self.Nc assert len(rids) == self.Nr self.rids = rids self.cids = cids if cid2bounds: self.bounds = [ cid2bounds.get(cid, (None, None)) for cid in self.cids ] else: self.bounds = None self.cid2bounds = cid2bounds self.c_range = c_range self.kegg = Kegg.getInstance()
def __init__(self, db, html_writer=None, dissociation=None, anchor_all=False): PsuedoisomerTableThermodynamics.__init__( self, name="Unified Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = dissociation self.transformed = False self.CollapseReactions = False self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S' self.GROUP_TABLE_NAME = 'ugc_G' self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b' self.ANCHORED_TABLE_NAME = 'ugc_anchored' self.COMPOUND_TABLE_NAME = 'ugc_compounds' self.OBSERVATION_TABLE_NAME = 'ugc_observations' self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors' self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations' self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers' self.ERRORS_TABLE_NAME = 'ugc_errors' self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations' if anchor_all: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv' else: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
def __init__(self, db, html_writer=None, dissociation=None, anchor_all=False): PsuedoisomerTableThermodynamics.__init__(self, name="Unified Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = dissociation self.transformed = False self.CollapseReactions = False self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S' self.GROUP_TABLE_NAME = 'ugc_G' self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b' self.ANCHORED_TABLE_NAME = 'ugc_anchored' self.COMPOUND_TABLE_NAME = 'ugc_compounds' self.OBSERVATION_TABLE_NAME = 'ugc_observations' self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors' self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations' self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers' self.ERRORS_TABLE_NAME = 'ugc_errors' self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations' if anchor_all: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv' else: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
def main(): html_fname = '../res/reversibility.html' logging.info('Writing HTML output to %s', html_fname) html_writer = HtmlWriter(html_fname) # plot the profile graph pylab.rcParams['text.usetex'] = False pylab.rcParams['legend.fontsize'] = 10 pylab.rcParams['font.family'] = 'sans-serif' pylab.rcParams['font.size'] = 14 pylab.rcParams['lines.linewidth'] = 2 pylab.rcParams['lines.markersize'] = 6 pylab.rcParams['figure.figsize'] = [6.0, 6.0] pylab.rcParams['figure.dpi'] = 90 estimators = LoadAllEstimators() #analyse_reversibility(estimators['hatzi_gc'], 'HatziGC') #analyse_reversibility(estimators['PGC'], 'MiloGC_zoom') reaction_list = Kegg.getInstance().AllReactions() #reaction_list = Feist.FromFiles().reactions thermo = estimators['PGC'] thermo.c_mid = DEFAULT_CMID thermo.T = DEFAULT_T thermo.pH = DEFAULT_PH thermo.I = DEFAULT_I thermo.pMg = DEFAULT_PMG compare_reversibility_to_dG0(reaction_list, thermo=thermo, html_writer=html_writer)
def GetForamtionEnergies(self, thermo): self.db.CreateTable(self.GIBBS_ENERGY_TABLE_NAME, "equation TEXT, dG0 REAL, dGc REAL", drop_if_exists=True) self.db.CreateIndex('gibbs_equation_idx', self.GIBBS_ENERGY_TABLE_NAME, 'equation', unique=True, drop_if_exists=True) all_equations = set() for row in self.db.Execute("SELECT distinct(equation) FROM %s" % (self.EQUATION_TABLE_NAME)): all_equations.add(str(row[0])) from pygibbs.kegg import Kegg kegg = Kegg.getInstance() all_kegg_cids = set(kegg.get_all_cids()) for equation in all_equations: try: rxn = Reaction.FromFormula(equation) if not rxn.get_cids().issubset(all_kegg_cids): raise KeggNonCompoundException rxn.Balance(balance_water=True, exception_if_unknown=True) dG0 = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1)[0, 0] dGc = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1e-3)[0, 0] self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, dG0, dGc]) except (KeggParseException, KeggNonCompoundException, KeggReactionNotBalancedException): self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, None, None]) self.db.Commit()
def __init__(self, db, html_writer, thermodynamics, kegg=None): self.db = db self.html_writer = html_writer self.thermo = thermodynamics self.kegg = kegg or Kegg.getInstance() self.pathways = {}
def GetMolInput(dissociation): mols = [] # a list of pairs of Molecule objects and stoichiometric coefficients while mols == []: print 'KEGG ID or SMILES (or Enter to quit):', s_input = raw_input() if not s_input: return [] elif re.findall('C\d\d\d\d\d', s_input) != []: try: cid = int(s_input[1:]) mols = [(GetMostAbundantMol(cid, dissociation), 1)] print "Compound:", mols[0][0].ToInChI() except ValueError: print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input elif re.findall('R\d\d\d\d\d', s_input) != []: try: rid = int(s_input[1:]) reaction = Kegg.getInstance().rid2reaction(rid) print "Reaction:", str(reaction) for cid, coeff in reaction.iteritems(): mols += [(GetMostAbundantMol(cid, dissociation), coeff)] except ValueError: print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input else: try: mols = [(Molecule.FromSmiles(s_input), 1)] print "Compound:", mols[0][0].ToInChI() except Exception: print 'unable to parse SMILES string, please try again' return mols
def GetMolInput(dissociation): mols = [ ] # a list of pairs of Molecule objects and stoichiometric coefficients while mols == []: print 'KEGG ID or SMILES (or Enter to quit):', s_input = raw_input() if not s_input: return [] elif re.findall('C\d\d\d\d\d', s_input) != []: try: cid = int(s_input[1:]) mols = [(GetMostAbundantMol(cid, dissociation), 1)] print "Compound:", mols[0][0].ToInChI() except ValueError: print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input elif re.findall('R\d\d\d\d\d', s_input) != []: try: rid = int(s_input[1:]) reaction = Kegg.getInstance().rid2reaction(rid) print "Reaction:", str(reaction) for cid, coeff in reaction.iteritems(): mols += [(GetMostAbundantMol(cid, dissociation), coeff)] except ValueError: print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input else: try: mols = [(Molecule.FromSmiles(s_input), 1)] print "Compound:", mols[0][0].ToInChI() except Exception: print 'unable to parse SMILES string, please try again' return mols
def BalanceSparseReaction(sparse, balance_water=False, balance_hydrogens=False, exception_if_unknown=False): from pygibbs.kegg import Kegg kegg = Kegg.getInstance() kegg_utils.balance_reaction(kegg, sparse, balance_water, balance_hydrogens, exception_if_unknown)
def GetTransfromedKeggReactionEnergies(self, kegg_reactions, pH=None, I=None, pMg=None, T=None, conc=1): kegg = Kegg.getInstance() S, cids = kegg.reaction_list_to_S(kegg_reactions) return self.GetTransfromedReactionEnergies(S, cids, pH=pH, I=I, pMg=pMg, T=T, conc=conc)
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase(options.db_file) kegg = Kegg.getInstance() if options.override_table: db.Execute("DROP TABLE IF EXISTS " + options.table_name) DissociationConstants._CreateDatabase(db, options.table_name, drop_if_exists=options.override_table) cids_to_calculate = set() if options.nist: cids_to_calculate.update(Nist().GetAllCids()) cids_to_calculate.update(RedoxCarriers().GetAllCids()) ptable = PsuedoisomerTableThermodynamics.FromCsvFile("../data/thermodynamics/formation_energies.csv") cids_to_calculate.update(ptable.get_all_cids()) else: cids_to_calculate.update(kegg.get_all_cids()) for row in db.Execute("SELECT distinct(cid) FROM %s" % options.table_name): if row[0] in cids_to_calculate: cids_to_calculate.remove(row[0]) cid2smiles_and_mw = {} for cid in cids_to_calculate: # the compound CO is a special case where the conversion from InChI # to SMILES fails, so we add a specific override for it only if cid == 237: cid2smiles_and_mw[cid] = ("[C-]#[O+]", 28) continue try: comp = kegg.cid2compound(cid) mol = comp.GetMolecule() cid2smiles_and_mw[cid] = (mol.ToSmiles(), mol.GetExactMass()) except KeggParseException: logging.debug("%s (C%05d) has no SMILES, skipping..." % (kegg.cid2name(cid), cid)) except OpenBabelError: logging.debug("%s (C%05d) cannot be converted to SMILES, skipping..." % (kegg.cid2name(cid), cid)) # Do not recalculate pKas for CIDs that are already in the database cids_to_calculate = cid2smiles_and_mw.keys() cids_to_calculate.sort(key=lambda(cid):(cid2smiles_and_mw[cid][1], cid)) db_lock = threading.Lock() semaphore = threading.Semaphore(options.n_threads) for cid in cids_to_calculate: smiles, _ = cid2smiles_and_mw[cid] if not smiles: logging.info("The following compound is blacklisted: C%05d" % cid) continue thread = DissociationThreads(group=None, target=None, name=None, args=(cid, smiles, semaphore, db_lock, options), kwargs={}) thread.start()
def main(): options, _ = flags.MakeOpts().parse_args(sys.argv) c_mid = options.c_mid pH = options.ph pMg = options.pmg I = options.i_s T = default_T db = SqliteDatabase("../res/gibbs.sqlite") kegg = Kegg.getInstance() G = GroupContribution(db) G.init() print ("Parameters: T=%f K, pH=%.2g, pMg=%.2g, " "I=%.2gM, Median concentration=%.2gM" % (T, pH, pMg, I, c_mid)) cmap = {} if not options.ignore_cofactors: if options.full_metabolites: print "Fixing concentrations of all known metabolites" cmap = reversibility.GetFullConcentrationMap(G) else: print "Fixing concentrations of co-factors" cmap = reversibility.GetConcentrationMap(kegg) else: print "Not fixing concentrations of co-factors" if options.report_mode: print "Output used metabolites concentrations" while True: mid = GetModuleIdInput() rid_flux_list = kegg.mid2rid_map[mid] for rid, flux in rid_flux_list: try: reaction = kegg.rid2reaction(rid) print "Reaction Name", reaction.name print "\tKegg Id", reaction.rid print "\tEC", reaction.ec_list rev = reversibility.CalculateReversability( reaction.sparse, G, pH=pH, I=I, pMg=pMg, T=T, concentration_map=cmap ) if rev == None: dG = G.estimate_dG_reaction(reaction.sparse, pH=pH, pMg=pMg, I=I, T=T, c0=c_mid, media="glucose") print "\tReversibility: No free compounds, dG = %.2g" % dG else: corrected_reversibility = flux * rev print "\tReversibility %.2g" % corrected_reversibility if options.report_mode: for cid, s in reaction.sparse.iteritems(): if cid in cmap: print "(%d C%05d) %s\t: %.2g" % (s, cid, kegg.cid2name(cid), cmap[cid]) else: print "(%d C%05d) %s\t: Free concentration" % (s, cid, kegg.cid2name(cid)) except Exception: print "\tCouldn't calculate irreversibility"
def CompareOverKegg(self, html_writer, other, fig_name=None): """ Compare the estimation errors of two different evaluation methods by calculating all the KEGG reactions which both self and other can estimate, and comparing using a XY plot. Write results to HTML. """ total_list = [] kegg = Kegg.getInstance() for rid in sorted(kegg.get_all_rids()): reaction = kegg.rid2reaction(rid) try: reaction.Balance() dG0_self = reaction.PredictReactionEnergy(self, pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T) dG0_other = reaction.PredictReactionEnergy(other, pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T) except (MissingCompoundFormationEnergy, MissingReactionEnergy, KeggReactionNotBalancedException, KeyError): continue total_list.append({'self':dG0_self, 'other':dG0_other, 'rid':rid, 'reaction':reaction}) if not total_list: return 0, 0 # plot the profile graph plt.rcParams['text.usetex'] = False plt.rcParams['legend.fontsize'] = 12 plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['font.size'] = 12 plt.rcParams['lines.linewidth'] = 2 plt.rcParams['lines.markersize'] = 6 plt.rcParams['figure.figsize'] = [6.0, 6.0] plt.rcParams['figure.dpi'] = 100 vec_dG0_self = np.array([x['self'] for x in total_list]) vec_dG0_other = np.array([x['other'] for x in total_list]) vec_rid = [x['rid'] for x in total_list] fig = plt.figure() fig.hold(True) max_dG0 = max(vec_dG0_self.max(), vec_dG0_other.max()) min_dG0 = min(vec_dG0_self.min(), vec_dG0_other.min()) plt.plot([min_dG0, max_dG0], [min_dG0, max_dG0], 'k--', figure=fig) plt.plot(vec_dG0_self, vec_dG0_other, '.', figure=fig) for i, rid in enumerate(vec_rid): plt.text(vec_dG0_self[i], vec_dG0_other[i], '%d' % rid, fontsize=6) r2 = np.corrcoef(vec_dG0_self, vec_dG0_other)[1, 0] plt.title("$\Delta_r G^{'\circ}$ comparison per reaction, $r^2$ = %.2f" % r2) plt.xlabel(self.name + ' (in kJ/mol)', figure=fig) plt.ylabel(other.name + ' (in kJ/mol)', figure=fig) html_writer.embed_matplotlib_figure(fig, width=200, height=200, name=fig_name)
def GetConcentrationMap(): kegg = Kegg.getInstance() cmap = GetEmptyConcentrationMap() for cid in kegg.get_all_cids(): lower, upper = kegg.get_bounds(cid) if lower and upper: # In the file we got this data from lower = upper cmap[cid] = lower return cmap
def __init__(self, T_range=(298, 314)): self.db = SqliteDatabase('../data/public_data.sqlite') self.kegg = Kegg.getInstance() self.T_range = T_range self.pH_range = None self.override_I = None self.override_pMg = None self.override_T = None self.FromDatabase() self.BalanceReactions()
def ReadKeggCompounds(): kegg = Kegg.getInstance() inchi2KeggID = {} inchi2KeggID[None] = 0 for cid in sorted(kegg.get_all_cids()): inchi = kegg.cid2inchi(cid) inchi = Feist.NormalizeInChI(inchi) if inchi not in inchi2KeggID: # since CIDs are sorted, this will always keep the lowest CID with this InChI inchi2KeggID[inchi] = cid return inchi2KeggID
def write_compound_and_coeff(cid, coeff, show_cids=True): if show_cids: comp = "C%05d" % cid else: from pygibbs.kegg import Kegg kegg = Kegg.getInstance() comp = kegg.cid2name(cid) if coeff == 1: return comp else: return "%g %s" % (coeff, comp)
def GetDissociationTable(self, cid, create_if_missing=True): if cid not in self.cid2DissociationTable and create_if_missing: try: kegg = Kegg.getInstance() mol = kegg.cid2mol(cid) diss_table = DissociationTable.FromMolecule(mol) except KeggParseException: diss_table = None self.cid2DissociationTable[cid] = diss_table return self.cid2DissociationTable.get(cid, None)
def WriteBiochemicalReactionEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() pH, I, pMg, T = self.GetConditions() kegg_reactions = kegg.get_all_balanced_reactions() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['rid', 'formula', 'pH', 'I', 'pMg', 'T', 'dG0']) dG0_r = self.GetTransfromedKeggReactionEnergies(kegg_reactions) for i, reaction in enumerate(kegg_reactions): writer.writerow([reaction.name, reaction.FullReactionString(), pH, I, pMg, T, '%.1f' % float(dG0_r[0, i])])
def WriteBiochemicalFormationEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() pH, I, pMg, T = self.GetConditions() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['name', 'cid', 'pH', 'I', 'pMg', 'T', 'dG0']) cids = sorted(self.get_all_cids()) dG0_prime = self.GetTransformedFormationEnergies(cids) for i, cid in enumerate(cids): name = kegg.cid2name(cid) writer.writerow([name, "C%05d" % cid, pH, I, pMg, T, '%.1f' % dG0_prime[0, i]])
def WriteChemicalFormationEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['name', 'cid', 'nH', 'z', 'nMg', 'dG0']) for cid in sorted(self.get_all_cids()): name = kegg.cid2name(cid) try: pdata = self.cid2PseudoisomerMap(cid) for nH, z, nMg, dG0 in pdata.ToMatrix(): writer.writerow([name, "C%05d" % cid, nH, z, nMg, '%.1f' % dG0]) except MissingCompoundFormationEnergy as e: logging.warning(str(e))
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = { 'cid': '<a href="%s">C%05d</a>' % (link, cid), 'name': name, 'error': cid2error.get(cid, None) } for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer( pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table( dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def main(): ptable = PsuedoisomerTableThermodynamics.FromCsvFile(FormationEnergyFileName, label='testing') kegg = Kegg.getInstance() pH, I, pMg, T = (7.0, 0.25, 14, 298.15) output_csv = csv.writer(open('../res/formation_energies_transformed.csv', 'w')) output_csv.writerow(["cid","name","dG'0","pH","I","pMg","T", "anchor","compound_ref","remark"]) for cid in ptable.get_all_cids(): pmap = ptable.cid2PseudoisomerMap(cid) dG0_prime = pmap.Transform(pH=pH, I=I, pMg=pMg, T=T) output_csv.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, 1, ptable.cid2source_string[cid]])
def CreateElementMatrix(thermo): kegg = Kegg.getInstance() atom_matrix = [] cids = [] for cid in thermo.get_all_cids(): try: atom_vector = kegg.cid2compound(cid).get_atom_vector() except (KeggParseException, OpenBabelError): continue if atom_vector is not None: cids.append(cid) atom_matrix.append(atom_vector) atom_matrix = np.array(atom_matrix) return cids, atom_matrix
def FindRedoxPairs(cids, atom_matrix, thermo): """ Finds pairs of rows in the matrix where the only difference is exactly one or two electrons (H is ignored) """ def hash_atom_vector(v): return ','.join([ "%d:%d" % (i, v[i]) for i in np.nonzero(v)[0] if i != 1 ]) # ignore H atoms dG0_f = thermo.GetTransformedFormationEnergies(cids) kegg = Kegg.getInstance() # remove the H column, and create two matrices with one or two added # electrons lookup = collections.defaultdict(list) for i in xrange(atom_matrix.shape[0]): h = hash_atom_vector(atom_matrix[i, :]) lookup[h].append(i) rowdicts = [] fieldnames = [ 'name_ox', 'CID_ox', 'ne_ox', 'name_red', 'CID_red', 'ne_red', 'E_tag', 'pH', 'ref' ] for delta_e in [1, 2]: # check if there are rows which equal other rows, with a difference # of 'delta_e' electrons reduction_vector = np.zeros((atom_matrix.shape[1]), dtype='int') reduction_vector[0] = delta_e for i in xrange(atom_matrix.shape[0]): h = hash_atom_vector(atom_matrix[i, :] + reduction_vector) partners = lookup[h] for k in partners: #delta_H = atom_matrix[k, 1] - atom_matrix[i, 1] ddG0 = dG0_f[i, 0] - dG0_f[k, 0] E_prime = ddG0 / (F * delta_e) rowdicts.append({ "name_ox": kegg.cid2name(cids[i]), "CID_ox": cids[i], "ne_ox": atom_matrix[i, 0], "name_red": kegg.cid2name(cids[k]), "CID_red": cids[k], "ne_red": atom_matrix[k, 0], "E_tag": "%.2f" % E_prime, "pH": "%g" % thermo.pH, "ref": thermo.name }) return rowdicts, fieldnames
def __init__(self, db, html_writer, thermodynamics): self.db = db self.html_writer = html_writer self.thermo = thermodynamics self.kegg = Kegg.getInstance() # set the standard redox potential to 320mV and concentrations to 1M # the formation energy will be used only for the dG in the tables # but will later be overridden by the value or 'redox' which is # determined by the Y-axis in the contour plot. default_E_prime = -0.32 # the E' of NAD(P) at pH 7 self.thermo.AddPseudoisomer(28, nH=0, z=0, nMg=0, dG0=0) # oxidized electron carrier self.thermo.AddPseudoisomer(30, nH=0, z=0, nMg=0, dG0=-default_E_prime * F) # reduced electron carrier
def __init__(self, formation_thermo, name='reaction thermodynamic data'): """ arguments: S - a stoichiometric matrix of the reactions from the unreliable source cids - the KEGG compound IDs of the columns of S dG0_r - the dG0_r' of the reactions in S, according to the unreliable source """ Thermodynamics.__init__(self, name) self.formations = formation_thermo self.kegg = Kegg.getInstance() self.reactions = [] self.dG0_r_primes = [] self.cid2dG0_f = {} self.var_cids = [] self.var_nullspace = None
def CalculateCharge(self): """ Calculate the charge for the most basic species """ mol = self.GetAnyMol() if mol is None: # get the charge and nH of the default pseudoisomer in KEGG: kegg = Kegg.getInstance() nH_z_pair = kegg.cid2nH_and_charge(self.cid) else: nH_z_pair = mol.GetHydrogensAndCharge() if nH_z_pair: nH, z = nH_z_pair self.min_charge = z + (self.min_nH - nH) else: self.min_charge = 0