def __init__(self, S, reaction_ids, compound_ids, fluxes=None, name=None): """Initialize the stoichiometric model. Args: S: the stoichiometrix matrix. Reactions are on the rows, compounds on the columns. reaction_ids: the ids/names of the reactions (rows). compound_ids: the ids/names of the compounds (columns). fluxes: the list of relative fluxes through all reactions. if not supplied, assumed to be 1.0 for all reactions. name: a string name for this model. """ self.kegg = Kegg.getInstance() self.S = S self.reaction_ids = reaction_ids self.compound_ids = compound_ids self.Nr = len(self.reaction_ids) self.Nc = len(self.compound_ids) self.name = name self.slug_name = util.slugify(self.name) self.fluxes = np.array(fluxes) if fluxes is None: self.fluxes = np.ones((1, self.Nr)) expected_Nc, expected_Nr = self.S.shape if self.Nr != expected_Nr: raise ValueError('Number of columns does not match number of reactions') if self.Nc != expected_Nc: raise ValueError('Number of rows does not match number of compounds') if self.fluxes is None: self.fluxes = np.ones((self.Nr, 1))
def main(): html_fname = '../res/reversibility.html' logging.info('Writing HTML output to %s', html_fname) html_writer = HtmlWriter(html_fname) # plot the profile graph pylab.rcParams['text.usetex'] = False pylab.rcParams['legend.fontsize'] = 10 pylab.rcParams['font.family'] = 'sans-serif' pylab.rcParams['font.size'] = 14 pylab.rcParams['lines.linewidth'] = 2 pylab.rcParams['lines.markersize'] = 6 pylab.rcParams['figure.figsize'] = [6.0, 6.0] pylab.rcParams['figure.dpi'] = 90 estimators = LoadAllEstimators() #analyse_reversibility(estimators['hatzi_gc'], 'HatziGC') #analyse_reversibility(estimators['PGC'], 'MiloGC_zoom') reaction_list = Kegg.getInstance().AllReactions() #reaction_list = Feist.FromFiles().reactions thermo = estimators['PGC'] thermo.c_mid = DEFAULT_CMID thermo.T = DEFAULT_T thermo.pH = DEFAULT_PH thermo.I = DEFAULT_I thermo.pMg = DEFAULT_PMG compare_reversibility_to_dG0(reaction_list, thermo=thermo, html_writer=html_writer)
def GetJSONDictionary(self): """Returns a JSON formatted thermodynamic data.""" kegg = Kegg.getInstance() formations = [] for cid in self.get_all_cids(): h = {} h['cid'] = cid try: h['name'] = kegg.cid2name(h['cid']) except KeyError: h['name'] = None try: h['inchi'] = kegg.cid2inchi(h['cid']) except KeyError: h['inchi'] = None try: h['num_electrons'] = kegg.cid2num_electrons(h['cid']) except KeggParseException: h['num_electrons'] = None h['source'] = self.cid2source_string.get(cid, None) h['species'] = [] for nH, z, nMg, dG0 in self.cid2PseudoisomerMap(cid).ToMatrix(): h['species'].append({"nH":nH, "z":z, "nMg":nMg, "dG0_f":dG0}) formations.append(h) return formations
def GetFullOxidationReaction(cid): kegg = Kegg.getInstance() basic_cids = [1, 7, 9, 11, 14] # H2O, O2, Pi, CO2, NH3 basic_elements = ["C", "O", "P", "N", "e-"] element_mat = np.matrix(np.zeros((len(basic_elements), len(basic_cids)))) for j in xrange(len(basic_cids)): atom_bag = kegg.cid2atom_bag(basic_cids[j]) atom_bag["e-"] = kegg.cid2num_electrons(basic_cids[j]) for i in xrange(len(basic_elements)): element_mat[i, j] = atom_bag.get(basic_elements[i], 0) cs_element_vec = np.zeros((len(basic_elements), 1)) atom_bag = kegg.cid2atom_bag(cid) atom_bag["e-"] = kegg.cid2num_electrons(cid) for i in xrange(len(basic_elements)): cs_element_vec[i, 0] = atom_bag.get(basic_elements[i], 0) x = np.linalg.inv(element_mat) * cs_element_vec sparse = dict([(basic_cids[i], np.round(x[i, 0], 3)) for i in xrange(len(basic_cids))]) sparse[cid] = -1 r = Reaction("complete oxidation of %s" % kegg.cid2name(cid), sparse) return r
def GetMolInput(dissociation): mols = [] # a list of pairs of Molecule objects and stoichiometric coefficients while mols == []: print 'KEGG ID or SMILES (or Enter to quit):', s_input = raw_input() if not s_input: return [] elif re.findall('C\d\d\d\d\d', s_input) != []: try: cid = int(s_input[1:]) mols = [(GetMostAbundantMol(cid, dissociation), 1)] print "Compound:", mols[0][0].ToInChI() except ValueError: print 'syntax error: KEGG compound ID is bad (%s), please try again' % s_input elif re.findall('R\d\d\d\d\d', s_input) != []: try: rid = int(s_input[1:]) reaction = Kegg.getInstance().rid2reaction(rid) print "Reaction:", str(reaction) for cid, coeff in reaction.iteritems(): mols += [(GetMostAbundantMol(cid, dissociation), coeff)] except ValueError: print 'syntax error: KEGG reaction ID is bad (%s), please try again' % s_input else: try: mols = [(Molecule.FromSmiles(s_input), 1)] print "Compound:", mols[0][0].ToInChI() except Exception: print 'unable to parse SMILES string, please try again' return mols
def Populate(self, filename): """Populates the database from files.""" self._InitTables() f = open(filename) r = csv.DictReader(f) for row in r: insert_row = [] for table_header in self.ORG_TABLE_HEADERS: if table_header not in self.CSV_HEADER_MAPPING: insert_row.append(None) continue csv_header = self.CSV_HEADER_MAPPING[table_header] val = row.get(csv_header, None) if val and val.strip(): insert_row.append(val) else: insert_row.append(None) oxy_req = row.get(self.OXY_REQ, None) broad_req = self.GetBroadyOxyReq(oxy_req) insert_row[-1] = broad_req self.db.Insert('organisms', insert_row) f.close() k = Kegg.getInstance(loadFromAPI=False) enzyme_map = k.ec2enzyme_map for ec, enzyme in enzyme_map.iteritems(): for org in enzyme.genes.keys(): self.db.Insert('organism_enzymes', [org.lower(), ec])
def __init__(self, db, html_writer, thermodynamics, kegg=None): self.db = db self.html_writer = html_writer self.thermo = thermodynamics self.kegg = kegg or Kegg.getInstance() self.pathways = {}
def main(): pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15 dissociation = DissociationConstants.FromPublicDB() kegg = Kegg.getInstance() obs_fname = "../data/thermodynamics/formation_energies.csv" res_fname = "../res/formation_energies_transformed.csv" train_species = PsuedoisomerTableThermodynamics.FromCsvFile(obs_fname, label="testing") csv_out = csv.writer(open(res_fname, "w")) csv_out.writerow(["cid", "name", "dG'0", "pH", "I", "pMg", "T", "anchor", "compound_ref", "remark"]) for cid in train_species.get_all_cids(): pmap = train_species.cid2PseudoisomerMap(cid) source = train_species.cid2source_string[cid] pmatrix = pmap.ToMatrix() # ToMatrix returns tuples of (nH, z, nMg, dG0) if len(pmatrix) != 1: raise Exception("multiple training species for C%05d" % cid) nH, charge, nMg, dG0 = pmatrix[0] name = "%s (%d)" % (kegg.cid2name(cid), nH) logging.info("Adding the formation energy of %s", name) diss_table = dissociation.GetDissociationTable(cid, create_if_missing=True) if diss_table is None: raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a " "dissociation table" % (name, cid, nH, nMg)) diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) diss_table.SetCharge(nH, charge, nMg) dG0_prime = diss_table.Transform(pH, I, pMg, T) csv_out.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None])
def main(): opt_parser = flags.MakeOpts() options, _ = opt_parser.parse_args(sys.argv) estimators = LoadAllEstimators() print ('Parameters: T=%f K, pH=%.2g, pMg=%.2g, ' 'I=%.2gmM, Median concentration=%.2gM' % (default_T, options.ph, options.pmg, options.i_s, options.c_mid)) for thermo in estimators.values(): thermo.c_mid = options.c_mid thermo.pH = options.ph thermo.pMg = options.pmg thermo.I = options.i_s thermo.T = default_T kegg = Kegg.getInstance() while True: cid = GetReactionIdInput() compound = kegg.cid2compound(cid) print 'Compound Name: %s' % compound.name print '\tKegg ID: C%05d' % cid print '\tFormula: %s' % compound.formula print '\tInChI: %s' % compound.inchi for key, thermo in estimators.iteritems(): print "\t<< %s >>" % key try: print thermo.cid2PseudoisomerMap(cid), print '--> dG0\'f = %.1f kJ/mol' % compound.PredictFormationEnergy(thermo) except Exception as e: print '\t\tError: %s' % (str(e))
def run(self): from toolbox.molecule import Molecule self.semaphore.acquire() start_time = time.time() logging.debug("SMILES: " + self.smiles) diss_table = Molecule._GetDissociationTable(self.smiles, fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) logging.debug("Min charge: %d" % diss_table.min_charge) logging.debug("Min nH: %d" % diss_table.min_nH) elapsed_time = time.time() - start_time self.db_lock.acquire() db = SqliteDatabase(self.options.db_file) kegg = Kegg.getInstance() name = kegg.cid2name(self.cid) if diss_table is not None: for row in diss_table.ToDatabaseRow(): db.Insert(self.options.table_name, [self.cid, name] + row) else: db.Insert(self.options.table_name, [self.cid, name] + [None] * 10) del db self.db_lock.release() logging.info("Completed C%05d, elapsed time = %.1f sec" % (self.cid, elapsed_time)) self.semaphore.release()
def __init__(self, db, html_writer=None, dissociation=None, anchor_all=False): PsuedoisomerTableThermodynamics.__init__(self, name="Unified Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = dissociation self.transformed = False self.CollapseReactions = False self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S' self.GROUP_TABLE_NAME = 'ugc_G' self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b' self.ANCHORED_TABLE_NAME = 'ugc_anchored' self.COMPOUND_TABLE_NAME = 'ugc_compounds' self.OBSERVATION_TABLE_NAME = 'ugc_observations' self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors' self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations' self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers' self.ERRORS_TABLE_NAME = 'ugc_errors' self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations' if anchor_all: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv' else: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
def FromChemAxon(cid2mol=None, html_writer=None): kegg = Kegg.getInstance() diss = DissociationConstants() if cid2mol is None: cid2mol = dict([(cid, None) for cid in kegg.get_all_cids()]) for cid, mol in sorted(cid2mol.iteritems()): logging.info("Using ChemAxon to find the pKa values for %s - C%05d" % (kegg.cid2name(cid), cid)) if html_writer: html_writer.write('<h2>%s - C%05d</h2>\n' % (kegg.cid2name(cid), cid)) # if this CID is not assigned to a Molecule, use the KEGG database # to create a Molecule for it. if mol is None: try: mol = kegg.cid2mol(cid) except KeggParseException: continue diss_table = mol.GetDissociationTable() diss.cid2DissociationTable[cid] = diss_table if diss_table and html_writer: diss_table.WriteToHTML(html_writer) html_writer.write('</br>\n') return diss
def GetForamtionEnergies(self, thermo): self.db.CreateTable(self.GIBBS_ENERGY_TABLE_NAME, "equation TEXT, dG0 REAL, dGc REAL", drop_if_exists=True) self.db.CreateIndex('gibbs_equation_idx', self.GIBBS_ENERGY_TABLE_NAME, 'equation', unique=True, drop_if_exists=True) all_equations = set() for row in self.db.Execute("SELECT distinct(equation) FROM %s" % (self.EQUATION_TABLE_NAME)): all_equations.add(str(row[0])) from pygibbs.kegg import Kegg kegg = Kegg.getInstance() all_kegg_cids = set(kegg.get_all_cids()) for equation in all_equations: try: rxn = Reaction.FromFormula(equation) if not rxn.get_cids().issubset(all_kegg_cids): raise KeggNonCompoundException rxn.Balance(balance_water=True, exception_if_unknown=True) dG0 = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1)[0, 0] dGc = thermo.GetTransfromedKeggReactionEnergies([rxn], conc=1e-3)[0, 0] self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, dG0, dGc]) except (KeggParseException, KeggNonCompoundException, KeggReactionNotBalancedException): self.db.Insert(self.GIBBS_ENERGY_TABLE_NAME, [equation, None, None]) self.db.Commit()
def Train(self, FromDatabase=True, prior_thermodynamics=None): if FromDatabase and self.db.DoesTableExist('prc_S'): S = self.db.LoadSparseNumpyMatrix('prc_S') dG0 = self.db.LoadNumpyMatrix('prc_b').T cids = [] cid2nH_nMg = {} for rowdict in self.db.DictReader('prc_compounds'): cid, nH, nMg = int(rowdict['cid']), int(rowdict['nH']), int(rowdict['nMg']) cids.append(int(rowdict['cid'])) cid2nH_nMg[cid] = (nH, nMg) else: cid2nH_nMg = self.GetDissociation().GetCid2nH_nMg( self.pH, self.I, self.pMg, self.T) S, dG0, cids = self.ReverseTransform(cid2nH_nMg=cid2nH_nMg) self.db.SaveSparseNumpyMatrix('prc_S', S) self.db.SaveNumpyMatrix('prc_b', dG0.T) self.db.CreateTable('prc_compounds', 'cid INT, name TEXT, nH INT, nMg INT') kegg = Kegg.getInstance() for cid in cids: nH, nMg = cid2nH_nMg[cid] self.db.Insert('prc_compounds', [cid, kegg.cid2name(cid), nH, nMg]) self.db.Commit() # Train the formation energies using linear regression self.LinearRegression(S, dG0, cids, cid2nH_nMg, prior_thermodynamics) self.ToDatabase(self.db, 'prc_pseudoisomers')
def ExportJSONFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) thermo_list = [] thermo_list.append(estimators[options.thermodynamics_source]) thermo_list.append(PsuedoisomerTableThermodynamics.FromCsvFile(options.thermodynamics_csv)) # Make sure we have all the data. kegg = Kegg.getInstance() for i, thermo in enumerate(thermo_list): print "Priority %d - formation energies of: %s" % (i+1, thermo.name) kegg.AddThermodynamicData(thermo, priority=(i+1)) db = SqliteDatabase('../res/gibbs.sqlite') print 'Exporting Group Contribution Nullspace matrix as JSON.' nullspace_vectors = [] for row in db.DictReader('ugc_conservations'): d = {'msg': row['msg']} sparse = json.loads(row['json']) d['reaction'] = [] for cid, coeff in sparse.iteritems(): d['reaction'].append([coeff, "C%05d" % int(cid)]) nullspace_vectors.append(d) WriteJSONFile(nullspace_vectors, options.nullspace_out_filename) print 'Exporting KEGG compounds as JSON.' WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename) print 'Exporting KEGG reactions as JSON.' WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename) print 'Exporting KEGG enzymes as JSON.' WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
def GetTransfromedKeggReactionEnergies(self, kegg_reactions, pH=None, I=None, pMg=None, T=None, conc=1): kegg = Kegg.getInstance() S, cids = kegg.reaction_list_to_S(kegg_reactions) return self.GetTransfromedReactionEnergies(S, cids, pH=pH, I=I, pMg=pMg, T=T, conc=conc)
def main(): options, _ = flags.MakeOpts().parse_args(sys.argv) c_mid = options.c_mid pH = options.ph pMg = options.pmg I = options.i_s T = default_T db = SqliteDatabase("../res/gibbs.sqlite") kegg = Kegg.getInstance() G = GroupContribution(db) G.init() print ("Parameters: T=%f K, pH=%.2g, pMg=%.2g, " "I=%.2gM, Median concentration=%.2gM" % (T, pH, pMg, I, c_mid)) cmap = {} if not options.ignore_cofactors: if options.full_metabolites: print "Fixing concentrations of all known metabolites" cmap = reversibility.GetFullConcentrationMap(G) else: print "Fixing concentrations of co-factors" cmap = reversibility.GetConcentrationMap(kegg) else: print "Not fixing concentrations of co-factors" if options.report_mode: print "Output used metabolites concentrations" while True: mid = GetModuleIdInput() rid_flux_list = kegg.mid2rid_map[mid] for rid, flux in rid_flux_list: try: reaction = kegg.rid2reaction(rid) print "Reaction Name", reaction.name print "\tKegg Id", reaction.rid print "\tEC", reaction.ec_list rev = reversibility.CalculateReversability( reaction.sparse, G, pH=pH, I=I, pMg=pMg, T=T, concentration_map=cmap ) if rev == None: dG = G.estimate_dG_reaction(reaction.sparse, pH=pH, pMg=pMg, I=I, T=T, c0=c_mid, media="glucose") print "\tReversibility: No free compounds, dG = %.2g" % dG else: corrected_reversibility = flux * rev print "\tReversibility %.2g" % corrected_reversibility if options.report_mode: for cid, s in reaction.sparse.iteritems(): if cid in cmap: print "(%d C%05d) %s\t: %.2g" % (s, cid, kegg.cid2name(cid), cmap[cid]) else: print "(%d C%05d) %s\t: Free concentration" % (s, cid, kegg.cid2name(cid)) except Exception: print "\tCouldn't calculate irreversibility"
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase(options.db_file) kegg = Kegg.getInstance() if options.override_table: db.Execute("DROP TABLE IF EXISTS " + options.table_name) DissociationConstants._CreateDatabase(db, options.table_name, drop_if_exists=options.override_table) cids_to_calculate = set() if options.nist: cids_to_calculate.update(Nist().GetAllCids()) cids_to_calculate.update(RedoxCarriers().GetAllCids()) ptable = PsuedoisomerTableThermodynamics.FromCsvFile("../data/thermodynamics/formation_energies.csv") cids_to_calculate.update(ptable.get_all_cids()) else: cids_to_calculate.update(kegg.get_all_cids()) for row in db.Execute("SELECT distinct(cid) FROM %s" % options.table_name): if row[0] in cids_to_calculate: cids_to_calculate.remove(row[0]) cid2smiles_and_mw = {} for cid in cids_to_calculate: # the compound CO is a special case where the conversion from InChI # to SMILES fails, so we add a specific override for it only if cid == 237: cid2smiles_and_mw[cid] = ("[C-]#[O+]", 28) continue try: comp = kegg.cid2compound(cid) mol = comp.GetMolecule() cid2smiles_and_mw[cid] = (mol.ToSmiles(), mol.GetExactMass()) except KeggParseException: logging.debug("%s (C%05d) has no SMILES, skipping..." % (kegg.cid2name(cid), cid)) except OpenBabelError: logging.debug("%s (C%05d) cannot be converted to SMILES, skipping..." % (kegg.cid2name(cid), cid)) # Do not recalculate pKas for CIDs that are already in the database cids_to_calculate = cid2smiles_and_mw.keys() cids_to_calculate.sort(key=lambda(cid):(cid2smiles_and_mw[cid][1], cid)) db_lock = threading.Lock() semaphore = threading.Semaphore(options.n_threads) for cid in cids_to_calculate: smiles, _ = cid2smiles_and_mw[cid] if not smiles: logging.info("The following compound is blacklisted: C%05d" % cid) continue thread = DissociationThreads(group=None, target=None, name=None, args=(cid, smiles, semaphore, db_lock, options), kwargs={}) thread.start()
def CompareOverKegg(self, html_writer, other, fig_name=None): """ Compare the estimation errors of two different evaluation methods by calculating all the KEGG reactions which both self and other can estimate, and comparing using a XY plot. Write results to HTML. """ total_list = [] kegg = Kegg.getInstance() for rid in sorted(kegg.get_all_rids()): reaction = kegg.rid2reaction(rid) try: reaction.Balance() dG0_self = reaction.PredictReactionEnergy(self, pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T) dG0_other = reaction.PredictReactionEnergy(other, pH=self.pH, pMg=self.pMg, I=self.I ,T=self.T) except (MissingCompoundFormationEnergy, MissingReactionEnergy, KeggReactionNotBalancedException, KeyError): continue total_list.append({'self':dG0_self, 'other':dG0_other, 'rid':rid, 'reaction':reaction}) if not total_list: return 0, 0 # plot the profile graph plt.rcParams['text.usetex'] = False plt.rcParams['legend.fontsize'] = 12 plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['font.size'] = 12 plt.rcParams['lines.linewidth'] = 2 plt.rcParams['lines.markersize'] = 6 plt.rcParams['figure.figsize'] = [6.0, 6.0] plt.rcParams['figure.dpi'] = 100 vec_dG0_self = np.array([x['self'] for x in total_list]) vec_dG0_other = np.array([x['other'] for x in total_list]) vec_rid = [x['rid'] for x in total_list] fig = plt.figure() fig.hold(True) max_dG0 = max(vec_dG0_self.max(), vec_dG0_other.max()) min_dG0 = min(vec_dG0_self.min(), vec_dG0_other.min()) plt.plot([min_dG0, max_dG0], [min_dG0, max_dG0], 'k--', figure=fig) plt.plot(vec_dG0_self, vec_dG0_other, '.', figure=fig) for i, rid in enumerate(vec_rid): plt.text(vec_dG0_self[i], vec_dG0_other[i], '%d' % rid, fontsize=6) r2 = np.corrcoef(vec_dG0_self, vec_dG0_other)[1, 0] plt.title("$\Delta_r G^{'\circ}$ comparison per reaction, $r^2$ = %.2f" % r2) plt.xlabel(self.name + ' (in kJ/mol)', figure=fig) plt.ylabel(other.name + ' (in kJ/mol)', figure=fig) html_writer.embed_matplotlib_figure(fig, width=200, height=200, name=fig_name)
def GetConcentrationMap(): kegg = Kegg.getInstance() cmap = GetEmptyConcentrationMap() for cid in kegg.get_all_cids(): lower, upper = kegg.get_bounds(cid) if lower and upper: # In the file we got this data from lower = upper cmap[cid] = lower return cmap
def ReadKeggCompounds(): kegg = Kegg.getInstance() inchi2KeggID = {} inchi2KeggID[None] = 0 for cid in sorted(kegg.get_all_cids()): inchi = kegg.cid2inchi(cid) inchi = Feist.NormalizeInChI(inchi) if inchi not in inchi2KeggID: # since CIDs are sorted, this will always keep the lowest CID with this InChI inchi2KeggID[inchi] = cid return inchi2KeggID
def __init__(self, T_range=(298, 314)): self.db = SqliteDatabase('../data/public_data.sqlite') self.kegg = Kegg.getInstance() self.T_range = T_range self.pH_range = None self.override_I = None self.override_pMg = None self.override_T = None self.FromDatabase() self.BalanceReactions()
def GetDissociationTable(self, cid, create_if_missing=True): if cid not in self.cid2DissociationTable and create_if_missing: try: kegg = Kegg.getInstance() mol = kegg.cid2mol(cid) diss_table = DissociationTable.FromMolecule(mol) except KeggParseException: diss_table = None self.cid2DissociationTable[cid] = diss_table return self.cid2DissociationTable.get(cid, None)
def WriteBiochemicalFormationEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() pH, I, pMg, T = self.GetConditions() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['name', 'cid', 'pH', 'I', 'pMg', 'T', 'dG0']) cids = sorted(self.get_all_cids()) dG0_prime = self.GetTransformedFormationEnergies(cids) for i, cid in enumerate(cids): name = kegg.cid2name(cid) writer.writerow([name, "C%05d" % cid, pH, I, pMg, T, '%.1f' % dG0_prime[0, i]])
def WriteBiochemicalReactionEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() pH, I, pMg, T = self.GetConditions() kegg_reactions = kegg.get_all_balanced_reactions() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['rid', 'formula', 'pH', 'I', 'pMg', 'T', 'dG0']) dG0_r = self.GetTransfromedKeggReactionEnergies(kegg_reactions) for i, reaction in enumerate(kegg_reactions): writer.writerow([reaction.name, reaction.FullReactionString(), pH, I, pMg, T, '%.1f' % float(dG0_r[0, i])])
def write_compound_and_coeff(cid, coeff, show_cids=True): if show_cids: comp = "C%05d" % cid else: from pygibbs.kegg import Kegg kegg = Kegg.getInstance() comp = kegg.cid2name(cid) if coeff == 1: return comp else: return "%g %s" % (coeff, comp)
def WriteChemicalFormationEnergiesToCsv(self, csv_fname): kegg = Kegg.getInstance() writer = csv.writer(open(csv_fname, 'w')) writer.writerow(['name', 'cid', 'nH', 'z', 'nMg', 'dG0']) for cid in sorted(self.get_all_cids()): name = kegg.cid2name(cid) try: pdata = self.cid2PseudoisomerMap(cid) for nH, z, nMg, dG0 in pdata.ToMatrix(): writer.writerow([name, "C%05d" % cid, nH, z, nMg, '%.1f' % dG0]) except MissingCompoundFormationEnergy as e: logging.warning(str(e))
def main(): ptable = PsuedoisomerTableThermodynamics.FromCsvFile(FormationEnergyFileName, label='testing') kegg = Kegg.getInstance() pH, I, pMg, T = (7.0, 0.25, 14, 298.15) output_csv = csv.writer(open('../res/formation_energies_transformed.csv', 'w')) output_csv.writerow(["cid","name","dG'0","pH","I","pMg","T", "anchor","compound_ref","remark"]) for cid in ptable.get_all_cids(): pmap = ptable.cid2PseudoisomerMap(cid) dG0_prime = pmap.Transform(pH=pH, I=I, pMg=pMg, T=T) output_csv.writerow([cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, 1, ptable.cid2source_string[cid]])
def CreateElementMatrix(thermo): kegg = Kegg.getInstance() atom_matrix = [] cids = [] for cid in thermo.get_all_cids(): try: atom_vector = kegg.cid2compound(cid).get_atom_vector() except (KeggParseException, OpenBabelError): continue if atom_vector is not None: cids.append(cid) atom_matrix.append(atom_vector) atom_matrix = np.array(atom_matrix) return cids, atom_matrix
def __init__(self, db, html_writer, thermodynamics): self.db = db self.html_writer = html_writer self.thermo = thermodynamics self.kegg = Kegg.getInstance() # set the standard redox potential to 320mV and concentrations to 1M # the formation energy will be used only for the dG in the tables # but will later be overridden by the value or 'redox' which is # determined by the Y-axis in the contour plot. default_E_prime = -0.32 # the E' of NAD(P) at pH 7 self.thermo.AddPseudoisomer(28, nH=0, z=0, nMg=0, dG0=0) # oxidized electron carrier self.thermo.AddPseudoisomer(30, nH=0, z=0, nMg=0, dG0=-default_E_prime * F) # reduced electron carrier
def __init__(self, model, thermodynamic_data, metabolite_concentration_bounds, optimization_status=OptimizationStatus.Successful(), optimal_value=None, optimal_ln_metabolite_concentrations=None): self.model = model self.thermo = thermodynamic_data self.bounds = metabolite_concentration_bounds self.S = model.GetStoichiometricMatrix() self.Ncompounds, self.Nreactions = self.S.shape self.status = optimization_status self.opt_val = optimal_value self.ln_concentrations = optimal_ln_metabolite_concentrations self.dGr0_tag = np.array( thermodynamic_data.GetDGrTagZero_ForModel(self.model)) self.dGr0_tag_list = list(self.dGr0_tag.flatten()) self.compound_ids = self.model.GetCompoundIDs() self.reaction_ids = self.model.GetReactionIDs() self.fluxes = self.model.GetFluxes() self.slug_name = util.slugify(model.name) self.pathway_graph_filename = '%s_graph.svg' % self.slug_name self.thermo_profile_filename = '%s_thermo_profile.png' % self.slug_name self.conc_profile_filename = '%s_conc_profile.png' % self.slug_name self.kegg = Kegg.getInstance() self.concentrations = None self.dGr_tag = None self.dGr_tag_list = None self.dGr_bio = None self.dGr_bio_list = None if (self.ln_concentrations is not None and self.dGr0_tag is not None): self.concentrations = np.exp(self.ln_concentrations) conc_correction = RT * self.ln_concentrations * self.S self.dGr_tag = np.array(self.dGr0_tag + conc_correction) self.dGr_tag_list = list(self.dGr_tag.flatten()) bio_concs = self.bounds.GetBoundsWithDefault(self.compound_ids, default=1e-3) bio_correction = RT * np.dot(np.log(bio_concs), self.S) self.dGr_bio = np.array(self.dGr0_tag + bio_correction) self.dGr_bio_list = list(self.dGr_bio.flatten())
def main(): kegg = Kegg.getInstance() estimators = LoadAllEstimators() thermo = estimators['UGC'] data = LoadGrowthData() pdf = PdfPages('../res/growth_rates.pdf') for d in data: mol = kegg.cid2mol(d['cid']) d['mw'] = mol.GetExactMass() atom_bag, _ = mol.GetAtomBagAndCharge() d['numC'] = atom_bag['C'] r = GetFullOxidationReaction(d['cid']) r.Balance(balance_water=False, balance_hydrogens=True, exception_if_unknown=True) print r.FullReactionString(show_cids=False) d['dG0'] = r.PredictReactionEnergy(thermo) d['total S'] = sum([abs(x) for x in r.sparse.values()]) PlotEnergy(data, 'growth_rate', 'Specific Growth Rate [1/hr]', pdf) fig = plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) CorrPlot([d['sumex'] for d in data], [d['growth_rate'] for d in data], [d['carbon_source'] for d in data], 'SUMEX score', 'Specific Growth Rate [1/h]', figure=fig) data_with_dG0 = [d for d in data if np.isfinite(d['dG0'])] plt.subplot(1, 2, 2) CorrPlot([-d['dG0'] for d in data_with_dG0], [d['growth_rate'] for d in data_with_dG0], [d['carbon_source'] for d in data_with_dG0], r'Oxidation $-\Delta_r G^\circ$ [kJ/mol]', 'Specific Growth Rate [1/h]', figure=fig) fig.tight_layout() pdf.savefig(fig) PlotEnergy(data, 'sumex', 'SUMEX score', pdf) pdf.close()
def __init__(self, S, rids, fluxes, cids, formation_energies=None, reaction_energies=None, cid2bounds=None, c_range=None, T=default_T): Pathway.__init__(self, S, formation_energies=formation_energies, reaction_energies=reaction_energies, fluxes=fluxes) assert len(cids) == self.Nc assert len(rids) == self.Nr self.rids = rids self.cids = cids if cid2bounds: self.bounds = [cid2bounds.get(cid, (None, None)) for cid in self.cids] else: self.bounds = None self.cid2bounds = cid2bounds self.c_range = c_range self.T = T self.kegg = Kegg.getInstance()
def LoadGrowthData(): kegg = Kegg.getInstance() path = '../data/growth/growth_rates_adadi_2012.csv' data = [] for row in csv.DictReader(open(path, 'r')): carbon_source = row['carbon source'] cid, _, _ = kegg.name2cid(carbon_source) if cid is None: raise Exception("Cannot map compound name to KEGG ID: " + carbon_source) data.append({ 'carbon_source': carbon_source, 'cid': cid, 'growth_rate': float(row['maximum growth rate measured']), 'sumex': float(row['SUMEX']) }) return data
def __init__(self, db, html_writer=None, transformed=False): """Construct a GroupContribution instance. Args: db: the database handle to read from. html_writer: the HtmlWriter to write to. kegg: a Kegg instance if you don't want to use the default one. """ PsuedoisomerTableThermodynamics.__init__(self, name="Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = None self.transformed = transformed self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.bounds = deepcopy(self.kegg.cid2bounds) self.group_nullspace = None self.group_contributions = None self.obs_collection = None self.cid2error = {} self.cid2groupvec = None if transformed: prefix = 'bgc' else: prefix = 'pgc' self.OBSERVATION_TABLE_NAME = prefix + '_observations' self.GROUPVEC_TABLE_NAME = prefix + '_groupvector' self.NULLSPACE_TABLE_NAME = prefix + '_nullspace' self.CONTRIBUTION_TABLE_NAME = prefix + '_contribution' self.REGRESSION_TABLE_NAME = prefix + '_regression' self.THERMODYNAMICS_TABLE_NAME = prefix + '_pseudoisomers' self.STOICHIOMETRIC_MATRIX_TABLE_NAME = prefix + '_stoichiometry' self.ANCHORED_CONTRIBUTIONS_TALBE_NAME = prefix + '_anchored_g' self.ANCHORED_CIDS_TABLE_NAME = prefix + '_anchored_cids' self.ANCHORED_P_L_TALBE_NAME = prefix + '_anchored_P_L'
def main(): ptable = PsuedoisomerTableThermodynamics.FromCsvFile( FormationEnergyFileName, label='testing') kegg = Kegg.getInstance() pH, I, pMg, T = (7.0, 0.25, 14, 298.15) output_csv = csv.writer( open('../res/formation_energies_transformed.csv', 'w')) output_csv.writerow([ "cid", "name", "dG'0", "pH", "I", "pMg", "T", "anchor", "compound_ref", "remark" ]) for cid in ptable.get_all_cids(): pmap = ptable.cid2PseudoisomerMap(cid) dG0_prime = pmap.Transform(pH=pH, I=I, pMg=pMg, T=T) output_csv.writerow([ cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, 1, ptable.cid2source_string[cid] ])
def CalculateThermo(): estimators = LoadAllEstimators() parser = MakeOpts(estimators) options, args = parser.parse_args(sys.argv) kegg = Kegg.getInstance() if options.rid is None: reaction = GetSparseReactionInput(args[-1], kegg) else: reaction = kegg.rid2reaction(options.rid) estimator = estimators[options.thermodynamics_source] pH, I, pMg, T = options.pH, options.I, options.pMg, options.T estimator.SetConditions(pH=pH, I=I, pMg=pMg, T=T) print "Thermodynamic source:", options.thermodynamics_source print('Parameters: pH=%.1f, pMg=%.1f, I=%.2fM, T=%.1fK' % (options.pH, options.pMg, options.I, options.T)) print str(reaction) print 'dG\'0 = %.2f [kJ/mol]' % reaction.PredictReactionEnergy(estimator)
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning( 'Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x: x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def AnalyzeKeggModule(thermo, mid, bounds, html_writer): d = {} d['OBD [kJ/mol]'] = "N/A" kegg = Kegg.getInstance() S, rids, fluxes, cids = kegg.get_module(mid) thermo.bounds = bounds.GetOldStyleBounds(cids) d['Length'] = len(rids) # the S matrix already has the coefficients in the correct direction fluxes = [abs(f) for f in fluxes] for rid in rids: r = kegg.rid2reaction(rid) try: r.Balance(balance_water=True, exception_if_unknown=True) except KeggReactionNotBalancedException: logging.warning( 'R%05d is not a balanced reaction, skipping module' % rid) return d dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids) if np.any(np.isnan(dG0_r_prime)): logging.warning("Cannot analyze module M%05d because some of the " "Gibbs energies cannot be calculated." % mid) return d keggpath = KeggPathway(S, rids, fluxes, cids, reaction_energies=dG0_r_prime, cid2bounds=thermo.bounds, c_range=thermo.c_range) obd, params = keggpath.FindOBD() keggpath.WriteResultsToHtmlTables(html_writer, params['concentrations'], params['reaction prices'], params['compound prices']) d['OBD [kJ/mol]'] = obd return d
def CalculateThermo(): estimators = LoadAllEstimators() parser = MakeOpts(estimators) options, _ = parser.parse_args(sys.argv) if options.input_filename is None: sys.stderr.write(parser.get_usage()) sys.exit(-1) estimator = estimators[options.thermodynamics_source] pH, I, pMg, T = options.pH, options.I, options.pMg, options.T kegg = Kegg.getInstance() if options.csv_output_filename is not None: out_fp = open(options.csv_output_filename, 'w') print "writing results to %s ... " % options.csv_output_filename else: out_fp = sys.stdout entry2fields_map = ParsedKeggFile.FromKeggFile(options.input_filename) all_reactions = [] for key in sorted(entry2fields_map.keys()): field_map = entry2fields_map[key] p_data = PathwayData.FromFieldMap(field_map) if p_data.skip: continue cid_mapping = p_data.cid_mapping field_map = p_data.field_map _, _, _, reactions = kegg.parse_explicit_module_to_reactions( field_map, cid_mapping) all_reactions += reactions S, cids = kegg.reaction_list_to_S(all_reactions) dG0_r = estimator.GetTransfromedReactionEnergies(S, cids) csv_writer = csv.writer(out_fp) csv_writer.writerow(['reaction', 'dG0\'', 'pH', 'I', 'pMg', 'T']) for r, reaction in enumerate(all_reactions): csv_writer.writerow( [reaction.FullReactionString(), dG0_r[r, 0], pH, I, pMg, T])
def ExportCSVFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) print "Using the thermodynamic estimations of: " + options.thermo_estimator thermo = estimators[options.thermo_estimator] thermo.pH = float(options.pH) thermo.I = float(options.I) thermo.pMg = float(options.pMg) thermo.T = float(options.T) # Make sure we have all the data. kegg = Kegg.getInstance() print 'Exporting KEGG compounds as JSON.' WriteCompoundCSV(kegg.AllCompounds(), thermo, options.compounds_out_filename) print 'Exporting KEGG reactions as JSON.' WriteReactionCSV(kegg.AllReactions(), thermo, options.reactions_out_filename)
def dissociation_decomposition_test(): """ Verifies that the decomposition of the compounds in the dissociation table match the nH of each species. """ db = SqliteDatabase('../res/gibbs.sqlite') dissociation = DissociationConstants.FromPublicDB() groups_data = GroupsData.FromDatabase(db) group_decomposer = GroupDecomposer(groups_data) kegg = Kegg.getInstance() for cid in dissociation.GetAllCids(): id = "C%05d (%s)" % (cid, kegg.cid2name(cid)) if kegg.cid2compound(cid).get_atom_bag() is None: logging.debug('%s: has no explicit formula' % id) else: diss = dissociation.GetDissociationTable(cid, create_if_missing=False) test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=True)
def ToDatabase(self, db, table_name, error_table_name=None): kegg = Kegg.getInstance() db.CreateTable(table_name, "cid INT, nH INT, z INT, nMg INT, " "dG0 REAL, compound_ref TEXT, pseudoisomer_ref TEXT, " "anchor BOOL") if error_table_name is not None: db.CreateTable(error_table_name, 'cid INT, name TEXT, error TEXT') for cid in self.get_all_cids(): compound_ref = self.cid2SourceString(cid) try: pmap = self.cid2PseudoisomerMap(cid) for nH, z, nMg, dG0 in pmap.ToMatrix(): pseudo_ref = pmap.GetRef(nH, z, nMg) db.Insert(table_name, [cid, nH, z, nMg, dG0, compound_ref, pseudo_ref, cid in self.anchors]) except MissingCompoundFormationEnergy as e: if error_table_name is not None: db.Insert(error_table_name, [cid, kegg.cid2name(cid), str(e)]) else: logging.warning(str(e)) db.Commit()
def main(): pH, I, pMg, T = 7.0, 0.25, 14.0, 298.15 dissociation = DissociationConstants.FromPublicDB() kegg = Kegg.getInstance() obs_fname = "../data/thermodynamics/formation_energies.csv" res_fname = "../res/formation_energies_transformed.csv" train_species = PsuedoisomerTableThermodynamics.FromCsvFile( obs_fname, label='testing') csv_out = csv.writer(open(res_fname, 'w')) csv_out.writerow([ 'cid', 'name', "dG'0", 'pH', 'I', 'pMg', 'T', 'anchor', 'compound_ref', 'remark' ]) for cid in train_species.get_all_cids(): pmap = train_species.cid2PseudoisomerMap(cid) source = train_species.cid2source_string[cid] pmatrix = pmap.ToMatrix( ) # ToMatrix returns tuples of (nH, z, nMg, dG0) if len(pmatrix) != 1: raise Exception("multiple training species for C%05d" % cid) nH, charge, nMg, dG0 = pmatrix[0] name = "%s (%d)" % (kegg.cid2name(cid), nH) logging.info('Adding the formation energy of %s', name) diss_table = dissociation.GetDissociationTable(cid, create_if_missing=True) if diss_table is None: raise Exception("%s [C%05d, nH=%d, nMg=%d] does not have a " "dissociation table" % (name, cid, nH, nMg)) diss_table.SetFormationEnergyByNumHydrogens(dG0, nH, nMg) diss_table.SetCharge(nH, charge, nMg) dG0_prime = diss_table.Transform(pH, I, pMg, T) csv_out.writerow([ cid, kegg.cid2name(cid), "%.1f" % dG0_prime, pH, I, pMg, T, True, source, None ])
def example_formate(thermo, product_cid=22, co2_conc=1e-5): co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration])) carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T)) thermo.bounds[11] = (co2_conc, co2_conc) thermo.bounds[288] = (carbonate_conc, carbonate_conc) pl = Pathologic(db=SqliteDatabase('../res/gibbs.sqlite', 'r'), public_db=SqliteDatabase('../data/public_data.sqlite'), html_writer=HtmlWriter('../res/pathologic.html'), thermo=thermo, max_solutions=None, max_reactions=20, maximal_dG=0.0, thermodynamic_method=OptimizationMethods.GLOBAL, update_file=None) add_cofactor_reactions(pl, free_ATP_hydrolysis=True) add_redox_reactions(pl, NAD_only=False) pl.delete_reaction(134) # formate:NADP+ oxidoreductase pl.delete_reaction(519) # Formate:NAD+ oxidoreductase pl.delete_reaction(24) # Rubisco pl.delete_reaction(581) # L-serine:NAD+ oxidoreductase (deaminating) pl.delete_reaction(220) # L-serine ammonia-lyase pl.delete_reaction(13) # glyoxylate carboxy-lyase (dimerizing; tartronate-semialdehyde-forming) pl.delete_reaction(585) # L-Serine:pyruvate aminotransferase pl.delete_reaction(1440) # D-Xylulose-5-phosphate:formaldehyde glycolaldehydetransferase pl.delete_reaction(5338) # 3-hexulose-6-phosphate synthase pl.add_reaction(Reaction.FromFormula("C06265 => C00011", name="CO2 uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00288", name="carbonate uptake")) pl.add_reaction(Reaction.FromFormula("C06265 => C00058", name="formate uptake")) r = Reaction.FromFormula("5 C06265 + C00058 => C%05d" % product_cid) # at least one formate to product #r.Balance() kegg = Kegg.getInstance() pl.find_path("formate to %s" % kegg.cid2name(product_cid), r)
def __init__(self, use_pKa=True): if use_pKa: Thermodynamics.__init__(self, "Jankowski et al. (+pKa)") self.dissociation = DissociationConstants.FromPublicDB() else: Thermodynamics.__init__(self, "Jankowski et al.") self.dissociation = None self.db = SqliteDatabase('../res/gibbs.sqlite', 'w') self.cid2pmap_dict = {} # the conditions in which Hatzimanikatis makes his predictions self.Hatzi_pH = 7.0 self.Hatzi_I = 0.0 self.Hatzi_pMg = 14.0 self.Hatzi_T = 298.15 self.kegg = Kegg.getInstance() # for some reason, Hatzimanikatis doesn't indicate that H+ is zero, # so we add it here H_pmap = PseudoisomerMap() H_pmap.Add(0, 0, 0, 0) self.SetPseudoisomerMap(80, H_pmap) self.cid2dG0_tag_dict = {80: 0} self.cid2charge_dict = {80: 0} for row in csv.DictReader(open(HATZI_CSV_FNAME, 'r')): cid = int(row['ENTRY'][1:]) self.cid2source_string[cid] = 'Jankowski et al. 2008' if row['DELTAG'] == "Not calculated": continue if cid == 3178: # this compound, which is supposed to be "Tetrahydroxypteridine" # seems to be mapped to something else by Hatzimanikatis continue self.cid2dG0_tag_dict[cid] = float(row['DELTAG']) * J_per_cal self.cid2charge_dict[cid] = int(row['CHARGE'])
def WriteDataToHtml(self, html_writer): kegg = Kegg.getInstance() rowdicts = [] for cid in self.get_all_cids(): pdata = self.cid2PseudoisomerMap(cid) for nH, z, nMg, dG0 in pdata.ToMatrix(): rowdict = {} rowdict['KEGG ID'] = 'C%05d' % cid rowdict['name'] = kegg.cid2name(cid) rowdict['nH'] = nH rowdict['z'] = z rowdict['nMg'] = nMg rowdict[symbol_df_G0] = dG0 rowdict['reference'] = pdata.GetRef(nH, z, nMg) if cid in self.anchors: rowdict['anchor'] = 'yes' else: rowdict['anchor'] = 'no' rowdicts.append(rowdict) headers = ['KEGG ID', 'name', 'nH', 'z', 'nMg', symbol_df_G0, 'reference', 'anchor'] html_writer.write_table(rowdicts, headers, decimal=1)
def nist_dissociation_test(): """ Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings. """ db = SqliteDatabase('../res/gibbs.sqlite') nist_regression = NistRegression(db, html_writer=NullHtmlWriter()) dissociation = nist_regression.dissociation groups_data = GroupsData.FromDatabase(db) group_decomposer = GroupDecomposer(groups_data) kegg = Kegg.getInstance() nist = nist_regression.nist for cid in nist.GetAllCids(): id = "C%05d (%s)" % (cid, kegg.cid2name(cid)) if kegg.cid2compound(cid).get_atom_bag() is None: logging.debug('%s: has no explicit formula' % id) else: diss = dissociation.GetDissociationTable(cid, create_if_missing=False) test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=False)
def ExportJSONFiles(): estimators = LoadAllEstimators() options, _ = MakeOpts(estimators).parse_args(sys.argv) thermo_list = [] thermo_list.append(estimators[options.thermodynamics_source]) thermo_list.append( PsuedoisomerTableThermodynamics.FromCsvFile( options.thermodynamics_csv)) # Make sure we have all the data. kegg = Kegg.getInstance() for i, thermo in enumerate(thermo_list): print "Priority %d - formation energies of: %s" % (i + 1, thermo.name) kegg.AddThermodynamicData(thermo, priority=(i + 1)) db = SqliteDatabase('../res/gibbs.sqlite') print 'Exporting Group Contribution Nullspace matrix as JSON.' nullspace_vectors = [] for row in db.DictReader('ugc_conservations'): d = {'msg': row['msg']} sparse = json.loads(row['json']) d['reaction'] = [] for cid, coeff in sparse.iteritems(): d['reaction'].append([coeff, "C%05d" % int(cid)]) nullspace_vectors.append(d) WriteJSONFile(nullspace_vectors, options.nullspace_out_filename) print 'Exporting KEGG compounds as JSON.' WriteJSONFile(kegg.AllCompounds(), options.compounds_out_filename) print 'Exporting KEGG reactions as JSON.' WriteJSONFile(kegg.AllReactions(), options.reactions_out_filename) print 'Exporting KEGG enzymes as JSON.' WriteJSONFile(kegg.AllEnzymes(), options.enzymes_out_filename)
def main(): options, _ = flags.MakeOpts().parse_args(sys.argv) kegg = Kegg.getInstance() estimators = LoadAllEstimators() print ('Parameters: T=%f K, pH=%.2g, pMg=%.2g, ' 'I=%.2gmM, Median concentration=%.2gM' % (default_T, options.ph, options.pmg, options.i_s, options.c_mid)) for thermo in estimators.values(): thermo.c_mid = options.c_mid thermo.pH = options.ph thermo.pMg = options.pmg thermo.I = options.i_s thermo.T = default_T cmap = {} if not options.ignore_cofactors: print 'Fixing concentrations of co-factors' cmap = reversibility.GetConcentrationMap() else: print 'Not fixing concentrations of co-factors' while True: rid = GetReactionIdInput() reaction = kegg.rid2reaction(rid) print 'Reaction Name: %s' % reaction.name print '\tKegg ID: R%05d' % rid print '\tEC: %s' % str(reaction.ec_list) for key, thermo in estimators.iteritems(): print "\t<< %s >>" % key try: print '\t\tdG0\'f = %.1f kJ/mol' % reaction.PredictReactionEnergy(thermo) rev = reversibility.CalculateReversability(reaction, thermo, concentration_map=cmap) print '\t\tgamma = %.3g' % rev except Exception as e: print '\tError: %s' % (str(e))
def __init__(self): self.cid2DissociationTable = {} self.kegg = Kegg.getInstance()
def main(): options, _ = flags.MakeOpts().parse_args(sys.argv) c_mid = options.c_mid pH = options.ph pMg = options.pmg I = options.i_s T = default_T db = SqliteDatabase('../res/gibbs.sqlite') kegg = Kegg.getInstance() G = GroupContribution(db) G.init() print( 'Parameters: T=%f K, pH=%.2g, pMg=%.2g, ' 'I=%.2gM, Median concentration=%.2gM' % (T, pH, pMg, I, c_mid)) cmap = {} if not options.ignore_cofactors: if options.full_metabolites: print 'Fixing concentrations of all known metabolites' cmap = reversibility.GetFullConcentrationMap(G) else: print 'Fixing concentrations of co-factors' cmap = reversibility.GetConcentrationMap(kegg) else: print 'Not fixing concentrations of co-factors' if options.report_mode: print 'Output used metabolites concentrations' while True: mid = GetModuleIdInput() rid_flux_list = kegg.mid2rid_map[mid] for rid, flux in rid_flux_list: try: reaction = kegg.rid2reaction(rid) print 'Reaction Name', reaction.name print '\tKegg Id', reaction.rid print '\tEC', reaction.ec_list rev = reversibility.CalculateReversability( reaction.sparse, G, pH=pH, I=I, pMg=pMg, T=T, concentration_map=cmap) if rev == None: dG = G.estimate_dG_reaction(reaction.sparse, pH=pH, pMg=pMg, I=I, T=T, c0=c_mid, media='glucose') print '\tReversibility: No free compounds, dG = %.2g' % dG else: corrected_reversibility = flux * rev print '\tReversibility %.2g' % corrected_reversibility if options.report_mode: for cid, s in reaction.sparse.iteritems(): if cid in cmap: print '(%d C%05d) %s\t: %.2g' % ( s, cid, kegg.cid2name(cid), cmap[cid]) else: print '(%d C%05d) %s\t: Free concentration' % ( s, cid, kegg.cid2name(cid)) except Exception: print '\tCouldn\'t calculate irreversibility'
def main(): kegg = Kegg.getInstance() options, args = MakeOpts().parse_args(sys.argv) print ('Parameters: T=%f K, pMg=%.2g, I=%.2gM' % (options.T, options.pMg, options.I)) print "reaction:", args[-1] estimators = LoadAllEstimators() plt.rcParams['legend.fontsize'] = 8 plt.rcParams['font.family'] = 'sans-serif' plt.rcParams['font.size'] = 12 plt.rcParams['lines.linewidth'] = 2 colormap = {} colormap['markers'] = (64.0/255, 111.0/255, 29.0/255, 3.0) #colormap['hatzi_gc'] = (54.0/255, 182.0/255, 202.0/255, 1.0) colormap['UGC'] = (202.0/255, 101.0/255, 54.0/255, 1.0) #colormap['alberty'] = (202.0/255, 54.0/255, 101.0/255, 1.0) colormap['PGC'] = (101.0/255, 202.0/255, 54.0/255, 1.0) fig = plt.figure(figsize=(6,6), dpi=90) fig.hold(True) if options.rid is None: reaction = GetSparseReactionInput(args[-1], kegg) else: reaction = kegg.rid2reaction(options.rid) reaction.Balance() print 'Reaction: %s' % reaction.FullReactionString() nist = Nist() nist_rows = nist.SelectRowsFromNist(reaction, check_reverse=True) pH_min = 7.0 - options.pH/2.0 pH_max = 7.0 + options.pH/2.0 if nist_rows: dG0_list = [] pH_list = [] for row_data in nist_rows: pH_list.append(row_data.pH) if row_data.reaction == reaction: dG0_list.append(row_data.dG0_r) else: dG0_list.append(-row_data.dG0_r) plt.plot(pH_list, dG0_list, marker='.', linestyle='none', label='measured data', markeredgecolor='none', markerfacecolor=colormap['markers'], markersize=5) pH_max = max(pH_list + [pH_max]) pH_min = min(pH_list + [pH_min]) pH_range = np.arange(pH_min-0.1, pH_max+0.1, 0.02) for key, thermo in estimators.iteritems(): if key not in colormap: continue print key, 'dG0 at pH=7: %.2f' % reaction.PredictReactionEnergy(thermo, pH=7.0, pMg=options.pMg, I=options.I, T=options.T) dG0 = [] for pH in pH_range: dG0.append(reaction.PredictReactionEnergy(thermo, pH=pH, pMg=options.pMg, I=options.I, T=options.T)) plt.plot(pH_range, dG0, marker='None', linestyle='solid', color=colormap[key], figure=fig, label=thermo.name) plt.xlabel('pH') plt.ylabel(r'$\Delta_r G^\circ$ [kJ/mol]') plt.title(kegg.reaction2string(reaction), fontsize=8) plt.legend(loc='lower left') if not options.output: plt.tight_layout() plt.show() else: fig.savefig(options.output, format='svg')
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase(options.db_file) kegg = Kegg.getInstance() if options.override_table: db.Execute("DROP TABLE IF EXISTS " + options.table_name) DissociationConstants._CreateDatabase( db, options.table_name, drop_if_exists=options.override_table) cids_to_calculate = set() if options.nist: cids_to_calculate.update(Nist().GetAllCids()) cids_to_calculate.update(RedoxCarriers().GetAllCids()) ptable = PsuedoisomerTableThermodynamics.FromCsvFile( "../data/thermodynamics/formation_energies.csv") cids_to_calculate.update(ptable.get_all_cids()) else: cids_to_calculate.update(kegg.get_all_cids()) for row in db.Execute("SELECT distinct(cid) FROM %s" % options.table_name): if row[0] in cids_to_calculate: cids_to_calculate.remove(row[0]) cid2smiles_and_mw = {} for cid in cids_to_calculate: # the compound CO is a special case where the conversion from InChI # to SMILES fails, so we add a specific override for it only if cid == 237: cid2smiles_and_mw[cid] = ("[C-]#[O+]", 28) continue try: comp = kegg.cid2compound(cid) mol = comp.GetMolecule() cid2smiles_and_mw[cid] = (mol.ToSmiles(), mol.GetExactMass()) except KeggParseException: logging.debug("%s (C%05d) has no SMILES, skipping..." % (kegg.cid2name(cid), cid)) except OpenBabelError: logging.debug( "%s (C%05d) cannot be converted to SMILES, skipping..." % (kegg.cid2name(cid), cid)) # Do not recalculate pKas for CIDs that are already in the database cids_to_calculate = cid2smiles_and_mw.keys() cids_to_calculate.sort(key=lambda (cid): (cid2smiles_and_mw[cid][1], cid)) db_lock = threading.Lock() semaphore = threading.Semaphore(options.n_threads) for cid in cids_to_calculate: smiles, _ = cid2smiles_and_mw[cid] if not smiles: logging.info("The following compound is blacklisted: C%05d" % cid) continue thread = DissociationThreads(group=None, target=None, name=None, args=(cid, smiles, semaphore, db_lock, options), kwargs={}) thread.start()
def main(): html_writer = HtmlWriter("../res/nist/report.html") estimators = LoadAllEstimators() nist = Nist() nist.T_range = (273.15 + 24, 273.15 + 40) #nist.override_I = 0.25 #nist.override_pMg = 14.0 #nist.override_T = 298.15 html_writer.write('<p>\n') html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data)) html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \ (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist()))) html_writer.write('</p>\n') reactions = {} reactions['KEGG'] = [] for reaction in Kegg.getInstance().AllReactions(): try: reaction.Balance(balance_water=True, exception_if_unknown=True) reactions['KEGG'].append(reaction) except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError): pass reactions['FEIST'] = Feist.FromFiles().reactions reactions['NIST'] = nist.GetUniqueReactionSet() pairs = [] #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')] for t1, t2 in pairs: logging.info('Writing the NIST report for %s vs. %s' % (estimators[t1].name, estimators[t2].name)) html_writer.write('<p><b>%s vs. %s</b> ' % (estimators[t1].name, estimators[t2].name)) html_writer.insert_toggle(start_here=True) two_way_comparison(html_writer=html_writer, thermo1=estimators[t1], thermo2=estimators[t2], reaction_list=reactions['FEIST'], name='%s_vs_%s' % (t1, t2)) html_writer.div_end() html_writer.write('</p>') if False: estimators['alberty'].CompareOverKegg( html_writer, other=estimators['PRC'], fig_name='kegg_compare_alberty_vs_nist') rowdicts = [] rowdict = {'Method': 'Total'} for db_name, reaction_list in reactions.iteritems(): rowdict[db_name + ' coverage'] = len(reaction_list) rowdicts.append(rowdict) for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']: thermo = estimators[name] logging.info('Writing the NIST report for %s' % thermo.name) html_writer.write('<p><b>%s</b> ' % thermo.name) html_writer.insert_toggle(start_here=True) num_estimations, rmse = nist.verify_results(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.div_end() html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse)) logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse)) rowdict = { 'Method': thermo.name, 'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations) } for db_name, reaction_list in reactions.iteritems(): n_covered = thermo.CalculateCoverage(reaction_list) percent = n_covered * 100.0 / len(reaction_list) rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered) logging.info(db_name + " coverage = %.1f%%" % percent) rowdicts.append(rowdict) headers = ['Method', 'RMSE (kJ/mol)'] + \ [db_name + ' coverage' for db_name in reactions.keys()] html_writer.write_table(rowdicts, headers=headers)
def thermodynamic_pathway_analysis(S, rids, fluxes, cids, thermodynamics, html_writer): Nr, Nc = S.shape # adjust the directions of the reactions in S to fit the fluxes fluxes = map(abs, fluxes) kegg = Kegg.getInstance() #kegg.write_reactions_to_html(html_writer, S, rids, fluxes, cids, show_cids=False) dG0_f = thermodynamics.GetTransformedFormationEnergies(cids) bounds = [thermodynamics.bounds.get(cid, (None, None)) for cid in cids] res = {} try: c_mid = thermodynamics.c_mid c_range = thermodynamics.c_range res['pCr'] = find_pCr(S, dG0_f, c_mid=c_mid, ratio=3.0, bounds=bounds) #res['PCR2'] = find_unfeasible_concentrations(S, dG0_f, c_range, c_mid=c_mid, bounds=bounds) res['MTDF'] = find_mtdf(S, dG0_f, c_range=c_range, bounds=bounds) #path = pathway_modelling.Pathway(S, dG0_f) #res['pCr_regularized'] = path.FindPcr_OptimizeConcentrations( # c_mid=c_mid, ratio=3.0, bounds=bounds) #res['pCr_regularized (dGr < -2.7)'] = path.FindPcr_OptimizeConcentrations( # c_mid=c_mid, ratio=3.0, bounds=bounds, max_reaction_dg=-2.7) #res['MTDF_regularized'] = path.FindMTDF_OptimizeConcentrations( # c_range=c_range, bounds=bounds, c_mid=c_mid) #costs = [] #for max_dg in pylab.arange(0.0,-4.25,-0.25): # c = path.FindPcrEnzymeCost(c_mid=c_mid, # ratio=3.0, # bounds=bounds, # max_reaction_dg=max_dg, # fluxes=fluxes) # costs.append(str(c)) #print ', '.join(costs) except LinProgNoSolutionException: html_writer.write( '<b>No feasible solution found, cannot calculate the Margin</b>') # plot the profile graph pylab.rcParams['text.usetex'] = False pylab.rcParams['legend.fontsize'] = 10 pylab.rcParams['font.family'] = 'sans-serif' pylab.rcParams['font.size'] = 12 pylab.rcParams['lines.linewidth'] = 2 pylab.rcParams['lines.markersize'] = 5 pylab.rcParams['figure.figsize'] = [8.0, 6.0] pylab.rcParams['figure.dpi'] = 100 # plot the thermodynamic profile in standard conditions profile_fig = pylab.figure() profile_fig.hold(True) pylab.title('Thermodynamic profile', figure=profile_fig) pylab.ylabel('cumulative dG [kJ/mol]', figure=profile_fig) pylab.xlabel('Reaction KEGG ID', figure=profile_fig) pylab.xticks(pylab.arange(1, Nr + 1), ['R%05d' % rids[i] for i in xrange(Nr)], fontproperties=FontProperties(size=8), rotation=30) dG0_r = pylab.zeros((Nr, 1)) for r in range(Nr): reactants = pylab.find(S[r, :]) dG0_r[r, 0] = pylab.dot(S[r, reactants], dG0_f[reactants]) nan_indices = pylab.find(pylab.isnan(dG0_r)) finite_indices = pylab.find(pylab.isfinite(dG0_r)) if (len(nan_indices) > 0): dG0_r_finite = pylab.zeros((Nr, 1)) dG0_r_finite[finite_indices] = dG0_r[finite_indices] cum_dG0_r = pylab.cumsum( [0] + [dG0_r_finite[r, 0] * fluxes[r] for r in range(Nr)]) else: cum_dG0_r = pylab.cumsum([0] + [dG0_r[r, 0] * fluxes[r] for r in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG0_r, figure=profile_fig, label='Standard [1M]') # plot the thermodynamic profile for the different optimization schemes pylab.grid(True, figure=profile_fig) for optimization in res.keys(): dG_f, conc, score = res[optimization] if score is None: continue dG_r = pylab.dot(S, dG_f) cum_dG_r = pylab.cumsum([0] + [dG_r[i, 0] * fluxes[i] for i in range(Nr)]) pylab.plot(pylab.arange(0.5, Nr + 1), cum_dG_r, figure=profile_fig, label='%s = %.1f' % (optimization, score)) pylab.legend() html_writer.embed_matplotlib_figure(profile_fig, width=480, height=360) # plot the optimal metabolite concentrations for the different optimization schemes ind_nan = pylab.find(pylab.isnan(dG0_f)) for optimization in res.keys(): dG_f, conc, score = res[optimization] if score is None: continue dG_r = pylab.dot(S, dG_f) conc[ ind_nan] = thermodynamics.c_mid # give all compounds with unknown dG0_f the middle concentration value conc_fig = pylab.figure() conc_fig.suptitle('Concentrations (%s = %.1f)' % (optimization, score)) pylab.xscale('log', figure=conc_fig) pylab.ylabel('Compound KEGG ID', figure=conc_fig) pylab.xlabel('Concentration [M]', figure=conc_fig) pylab.yticks(range(Nc, 0, -1), ["C%05d" % cid for cid in cids], fontproperties=FontProperties(size=8)) pylab.plot(conc, range(Nc, 0, -1), '*b', figure=conc_fig) x_min = conc.min() / 10 x_max = conc.max() * 10 y_min = 0 y_max = Nc + 1 for c in range(Nc): pylab.text(conc[c, 0] * 1.1, Nc - c, kegg.cid2name(cids[c]), \ figure=conc_fig, fontsize=6, rotation=0) b_low, b_up = bounds[c] if b_low is None: b_low = x_min if b_up is None: b_up = x_max pylab.plot([b_low, b_up], [Nc - c, Nc - c], '-k', linewidth=0.4) if optimization.startswith('pCr'): c_range_opt = pC_to_range(score, c_mid=thermodynamics.c_mid, ratio=3.0) pylab.axvspan(c_range_opt[0], c_range_opt[1], facecolor='g', alpha=0.3, figure=conc_fig) else: pylab.axvspan(thermodynamics.c_range[0], thermodynamics.c_range[1], facecolor='r', alpha=0.3, figure=conc_fig) pylab.axis([x_min, x_max, y_min, y_max], figure=conc_fig) try: html_writer.embed_matplotlib_figure(conc_fig, width=420, height=360) except AttributeError: html_writer.write('<b>Failed to generate concentration figure</b>') # write all the results in tables as well for optimization in res.keys(): (dG_f, conc, score) = res[optimization] html_writer.write( '<p>Biochemical Compound Formation Energies (%s = %.1f)<br>\n' % (optimization, score)) html_writer.write('<table border="1">\n') html_writer.write(' ' + '<td>%s</td>' * 5 % ("KEGG CID", "Compound Name", "Concentration [M]", "dG'0_f [kJ/mol]", "dG'_f [kJ/mol]") + '\n') for c in range(Nc): cid = cids[c] name = kegg.cid2name(cid) if (pylab.isnan(dG0_f[c, 0])): html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%s</td><td>%s</td><td>%s</td></tr>\n' % \ (kegg.cid2link(cid), cid, name, "N/A", "N/A", "N/A")) else: html_writer.write('<tr><td><a href="%s">C%05d</a></td><td>%s</td><td>%.2g</td><td>%.2f</td><td>%.2f</td></tr>\n' % \ (kegg.cid2link(cid), cid, name, conc[c, 0], dG0_f[c, 0], dG_f[c, 0])) html_writer.write('</table></p>\n') html_writer.write( '<p>Biochemical Reaction Energies (%s = %.1f)<br>\n' % (optimization, score)) html_writer.write('<table border="1">\n') html_writer.write(' ' + '<td>%s</td>' * 3 % ("KEGG RID", "dG'0_r [kJ/mol]", "dG'_r [kJ/mol]") + '\n') dG_r = pylab.dot(S, dG_f) for r in range(Nr): rid = rids[r] if (pylab.isnan(dG0_r[r, 0])): html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%s</td><td>%.2f</td></tr>\n' % \ (kegg.rid2link(rid), kegg.rid2name(rid), rid, "N/A", dG_r[r, 0])) else: html_writer.write('<tr><td><a href="%s" title="%s">R%05d</a></td><td>%.2f</td><td>%.2f</td></tr>\n' % \ (kegg.rid2link(rid), kegg.rid2name(rid), rid, dG0_r[r, 0], dG_r[r, 0])) html_writer.write('</table></p>\n') return res
def row2hypertext(S_row, cids): kegg = Kegg.getInstance() active_cids = list(np.nonzero(S_row)[0].flat) sparse = dict((cids[c], S_row[c]) for c in active_cids) return kegg.sparse_to_hypertext(sparse, show_cids=False)
#print m.ToFormat('mol2') #print m.ToFormat('smi') #print m.ToFormat('inchi') #print m.ToFormat('sdf') diss_table = Molecule._GetDissociationTable('C(=O)(O)CN', fmt='smiles', mid_pH=default_pH, min_pKa=0, max_pKa=14, T=default_T) print "glycine\n", diss_table html_writer = HtmlWriter('../res/molecule.html') from pygibbs.kegg import Kegg kegg = Kegg.getInstance() html_writer.write('<h1>pKa estimation using ChemAxon</h1>\n') for cid in [41]: m = kegg.cid2mol(cid) html_writer.write("<h2>C%05d : %s</h2>\n" % (cid, str(m))) diss_table = m.GetDissociationTable() pmap = diss_table.GetPseudoisomerMap() diss_table.WriteToHTML(html_writer) pmap.WriteToHTML(html_writer) html_writer.write("</p>\n") #print m.GetDissociationConstants() #print m.GetMacrospecies() #obmol = m.ToOBMol() #print 'atom bag = %s, charge = %d' % m.GetAtomBagAndCharge() #print 'no. e- =', m.GetNumElectrons()
def FromFileToDB(file_name, db, table_name): """ Parses a CSV file that contains pKa and pKMg data for many compounds and returns a dictionary of their DissociationTables, where the key is the CID. We support to CSV formats (legacy issues, sorry): 1) cid, name, nH_below, nH_above, nMg_below, nMg_above, mol_below, mol_above, ddG, ref 2) cid, name, type, T, nH_below, nH_above, nMg_below, nMg_above, mol_below, mol_above, pK, ref """ kegg = Kegg.getInstance() DissociationConstants._CreateDatabase(db, table_name) for i, row in enumerate(csv.DictReader(open(file_name, 'r'))): if 'pK' not in row and 'ddG' not in row: raise Exception("The CSV file is not in a recognized format: " "there should be a column named ddG or pK") try: if not row['cid']: continue # without a CID we cannot match this to the dG0 table cid = int(row['cid']) name = row['name'] or kegg.cid2name(cid) logging.debug("Parsing row #%d, compound %s (C%05d)" % (i, name, cid)) nH_below = int(row['nH_below']) nH_above = int(row['nH_above']) nMg_below = int(row['nMg_below']) nMg_above = int(row['nMg_above']) mol_below = row['mol_below'] or None mol_above = row['mol_above'] or None ref = row['ref'] if 'ddG' in row: # this is the 1st format type ddG = float(row['ddG']) elif 'pK' in row: # this is the 2nd format type pK = float(row['pK'] or 0) T = float(row['T'] or default_T) if row['type'] == 'acid-base': if nMg_below != nMg_above or nH_below != nH_above + 1: raise Exception('wrong nMg and nH values') ddG = -R * T * np.log(10) * pK elif row['type'] == 'Mg': if nMg_below != nMg_above + 1 or nH_below != nH_above: raise Exception('wrong nMg and nH values') ddG = -R * T * np.log(10) * pK + dG0_f_Mg elif row['type'] == '': if nMg_below != nMg_above or nH_below != nH_above: raise Exception('wrong nMg and nH values') ddG = None else: raise Exception('unknown dissociation type: ' + row['type']) except Exception as e: raise Exception("Row %i: %s" % (i, str(e))) db.Insert(table_name, [ cid, name, nH_below, nH_above, nMg_below, nMg_above, mol_below, mol_above, ddG, ref ]) db.Commit()