def __init__(self, db, html_writer=None, dissociation=None, anchor_all=False): PsuedoisomerTableThermodynamics.__init__( self, name="Unified Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = dissociation self.transformed = False self.CollapseReactions = False self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.STOICHIOMETRIC_TABLE_NAME = 'ugc_S' self.GROUP_TABLE_NAME = 'ugc_G' self.GIBBS_ENERGY_TABLE_NAME = 'ugc_b' self.ANCHORED_TABLE_NAME = 'ugc_anchored' self.COMPOUND_TABLE_NAME = 'ugc_compounds' self.OBSERVATION_TABLE_NAME = 'ugc_observations' self.GROUPVEC_TABLE_NAME = 'ugc_groupvectors' self.UNIQUE_OBSERVATION_TABLE_NAME = 'ugc_unique_observations' self.THERMODYNAMICS_TABLE_NAME = 'ugc_pseudoisomers' self.ERRORS_TABLE_NAME = 'ugc_errors' self.CONSERVATIONS_TABLE_NAME = 'ugc_conservations' if anchor_all: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies_anchor_all.csv' else: self.FORMATION_ENERGY_FILENAME = '../data/thermodynamics/formation_energies.csv'
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi compound_name = thermo.kegg.cid2name(cid) kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) null_html_writer = NullHtmlWriter() if csv_output_fname: csv_output = csv.writer(open(csv_output_fname, 'w')) csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries()) else: csv_output = None pH_vec = np.array( [7]) # this needs to be fixed so that the txt file will set the pH conc_vec = 10**(-np.arange(2, 6.0001, 0.25) ) # logarithmic scale between 10mM and 1nM override_bounds = {} fig = plt.figure(figsize=(6, 6), dpi=90) legend = [] for pH in pH_vec.flat: obd_vec = [] for conc in conc_vec.flat: override_bounds[cid] = (conc, conc) logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc)) data, labels = pareto(kegg_file, null_html_writer, thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obd_vec.append(data[:, 1]) csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat)) obd_mat = np.matrix( obd_vec) # rows are pathways and columns are concentrations plt.plot(conc_vec, obd_mat, '.-', figure=fig) legend += ['%s, pH = %g' % (l, pH) for l in labels] plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig) plt.xscale('log') plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(legend) html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix) html_writer.embed_matplotlib_figure(fig, name=prefix) html_writer.close()
def FromDatabase(db, table_name, transformed=False): html_writer = NullHtmlWriter() dissociation = None obs_collections = KeggObervationCollection(html_writer, dissociation, transformed) obs_collections.observations = [] for row in db.DictReader(table_name): obs = KeggObservation.FromDatabaseRow(row) obs_collections.observations.append(obs) return obs_collections
def __init__(self, db, dissociation=None, html_writer=None, nist=None): PsuedoisomerTableThermodynamics.__init__(self) self.db = db self.html_writer = html_writer or NullHtmlWriter() self.nist = nist or Nist() self.dissociation = None self.cid2pmap_dict = {} self.assume_no_pKa_by_default = False self.std_diff_threshold = np.inf
def __init__(self, db, html_writer=None, transformed=False): """Construct a GroupContribution instance. Args: db: the database handle to read from. html_writer: the HtmlWriter to write to. kegg: a Kegg instance if you don't want to use the default one. """ PsuedoisomerTableThermodynamics.__init__(self, name="Group Contribution") self.db = db self.html_writer = html_writer or NullHtmlWriter() self.dissociation = None self.transformed = transformed self.epsilon = 1e-10 self.kegg = Kegg.getInstance() self.bounds = deepcopy(self.kegg.cid2bounds) self.group_nullspace = None self.group_contributions = None self.obs_collection = None self.cid2error = {} self.cid2groupvec = None if transformed: prefix = 'bgc' else: prefix = 'pgc' self.OBSERVATION_TABLE_NAME = prefix + '_observations' self.GROUPVEC_TABLE_NAME = prefix + '_groupvector' self.NULLSPACE_TABLE_NAME = prefix + '_nullspace' self.CONTRIBUTION_TABLE_NAME = prefix + '_contribution' self.REGRESSION_TABLE_NAME = prefix + '_regression' self.THERMODYNAMICS_TABLE_NAME = prefix + '_pseudoisomers' self.STOICHIOMETRIC_MATRIX_TABLE_NAME = prefix + '_stoichiometry' self.ANCHORED_CONTRIBUTIONS_TALBE_NAME = prefix + '_anchored_g' self.ANCHORED_CIDS_TABLE_NAME = prefix + '_anchored_cids' self.ANCHORED_P_L_TALBE_NAME = prefix + '_anchored_P_L'
def main(): estimators = LoadAllEstimators() args, _ = MakeOpts(estimators).parse_args(sys.argv) # Make sure we have all the data. db = SqliteDatabase('../res/gibbs.sqlite') G = GroupContribution(db=db, html_writer=NullHtmlWriter(), transformed=args.transformed) G.init() print 'Exporting KEGG compounds to %s' % args.compounds_out_filename csv_writer = csv.writer(open(args.compounds_out_filename, 'w')) csv_writer.writerow(["KEGG ID", "nH", "CHARGE", "nMg", "dG0_f"]) for cid in sorted(G.get_all_cids()): try: for nH, z, nMg, dG0 in G.cid2PseudoisomerMap(cid).ToMatrix(): csv_writer.writerow(["C%05d" % cid, nH, z, nMg, "%.1f" % dG0]) except MissingCompoundFormationEnergy as e: csv_writer.writerow(["C%05d" % cid, None, None, None, str(e)]) print 'Exporting KEGG reactions to %s' % args.reactions_out_filename csv_writer = csv.writer(open(args.reactions_out_filename, 'w')) csv_writer.writerow(["KEGG ID", "dG'0_r (pH=%.1f, I=%.2f, pMg=%.1f, T=%.1f)" % (args.ph, args.i_s, args.pmg, args.temp)]) for rid in sorted(G.kegg.get_all_rids()): reaction = G.kegg.rid2reaction(rid) try: reaction.Balance(balance_water=True) dG0_r = reaction.PredictReactionEnergy(G, pH=args.ph, pMg=args.pmg, I=args.i_s, T=args.temp) csv_writer.writerow(["R%05d" % rid, "%.1f" % dG0_r]) except (KeggParseException, MissingCompoundFormationEnergy, KeggReactionNotBalancedException, MissingReactionEnergy, KeyError, OpenBabelError) as e: csv_writer.writerow(["R%05d" % rid, str(e)])
def nist_dissociation_test(): """ Verifies that all the compounds in NIST are covered by the dissociation table, including SMILES strings. """ db = SqliteDatabase('../res/gibbs.sqlite') nist_regression = NistRegression(db, html_writer=NullHtmlWriter()) dissociation = nist_regression.dissociation groups_data = GroupsData.FromDatabase(db) group_decomposer = GroupDecomposer(groups_data) kegg = Kegg.getInstance() nist = nist_regression.nist for cid in nist.GetAllCids(): id = "C%05d (%s)" % (cid, kegg.cid2name(cid)) if kegg.cid2compound(cid).get_atom_bag() is None: logging.debug('%s: has no explicit formula' % id) else: diss = dissociation.GetDissociationTable(cid, create_if_missing=False) test_dissociation_table(diss, group_decomposer, id, ignore_missing_smiles=False)
def GetAllOBDs(pathway_list, html_writer, thermo, pH=None, plot_profile=False, section_prefix="", balance_water=True, override_bounds={}): """ Return value is a list or dictionaries containing the following fields: entry - the name of the pathway remark - typically the exception message if something went wrong OBD - optimized distributed bottleneck (in kJ/mol) FFE - flux-force efficiency (between -1 and 1) min total dG - in kJ/mol max total dG - in kJ/mol sum of fluxes - the sum of all fluxes """ if html_writer is None: html_writer = NullHtmlWriter() html_writer.write('<h2 id="%s_tables">Individual result tables</h1>\n' % section_prefix) rowdicts = [] for entry, p_data in pathway_list: rowdict = defaultdict(float) rowdicts.append(rowdict) rowdict['entry'] = entry rowdict['remark'] = 'okay' if p_data.skip: logging.info("Skipping pathway: %s", rowdict['entry']) rowdict['remark'] = 'skipping' continue if pH is None: pH = p_data.pH thermo.SetConditions(pH=pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range rowdict['pH'] = thermo.pH rowdict['I'] = thermo.I rowdict['T'] = thermo.T rowdict['pMg'] = thermo.pMg #html_writer.write('<a name="%s"></a>\n' % entry) html_writer.write('<h3 id="%s_%s">%s</h2>\n' % (section_prefix, rowdict['entry'], rowdict['entry'])) S, rids, fluxes, cids = p_data.get_explicit_reactions( balance_water=balance_water) fluxes = np.matrix(fluxes) rowdict['rids'] = rids rowdict['cids'] = cids rowdict['fluxes'] = fluxes thermo.bounds = p_data.GetBounds().GetOldStyleBounds(cids) for cid, (lb, ub) in override_bounds.iteritems(): thermo.bounds[cid] = (lb, ub) dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids) rowdict['dG0_r_prime'] = dG0_r_prime keggpath = KeggPathway(S, rids, fluxes, cids, reaction_energies=dG0_r_prime, cid2bounds=thermo.bounds, c_range=thermo.c_range) rowdict['formulas'] = [ keggpath.GetReactionString(r, show_cids=False) for r in xrange(len(rids)) ] if np.any(np.isnan(dG0_r_prime)): html_writer.write('NaN reaction energy') keggpath.WriteProfileToHtmlTable(html_writer) keggpath.WriteConcentrationsToHtmlTable(html_writer) logging.info('%20s: OBD = NaN, maxTG = NaN' % (entry)) rowdict['remark'] = 'NaN reaction energy' continue obd, params = keggpath.FindOBD() odfe = 100 * np.tanh(obd / (2 * R * thermo.T)) rowdict['OBD'] = obd rowdict['FFE'] = odfe rowdict['sum of fluxes'] = np.sum(fluxes) rowdict['concentrations'] = params['concentrations'] rowdict['reaction prices'] = params['reaction prices'] rowdict['compound prices'] = params['compound prices'] rowdict['min total dG'] = params['minimum total dG'] rowdict['max total dG'] = params['maximum total dG'] logging.info('%20s: OBD = %.1f [kJ/mol], maxTG = %.1f [kJ/mol]' % (rowdict['entry'], obd, rowdict['max total dG'])) html_writer.write_ul([ "pH = %.1f, I = %.2fM, T = %.2f K" % (thermo.pH, thermo.I, thermo.T), "OBD = %.1f [kJ/mol]" % obd, "flux-force efficiency = %.1f%%" % odfe, "Min Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['min total dG']), "Max Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['max total dG']) ]) rowdict[ 'dG_r_prime'] = keggpath.CalculateReactionEnergiesUsingConcentrations( rowdict['concentrations']) keggpath.WriteResultsToHtmlTables(html_writer, rowdict['concentrations'], rowdict['reaction prices'], rowdict['compound prices']) html_writer.write('<h2 id="%s_summary">Summary table</h1>\n' % section_prefix) dict_list = [{ 'Name': '<a href="#%s_%s">%s</a>' % (section_prefix, d['entry'], d['entry']), 'OBD [kJ/mol]': '%.1f' % d['OBD'], 'flux-force eff.': '%.1f%%' % d['FFE'], 'Total dG\' [kJ/mol]': '%6.1f - %6.1f' % (d['min total dG'], d['max total dG']), 'sum(flux)': '%g' % d['sum of fluxes'], 'remark': d['remark'] } for d in rowdicts] html_writer.write_table(dict_list, headers=[ 'Name', 'OBD [kJ/mol]', 'flux-force eff.', 'Total dG\' [kJ/mol]', 'sum(flux)', 'remark' ]) return rowdicts
def GetAllOBDs(pathway_list, html_writer, thermo, pH=None, plot_profile=False, section_prefix="", balance_water=True, override_bounds={}): """ Return value is a list or dictionaries containing the following fields: entry - the name of the pathway remark - typically the exception message if something went wrong OBD - optimized distributed bottleneck (in kJ/mol) FFE - flux-force efficiency (between -1 and 1) min total dG - in kJ/mol max total dG - in kJ/mol sum of fluxes - the sum of all fluxes """ if html_writer is None: html_writer = NullHtmlWriter() html_writer.write('<h2 id="%s_tables">Individual result tables</h1>\n' % section_prefix) rowdicts = [] for entry, p_data in pathway_list: rowdict = defaultdict(float) rowdicts.append(rowdict) rowdict['entry'] = entry rowdict['remark'] = 'okay' if p_data.skip: logging.info("Skipping pathway: %s", rowdict['entry']) rowdict['remark'] = 'skipping' continue if pH is None: pH = p_data.pH thermo.SetConditions(pH=pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range rowdict['pH'] = thermo.pH rowdict['I'] = thermo.I rowdict['T'] = thermo.T rowdict['pMg'] = thermo.pMg #html_writer.write('<a name="%s"></a>\n' % entry) html_writer.write('<h3 id="%s_%s">%s</h2>\n' % (section_prefix, rowdict['entry'], rowdict['entry'])) S, rids, fluxes, cids = p_data.get_explicit_reactions(balance_water=balance_water) fluxes = np.matrix(fluxes) rowdict['rids'] = rids rowdict['cids'] = cids rowdict['fluxes'] = fluxes thermo.bounds = p_data.GetBounds().GetOldStyleBounds(cids) for cid, (lb, ub) in override_bounds.iteritems(): thermo.bounds[cid] = (lb, ub) dG0_r_prime = thermo.GetTransfromedReactionEnergies(S, cids) rowdict['dG0_r_prime'] = dG0_r_prime keggpath = KeggPathway(S, rids, fluxes, cids, reaction_energies=dG0_r_prime, cid2bounds=thermo.bounds, c_range=thermo.c_range) rowdict['formulas'] = [keggpath.GetReactionString(r, show_cids=False) for r in xrange(len(rids))] if np.any(np.isnan(dG0_r_prime)): html_writer.write('NaN reaction energy') keggpath.WriteProfileToHtmlTable(html_writer) keggpath.WriteConcentrationsToHtmlTable(html_writer) logging.info('%20s: OBD = NaN, maxTG = NaN' % (entry)) rowdict['remark'] = 'NaN reaction energy' continue obd, params = keggpath.FindOBD() odfe = 100 * np.tanh(obd / (2*R*thermo.T)) rowdict['OBD'] = obd rowdict['FFE'] = odfe rowdict['sum of fluxes'] = np.sum(fluxes) rowdict['concentrations'] = params['concentrations'] rowdict['reaction prices'] = params['reaction prices'] rowdict['compound prices'] = params['compound prices'] rowdict['min total dG'] = params['minimum total dG'] rowdict['max total dG'] = params['maximum total dG'] logging.info('%20s: OBD = %.1f [kJ/mol], maxTG = %.1f [kJ/mol]' % (rowdict['entry'], obd, rowdict['max total dG'])) html_writer.write_ul(["pH = %.1f, I = %.2fM, T = %.2f K" % (thermo.pH, thermo.I, thermo.T), "OBD = %.1f [kJ/mol]" % obd, "flux-force efficiency = %.1f%%" % odfe, "Min Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['min total dG']), "Max Total %s = %.1f [kJ/mol]" % (symbol_dr_G_prime, rowdict['max total dG'])]) rowdict['dG_r_prime'] = keggpath.CalculateReactionEnergiesUsingConcentrations(rowdict['concentrations']) keggpath.WriteResultsToHtmlTables(html_writer, rowdict['concentrations'], rowdict['reaction prices'], rowdict['compound prices']) html_writer.write('<h2 id="%s_summary">Summary table</h1>\n' % section_prefix) dict_list = [{'Name':'<a href="#%s_%s">%s</a>' % (section_prefix, d['entry'], d['entry']), 'OBD [kJ/mol]':'%.1f' % d['OBD'], 'flux-force eff.':'%.1f%%' % d['FFE'], 'Total dG\' [kJ/mol]':'%6.1f - %6.1f' % (d['min total dG'], d['max total dG']), 'sum(flux)':'%g' % d['sum of fluxes'], 'remark': d['remark']} for d in rowdicts] html_writer.write_table(dict_list, headers=['Name', 'OBD [kJ/mol]', 'flux-force eff.', 'Total dG\' [kJ/mol]', 'sum(flux)', 'remark']) return rowdicts
continue elem_ne += count * Molecule.GetAtomicNum(elem) elem_cid, elem_coeff = atom2cid[elem] sparse.setdefault(elem_cid, 0) sparse[elem_cid] += -count * elem_coeff # use the H element to balance the electrons in the formation # reactions (we don't need to balance protons since this is # a biochemical reaction, so H+ are 'free'). H_cid, H_coeff = atom2cid['H'] sparse[H_cid] = (elem_ne - ne) * H_coeff reaction = Reaction( "formation of %s" % self.kegg.cid2name(cid), sparse) output_csv.writerow( (ref, 'C%05d' % cid, 'formation', 'A', '', 'formation of %s' % self.kegg.cid2name(cid), reaction.FullReactionString(), reaction.FullReactionString(show_cids=False), '%.2f' % dG0_prime, self.T, self.I, self.pH, self.pMg)) if __name__ == "__main__": dissociation = DissociationConstants.FromPublicDB() html_writer = NullHtmlWriter() obs_col = KeggObervationCollection(html_writer, dissociation, transformed=True) obs_col.ConvertFormation2Reaction( output_fname='../res/formation_reactions.csv')