def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = { 'cid': '<a href="%s">C%05d</a>' % (link, cid), 'name': name, 'error': cid2error.get(cid, None) } for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer( pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table( dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid), 'name':name, 'error':cid2error.get(cid, None)} for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning( 'Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x: x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning('Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x:x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def main(): html_writer = HtmlWriter("../res/nist/report.html") estimators = LoadAllEstimators() nist = Nist() nist.T_range = (273.15 + 24, 273.15 + 40) #nist.override_I = 0.25 #nist.override_pMg = 14.0 #nist.override_T = 298.15 html_writer.write('<p>\n') html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data)) html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \ (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist()))) html_writer.write('</p>\n') reactions = {} reactions['KEGG'] = [] for reaction in Kegg.getInstance().AllReactions(): try: reaction.Balance(balance_water=True, exception_if_unknown=True) reactions['KEGG'].append(reaction) except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError): pass reactions['FEIST'] = Feist.FromFiles().reactions reactions['NIST'] = nist.GetUniqueReactionSet() pairs = [] #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')] for t1, t2 in pairs: logging.info('Writing the NIST report for %s vs. %s' % (estimators[t1].name, estimators[t2].name)) html_writer.write('<p><b>%s vs. %s</b> ' % (estimators[t1].name, estimators[t2].name)) html_writer.insert_toggle(start_here=True) two_way_comparison(html_writer=html_writer, thermo1=estimators[t1], thermo2=estimators[t2], reaction_list=reactions['FEIST'], name='%s_vs_%s' % (t1, t2)) html_writer.div_end() html_writer.write('</p>') if False: estimators['alberty'].CompareOverKegg( html_writer, other=estimators['PRC'], fig_name='kegg_compare_alberty_vs_nist') rowdicts = [] rowdict = {'Method': 'Total'} for db_name, reaction_list in reactions.iteritems(): rowdict[db_name + ' coverage'] = len(reaction_list) rowdicts.append(rowdict) for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']: thermo = estimators[name] logging.info('Writing the NIST report for %s' % thermo.name) html_writer.write('<p><b>%s</b> ' % thermo.name) html_writer.insert_toggle(start_here=True) num_estimations, rmse = nist.verify_results(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.div_end() html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse)) logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse)) rowdict = { 'Method': thermo.name, 'RMSE (kJ/mol)': "%.1f (N=%d)" % (rmse, num_estimations) } for db_name, reaction_list in reactions.iteritems(): n_covered = thermo.CalculateCoverage(reaction_list) percent = n_covered * 100.0 / len(reaction_list) rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered) logging.info(db_name + " coverage = %.1f%%" % percent) rowdicts.append(rowdict) headers = ['Method', 'RMSE (kJ/mol)'] + \ [db_name + ' coverage' for db_name in reactions.keys()] html_writer.write_table(rowdicts, headers=headers)
def main(): kegg = Kegg.getInstance() prefix = "../res/prc_" fixed_cids = {} # a dictionary from CID to pairs of (nH, dG0) # Alberty formation energies directly measured, linearly independent: fixed_cids[1] = (2, -237.19) # H2O fixed_cids[9] = (1, -1096.1) # HPO3(-2) fixed_cids[14] = (4, -79.31) # NH4(+1) fixed_cids[59] = (0, -744.53) # SO4(-2) fixed_cids[288] = (1, -586.77) # HCO3(-1) # Alberty zeros: fixed_cids[3] = (26, 0.0) # NAD(ox) fixed_cids[10] = (32, 0.0) # CoA fixed_cids[127] = (30, 0.0) # glutathione(ox) fixed_cids[376] = (28, 0.0) # retinal(ox) # Directly measured values fixed_cids[4] = (27, 22.65) # NAD(red) -- relative to NAD(ox) fixed_cids[212] = (13, -194.5) # adenosine # fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors' # Alberty zeros which are not in NIST: # fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox) # fixed_cids[16] = (31, 0.0) # FAD(ox) # fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox) # fixed_cids[61] = (19, 0.0) # FMN(ox) # fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox) # fixed_cids[399] = (90, 0.0) # ubiquinone(ox) public_db = SqliteDatabase("../data/public_data.sqlite") alberty = PsuedoisomerTableThermodynamics.FromDatabase( public_db, "alberty_pseudoisomers", label=None, name="Alberty" ) alberty_cid2dG0 = {} alberty_cid2nH = {} for cid in alberty.get_all_cids(): pmap = alberty.cid2PseudoisomerMap(cid) dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer( pH=default_pH, I=default_I, pMg=default_pMg, T=default_T ) alberty_cid2nH[cid] = nH alberty_cid2dG0[cid] = dG0 if not os.path.exists(prefix + "S.txt"): db = SqliteDatabase("../res/gibbs.sqlite") nist_regression = NistRegression(db) cid2nH = {} for cid in nist_regression.nist.GetAllCids(): if cid in fixed_cids: cid2nH[cid] = fixed_cids[cid][0] elif cid in alberty_cid2nH: cid2nH[cid] = alberty_cid2nH[cid] else: tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer( cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T ) if tmp is not None: cid2nH[cid] = tmp[0] else: logging.warning( "The most abundant pseudoisomer of %s (C%05d) " "cannot be resolved. Using nH = 0." % (kegg.cid2name(cid), cid) ) cid2nH[cid] = 0 # nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction # nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40) S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH) # export the raw data matrices to text files C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids]) np.savetxt(prefix + "CID.txt", C, fmt="%d", delimiter=",") np.savetxt(prefix + "S.txt", S, fmt="%g", delimiter=",") np.savetxt(prefix + "dG0.txt", dG0, fmt="%.2f", delimiter=",") else: C = np.loadtxt(prefix + "CID.txt", delimiter=",") cids = [int(cid) for cid in C[:, 0]] cid2nH = {} for i, cid in enumerate(cids): cid2nH[cid] = int(C[i, 1]) S = np.loadtxt(prefix + "S.txt", delimiter=",") dG0 = np.loadtxt(prefix + "dG0.txt", delimiter=",") dG0 = np.reshape(dG0, (dG0.shape[0], 1)) html_writer = HtmlWriter("../res/regression_fast.html") html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n") html_writer.write("<p>The stoichiometric matrix (S):") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, S, cids) html_writer.div_end() html_writer.write("</p>") index2value = {} S_extended = S # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds for cid in fixed_cids.keys(): i = cids.index(cid) e_i = np.zeros((1, len(cids))) e_i[0, i] = 1.0 S_extended = np.vstack([S_extended, e_i]) nH, dG0_fixed = fixed_cids[cid] index2value[i] = dG0_fixed x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value) cid2dG0 = {} for i, cid in enumerate(cids): cid2dG0[cid] = x[i] # Calculate the Kernel of the reduced stoichiometric matrix (after removing # the columns of the fixed compounds). cids_red = [cid for cid in cids if cid not in fixed_cids] index_red = [i for i in xrange(len(cids)) if i not in index2value] S_red = S[:, index_red] K_red = LinearRegression.Kernel(S_red) # print "Reduced Stoichiometric Matrix:" # print matrix2string(S_red, cids_red, kegg) # print '-'*80 # Find all CIDs that are completely determined and do not depend on any # free variable. In other words, all zeros columns in K2. dict_list = [] determined_indices = np.where(np.sum(abs(K_red), 0) < 1e-10)[0] # all zero-columns in reducedK determined_cids = [cids_red[i] for i in determined_indices] plot_data = [] for i, cid in enumerate(cids): d = { "CID": "C%05d" % cid, "Compound": kegg.cid2name(cid), "nH": "%d" % cid2nH[cid], "dG0 (PRC)": "%.1f" % cid2dG0[cid], } if cid in alberty_cid2dG0: d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid] if cid not in fixed_cids: plot_data.append((alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid))) else: d["dG0 (Alberty)"] = "" if cid in fixed_cids: d["Depends on"] = "anchored" elif cid in determined_cids: d["Depends on"] = "fixed compounds" else: d["Depends on"] = "kernel dimensions" dict_list.append(d) dict_list.sort(key=lambda (x): (x["Depends on"], x["CID"])) html_writer.write("<p>Formation energies determined by the linear constraints:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table( dict_list, headers=["#", "Compound", "CID", "nH", "dG0 (PRC)", "dG0 (Alberty)", "Depends on"] ) html_writer.write("</font>") html_writer.div_end() html_writer.write("</p>") # Plot a comparison between PRC and Alberty formation energies fig = plt.figure(figsize=(8, 8), dpi=80) plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], "b.", figure=fig) for x, y, name in plot_data: plt.text(x, y, name, fontsize=6) plt.xlabel("Alberty $\Delta_f G^\circ$") plt.ylabel("PRC $\Delta_f G^\circ$") html_writer.write("<p>Plot comparing PRC and Alberty results:") html_writer.insert_toggle(start_here=True) html_writer.embed_matplotlib_figure(fig) html_writer.div_end() html_writer.write("</p>") K_sparse = SparseKernel(S_red).Solve() html_writer.write("<p>The sparse null-space of the reduced stoichiometric matrix:") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, K_sparse, cids_red) html_writer.div_end() html_writer.write("</p>") dict_list = [] index2string_html = dict((i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0])) index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0])) for i, cid in enumerate(cids_red): d = {} d["KEGG ID"] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid) d["KEGG ID plain"] = "C%05d" % cid d["Compound"] = kegg.cid2name(cid) d["nH"] = "%d" % cid2nH[cid] if cid in alberty_cid2dG0: d["dG0 (Alberty)"] = "%.1f" % alberty_cid2dG0[cid] else: d["dG0 (Alberty)"] = "" d["dG0 (PRC)"] = "%.1f" % cid2dG0[cid] d["dG0 (PRC) plain"] = "%.1f" % cid2dG0[cid] indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist() indic.reverse() d["order_key"] = indic if mlab.rms_flat(K_sparse[:, i]) > 1e-10: d["dG0 (PRC)"] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")" d["dG0 (PRC) plain"] += " + (" + vector2string(K_sparse[:, i], index2string) + ")" dict_list.append(d) dict_list.sort(key=lambda (d): (d["order_key"], d["KEGG ID plain"])) # Export the results to CSV csv_writer = csv.writer(open("../res/prc_results.csv", "w")) csv_writer.writerow(["KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"]) for d in dict_list: csv_writer.writerow([d["KEGG ID plain"], d["Compound"], d["nH"], d["dG0 (PRC) plain"], d["dG0 (Alberty)"]]) html_writer.write("<p>All formation energies as a function of the free variables:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table(dict_list, headers=["#", "KEGG ID", "Compound", "nH", "dG0 (PRC)", "dG0 (Alberty)"]) html_writer.write("</font>") html_writer.div_end() html_writer.write("</p>") fp = open("../res/prc_latex.txt", "w") fp.write( latex.table2LaTeX( dict_list, headers=["#", "KEGG ID plain", "Compound", "nH", "dG0 (PRC) plain", "dG0 (Alberty)"] ) ) fp.close()
def main(): html_writer = HtmlWriter("../res/nist/report.html") estimators = LoadAllEstimators() nist = Nist() nist.T_range = (273.15 + 24, 273.15 + 40) #nist.override_I = 0.25 #nist.override_pMg = 14.0 #nist.override_T = 298.15 html_writer.write('<p>\n') html_writer.write("Total number of reaction in NIST: %d</br>\n" % len(nist.data)) html_writer.write("Total number of reaction in range %.1fK < T < %.1fK: %d</br>\n" % \ (nist.T_range[0], nist.T_range[1], len(nist.SelectRowsFromNist()))) html_writer.write('</p>\n') reactions = {} reactions['KEGG'] = [] for reaction in Kegg.getInstance().AllReactions(): try: reaction.Balance(balance_water=True, exception_if_unknown=True) reactions['KEGG'].append(reaction) except (KeggReactionNotBalancedException, KeggParseException, OpenBabelError): pass reactions['FEIST'] = Feist.FromFiles().reactions reactions['NIST'] = nist.GetUniqueReactionSet() pairs = [] #pairs += [('hatzi_gc', 'UGC')], ('PGC', 'PRC'), ('alberty', 'PRC')] for t1, t2 in pairs: logging.info('Writing the NIST report for %s vs. %s' % (estimators[t1].name, estimators[t2].name)) html_writer.write('<p><b>%s vs. %s</b> ' % (estimators[t1].name, estimators[t2].name)) html_writer.insert_toggle(start_here=True) two_way_comparison(html_writer=html_writer, thermo1=estimators[t1], thermo2=estimators[t2], reaction_list=reactions['FEIST'], name='%s_vs_%s' % (t1, t2)) html_writer.div_end() html_writer.write('</p>') if False: estimators['alberty'].CompareOverKegg(html_writer, other=estimators['PRC'], fig_name='kegg_compare_alberty_vs_nist') rowdicts = [] rowdict = {'Method': 'Total'} for db_name, reaction_list in reactions.iteritems(): rowdict[db_name + ' coverage'] = len(reaction_list) rowdicts.append(rowdict) for name in ['UGC', 'PGC', 'PRC', 'alberty', 'merged', 'hatzi_gc']: thermo = estimators[name] logging.info('Writing the NIST report for %s' % thermo.name) html_writer.write('<p><b>%s</b> ' % thermo.name) html_writer.insert_toggle(start_here=True) num_estimations, rmse = nist.verify_results(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.div_end() html_writer.write('N = %d, RMSE = %.1f</p>\n' % (num_estimations, rmse)) logging.info('N = %d, RMSE = %.1f' % (num_estimations, rmse)) rowdict = {'Method':thermo.name, 'RMSE (kJ/mol)':"%.1f (N=%d)" % (rmse, num_estimations)} for db_name, reaction_list in reactions.iteritems(): n_covered = thermo.CalculateCoverage(reaction_list) percent = n_covered * 100.0 / len(reaction_list) rowdict[db_name + " coverage"] = "%.1f%% (%d)" % (percent, n_covered) logging.info(db_name + " coverage = %.1f%%" % percent) rowdicts.append(rowdict) headers = ['Method', 'RMSE (kJ/mol)'] + \ [db_name + ' coverage' for db_name in reactions.keys()] html_writer.write_table(rowdicts, headers=headers)
def main(): kegg = Kegg.getInstance() prefix = '../res/prc_' fixed_cids = {} # a dictionary from CID to pairs of (nH, dG0) # Alberty formation energies directly measured, linearly independent: fixed_cids[1] = (2, -237.19) # H2O fixed_cids[9] = (1, -1096.1) # HPO3(-2) fixed_cids[14] = (4, -79.31) # NH4(+1) fixed_cids[59] = (0, -744.53) # SO4(-2) fixed_cids[288] = (1, -586.77) # HCO3(-1) # Alberty zeros: fixed_cids[3] = (26, 0.0) # NAD(ox) fixed_cids[10] = (32, 0.0) # CoA fixed_cids[127] = (30, 0.0) # glutathione(ox) fixed_cids[376] = (28, 0.0) # retinal(ox) # Directly measured values fixed_cids[4] = (27, 22.65) # NAD(red) -- relative to NAD(ox) fixed_cids[212] = (13, -194.5) # adenosine #fixed_cids[294] = (12, -409.2) # inosine - linearly dependent on other 'anchors' # Alberty zeros which are not in NIST: #fixed_cids[524] = ( 0, 0.0) # cytochrome c(ox) #fixed_cids[16] = (31, 0.0) # FAD(ox) #fixed_cids[139] = ( 0, 0.0) # ferredoxin(ox) #fixed_cids[61] = (19, 0.0) # FMN(ox) #fixed_cids[343] = ( 0, 0.0) # thioredoxin(ox) #fixed_cids[399] = (90, 0.0) # ubiquinone(ox) public_db = SqliteDatabase("../data/public_data.sqlite") alberty = PsuedoisomerTableThermodynamics.FromDatabase( public_db, 'alberty_pseudoisomers', label=None, name='Alberty') alberty_cid2dG0 = {} alberty_cid2nH = {} for cid in alberty.get_all_cids(): pmap = alberty.cid2PseudoisomerMap(cid) dG0, _dG0_tag, nH, _z, _nMg = pmap.GetMostAbundantPseudoisomer( pH=default_pH, I=default_I, pMg=default_pMg, T=default_T) alberty_cid2nH[cid] = nH alberty_cid2dG0[cid] = dG0 if not os.path.exists(prefix + 'S.txt'): db = SqliteDatabase("../res/gibbs.sqlite") nist_regression = NistRegression(db) cid2nH = {} for cid in nist_regression.nist.GetAllCids(): if cid in fixed_cids: cid2nH[cid] = fixed_cids[cid][0] elif cid in alberty_cid2nH: cid2nH[cid] = alberty_cid2nH[cid] else: tmp = nist_regression.dissociation.GetMostAbundantPseudoisomer( cid, pH=default_pH, I=default_I, pMg=default_pMg, T=default_T) if tmp is not None: cid2nH[cid] = tmp[0] else: logging.warning( 'The most abundant pseudoisomer of %s (C%05d) ' 'cannot be resolved. Using nH = 0.' % (kegg.cid2name(cid), cid)) cid2nH[cid] = 0 #nist_regression.std_diff_threshold = 2.0 # the threshold over which to print an analysis of a reaction #nist_regression.nist.T_range = None#(273.15 + 24, 273.15 + 40) S, dG0, cids = nist_regression.ReverseTransform(cid2nH=cid2nH) # export the raw data matrices to text files C = np.array([[cid, cid2nH.get(cid, 0)] for cid in cids]) np.savetxt(prefix + 'CID.txt', C, fmt='%d', delimiter=',') np.savetxt(prefix + 'S.txt', S, fmt='%g', delimiter=',') np.savetxt(prefix + 'dG0.txt', dG0, fmt='%.2f', delimiter=',') else: C = np.loadtxt(prefix + 'CID.txt', delimiter=',') cids = [int(cid) for cid in C[:, 0]] cid2nH = {} for i, cid in enumerate(cids): cid2nH[cid] = int(C[i, 1]) S = np.loadtxt(prefix + 'S.txt', delimiter=',') dG0 = np.loadtxt(prefix + 'dG0.txt', delimiter=',') dG0 = np.reshape(dG0, (dG0.shape[0], 1)) html_writer = HtmlWriter('../res/regression_fast.html') html_writer.write("<h1>Pseudoisomeric Reactant Contributions</h1>\n") html_writer.write("<p>The stoichiometric matrix (S):") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, S, cids) html_writer.div_end() html_writer.write('</p>') index2value = {} S_extended = S # the stoichiometric matrix, extended with elementary basis vector for the fixed compounds for cid in fixed_cids.keys(): i = cids.index(cid) e_i = np.zeros((1, len(cids))) e_i[0, i] = 1.0 S_extended = np.vstack([S_extended, e_i]) nH, dG0_fixed = fixed_cids[cid] index2value[i] = dG0_fixed x, _K = LinearRegression.LeastSquaresWithFixedPoints(S, dG0, index2value) cid2dG0 = {} for i, cid in enumerate(cids): cid2dG0[cid] = x[i] # Calculate the Kernel of the reduced stoichiometric matrix (after removing # the columns of the fixed compounds). cids_red = [cid for cid in cids if cid not in fixed_cids] index_red = [i for i in xrange(len(cids)) if i not in index2value] S_red = S[:, index_red] K_red = LinearRegression.Kernel(S_red) #print "Reduced Stoichiometric Matrix:" #print matrix2string(S_red, cids_red, kegg) #print '-'*80 # Find all CIDs that are completely determined and do not depend on any # free variable. In other words, all zeros columns in K2. dict_list = [] determined_indices = np.where( np.sum(abs(K_red), 0) < 1e-10)[0] # all zero-columns in reducedK determined_cids = [cids_red[i] for i in determined_indices] plot_data = [] for i, cid in enumerate(cids): d = { 'CID': 'C%05d' % cid, 'Compound': kegg.cid2name(cid), 'nH': '%d' % cid2nH[cid], 'dG0 (PRC)': '%.1f' % cid2dG0[cid] } if cid in alberty_cid2dG0: d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid] if cid not in fixed_cids: plot_data.append( (alberty_cid2dG0[cid], cid2dG0[cid], kegg.cid2name(cid))) else: d['dG0 (Alberty)'] = '' if cid in fixed_cids: d['Depends on'] = 'anchored' elif cid in determined_cids: d['Depends on'] = 'fixed compounds' else: d['Depends on'] = 'kernel dimensions' dict_list.append(d) dict_list.sort(key=lambda (x): (x['Depends on'], x['CID'])) html_writer.write( "<p>Formation energies determined by the linear constraints:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table(dict_list, headers=[ '#', 'Compound', 'CID', 'nH', 'dG0 (PRC)', 'dG0 (Alberty)', 'Depends on' ]) html_writer.write('</font>') html_writer.div_end() html_writer.write('</p>') # Plot a comparison between PRC and Alberty formation energies fig = plt.figure(figsize=(8, 8), dpi=80) plt.plot([x[0] for x in plot_data], [x[1] for x in plot_data], 'b.', figure=fig) for x, y, name in plot_data: plt.text(x, y, name, fontsize=6) plt.xlabel('Alberty $\Delta_f G^\circ$') plt.ylabel('PRC $\Delta_f G^\circ$') html_writer.write("<p>Plot comparing PRC and Alberty results:") html_writer.insert_toggle(start_here=True) html_writer.embed_matplotlib_figure(fig) html_writer.div_end() html_writer.write("</p>") K_sparse = SparseKernel(S_red).Solve() html_writer.write( "<p>The sparse null-space of the reduced stoichiometric matrix:") html_writer.insert_toggle(start_here=True) stoichiometric_matrix2html(html_writer, K_sparse, cids_red) html_writer.div_end() html_writer.write("</p>") dict_list = [] index2string_html = dict( (i, "V<sub>%02d</sub>" % i) for i in xrange(K_sparse.shape[0])) index2string = dict((i, "V%d" % i) for i in xrange(K_sparse.shape[0])) for i, cid in enumerate(cids_red): d = {} d['KEGG ID'] = '<a href="%s">C%05d</a>' % (kegg.cid2link(cid), cid) d['KEGG ID plain'] = 'C%05d' % cid d['Compound'] = kegg.cid2name(cid) d['nH'] = '%d' % cid2nH[cid] if cid in alberty_cid2dG0: d['dG0 (Alberty)'] = '%.1f' % alberty_cid2dG0[cid] else: d['dG0 (Alberty)'] = '' d['dG0 (PRC)'] = '%.1f' % cid2dG0[cid] d['dG0 (PRC) plain'] = '%.1f' % cid2dG0[cid] indic = np.where(abs(K_sparse[:, i]) > 1e-10, 1, 0).tolist() indic.reverse() d['order_key'] = indic if mlab.rms_flat(K_sparse[:, i]) > 1e-10: d['dG0 (PRC)'] += " + (" + vector2string(K_sparse[:, i], index2string_html) + ")" d['dG0 (PRC) plain'] += " + (" + vector2string( K_sparse[:, i], index2string) + ")" dict_list.append(d) dict_list.sort(key=lambda (d): (d['order_key'], d['KEGG ID plain'])) # Export the results to CSV csv_writer = csv.writer(open('../res/prc_results.csv', 'w')) csv_writer.writerow( ['KEGG ID', 'Compound', 'nH', 'dG0 (PRC)', 'dG0 (Alberty)']) for d in dict_list: csv_writer.writerow([ d['KEGG ID plain'], d['Compound'], d['nH'], d['dG0 (PRC) plain'], d['dG0 (Alberty)'] ]) html_writer.write( "<p>All formation energies as a function of the free variables:") html_writer.insert_toggle(start_here=True) html_writer.write('<font size="1">') html_writer.write_table(dict_list, headers=[ '#', 'KEGG ID', 'Compound', 'nH', 'dG0 (PRC)', 'dG0 (Alberty)' ]) html_writer.write('</font>') html_writer.div_end() html_writer.write('</p>') fp = open('../res/prc_latex.txt', 'w') fp.write( latex.table2LaTeX(dict_list, headers=[ '#', 'KEGG ID plain', 'Compound', 'nH', 'dG0 (PRC) plain', 'dG0 (Alberty)' ])) fp.close()