def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi compound_name = thermo.kegg.cid2name(cid) kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) null_html_writer = NullHtmlWriter() if csv_output_fname: csv_output = csv.writer(open(csv_output_fname, 'w')) csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries()) else: csv_output = None pH_vec = np.array( [7]) # this needs to be fixed so that the txt file will set the pH conc_vec = 10**(-np.arange(2, 6.0001, 0.25) ) # logarithmic scale between 10mM and 1nM override_bounds = {} fig = plt.figure(figsize=(6, 6), dpi=90) legend = [] for pH in pH_vec.flat: obd_vec = [] for conc in conc_vec.flat: override_bounds[cid] = (conc, conc) logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc)) data, labels = pareto(kegg_file, null_html_writer, thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obd_vec.append(data[:, 1]) csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat)) obd_mat = np.matrix( obd_vec) # rows are pathways and columns are concentrations plt.plot(conc_vec, obd_mat, '.-', figure=fig) legend += ['%s, pH = %g' % (l, pH) for l in labels] plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig) plt.xscale('log') plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(legend) html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix) html_writer.embed_matplotlib_figure(fig, name=prefix) html_writer.close()
def main(): html_writer = HtmlWriter("../res/formation_resolve.html") estimators = LoadAllEstimators() for name in ['alberty']: thermo = estimators[name] nist = Nist() nist.verify_formation(html_writer=html_writer, thermodynamics=thermo, name=name) html_writer.close()
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) xls_workbook = Workbook() logging.info("running OBD analysis for all pathways") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="pareto", balance_water=True, override_bounds={}) for d in data: sheet = xls_workbook.add_sheet(d['entry']) sheet.write(0, 0, "reaction") sheet.write(0, 1, "formula") sheet.write(0, 2, "flux") sheet.write(0, 3, "delta_r G'") sheet.write(0, 4, "shadow price") for r, rid in enumerate(d['rids']): sheet.write(r + 1, 0, rid) sheet.write(r + 1, 1, d['formulas'][r]) sheet.write(r + 1, 2, d['fluxes'][0, r]) sheet.write(r + 1, 3, d['dG_r_prime'][0, r]) sheet.write(r + 1, 4, d['reaction prices'][r, 0]) xls_workbook.save('%s.xls' % output_prefix) obds = [] minus_avg_tg = [] for i, d in enumerate(data): obds.append(d['OBD']) if d['sum of fluxes']: minus_avg_tg.append(-d['max total dG'] / d['sum of fluxes']) else: minus_avg_tg.append(0) fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(minus_avg_tg, obds, 'o', figure=fig) plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g') for i, name in enumerate(pathway_names): plt.text(minus_avg_tg[i], obds[i], name) plt.title('OBD vs. Average $\Delta_r G$') plt.ylim(ymin=0) plt.xlim(xmin=0) plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]') plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]') html_writer.write('<h2>Pareto figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = { 'cid': '<a href="%s">C%05d</a>' % (link, cid), 'name': name, 'error': cid2error.get(cid, None) } for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer( pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table( dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def AnalyzePareto(pathway_file, output_prefix, thermo, pH=None): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) xls_workbook = Workbook() logging.info("running OBD analysis for all pathways") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="pareto", balance_water=True, override_bounds={}) for d in data: sheet = xls_workbook.add_sheet(d['entry']) sheet.write(0, 0, "reaction") sheet.write(0, 1, "formula") sheet.write(0, 2, "flux") sheet.write(0, 3, "delta_r G'") sheet.write(0, 4, "shadow price") for r, rid in enumerate(d['rids']): sheet.write(r+1, 0, rid) sheet.write(r+1, 1, d['formulas'][r]) sheet.write(r+1, 2, d['fluxes'][0, r]) sheet.write(r+1, 3, d['dG_r_prime'][0, r]) sheet.write(r+1, 4, d['reaction prices'][r, 0]) xls_workbook.save('%s.xls' % output_prefix) obds = [] minus_avg_tg = [] for i, d in enumerate(data): obds.append(d['OBD']) if d['sum of fluxes']: minus_avg_tg.append(-d['max total dG']/d['sum of fluxes']) else: minus_avg_tg.append(0) fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(minus_avg_tg, obds, 'o', figure=fig) plt.plot([0, max(minus_avg_tg)], [0, max(minus_avg_tg)], '--g') for i, name in enumerate(pathway_names): plt.text(minus_avg_tg[i], obds[i], name) plt.title('OBD vs. Average $\Delta_r G$') plt.ylim(ymin=0) plt.xlim(xmin=0) plt.xlabel(r'- Average $\Delta_r G$ [kJ/mol]') plt.ylabel(r'Optimized Distributed Bottleneck [kJ/mol]') html_writer.write('<h2>Pareto figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def compare_charges(): #db_public = SqliteDatabase('../data/public_data.sqlite') db_gibbs = SqliteDatabase('../res/gibbs.sqlite') print "Writing Compare Charges report to ../res/groups_report.html" html_writer = HtmlWriter("../res/groups_report.html") kegg = Kegg.getInstance() #pH, I, pMg, T = default_pH, default_I, default_pMg, default_T pH, I, pMg, T = default_pH, 0, 14, default_T cid2error = {} for row_dict in db_gibbs.DictReader("gc_errors"): cid = int(row_dict['cid']) cid2error[cid] = row_dict['error'] estimators = {} estimators['hatzi'] = Hatzi(use_pKa=False) estimators['milo'] = PsuedoisomerTableThermodynamics.FromDatabase( db_gibbs, 'gc_pseudoisomers', name='Milo Group Contribution') all_cids = set(lsum([e.get_all_cids() for e in estimators.values()])) dict_list = [] for cid in all_cids: try: name = kegg.cid2name(cid) link = kegg.cid2compound(cid).get_link() except KeyError: name = "unknown" link = "" row_dict = {'cid':'<a href="%s">C%05d</a>' % (link, cid), 'name':name, 'error':cid2error.get(cid, None)} for key, est in estimators.iteritems(): try: pmap = est.cid2PseudoisomerMap(cid) dG0, dG0_tag, nH, z, nMg = pmap.GetMostAbundantPseudoisomer(pH, I, pMg, T) except MissingCompoundFormationEnergy: dG0, dG0_tag, nH, z, nMg = "", "", "", "", "" row_dict['nH_' + key] = nH row_dict['charge_' + key] = z row_dict['nMg_' + key] = nMg row_dict['dG0_' + key] = dG0 row_dict['dG0_tag_' + key] = dG0_tag dict_list.append(row_dict) html_writer.write_table(dict_list, headers=['cid', 'name', 'charge_hatzi', 'charge_milo', 'error']) html_writer.close()
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None): compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids]) pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default concentrations") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names) conc_vec = 10**(-ParseConcentrationRange(conc_range)) # logarithmic scale between 10mM and 1nM override_bounds = {} obd_mat = [] for conc in conc_vec.flat: for cid in cids: override_bounds[cid] = (conc, conc) logging.info("[%s] = %.1e M" % (compound_names, conc)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH'], conc] + obds) obd_mat = np.matrix(obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. [%s]" % (compound_names), figure=fig) plt.xscale('log') plt.ylim(ymin=0) plt.xlabel('[%s] (in M)' % compound_names, figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def main(): options, _ = MakeOpts().parse_args(sys.argv) db = SqliteDatabase("../res/gibbs.sqlite") public_db = SqliteDatabase("../data/public_data.sqlite") output_filename = os.path.abspath(options.output_filename) logging.info('Will write output to %s' % output_filename) html_writer = HtmlWriter(output_filename) nist = Nist(T_range=None) nist_regression = NistRegression(db, html_writer=html_writer, nist=nist) nist_regression.std_diff_threshold = 5 # the threshold over which to print an analysis of a reaction #nist_regression.nist.T_range = None(273.15 + 24, 273.15 + 40) #nist_regression.nist.override_I = 0.25 #nist_regression.nist.override_pMg = 14.0 html_writer.write("<h2>NIST regression:</h2>") if options.use_prior: logging.info('Using the data from Alberty as fixed prior') prior_thermo = PsuedoisomerTableThermodynamics.FromDatabase( public_db, 'alberty_pseudoisomers', name="Alberty") else: prior_thermo = None html_writer.write('</br><b>Regression Tables</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.Train(options.from_database, prior_thermo) html_writer.div_end() html_writer.write('</br><b>PRC results</b>\n') html_writer.insert_toggle(start_here=True) nist_regression.WriteDataToHtml(html_writer) html_writer.div_end() html_writer.write('</br><b>Transformed reaction energies - PRC vs. Observed</b>\n') html_writer.insert_toggle(start_here=True) N, rmse = nist_regression.VerifyResults() html_writer.div_end() logging.info("Regression results for transformed data:") logging.info("N = %d, RMSE = %.1f" % (N, rmse)) html_writer.close()
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning( 'Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x: x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def main(): estimators = LoadAllEstimators() parser = MakeArgParser(estimators) args = parser.parse_args() thermo = estimators[args.thermodynamics_source] kegg_file = ParsedKeggFile.FromKeggFile(args.config_fname) entries = kegg_file.entries() if len(entries) == 0: raise ValueError('No entries in configuration file') entry = 'CONFIGURATION' if entry not in entries: logging.warning('Configuration file does not contain the entry "CONFIGURATION". ' 'Using the first entry by default: %s' % entries[0]) entry = entries[0] p_data = PathwayData.FromFieldMap(kegg_file[entry]) thermo.SetConditions(pH=p_data.pH, I=p_data.I, T=p_data.T, pMg=p_data.pMg) thermo.c_range = p_data.c_range bounds = p_data.GetBounds() html_writer = HtmlWriter(args.output_prefix + ".html") rowdicts = [] headers = ['Module', 'Name', 'OBD [kJ/mol]', 'Length'] kegg = Kegg.getInstance() for mid in kegg.get_all_mids(): html_writer.write('<h2 id=M%05d>M%05d: %s</h2>' % (mid, mid, kegg.get_module_name(mid))) try: d = AnalyzeKeggModule(thermo, mid, bounds, html_writer) except KeyError: continue d['Module'] = '<a href="#M%05d">M%05d</a>' % (mid, mid) d['Name'] = kegg.get_module_name(mid) rowdicts.append(d) rowdicts.sort(key=lambda x:x['OBD [kJ/mol]']) html_writer.write_table(rowdicts, headers, decimal=1) html_writer.close()
def AnalyzeConcentrationGradient(prefix, thermo, csv_output_fname, cid=13): # default compound is PPi compound_name = thermo.kegg.cid2name(cid) kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) null_html_writer = NullHtmlWriter() if csv_output_fname: csv_output = csv.writer(open(csv_output_fname, 'w')) csv_output.writerow(['pH', 'I', 'T', '[C%05d]' % cid] + kegg_file.entries()) else: csv_output = None pH_vec = np.array([7]) # this needs to be fixed so that the txt file will set the pH conc_vec = 10**(-np.arange(2, 6.0001, 0.25)) # logarithmic scale between 10mM and 1nM override_bounds = {} fig = plt.figure(figsize=(6, 6), dpi=90) legend = [] for pH in pH_vec.flat: obd_vec = [] for conc in conc_vec.flat: override_bounds[cid] = (conc, conc) logging.info("pH = %g, [%s] = %.1e M" % (pH, compound_name, conc)) data, labels = pareto(kegg_file, null_html_writer, thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obd_vec.append(data[:, 1]) csv_output.writerow([pH, thermo.I, thermo.T, conc] + list(data[:, 1].flat)) obd_mat = np.matrix(obd_vec) # rows are pathways and columns are concentrations plt.plot(conc_vec, obd_mat, '.-', figure=fig) legend += ['%s, pH = %g' % (l, pH) for l in labels] plt.title("ODB vs. [%s] (I = %gM, T = %gK)" % (compound_name, thermo.I, thermo.T), figure=fig) plt.xscale('log') plt.xlabel('Concentration of %s [M]' % thermo.kegg.cid2name(cid), figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(legend) html_writer.write('<h2 id="figure_%s">Summary figure</h1>\n' % prefix) html_writer.embed_matplotlib_figure(fig, name=prefix) html_writer.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data("../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len(grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
def main(): db = database.SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter("../res/nist/report.html") gc = GroupContribution(db) gc.override_gc_with_measurements = True gc.init() grad = GradientAscent(gc) nist = Nist(db, html_writer, gc.kegg()) nist.FromDatabase() alberty = Alberty() hatzi = Hatzi() if True: grad.load_nist_data(nist, alberty, skip_missing_reactions=False, T_range=(298, 314)) grad.verify_results("Alberty", alberty, html_writer) #grad.write_pseudoisomers("../res/nist/nist_dG0_f.csv") #html_writer.write("<h2>Using Group Contribution (Hatzimanikatis' implementation)</h2>") #html_writer.write("<h3>Correlation with the reduced NIST database (containing only compounds that appear in Alberty's list)</h3>") #logging.info("calculate the correlation between Hatzimanikatis' predictions and the reduced NIST database") #grad.verify_results("Hatzimanikatis_Reduced", hatzi, html_writer) #grad.load_nist_data(nist, hatzi, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Hatzimanikatis", hatzi, html_writer) #grad.load_nist_data(nist, gc, skip_missing_reactions=True, T_range=(298, 314)) grad.verify_results("Milo", gc, html_writer) elif False: # Run the gradient ascent algorithm, where the starting point is the same file used for training the GC algorithm grad.load_dG0_data("../data/thermodynamics/dG0.csv") # load the data for the anchors (i.e. compounds whose dG0 should not be changed - usually their value will be 0). grad.anchors = grad.load_dG0_data( "../data/thermodynamics/nist_anchors.csv") grad.load_nist_data(nist, grad, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient1") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) grad.load_nist_data(nist, alberty, skip_missing_reactions=True) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient2") elif False: # Run the gradient ascent algorithm, where the starting point is Alberty's table from (Mathematica 2006) # Use DETERMINISTIC gradient ascent grad.load_nist_data(nist, alberty, skip_missing_reactions=True, T_range=(24 + 273.15, 40 + 273.15)) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.cid2pmap_dict = alberty.cid2pmap_dict grad.deterministic_hill_climb(max_i=200) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient_deterministic") elif False: # Run the gradient ascent algorithm, where the starting point arbitrary (predict all of the NIST compounds) grad = GradientAscent(gc) grad.load_nist_data(nist, skip_missing_reactions=False) print "Training %d compounds using %d reactions: " % (len( grad.cid2pmap_dict.keys()), len(grad.data)) grad.hill_climb(max_i=20000) grad.save_energies(grad.gc.comm, "gradient_cid2prm") grad.verify_results("gradient3") elif False: # Use Alberty's table from (Mathematica 2006) to calculate the dG0 of all possible reactions in KEGG grad = GradientAscent(gc) grad.cid2pmap_dict = alberty.cid2pmap_dict (pH, I, T) = (7, 0, 300) counter = 0 for rid in grad.kegg.get_all_rids(): sparse_reaction = grad.kegg.rid2sparse_reaction(rid) try: dG0 = grad.reaction_to_dG0(sparse_reaction, pH, I, T) print "R%05d: dG0_r = %.2f [kJ/mol]" % (rid, dG0) counter += 1 except MissingCompoundFormationEnergy as e: #print "R%05d: missing formation energy of C%05d" % (rid, e.cid) pass print "Managed to calculate the dG0 of %d reactions" % counter elif False: util._mkdir("../res/nist/fig") csv_writer = csv.writer(open("../res/nist/pseudoisomers.csv", "w")) cid_set = set() for row in nist.data: sparce_reaction = row['sparse'] cid_set.update(sparce_reaction.keys()) html_writer.write("<table border=1>\n") for cid in sorted(list(cid_set)): html_writer.write(" <tr><td>C%05d</td><td>%s</td><td>" % (cid, grad.kegg.cid2name(cid))) try: mol = grad.kegg.cid2mol(cid) img_fname = '../res/nist/fig/C%05d.png' % cid html_writer.embed_img(img_fname, "C%05d" % cid) mol.draw(show=False, filename=img_fname) except AssertionError as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) except KeggParseException as e: html_writer.write("WARNING: cannot draw C%05d - %s" % (cid, str(e))) html_writer.write("</td><td>") if (cid in alberty.cid2pmap_dict): for (nH, z) in alberty.cid2pmap_dict[cid].keys(): html_writer.write("(nH=%d, z=%d)<br>" % (nH, z)) csv_writer.writerow((cid, nH, z)) else: nH = grad.kegg.cid2num_hydrogens(cid) z = grad.kegg.cid2charge(cid) html_writer.write("unknown pseudoisomers<br>") html_writer.write("(nH=%d, z=%d)" % (nH, z)) csv_writer.writerow((cid, nH, z)) html_writer.write("</td></tr>\n") html_writer.write("</table>\n") html_writer.close()
continue if self.override_pMg or self.override_I or self.override_T: nist_row_copy = nist_row_data.Clone() if self.override_pMg: nist_row_copy.pMg = self.override_pMg if self.override_I: nist_row_copy.I = self.override_I if self.override_T: nist_row_copy.T = self.override_T rows.append(nist_row_copy) else: rows.append(nist_row_data) return rows def GetUniqueReactionSet(self): return set([row.reaction for row in self.data]) if __name__ == '__main__': #logging.getLogger('').setLevel(logging.DEBUG) _mkdir("../res/nist") html_writer = HtmlWriter("../res/nist/statistics.html") nist = Nist() fp = open('../res/nist_kegg_ids.txt', 'w') for cid in nist.GetAllCids(): fp.write("C%05d\n" % cid) fp.close() nist.AnalyzeStats(html_writer) nist.AnalyzeConnectivity(html_writer) html_writer.close()
def find_path(self, experiment_name, net_reaction): """Find a pathway from the source to the target. Args: experiment_name: a name given to this experiment. net_reaction: a Reaction describing the net reaction for the desired paths """ dirname = os.path.join('../res/pathologic/', experiment_name) logging.info('Writing output to: %s' % dirname) util._mkdir(dirname) self.html_writer.write('<a href="pathologic/' + experiment_name + '.html">' + experiment_name + '</a><br>\n') exp_html = HtmlWriter('../res/pathologic/' + experiment_name + '.html') exp_html.write("<p><h1>%s</h1>\n" % experiment_name) exp_html.insert_toggle(div_id="__parameters__", start_here=True, label='Show Parameters') f, S, compounds, reactions = self.kegg_pathologic.get_unique_cids_and_reactions() exp_html.write('<h2>Conditions:</h2>\n') exp_html.write_ul(['Optimization method: %s' % self.thermodynamic_method, 'Concentration range: %g M < C < %g M' % (self.thermo.c_range[0], self.thermo.c_range[1]), "Max Δ<sub>r</sub>G' = %.1f" % self.maximal_dG, 'pH = %g' % self.thermo.pH, 'I = %g' % self.thermo.I, 'T = %g' % self.thermo.T, 'Max no. reactions: %d' % (self.max_reactions or -1), 'Max no. solutions: %d' % (self.max_solutions or -1), 'Overall Reaction: %s' % net_reaction.to_hypertext(), '%d reactions' % len(reactions), '%d unique compounds' % len(compounds)]) exp_html.div_end() exp_html.write('</br>\n') logging.debug("All compounds:") for i, compound in enumerate(compounds): logging.debug("%05d) C%05d = %s" % (i, compound.cid, compound.name)) logging.debug("All reactions:") for i, reaction in enumerate(reactions): logging.debug("%05d) R%05d = %s" % (i, reaction.rid, str(reaction))) output_kegg_file = open(dirname + '/kegg_pathway.txt', 'w') exp_html.write('<a href="%s/kegg_pathway.txt">All solutions in KEGG format</a></br>\n' % experiment_name) # Find a solution with a minimal total flux logging.info("Preparing LP solver for the minimal total flux problem") exp_html.write('<b>Minimum flux</b>') slip = Stoichiometric_LP("Pathologic") slip.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) slip.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, 0)) exp_html.write(' (<a href="%s/%03d_lp.txt">LP file</a>): ' % (experiment_name, 0)) logging.info("Solving") if not slip.solve(): exp_html.write("<b>There are no solutions!</b>") logging.warning("There are no solutions. Quitting!") return logging.info("writing solution") self.write_current_solution(exp_html, slip, experiment_name) logging.info("Preparing MILP solver") milp = Stoichiometric_LP("Pathologic") milp.solution_index = 1 milp.add_stoichiometric_constraints(f, S, compounds, reactions, net_reaction) milp.add_milp_variables() if self.max_reactions is not None: milp.add_reaction_num_constraint(self.max_reactions) if self.thermodynamic_method == OptimizationMethods.LOCALIZED: milp.add_localized_dGf_constraints(self.thermo) else: milp.add_dGr_constraints(self.thermo, optimization=self.thermodynamic_method, maximal_dG=self.maximal_dG) index = 0 while (self.max_solutions is None) or (index < self.max_solutions): index += 1 # create the MILP problem to constrain the previous solutions not to reappear again. logging.info("Round %03d, solving using MILP" % (milp.solution_index)) milp.export("../res/pathologic/%s/%03d_lp.txt" % (experiment_name, milp.solution_index)) exp_html.write('<b>Solution #%d</b> (<a href="%s/%03d_lp.txt">LP file</a>): ' % (index, experiment_name, index)) if not milp.solve(): exp_html.write("<b>No solution found</b>") logging.info("No more solutions. Quitting!") break logging.info("writing solution") self.write_current_solution(exp_html, milp, experiment_name, output_kegg_file) milp.ban_current_solution() output_kegg_file.close() exp_html.close()
def AnalyzeConcentrationGradient(pathway_file, output_prefix, thermo, conc_range, cids=[], pH=None): compound_names = ','.join([thermo.kegg.cid2name(cid) for cid in cids]) pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default concentrations") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=pH, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH', '[' + compound_names + ']'] + pathway_names) conc_vec = 10**(-ParseConcentrationRange(conc_range) ) # logarithmic scale between 10mM and 1nM override_bounds = {} obd_mat = [] for conc in conc_vec.flat: for cid in cids: override_bounds[cid] = (conc, conc) logging.info("[%s] = %.1e M" % (compound_names, conc)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds=override_bounds) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH'], conc] + obds) obd_mat = np.matrix( obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(conc_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. [%s]" % (compound_names), figure=fig) plt.xscale('log') plt.ylim(ymin=0) plt.xlabel('[%s] (in M)' % compound_names, figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def analyze(prefix, thermo): kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") #pH_vec = np.arange(5, 9.001, 0.5) #pH_vec = np.array([6, 7, 8]) pH_vec = np.array([6, 7, 8]) # this needs to be fixed so that the txt file will set the pH #co2_conc_vec = np.array([1e-5, 1e-3]) co2_conc_vec = np.array([1e-5]) data_mat = [] override_bounds = {} for pH in pH_vec.flat: co2_hydration_dG0_prime = float(thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH)) for co2_conc in co2_conc_vec.flat: carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R*default_T)) #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I) override_bounds[11] = (co2_conc, co2_conc) override_bounds[288] = (carbonate_conc, carbonate_conc) section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc*1000) section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc*1000) html_writer.write('<h1 id="%s_title">%s</h1>\n' % (section_prefix, section_title)) html_writer.write_ul(['<a href="#%s_tables">Individual result tables</a>' % section_prefix, '<a href="#%s_summary">Summary table</a>' % section_prefix, '<a href="#%s_figure">Summary figure</a>' % section_prefix]) data, labels = pareto(kegg_file, html_writer, thermo, pH=pH, section_prefix=section_prefix, balance_water=True, override_bounds=override_bounds) data_mat.append(data) data_mat = np.array(data_mat) if data_mat.shape[0] == 1: pareto_fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig) for i in xrange(data_mat.shape[1]): if data[i, 1] < 0: color = 'grey' else: color = 'black' plt.text(data_mat[0, i, 0], data_mat[0, i, 1], labels[i], ha='left', va='bottom', fontsize=8, color=color, figure=pareto_fig) plt.title(section_title, figure=pareto_fig) else: pareto_fig = plt.figure(figsize=(10, 10), dpi=90) for i in xrange(data_mat.shape[1]): plt.plot(data_mat[:, i, 0], data_mat[:, i, 1], '-', figure=pareto_fig) plt.text(data_mat[0, i, 0], data_mat[0, i, 1], '%g' % pH_vec[0], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.text(data_mat[-1, i, 0], data_mat[-1, i, 1], '%g' % pH_vec[-1], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.legend(labels, loc='upper right') plt.title('Pareto', figure=pareto_fig) plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig) html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' % section_prefix) # plot the Pareto figure showing all values (including infeasible) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0') # set axes to hide infeasible pathways and focus on feasible ones pareto_fig.axes[0].set_xlim(None, 0) pareto_fig.axes[0].set_ylim(0, None) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1') html_writer.close()
def AnalyzePHGradient(pathway_file, output_prefix, thermo, conc_range): pathway_list = KeggFile2PathwayList(pathway_file) pathway_names = [entry for (entry, _) in pathway_list] html_writer = HtmlWriter('%s.html' % output_prefix) # run once just to make sure that the pathways are all working: logging.info("testing all pathways with default pH") data = GetAllOBDs(pathway_list, html_writer, thermo, pH=None, section_prefix="test", balance_water=True, override_bounds={}) csv_output = csv.writer(open('%s.csv' % output_prefix, 'w')) csv_output.writerow(['pH'] + pathway_names) util._mkdir(output_prefix) shadow_csvs = {} for d in data: path = '%s/%s.csv' % (output_prefix, d['entry']) shadow_csvs[d['entry']] = csv.writer(open(path, 'w')) shadow_csvs[d['entry']].writerow(['pH'] + d['rids']) pH_vec = ParseConcentrationRange(conc_range) obd_mat = [] for pH in pH_vec.flat: logging.info("pH = %.1f" % (pH)) data = GetAllOBDs(pathway_list, html_writer=None, thermo=thermo, pH=pH, section_prefix="", balance_water=True, override_bounds={}) obds = [d['OBD'] for d in data] obd_mat.append(obds) csv_output.writerow([data[0]['pH']] + obds) for d in data: if type(d['reaction prices']) != types.FloatType: prices = list(d['reaction prices'].flat) shadow_csvs[d['entry']].writerow([pH] + prices) obd_mat = np.matrix( obd_mat) # rows are pathways and columns are concentrations fig = plt.figure(figsize=(6, 6), dpi=90) colormap = color.ColorMap(pathway_names) for i, name in enumerate(pathway_names): plt.plot(pH_vec, obd_mat[:, i], '-', color=colormap[name], figure=fig) plt.title("OBD vs. pH", figure=fig) plt.ylim(0, np.max(obd_mat.flat)) plt.xlabel('pH', figure=fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=fig) plt.legend(pathway_names) html_writer.write('<h2>Summary figure</h1>\n') html_writer.embed_matplotlib_figure(fig) html_writer.close()
def analyze(prefix, thermo): kegg_file = ParsedKeggFile.FromKeggFile('../data/thermodynamics/%s.txt' % prefix) html_writer = HtmlWriter('../res/%s.html' % prefix) co2_hydration = Reaction.FromFormula("C00011 + C00001 => C00288") #pH_vec = np.arange(5, 9.001, 0.5) #pH_vec = np.array([6, 7, 8]) pH_vec = np.array( [6, 7, 8]) # this needs to be fixed so that the txt file will set the pH #co2_conc_vec = np.array([1e-5, 1e-3]) co2_conc_vec = np.array([1e-5]) data_mat = [] override_bounds = {} for pH in pH_vec.flat: co2_hydration_dG0_prime = float( thermo.GetTransfromedKeggReactionEnergies([co2_hydration], pH=pH)) for co2_conc in co2_conc_vec.flat: carbonate_conc = co2_conc * np.exp(-co2_hydration_dG0_prime / (R * default_T)) #print "[CO2] = %g, [carbonate] = %g, pH = %.1f, I = %.2fM" % (co2_conc, carbonate_conc, pH, I) override_bounds[11] = (co2_conc, co2_conc) override_bounds[288] = (carbonate_conc, carbonate_conc) section_prefix = 'pH_%g_CO2_%g' % (pH, co2_conc * 1000) section_title = 'pH = %g, [CO2] = %g mM' % (pH, co2_conc * 1000) html_writer.write('<h1 id="%s_title">%s</h1>\n' % (section_prefix, section_title)) html_writer.write_ul([ '<a href="#%s_tables">Individual result tables</a>' % section_prefix, '<a href="#%s_summary">Summary table</a>' % section_prefix, '<a href="#%s_figure">Summary figure</a>' % section_prefix ]) data, labels = pareto(kegg_file, html_writer, thermo, pH=pH, section_prefix=section_prefix, balance_water=True, override_bounds=override_bounds) data_mat.append(data) data_mat = np.array(data_mat) if data_mat.shape[0] == 1: pareto_fig = plt.figure(figsize=(6, 6), dpi=90) plt.plot(data_mat[0, :, 0], data_mat[0, :, 1], '.', figure=pareto_fig) for i in xrange(data_mat.shape[1]): if data[i, 1] < 0: color = 'grey' else: color = 'black' plt.text(data_mat[0, i, 0], data_mat[0, i, 1], labels[i], ha='left', va='bottom', fontsize=8, color=color, figure=pareto_fig) plt.title(section_title, figure=pareto_fig) else: pareto_fig = plt.figure(figsize=(10, 10), dpi=90) for i in xrange(data_mat.shape[1]): plt.plot(data_mat[:, i, 0], data_mat[:, i, 1], '-', figure=pareto_fig) plt.text(data_mat[0, i, 0], data_mat[0, i, 1], '%g' % pH_vec[0], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.text(data_mat[-1, i, 0], data_mat[-1, i, 1], '%g' % pH_vec[-1], ha='center', fontsize=6, color='black', figure=pareto_fig) plt.legend(labels, loc='upper right') plt.title('Pareto', figure=pareto_fig) plt.xlabel('Optimal Energetic Efficiency [kJ/mol]', figure=pareto_fig) plt.ylabel('Optimized Distributed Bottleneck [kJ/mol]', figure=pareto_fig) html_writer.write('<h2 id="%s_figure">Summary figure</h1>\n' % section_prefix) # plot the Pareto figure showing all values (including infeasible) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_0') # set axes to hide infeasible pathways and focus on feasible ones pareto_fig.axes[0].set_xlim(None, 0) pareto_fig.axes[0].set_ylim(0, None) html_writer.embed_matplotlib_figure(pareto_fig, name=prefix + '_1') html_writer.close()