def Report(self, est, title): self.html_writer.write('</br><b>%s</b><br>\n' % title) finite = np.isfinite(est) resid = abs(self.b[finite] - est[finite]) fig = plt.figure(figsize=(5,5), dpi=60) cdf(list(resid.flat), figure=fig) #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig) plt.title("RMSE = %.1f, N = %d" % (rms_flat(resid.flat), resid.shape[1])) plt.xlabel(r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]") plt.ylabel(r"CDF") self.html_writer.embed_matplotlib_figure(fig) rowdicts = [] for i in xrange(self.b.shape[1]): rowdict = {} rowdict['row'] = i rowdict['type'] = self.obs_types[i] rowdict['reaction'] = UnifiedGroupContribution.row2hypertext(self.S[:, i], self.cids) rowdict['anchored'] = self.anchored[0, i] rowdict['obs'] = self.b[0, i] rowdict['est'] = est[0, i] if np.isfinite(est[0, i]): rowdict['|err|'] = abs(self.b[0, i] - est[0, i]) else: rowdict['|err|'] = 0 rowdicts.append(rowdict) rowdicts.sort(key=lambda x:x['|err|'], reverse=True) self.html_writer.insert_toggle(start_here=True, label="Show table") self.html_writer.write_table(rowdicts, headers=['row', 'type', 'reaction', 'anchored', 'obs', 'est', '|err|'], decimal=1) self.html_writer.div_end()
def plot_histogram(histogram, html_writer, title='', max_pathway_length=8, xmin=None, xlim=20, error_bars=True, min_to_show=20, legend_loc='upper left'): fig = pylab.figure() pylab.hold(True) reps = 1000 y_offset = 0 offset_step = 0.007 colors = {1:'r', 2:'orange', 3:'green', 4:'cyan', 5:'blue', 'Rest':'violet', 'Not first':'k--', 'No known regulation':'grey', 'Activated':'green', 'Inhibited':'r', 'Mixed regulation':'blue'} for key, value in histogram.iteritems(): if len(value) >= min_to_show: m = stats.cmedian(value) sample_std = None if error_bars: sample_vals = [] i = 0 while i < reps: samples = [] while len(samples) < len(value): samples.append(random.choice(value)) sample_vals.append(pylab.median(samples)) i += 1 sample_std = pylab.std(sample_vals) plotting.cdf(value, label='%s (med=%.1f, N=%d)' % \ (key, m, len(value)), style=colors.get(key, 'grey'), std=sample_std, y_offset=y_offset) y_offset += offset_step xmin = -1 * xlim if xmin == None else xmin pylab.xlim(xmin, xlim) pylab.xlabel('Irreversability') #pylab.xlabel('deltaG') pylab.ylabel('Cumulative distribution') legendfont = matplotlib.font_manager.FontProperties(size=11) pylab.legend(loc=legend_loc, prop=legendfont) pylab.title(title) pylab.hold(False) if 'Not first' in histogram: print '%s, first vs. non-first ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram[1], histogram['Not first']) if 'Inhibited' in histogram: print '%s, inhibited vs. non-regulated ranksum test: ' % title + '(%f, %f)' % stats.ranksums(histogram['Inhibited'], histogram['No known regulation']) #for k1, h1 in histogram.iteritems(): # for k2, h2 in histogram.iteritems(): # print k1, k2, stats.ranksums(h1, h2) return fig
def try_kegg_api(): db = SqliteDatabase('../res/gibbs.sqlite') html_writer = HtmlWriter('../res/dG0_test.html') G = GroupContribution(db, html_writer=html_writer) G.init() wsdl = 'http://soap.genome.jp/KEGG.wsdl' serv = WSDL.Proxy(wsdl) rid_file = open('../res/eco_rids.txt', 'w') rids = set() for x in serv.list_pathways('eco'): pathway_id = x['entry_id'] for reaction_id in serv.get_reactions_by_pathway(pathway_id): rid = int(reaction_id[4:]) if rid not in rids: rids.add(rid) rid_file.write('%d\n' % rid) rid_file.close() c_mid = 1e-3 pH, pMg, I, T = (7.0, 3.0, 0.1, 298.15) rid2reversibility = {} misses = 0 for rid in sorted(rids): try: reaction = G.kegg.rid2reaction(rid) r = CalculateReversability(reaction, G, c_mid, pH, pMg, I, T) rid2reversibility[rid] = r except thermodynamics.MissingCompoundFormationEnergy: misses += 1 continue print 'hits = %d, misses = %d' % len(rid2reversibility), misses median = pylab.median(rid2reversibility.values()) print 'median = %.1f' % median pylab.figure() pylab.hold(True) plotting.cdf(rid2reversibility.values(), 'all reactions', 'r', show_median=True) pylab.show()
def Report(self, est, title): self.html_writer.write('</br><b>%s</b><br>\n' % title) finite = np.isfinite(est) resid = abs(self.b[finite] - est[finite]) fig = plt.figure(figsize=(5, 5), dpi=60) cdf(list(resid.flat), figure=fig) #plt.plot(self.b[finite].T, est[finite].T, '.', figure=fig) plt.title("RMSE = %.1f, N = %d" % (rms_flat(resid.flat), resid.shape[1])) plt.xlabel( r"$|\Delta_r G^{'\circ} obs - \Delta_r G^{'\circ} est|$ [kJ/mol]") plt.ylabel(r"CDF") self.html_writer.embed_matplotlib_figure(fig) rowdicts = [] for i in xrange(self.b.shape[1]): rowdict = {} rowdict['row'] = i rowdict['type'] = self.obs_types[i] rowdict['reaction'] = UnifiedGroupContribution.row2hypertext( self.S[:, i], self.cids) rowdict['anchored'] = self.anchored[0, i] rowdict['obs'] = self.b[0, i] rowdict['est'] = est[0, i] if np.isfinite(est[0, i]): rowdict['|err|'] = abs(self.b[0, i] - est[0, i]) else: rowdict['|err|'] = 0 rowdicts.append(rowdict) rowdicts.sort(key=lambda x: x['|err|'], reverse=True) self.html_writer.insert_toggle(start_here=True, label="Show table") self.html_writer.write_table(rowdicts, headers=[ 'row', 'type', 'reaction', 'anchored', 'obs', 'est', '|err|' ], decimal=1) self.html_writer.div_end()
def PlotCDF(self): special_pairs = {('eco:b3236', 'eco:b0720'):"mdh:gltA", # malate dehydrogenase -> oxaloacetate -> citrate synthase ('eco:b1263', 'eco:b1264'):"trpD:trpE"} # trpD -> chorismate -> trpE (two components of anthraline synthase) query = """ SELECT gene1, gene2, min(dGc2 - dGc1), max(score) FROM %s WHERE dGc1 + dGc2 < 0 AND dGc1 > 10 GROUP BY gene1, gene2 """ % (self.GENE_PAIRS_TABLE_NAME) data = [] markers = [] for row in self.db.Execute(query): gene1, gene2, ddG, score = row if (gene1, gene2) in special_pairs: markers.append((special_pairs[(gene1, gene2)], ddG)) data.append([ddG, float(score or 0)]) data = np.matrix(data) ind1 = list(np.where(data[:, 1] > 0)[0].flat) ind2 = list(np.where(data[:, 1] == 0)[0].flat) fig = plt.figure(figsize=(6,6), dpi=90) cdf((data[ind2, 0]).flat, label="non-interacting (N = %d)" % len(ind2), style='r', figure=fig) cdf((data[ind1, 0]).flat, label="interacting (N = %d)" % len(ind1), style='g', figure=fig) for label, ddG in markers: plt.plot([ddG, ddG], [0, 1], 'b--', figure=fig) plt.text(ddG, 0.1, label) plt.xlim(-500, 500) plt.xlabel(r"$\Delta G'^c$ (2nd) - $\Delta G'^c$ (1st) [kJ/mol]") plt.ylabel(r"Cumulative Distribution Function") plt.legend(loc="upper left") self.html_writer.embed_matplotlib_figure(fig, width=400, height=400, name='channeling_cdf')
def compare_annotations(reaction_list, thermo, html_writer, cmap, xlim=1e9): html_writer.write('<h1>Compare reaction annotations to Reversibility Index</h1>\n') histogram = {} error_counts = {'hits': 0, 'misses': 0, 'no_gamma': 0} debug_dict_list = [] for reaction in reaction_list: try: dG0 = reaction.PredictReactionEnergy(thermo) except (MissingCompoundFormationEnergy, MissingReactionEnergy) as e: logging.warning(str(e)) error_counts['misses'] += 1 continue gamma = CalculateReversability(reaction, thermo, concentration_map=cmap) if gamma is None: error_counts['no_gamma'] += 1 else: error_counts['hits'] += 1 histogram.setdefault(reaction.direction, []).append(gamma) debug_dict_list.append({'sortkey':gamma, 'Reaction Name':reaction.name, 'annotation':reaction.direction, 'KEGG Reaction':reaction.to_hypertext(), 'Rev. index':"%.3g" % gamma, 'dG0':"%.2f" % dG0}) debug_dict_list.sort(key=lambda(x):x['sortkey']) div_id = html_writer.insert_toggle() html_writer.div_start(div_id) html_writer.write_table(debug_dict_list, headers=['Rev. index', 'dG0', 'Reaction Name', 'KEGG Reaction', 'annotation']) html_writer.div_end() html_writer.write('</br>\n') html_writer.write_ul(["Reactions with known dG0: %d" % error_counts['hits'], "Reactions with unknown dG0: %d" % error_counts['misses'], "Reactions with unknown gamma: %d" % error_counts['no_gamma']]) # plot the bar fig = pylab.figure(figsize=(6,6), dpi=90) pylab.hold(True) plotting.cdf(histogram['<=>'], label='reversible (%d reactions)' % len(histogram['<=>']), style='green', figure=fig) plotting.cdf(histogram['=>'], label='forward only (%d reactions)' % len(histogram['=>']), style='red', figure=fig) plotting.cdf(histogram['<='], label='reverse only (%d reactions)' % len(histogram['<=']), style='orange', figure=fig) pylab.xlabel('Reversability index - $\hat{\gamma}$') pylab.ylabel('Cumulative Distribution') pylab.xscale('log') pylab.xlim((1/xlim, xlim)) pylab.legend(loc='upper left') html_writer.embed_matplotlib_figure(fig, width=640, height=480, name='FEIST_CDF')
def compare_reversibility_to_dG0(reaction_list, thermo, html_writer, cmap=None): html_writer.write('<h1>Reversibility index vs. equilibrium constants</h1>\n') cmap = cmap or GetEmptyConcentrationMap() x_range = (1e-9, 1e9) y_range = (1e-9, 1e9) x_threshold = 1e3 y_threshold = 1e3 regime_counters = {} stoich_counters = {} data_mat = pylab.zeros((0, 4)) debug_dict_list = [] for reaction in reaction_list: debug_dict = {'name':reaction.name, 'KEGG Reaction':reaction.to_hypertext()} try: reaction.Balance(balance_water=True, exception_if_unknown=True) except (KeggReactionNotBalancedException, OpenBabelError): continue dG0 = reaction.PredictReactionEnergy(thermo) if np.isnan(dG0): debug_dict['sortkey'] = 0 debug_dict['error'] = "Cannot calculate Gibbs energy" else: Keq = pylab.exp(-dG0/(R*thermo.T)) n_s = -sum([x for cid, x in reaction.sparse.iteritems() if (x < 0 and cid not in cmap)]) n_p = sum([x for cid, x in reaction.sparse.iteritems() if (x > 0 and cid not in cmap)]) if (n_p + n_s) == 0: continue stoich_counters.setdefault((n_s, n_p), 0) stoich_counters[n_s, n_p] += 1 log_gamma = CalculateReversability(reaction, thermo, concentration_map=cmap, logscale=True) if Keq < 1.0/x_threshold: Krev = -1 elif Keq < x_threshold: Krev = 0 else: Krev = 1 if log_gamma < -np.log(y_threshold): Grev = -1 elif log_gamma < np.log(y_threshold): Grev = 0 else: Grev = 1 regime_counters.setdefault((Krev, Grev), 0) regime_counters[Krev, Grev] += 1 data_mat = pylab.vstack([data_mat, [Keq, log_gamma, Krev, Grev]]) debug_dict['sortkey'] = log_gamma debug_dict['log(γ)'] = "%.2e" % log_gamma debug_dict[thermodynamic_constants.symbol_dr_G0_prime] = dG0 debug_dict_list.append(debug_dict) debug_dict_list.sort(key=lambda(x):x['sortkey']) div_id = html_writer.insert_toggle() html_writer.div_start(div_id) html_writer.write_table(debug_dict_list, headers=['log(γ)', thermodynamic_constants.symbol_dr_G0_prime, 'name', 'KEGG Reaction', 'error']) html_writer.div_end() html_writer.write('</br>\n') fig = pylab.figure(figsize=(6,6), dpi=90) pylab.xlabel("$K'$", figure=fig) pylab.ylabel(r"$\hat{\gamma} = \left( K' / Q'' \right)^{2/N}$", figure=fig) shading_color = (1.0, 0.7, 0.7) #pylab.axvspan(x_range[0], 1.0/x_threshold, ymin=0, ymax=1, color=x_color, alpha=0.3) #pylab.axvspan(x_threshold, x_range[1], ymin=0, ymax=1, color=x_color, alpha=0.3) #pylab.axhspan(y_range[0], 1.0/y_threshold, xmin=0, xmax=1, color=y_color, alpha=0.3) #pylab.axhspan(y_threshold, y_range[1], xmin=0, xmax=1, color=y_color, alpha=0.3) pylab.axvspan(x_range[0], 1.0/x_threshold, ymin=1.0/3.0, ymax=2.0/3.0, color=shading_color) pylab.axvspan(x_threshold, x_range[1], ymin=1.0/3.0, ymax=2.0/3.0, color=shading_color) pylab.axhspan(y_range[0], 1.0/y_threshold, xmin=1.0/3.0, xmax=2.0/3.0, color=shading_color) pylab.axhspan(y_threshold, y_range[1], xmin=1.0/3.0, xmax=2.0/3.0, color=shading_color) # draw the lines for the specific reaction stoichiometries stoichiometries = [(1, 1, '-', '#e49b1c'), (1 ,2, '--', '#1ce463'), (2, 1, '--', '#1d1de3'), (2, 2, '-', '#e41c63')] fig.hold(True) for n_s, n_p, style, color in stoichiometries: percent = 100.0 * stoich_counters.get((n_s, n_p), 0) / sum(stoich_counters.values()) gamma = [(Keq / thermo.c_mid**(n_p - n_s)) ** (2.0/(n_p + n_s)) for Keq in x_range] pylab.plot(x_range, gamma, style, color=color, linewidth=3, figure=fig, label="%d:%d (%d%%)" % (n_s, n_p, np.round(percent))) pylab.legend(loc='upper left') for Krev, Grev in regime_counters.keys(): x_pos = x_threshold ** (Krev*2) y_pos = y_threshold ** (Grev*2) pylab.text(x_pos, y_pos, "%.1f%%" % (100.0 * regime_counters[Krev, Grev] / data_mat.shape[0]), horizontalalignment='center', verticalalignment='center') pylab.xscale('log', figure=fig) pylab.yscale('log', figure=fig) pylab.ylim(y_range) pylab.xlim(x_range) pylab.xticks([1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9]) pylab.yticks([1e-9, 1e-6, 1e-3, 1, 1e3, 1e6, 1e9]) html_writer.embed_matplotlib_figure(fig, width=400, height=400, name="reversibility_vs_keq") fig = pylab.figure(figsize=(2,2), dpi=90) abs_gamma = np.exp(abs(data_mat[:,1])) plotting.cdf(abs_gamma, label='gamma', figure=fig) pylab.plot([x_threshold, x_threshold], [0, 1], 'k--', figure=fig) pylab.xscale('log', figure=fig) #pylab.xlabel(r'$\hat{\gamma}$', figure=fig) #pylab.ylabel(r'CDF($\hat{\gamma}$)', figure=fig) pylab.text(1e6, 0.4, r'CDF($\hat{\gamma}$)', horizontalalignment='center', verticalalignment='center') pylab.xlim((1, 1e9)) pylab.xticks([1, 1e3, 1e6, 1e9]) pylab.yticks([0, 0.5, 1.0]) pylab.tight_layout() html_writer.embed_matplotlib_figure(fig, width=125, height=125, name="reversibility_cdf")