def plot_correlations(self): # Figure that calculates the Euclidean distance between each EFM and # the "experimental" flow, and overlays that information on the # standard "Pareto" plot exp_flux_df = self.fluxes_df.copy() # remove the exchange reactions (xchg_*) exp_flux_df = exp_flux_df.loc[ exp_flux_df.reaction_id.str.find('xchg') != 0, :] exp_flux_df.reaction_id = exp_flux_df.reaction_id.apply( D.FIX_REACTION_ID) fig0, axs0 = plt.subplots(1, 2, figsize=(15, 7)) rates_df, params_df, km_df, enzyme_abundance_df = \ get_concatenated_raw_data('standard') CORR_FLUX_L = 'correlation with exp fluxes' LOG_LIKELIHOOD_L = 'log likelihood of flow' figure_data = D.get_figure_data() data = figure_data['standard'] data[CORR_FLUX_L] = rates_df.transpose().corr().loc[9999] # calculate the likelihood of each EFM according to the measured flux # distribution data[LOG_LIKELIHOOD_L] = 0 joined_rates = rates_df.T joined_rates['std'] = exp_flux_df[D.MEAS_STDEV_L] joined_rates['std'] = joined_rates['std'].fillna( 0) + 1.0 # add a baseline stdev of 10% for efm in data.index: x = (joined_rates[efm] - joined_rates[9999]) / joined_rates['std'] log_likelihood = -(x**2).sum() / 2 data.loc[efm, LOG_LIKELIHOOD_L] = log_likelihood data.loc[data[D.STRICTLY_ANAEROBIC_L], D.GROWTH_RATE_L] = 0 # remove oxygen-sensitive EFMs cmap = D.pareto_cmap(0.88) D.plot_basic_pareto(data, axs0[0], x=D.YIELD_L, y=D.GROWTH_RATE_L, c=CORR_FLUX_L, cmap=cmap, vmin=0, vmax=1, linewidth=0, s=20) D.plot_basic_pareto(data, axs0[1], x=D.YIELD_L, y=D.GROWTH_RATE_L, c=LOG_LIKELIHOOD_L, cmap=cmap, linewidth=0, s=20, vmin=-100000, vmax=0) for ax in axs0: for efm in D.efm_dict.keys(): xy = np.array(data.loc[efm, [D.YIELD_L, D.GROWTH_RATE_L]].tolist()) xytext = xy + np.array((-1, 0.025)) ax.annotate(xy=xy, s=D.efm_dict[efm]['label'], xycoords='data', xytext=xytext, arrowprops=dict(facecolor='black', shrink=0.05, width=2, headwidth=4)) ax.set_xlim(-1e-3, 1.1 * data[D.YIELD_L].max()) ax.set_ylim(-1e-3, 1.15 * data[D.GROWTH_RATE_L].max()) axs0[0].set_title('distance from measured fluxes (correlation)') axs0[1].set_title('distance from measured fluxes (likelihood)') fig0.tight_layout() fig0.savefig(os.path.join(D.OUTPUT_DIR, 'Fig_flux_correlation.pdf'))
def plot_tsne_figure(figure_data, figsize=(15, 13)): data = figure_data['standard'] # each one of the pareto zipfiles contains the rates of all the EFMs # so we arbitrarily chose Fig3_pareto to get them. rates_df, _, _, _ = get_concatenated_raw_data('standard') X = rates_df.as_matrix() model = TSNE(n_components=2) np.set_printoptions(suppress=True) X_new = model.fit_transform(X) rates_df_new = pd.DataFrame(index=rates_df.index, columns=('t-SNE dim 1', 't-SNE dim 2')) rates_df_new.iloc[:, 0] = X_new[:, 0] rates_df_new.iloc[:, 1] = X_new[:, 1] data = rates_df_new.join(data) #%% fig, axs = plt.subplots(3, 3, figsize=figsize, sharex=True, sharey=True) axs = list(axs.flat) for i, ax in enumerate(axs): ax.annotate(chr(ord('a') + i), xy=(0.04, 0.98), xycoords='axes fraction', ha='left', va='top', size=20) xdata = rates_df_new.iloc[:, 0] ydata = rates_df_new.iloc[:, 1] axs[0].scatter(xdata, ydata, s=15, c=(0.2, 0.2, 0.7), alpha=0.3) for efm in D.efm_dict.keys(): xy = (xdata[efm], ydata[efm]) axs[0].annotate(s=D.efm_dict[efm]['label'], xy=xy, xycoords='data', xytext=(30, 5), textcoords='offset points', arrowprops=dict(facecolor='black', shrink=0.05, width=2, headwidth=4), ha='left', va='bottom') plot_parameters = [ { 'c': D.YIELD_L, 'title': 'biomass yield' }, { 'c': D.GROWTH_RATE_L, 'title': 'growth rate' }, { 'c': D.OXYGEN_L, 'title': 'oxygen uptake' }, { 'c': D.ACE_L, 'title': 'acetate secretion' }, { 'c': D.NH3_L, 'title': 'ammonia uptake' }, { 'c': D.SUCCINATE_L, 'title': 'succinate secretion' }, { 'c': D.ED_L, 'title': 'ED pathway' }, { 'c': D.PPP_L, 'title': 'pentose phosphate pathway', }, ] for i, d in enumerate(plot_parameters): d['ax'] = axs[i + 1] D.plot_basic_pareto(data, x=rates_df_new.columns[0], y=rates_df_new.columns[1], c=d['c'], ax=d['ax'], cmap='copper_r', linewidth=0.2, s=10) d['ax'].set_title(d['title']) fig.tight_layout() return fig
def plot_sensitivity_for_reaction(self, reaction): reaction_data_df = self.efm_data_df[self.efm_data_df['reaction'] == reaction] draw_keq_sensitivity = (reaction_data_df['dmu/dKeq'] != 0).any() substrates = self.stoich_df[ (self.stoich_df['reaction'] == reaction) & (self.stoich_df['coefficient'] < 0)]['metabolite'].values products = self.stoich_df[(self.stoich_df['reaction'] == reaction) & ( self.stoich_df['coefficient'] > 0)]['metabolite'].values km_data = self.km_sensitivity_df[self.km_sensitivity_df['reaction'] == reaction] n_subfigs = 1 + len(substrates) + len(products) if draw_keq_sensitivity: n_subfigs += 1 fig, axs = plt.subplots(1, n_subfigs, figsize=(4.5 * n_subfigs, 3), sharey=True) axs_stack = list(axs) ax = axs_stack.pop(0) D.plot_basic_pareto(reaction_data_df, ax, x=D.YIELD_L, y=D.GROWTH_RATE_L, c='dlnmu/dlnk', cmap=D.pareto_cmap(0.83), linewidth=0) ax.set_title('sensitivity to $k_{cat}$ of %s' % reaction) ax.set_ylim(-1e-3, None) ax.set_xlim(-1e-3, None) if draw_keq_sensitivity: ax = axs_stack.pop(0) D.plot_basic_pareto(reaction_data_df, ax, x=D.YIELD_L, y=D.GROWTH_RATE_L, c='dlnmu/dlnKeq', cmap=D.pareto_cmap(0.11), linewidth=0) ax.set_title('sensitivity to $K_{eq}$ of %s' % reaction) ax.get_yaxis().set_visible(False) ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max()) ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max()) for s in substrates: ax = axs_stack.pop(0) tmp_df = pd.merge(reaction_data_df, km_data[km_data['metabolite'] == s], on='efm') D.plot_basic_pareto(tmp_df, ax, x=D.YIELD_L, y=D.GROWTH_RATE_L, c='dlnmu/dlnKm', cmap=D.pareto_cmap(0.03), linewidth=0) ax.set_title('sensitivity to $K_S$ of %s : %s' % (reaction, s)) ax.get_yaxis().set_visible(False) ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max()) ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max()) for p in products: ax = axs_stack.pop(0) D.plot_basic_pareto(tmp_df, ax, x=D.YIELD_L, y=D.GROWTH_RATE_L, c='dlnmu/dlnKm', cmap=D.pareto_cmap(0.58), linewidth=0) ax.set_title('sensitivity to $K_P$ of %s : %s' % (reaction, p)) ax.get_yaxis().set_visible(False) ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max()) ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max()) return fig
rcParams['font.family'] = 'sans-serif' rcParams['font.sans-serif'] = 'Arial' rcParams['legend.fontsize'] = 'medium' rcParams['axes.labelsize'] = 14.0 rcParams['axes.titlesize'] = 14.0 rcParams['xtick.labelsize'] = 12.0 rcParams['ytick.labelsize'] = 12.0 # %% Figure 2c fig2c, ax2c = plt.subplots(1, 1, figsize=(5, 5)) data = figure_data['standard'] # remove oxygen-sensitive EFMs data.loc[data[D.STRICTLY_ANAEROBIC_L], D.GROWTH_RATE_L] = 0 D.plot_basic_pareto(data, ax2c, x=D.YIELD_L, y=D.GROWTH_RATE_L, efm_dict=D.efm_dict, facecolors=D.PARETO_NEUTRAL_COLOR, edgecolors='none') ax2c.set_xlim(-1e-3, 1.1*data[D.YIELD_L].max()) ax2c.set_ylim(-1e-3, 1.15*data[D.GROWTH_RATE_L].max()) ax2c.set_title('glucose = 100 mM, O$_2$ = 3.7 mM') fig2c.tight_layout() fig2c.savefig(os.path.join(D.OUTPUT_DIR, 'Fig_web4.pdf')) # %% histogram of all different EFM growth rates in a specific condition fig5 = plt.figure(figsize=(5, 5)) ax5 = fig5.add_subplot(1, 1, 1) efm = allocation_pie_chart(ax5, D.STD_CONC['glucoseExt'], D.STD_CONC['oxygen']) rates_df, full_df = get_concatenated_raw_data('sweep_glucose')
{'c': D.LACTATE_L, 'short_title': 'lactate secretion'}, {'c': D.SUCCINATE_L, 'short_title': 'succinate secretion'}, ] ax3 = list(ax3.flat) data = figure_data['standard'] for i, d in enumerate(plot_parameters): d['ax'] = ax3[i] d['ax'].annotate(chr(ord('a')+i), xy=(0.02, 0.98), xycoords='axes fraction', ha='left', va='top', size=20) d['ax'].set_title(d['short_title']) d['ax'].set_xlim(-1e-3, 1.05*data[D.YIELD_L].max()) d['ax'].set_ylim(-1e-3, 1.05*data[D.GROWTH_RATE_L].max()) D.plot_basic_pareto(data, x=D.YIELD_L, y=D.GROWTH_RATE_L, c=d['c'], ax=d['ax'], cmap='copper_r') fig3.tight_layout(h_pad=0.2) D.savefig(fig3, '3') # %% Figure 4 - glucose & oxygen sweeps fig4 = plt.figure(figsize=(15, 10)) ax4a = fig4.add_subplot(2, 3, 1, xscale='linear', yscale='linear') ax4b = fig4.add_subplot(2, 3, 2, xscale='log', yscale='linear', sharey=ax4a) ax4c = fig4.add_subplot(2, 3, 3, projection='3d') ax4d = fig4.add_subplot(2, 3, 4, projection='3d') ax4e = fig4.add_subplot(2, 3, 5, projection='3d') ax4f = fig4.add_subplot(2, 3, 6, projection='3d')
import pareto_sampling import seaborn as sns figure_data = D.get_figure_data() if __name__ == '__main__': # %% Figure S1 - same as 3c, but compared to the biomass rate # instead of growth rate figS1, axS1 = plt.subplots(1, 2, figsize=(9, 4.5)) data = figure_data['standard'] # remove oxygen-sensitive EFMs data.loc[data[D.STRICTLY_ANAEROBIC_L], D.GROWTH_RATE_L] = 0 D.plot_basic_pareto(data, axS1[0], x=D.YIELD_L, y=D.BIOMASS_PROD_PER_ENZ_L, facecolors=D.PARETO_NEUTRAL_COLOR, edgecolors='none') axS1[0].set_ylabel( 'enzyme-specific biomass production\n$r_{BM} = v_{BM}/E_{met}$ [gr dw h$^{-1}$ / gr enz]' ) axS1[0].set_xlim(-1e-3, 1.1 * data[D.YIELD_L].max()) axS1[0].set_ylim(-1e-3, 1.15 * data[D.BIOMASS_PROD_PER_ENZ_L].max()) axS1[0].set_title('glucose = 100 mM, O$_2$ = 3.7 mM') axS1[0].annotate('c', xy=(0.02, 0.98), xycoords='axes fraction', ha='left', va='top', size=20) for y in range(0, 14, 2):