def plot_correlations(self):
        # Figure that calculates the Euclidean distance between each EFM and
        # the "experimental" flow, and overlays that information on the
        # standard "Pareto" plot

        exp_flux_df = self.fluxes_df.copy()

        # remove the exchange reactions (xchg_*)
        exp_flux_df = exp_flux_df.loc[
            exp_flux_df.reaction_id.str.find('xchg') != 0, :]
        exp_flux_df.reaction_id = exp_flux_df.reaction_id.apply(
            D.FIX_REACTION_ID)

        fig0, axs0 = plt.subplots(1, 2, figsize=(15, 7))
        rates_df, params_df, km_df, enzyme_abundance_df = \
            get_concatenated_raw_data('standard')

        CORR_FLUX_L = 'correlation with exp fluxes'
        LOG_LIKELIHOOD_L = 'log likelihood of flow'

        figure_data = D.get_figure_data()
        data = figure_data['standard']

        data[CORR_FLUX_L] = rates_df.transpose().corr().loc[9999]
        # calculate the likelihood of each EFM according to the measured flux
        # distribution
        data[LOG_LIKELIHOOD_L] = 0

        joined_rates = rates_df.T
        joined_rates['std'] = exp_flux_df[D.MEAS_STDEV_L]
        joined_rates['std'] = joined_rates['std'].fillna(
            0) + 1.0  # add a baseline stdev of 10%
        for efm in data.index:
            x = (joined_rates[efm] - joined_rates[9999]) / joined_rates['std']
            log_likelihood = -(x**2).sum() / 2
            data.loc[efm, LOG_LIKELIHOOD_L] = log_likelihood

        data.loc[data[D.STRICTLY_ANAEROBIC_L],
                 D.GROWTH_RATE_L] = 0  # remove oxygen-sensitive EFMs
        cmap = D.pareto_cmap(0.88)
        D.plot_basic_pareto(data,
                            axs0[0],
                            x=D.YIELD_L,
                            y=D.GROWTH_RATE_L,
                            c=CORR_FLUX_L,
                            cmap=cmap,
                            vmin=0,
                            vmax=1,
                            linewidth=0,
                            s=20)
        D.plot_basic_pareto(data,
                            axs0[1],
                            x=D.YIELD_L,
                            y=D.GROWTH_RATE_L,
                            c=LOG_LIKELIHOOD_L,
                            cmap=cmap,
                            linewidth=0,
                            s=20,
                            vmin=-100000,
                            vmax=0)

        for ax in axs0:
            for efm in D.efm_dict.keys():
                xy = np.array(data.loc[efm,
                                       [D.YIELD_L, D.GROWTH_RATE_L]].tolist())
                xytext = xy + np.array((-1, 0.025))
                ax.annotate(xy=xy,
                            s=D.efm_dict[efm]['label'],
                            xycoords='data',
                            xytext=xytext,
                            arrowprops=dict(facecolor='black',
                                            shrink=0.05,
                                            width=2,
                                            headwidth=4))
            ax.set_xlim(-1e-3, 1.1 * data[D.YIELD_L].max())
            ax.set_ylim(-1e-3, 1.15 * data[D.GROWTH_RATE_L].max())
        axs0[0].set_title('distance from measured fluxes (correlation)')
        axs0[1].set_title('distance from measured fluxes (likelihood)')
        fig0.tight_layout()

        fig0.savefig(os.path.join(D.OUTPUT_DIR, 'Fig_flux_correlation.pdf'))
Exemple #2
0
def plot_tsne_figure(figure_data, figsize=(15, 13)):
    data = figure_data['standard']
    # each one of the pareto zipfiles contains the rates of all the EFMs
    # so we arbitrarily chose Fig3_pareto to get them.

    rates_df, _, _, _ = get_concatenated_raw_data('standard')
    X = rates_df.as_matrix()

    model = TSNE(n_components=2)
    np.set_printoptions(suppress=True)
    X_new = model.fit_transform(X)

    rates_df_new = pd.DataFrame(index=rates_df.index,
                                columns=('t-SNE dim 1', 't-SNE dim 2'))
    rates_df_new.iloc[:, 0] = X_new[:, 0]
    rates_df_new.iloc[:, 1] = X_new[:, 1]
    data = rates_df_new.join(data)

    #%%
    fig, axs = plt.subplots(3, 3, figsize=figsize, sharex=True, sharey=True)
    axs = list(axs.flat)
    for i, ax in enumerate(axs):
        ax.annotate(chr(ord('a') + i),
                    xy=(0.04, 0.98),
                    xycoords='axes fraction',
                    ha='left',
                    va='top',
                    size=20)

    xdata = rates_df_new.iloc[:, 0]
    ydata = rates_df_new.iloc[:, 1]
    axs[0].scatter(xdata, ydata, s=15, c=(0.2, 0.2, 0.7), alpha=0.3)
    for efm in D.efm_dict.keys():
        xy = (xdata[efm], ydata[efm])
        axs[0].annotate(s=D.efm_dict[efm]['label'],
                        xy=xy,
                        xycoords='data',
                        xytext=(30, 5),
                        textcoords='offset points',
                        arrowprops=dict(facecolor='black',
                                        shrink=0.05,
                                        width=2,
                                        headwidth=4),
                        ha='left',
                        va='bottom')
    plot_parameters = [
        {
            'c': D.YIELD_L,
            'title': 'biomass yield'
        },
        {
            'c': D.GROWTH_RATE_L,
            'title': 'growth rate'
        },
        {
            'c': D.OXYGEN_L,
            'title': 'oxygen uptake'
        },
        {
            'c': D.ACE_L,
            'title': 'acetate secretion'
        },
        {
            'c': D.NH3_L,
            'title': 'ammonia uptake'
        },
        {
            'c': D.SUCCINATE_L,
            'title': 'succinate secretion'
        },
        {
            'c': D.ED_L,
            'title': 'ED pathway'
        },
        {
            'c': D.PPP_L,
            'title': 'pentose phosphate pathway',
        },
    ]

    for i, d in enumerate(plot_parameters):
        d['ax'] = axs[i + 1]
        D.plot_basic_pareto(data,
                            x=rates_df_new.columns[0],
                            y=rates_df_new.columns[1],
                            c=d['c'],
                            ax=d['ax'],
                            cmap='copper_r',
                            linewidth=0.2,
                            s=10)
        d['ax'].set_title(d['title'])
    fig.tight_layout()
    return fig
Exemple #3
0
    def plot_sensitivity_for_reaction(self, reaction):
        reaction_data_df = self.efm_data_df[self.efm_data_df['reaction'] ==
                                            reaction]

        draw_keq_sensitivity = (reaction_data_df['dmu/dKeq'] != 0).any()

        substrates = self.stoich_df[
            (self.stoich_df['reaction'] == reaction)
            & (self.stoich_df['coefficient'] < 0)]['metabolite'].values
        products = self.stoich_df[(self.stoich_df['reaction'] == reaction) & (
            self.stoich_df['coefficient'] > 0)]['metabolite'].values

        km_data = self.km_sensitivity_df[self.km_sensitivity_df['reaction'] ==
                                         reaction]

        n_subfigs = 1 + len(substrates) + len(products)
        if draw_keq_sensitivity:
            n_subfigs += 1

        fig, axs = plt.subplots(1,
                                n_subfigs,
                                figsize=(4.5 * n_subfigs, 3),
                                sharey=True)
        axs_stack = list(axs)

        ax = axs_stack.pop(0)
        D.plot_basic_pareto(reaction_data_df,
                            ax,
                            x=D.YIELD_L,
                            y=D.GROWTH_RATE_L,
                            c='dlnmu/dlnk',
                            cmap=D.pareto_cmap(0.83),
                            linewidth=0)
        ax.set_title('sensitivity to $k_{cat}$ of %s' % reaction)
        ax.set_ylim(-1e-3, None)
        ax.set_xlim(-1e-3, None)

        if draw_keq_sensitivity:
            ax = axs_stack.pop(0)
            D.plot_basic_pareto(reaction_data_df,
                                ax,
                                x=D.YIELD_L,
                                y=D.GROWTH_RATE_L,
                                c='dlnmu/dlnKeq',
                                cmap=D.pareto_cmap(0.11),
                                linewidth=0)
            ax.set_title('sensitivity to $K_{eq}$ of %s' % reaction)
            ax.get_yaxis().set_visible(False)
            ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max())
            ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max())

        for s in substrates:
            ax = axs_stack.pop(0)
            tmp_df = pd.merge(reaction_data_df,
                              km_data[km_data['metabolite'] == s],
                              on='efm')
            D.plot_basic_pareto(tmp_df,
                                ax,
                                x=D.YIELD_L,
                                y=D.GROWTH_RATE_L,
                                c='dlnmu/dlnKm',
                                cmap=D.pareto_cmap(0.03),
                                linewidth=0)
            ax.set_title('sensitivity to $K_S$ of %s : %s' % (reaction, s))
            ax.get_yaxis().set_visible(False)
            ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max())
            ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max())

        for p in products:
            ax = axs_stack.pop(0)
            D.plot_basic_pareto(tmp_df,
                                ax,
                                x=D.YIELD_L,
                                y=D.GROWTH_RATE_L,
                                c='dlnmu/dlnKm',
                                cmap=D.pareto_cmap(0.58),
                                linewidth=0)
            ax.set_title('sensitivity to $K_P$ of %s : %s' % (reaction, p))
            ax.get_yaxis().set_visible(False)
            ax.set_xlim(-1e-3, 1.05 * reaction_data_df[D.YIELD_L].max())
            ax.set_ylim(-1e-3, 1.05 * reaction_data_df[D.GROWTH_RATE_L].max())

        return fig
Exemple #4
0
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = 'Arial'
rcParams['legend.fontsize'] = 'medium'
rcParams['axes.labelsize'] = 14.0
rcParams['axes.titlesize'] = 14.0
rcParams['xtick.labelsize'] = 12.0
rcParams['ytick.labelsize'] = 12.0

# %% Figure 2c
fig2c, ax2c = plt.subplots(1, 1, figsize=(5, 5))

data = figure_data['standard']
# remove oxygen-sensitive EFMs
data.loc[data[D.STRICTLY_ANAEROBIC_L], D.GROWTH_RATE_L] = 0
D.plot_basic_pareto(data, ax2c, x=D.YIELD_L, y=D.GROWTH_RATE_L,
                    efm_dict=D.efm_dict,
                    facecolors=D.PARETO_NEUTRAL_COLOR, edgecolors='none')
ax2c.set_xlim(-1e-3, 1.1*data[D.YIELD_L].max())
ax2c.set_ylim(-1e-3, 1.15*data[D.GROWTH_RATE_L].max())
ax2c.set_title('glucose = 100 mM, O$_2$ = 3.7 mM')
fig2c.tight_layout()

fig2c.savefig(os.path.join(D.OUTPUT_DIR, 'Fig_web4.pdf'))

# %% histogram of all different EFM growth rates in a specific condition
fig5 = plt.figure(figsize=(5, 5))
ax5 = fig5.add_subplot(1, 1, 1)

efm = allocation_pie_chart(ax5, D.STD_CONC['glucoseExt'],
                           D.STD_CONC['oxygen'])
rates_df, full_df = get_concatenated_raw_data('sweep_glucose')
        {'c': D.LACTATE_L,    'short_title': 'lactate secretion'},
        {'c': D.SUCCINATE_L,  'short_title': 'succinate secretion'},
    ]
    ax3 = list(ax3.flat)
    data = figure_data['standard']

    for i, d in enumerate(plot_parameters):
        d['ax'] = ax3[i]
        d['ax'].annotate(chr(ord('a')+i), xy=(0.02, 0.98),
                         xycoords='axes fraction', ha='left', va='top',
                         size=20)
        d['ax'].set_title(d['short_title'])
        d['ax'].set_xlim(-1e-3, 1.05*data[D.YIELD_L].max())
        d['ax'].set_ylim(-1e-3, 1.05*data[D.GROWTH_RATE_L].max())

        D.plot_basic_pareto(data, x=D.YIELD_L, y=D.GROWTH_RATE_L,
                            c=d['c'], ax=d['ax'], cmap='copper_r')

    fig3.tight_layout(h_pad=0.2)
    D.savefig(fig3, '3')

    # %% Figure 4 - glucose & oxygen sweeps

    fig4 = plt.figure(figsize=(15, 10))

    ax4a = fig4.add_subplot(2, 3, 1, xscale='linear', yscale='linear')
    ax4b = fig4.add_subplot(2, 3, 2, xscale='log', yscale='linear', sharey=ax4a)
    ax4c = fig4.add_subplot(2, 3, 3, projection='3d')
    ax4d = fig4.add_subplot(2, 3, 4, projection='3d')
    ax4e = fig4.add_subplot(2, 3, 5, projection='3d')
    ax4f = fig4.add_subplot(2, 3, 6, projection='3d')
import pareto_sampling
import seaborn as sns

figure_data = D.get_figure_data()

if __name__ == '__main__':
    # %% Figure S1 - same as 3c, but compared to the biomass rate
    #    instead of growth rate
    figS1, axS1 = plt.subplots(1, 2, figsize=(9, 4.5))

    data = figure_data['standard']
    # remove oxygen-sensitive EFMs
    data.loc[data[D.STRICTLY_ANAEROBIC_L], D.GROWTH_RATE_L] = 0
    D.plot_basic_pareto(data,
                        axS1[0],
                        x=D.YIELD_L,
                        y=D.BIOMASS_PROD_PER_ENZ_L,
                        facecolors=D.PARETO_NEUTRAL_COLOR,
                        edgecolors='none')
    axS1[0].set_ylabel(
        'enzyme-specific biomass production\n$r_{BM} = v_{BM}/E_{met}$ [gr dw h$^{-1}$ / gr enz]'
    )
    axS1[0].set_xlim(-1e-3, 1.1 * data[D.YIELD_L].max())
    axS1[0].set_ylim(-1e-3, 1.15 * data[D.BIOMASS_PROD_PER_ENZ_L].max())
    axS1[0].set_title('glucose = 100 mM, O$_2$ = 3.7 mM')
    axS1[0].annotate('c',
                     xy=(0.02, 0.98),
                     xycoords='axes fraction',
                     ha='left',
                     va='top',
                     size=20)
    for y in range(0, 14, 2):