def plot_correlations(self):
        # Figure that calculates the Euclidean distance between each EFM and
        # the "experimental" flow, and overlays that information on the
        # standard "Pareto" plot

        exp_flux_df = self.fluxes_df.copy()

        # remove the exchange reactions (xchg_*)
        exp_flux_df = exp_flux_df.loc[
            exp_flux_df.reaction_id.str.find('xchg') != 0, :]
        exp_flux_df.reaction_id = exp_flux_df.reaction_id.apply(
            D.FIX_REACTION_ID)

        fig0, axs0 = plt.subplots(1, 2, figsize=(15, 7))
        rates_df, params_df, km_df, enzyme_abundance_df = \
            get_concatenated_raw_data('standard')

        CORR_FLUX_L = 'correlation with exp fluxes'
        LOG_LIKELIHOOD_L = 'log likelihood of flow'

        figure_data = D.get_figure_data()
        data = figure_data['standard']

        data[CORR_FLUX_L] = rates_df.transpose().corr().loc[9999]
        # calculate the likelihood of each EFM according to the measured flux
        # distribution
        data[LOG_LIKELIHOOD_L] = 0

        joined_rates = rates_df.T
        joined_rates['std'] = exp_flux_df[D.MEAS_STDEV_L]
        joined_rates['std'] = joined_rates['std'].fillna(
            0) + 1.0  # add a baseline stdev of 10%
        for efm in data.index:
            x = (joined_rates[efm] - joined_rates[9999]) / joined_rates['std']
            log_likelihood = -(x**2).sum() / 2
            data.loc[efm, LOG_LIKELIHOOD_L] = log_likelihood

        data.loc[data[D.STRICTLY_ANAEROBIC_L],
                 D.GROWTH_RATE_L] = 0  # remove oxygen-sensitive EFMs
        cmap = D.pareto_cmap(0.88)
        D.plot_basic_pareto(data,
                            axs0[0],
                            x=D.YIELD_L,
                            y=D.GROWTH_RATE_L,
                            c=CORR_FLUX_L,
                            cmap=cmap,
                            vmin=0,
                            vmax=1,
                            linewidth=0,
                            s=20)
        D.plot_basic_pareto(data,
                            axs0[1],
                            x=D.YIELD_L,
                            y=D.GROWTH_RATE_L,
                            c=LOG_LIKELIHOOD_L,
                            cmap=cmap,
                            linewidth=0,
                            s=20,
                            vmin=-100000,
                            vmax=0)

        for ax in axs0:
            for efm in D.efm_dict.keys():
                xy = np.array(data.loc[efm,
                                       [D.YIELD_L, D.GROWTH_RATE_L]].tolist())
                xytext = xy + np.array((-1, 0.025))
                ax.annotate(xy=xy,
                            s=D.efm_dict[efm]['label'],
                            xycoords='data',
                            xytext=xytext,
                            arrowprops=dict(facecolor='black',
                                            shrink=0.05,
                                            width=2,
                                            headwidth=4))
            ax.set_xlim(-1e-3, 1.1 * data[D.YIELD_L].max())
            ax.set_ylim(-1e-3, 1.15 * data[D.GROWTH_RATE_L].max())
        axs0[0].set_title('distance from measured fluxes (correlation)')
        axs0[1].set_title('distance from measured fluxes (likelihood)')
        fig0.tight_layout()

        fig0.savefig(os.path.join(D.OUTPUT_DIR, 'Fig_flux_correlation.pdf'))
Example #2
0
# -*- coding: utf-8 -*-
"""
Created on Tue Dec  8 10:38:03 2015

@author: noore
"""

import zipfile, os
import pandas as pd
import definitions as D

TSNE_DIM_1 = 't-SNE dim1'
TSNE_DIM_2 = 't-SNE dim2'

if __name__ == '__main__':
    figure_data = D.get_figure_data()

    for fig_name in ['monod_glucose_aero', 'monod_glucose_anae']:

        zip_fname = D.DATA_FILES[fig_name][0][0]
        prefix, ext = os.path.splitext(os.path.basename(zip_fname))

        with zipfile.ZipFile(zip_fname, 'r') as z:
            rates_df = pd.read_csv(z.open('%s/rates.csv' % prefix, 'r'),
                                   header=0,
                                   index_col=0)
            stoich_df = pd.read_csv(z.open('%s/stoich.csv' % prefix, 'r'),
                                    header=None,
                                    index_col=None)
            kcat_df = pd.read_csv(z.open('%s/kcats.csv' % prefix, 'r'),
                                  header=None,