def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        with open(validationDataFile, 'rb') as f:
            validation_data = cPickle.load(f)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                main_reader = TableReader(os.path.join(simOutDir, 'Main'))

                # Load data
                time = main_reader.readColumn('time')

        plt.figure()

        ### Create Plot ###

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close('all')
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        plt.figure(figsize=(4, 4))
        ax = plt.subplot2grid((1, 1), (0, 0))

        options = {
            "edgecolors": color_cycle[0],
            "alpha": 0.2,
            "s": 50,
            "clip_on": False
        }
        labels = ["minimal", "anaerobic", "minimal + AA"]

        ax.scatter(initial_volumes[2],
                   added_volumes[2],
                   marker="x",
                   label=labels[2],
                   **options)
        ax.scatter(initial_volumes[0],
                   added_volumes[0],
                   facecolors="none",
                   marker="o",
                   label=labels[0],
                   **options)
        ax.scatter(initial_volumes[1],
                   added_volumes[1],
                   facecolors="none",
                   marker="^",
                   label=labels[1],
                   **options)

        ax.set_xlim([0, 4])
        ax.set_ylim([0, 4])
        ax.set_xlabel("Birth Volume ($\mu m^3$)")
        ax.set_ylabel("Added Volume ($\mu m^3$)")
        ax.legend()

        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        # Get clean version of plot
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata)

        plt.close("all")
Ejemplo n.º 3
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()
        n_variants = len(variants)

        # Load sim_data
        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)
        cell_density = sim_data.constants.cellDensity.asNumber(MASS_UNITS /
                                                               VOLUME_UNITS)

        # Load validation_data
        with open(validationDataFile, "rb") as f:
            validation_data = cPickle.load(f)
        toyaReactions = validation_data.reactionFlux.toya2010fluxes[
            "reactionID"]
        toyaFluxes = validation_data.reactionFlux.toya2010fluxes[
            "reactionFlux"]
        toyaStdev = validation_data.reactionFlux.toya2010fluxes[
            "reactionFluxStdev"]
        toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes))
        toyaStdevDict = dict(zip(toyaReactions, toyaStdev))

        glc_uptakes = np.zeros(n_variants)
        log_ratio_succ = np.zeros(n_variants)
        size_pearson = np.zeros(n_variants)
        selected_indicies = np.zeros(n_variants, bool)
        for v, variant in enumerate(variants):
            # initialize kinetic flux comparison
            exchange_fluxes = {entry: [] for entry in EXCHANGES}
            reaction_fluxes = {entry: [] for entry in REACTIONS}

            modelFluxes = {}
            toyaOrder = []
            for rxn in toyaReactions:
                modelFluxes[rxn] = []
                toyaOrder.append(rxn)

            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                try:
                    # Listeners used
                    massListener = TableReader(os.path.join(simOutDir, "Mass"))
                    fbaResults = TableReader(
                        os.path.join(simOutDir, "FBAResults"))
                    enzymeKineticsReader = TableReader(
                        os.path.join(simOutDir, "EnzymeKinetics"))

                    ## Read from mass listener
                    cellMass = massListener.readColumn("cellMass")
                    # skip if no data
                    if cellMass.shape is ():
                        continue
                    dryMass = massListener.readColumn("dryMass")
                except Exception as e:
                    print(e)
                    continue

                coefficient = (dryMass / cellMass * cell_density).reshape(
                    -1, 1)

                ## Read from FBA listener
                reactionIDs = {
                    r: i
                    for i, r in enumerate(
                        fbaResults.readAttribute("reactionIDs"))
                }
                exMolec = {
                    m: i
                    for i, m in enumerate(
                        fbaResults.readAttribute("externalMoleculeIDs"))
                }
                reactionFluxes = FLUX_CONVERSION * (
                    fbaResults.readColumn("reactionFluxes") /
                    coefficient)[1:, :]
                exFlux = fbaResults.readColumn("externalExchangeFluxes")[1:, :]

                ## Read from EnzymeKinetics listener
                constrainedReactions = {
                    r: i
                    for i, r in enumerate(
                        enzymeKineticsReader.readAttribute(
                            "constrainedReactions"))
                }

                ## Append values for relevant reactions.
                # append to exchanges
                for entry in EXCHANGES:
                    exchange_fluxes[entry].extend(
                        list(exFlux[:, exMolec[entry]]))
                # append to reaction fluxes
                for entry in REACTIONS:
                    reaction_fluxes[entry].extend(
                        list(reactionFluxes[:, reactionIDs[entry]]))

                ## get all Toya reactions, and corresponding simulated fluxes.
                toya_idx = {r: [] for r in toyaReactions}
                for rxn, i in reactionIDs.items():
                    rxn = rxn.split(' (reverse)')
                    if len(rxn) > 1:
                        i = -i
                    rxn = rxn[0].split('__')[0]
                    if rxn in toya_idx:
                        toya_idx[rxn] += [i]
                for toyaReaction, reaction_idx in toya_idx.items():
                    flux_time_course = np.sum([
                        np.sign(i) * reactionFluxes[:, np.abs(i)]
                        for i in reaction_idx
                    ],
                                              axis=0)
                    modelFluxes[toyaReaction].append(flux_time_course.mean())

            ## Flux comparison with Toya
            toyaVsReactionAve = []
            rxn_order = []
            for rxn, toyaFlux in toyaFluxesDict.iteritems():
                rxn_order.append(rxn)
                if rxn in modelFluxes:
                    toyaVsReactionAve.append(
                        (np.mean(modelFluxes[rxn]),
                         toyaFlux.asNumber(OUTPUT_FLUX_UNITS),
                         np.std(modelFluxes[rxn]),
                         toyaStdevDict[rxn].asNumber(OUTPUT_FLUX_UNITS)))

            toyaVsReactionAve = np.array(toyaVsReactionAve)
            rWithAll = pearsonr(toyaVsReactionAve[:, 0], toyaVsReactionAve[:,
                                                                           1])
            succ_toya_flux = toyaVsReactionAve[rxn_order.index(SUCC_ID), 1]

            # Save data for plotting
            glc_uptakes[v] = -np.mean(exchange_fluxes[GLC_ID])
            log_ratio_succ[v] = np.log2(
                np.mean(reaction_fluxes[SUCC_ID]) / succ_toya_flux)
            size_pearson[v] = (rWithAll[0] * 8)**2
            selected_indicies[v] = np.all([
                c not in constrainedReactions for c in HIGHLIGHTED_CONSTRAINTS
            ])

        # Plot scatterplot
        fig = plt.figure(figsize=(5, 5))
        gs = gridspec.GridSpec(40, 40)

        ## Plot full data
        plt.scatter(glc_uptakes[~selected_indicies],
                    log_ratio_succ[~selected_indicies],
                    color='blue',
                    alpha=0.6,
                    s=size_pearson[~selected_indicies])
        plt.scatter(glc_uptakes[selected_indicies],
                    log_ratio_succ[selected_indicies],
                    color='red',
                    alpha=0.6,
                    s=size_pearson[selected_indicies])
        x_min, x_max = plt.xlim()
        y_max = max(np.abs(plt.ylim()))
        plt.axvspan(0, GLC_MAX, facecolor='g', alpha=0.1)
        plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1)
        plt.axhline(y=0, color='k', linestyle='--')

        ## Format axes
        plt.ylabel('log2(model flux / Toya flux)')
        plt.xlabel('glucose uptake (mmol / g DCW / hr)')
        plt.xlim([np.floor(min(x_min, 10)), np.ceil(x_max)])
        plt.ylim([-y_max, y_max])

        ## Plot highlighted region data
        fig.add_subplot(gs[1:28, -20:-1])
        in_region = (glc_uptakes < GLC_MAX) & (np.abs(log_ratio_succ) <
                                               SUCC_DISTANCE)
        selected_in = in_region & selected_indicies
        not_selected_in = in_region & ~selected_indicies
        constraint_labels = np.array(
            [[c[:2] for c in constraints] if constraints is not None else []
             for _, constraints in map(get_disabled_constraints, variants)])
        plt.scatter(glc_uptakes[not_selected_in],
                    log_ratio_succ[not_selected_in],
                    color='blue',
                    alpha=0.6,
                    s=size_pearson[not_selected_in])
        plt.scatter(glc_uptakes[selected_in],
                    log_ratio_succ[selected_in],
                    color='red',
                    alpha=0.6,
                    s=size_pearson[selected_in])
        for x, y, label in zip(glc_uptakes[in_region],
                               log_ratio_succ[in_region],
                               constraint_labels[in_region]):
            plt.text(x, y, ', '.join(label), ha='center', va='top', fontsize=6)
        x_min, _ = plt.xlim()
        x_min = np.floor(min(x_min, 10))
        plt.axvspan(x_min, GLC_MAX, facecolor='g', alpha=0.1)
        plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1)

        ## Format axes
        plt.xlim([x_min, GLC_MAX])
        plt.ylim([-SUCC_DISTANCE, SUCC_DISTANCE])

        ## Save figure
        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Ejemplo n.º 4
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print('This analysis only runs for the "condition" variant.')
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        n_gens = ap.n_generation
        variants = ap.get_variants()

        if n_gens - 1 < FIRST_GENERATION:
            print('Not enough generations to plot.')
            return

        all_growth_rates = []
        all_rna_to_protein_ratios = []

        for variant in variants:
            doubling_times = np.zeros(0)
            variant_rna_to_protein_ratios = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant],
                                     generation=range(FIRST_GENERATION,
                                                      n_gens))

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    rna_mass = mass.readColumn("rnaMass")
                    protein_mass = mass.readColumn("proteinMass")

                    time = TableReader(os.path.join(simOutDir,
                                                    "Main")).readColumn("time")

                    doubling_times = np.hstack(
                        (doubling_times, (time[-1] - time[0]) / 3600.))

                    variant_rna_to_protein_ratios = np.hstack(
                        (variant_rna_to_protein_ratios,
                         rna_mass.mean() / protein_mass.mean()))
                except:
                    continue

            variant_growth_rates = np.log(2) / doubling_times

            all_growth_rates.append(variant_growth_rates)
            all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios)

        # Get errorbar plot
        plt.figure(figsize=FIGSIZE)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
        marker_styles = ['o', '^', 'x']
        labels = ['basal', 'anaerobic', '+AA']

        ax = plt.subplot2grid((1, 1), (0, 0))

        for i in range(3):
            ax.errorbar(all_growth_rates[i].mean(),
                        all_rna_to_protein_ratios[i].mean(),
                        yerr=all_rna_to_protein_ratios[i].std(),
                        color=color_cycle[0],
                        mec=color_cycle[0],
                        marker=marker_styles[i],
                        markersize=8,
                        mfc='white',
                        linewidth=1,
                        capsize=2,
                        label=labels[i])

        # Add linear plot proposed in Scott et al. (2010)
        x_linear = np.linspace(0.05, 1.95, 100)
        y_linear = x_linear / 4.5 + 0.087
        ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

        ax.set_xlim([0, 2])
        ax.set_ylim([0, 0.7])
        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
        ax.set_ylabel("RNA/protein mass ratio")
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        # Get clean version of errorbar plot
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata)

        plt.close("all")

        # Get scatter version of plot
        plt.figure(figsize=FIGSIZE)
        ax = plt.subplot2grid((1, 1), (0, 0))

        options = {"edgecolors": color_cycle[0], "alpha": 0.25, "s": 20}

        ax.scatter(all_growth_rates[0],
                   all_rna_to_protein_ratios[0],
                   facecolors="none",
                   marker="o",
                   label=labels[0],
                   **options)
        ax.scatter(all_growth_rates[1],
                   all_rna_to_protein_ratios[1],
                   facecolors="none",
                   marker="^",
                   label=labels[1],
                   **options)
        ax.scatter(all_growth_rates[2],
                   all_rna_to_protein_ratios[2],
                   marker="x",
                   label=labels[2],
                   **options)

        x_linear = np.linspace(0.05, 2.45, 100)
        y_linear = x_linear / 4.5 + 0.087
        ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

        ax.set_xlim([0, 2.5])
        ax.set_ylim([0, 0.8])
        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
        ax.set_ylabel("RNA/protein mass ratio")
        exportFigure(plt, plotOutDir, plotOutFileName + "_scatter", metadata)
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        all_variants = ap.get_variants()
        variants = -np.ones(N_VARIANTS)
        for v, variant in enumerate(all_variants):
            disable_constraints, additional_disabled = get_disabled_constraints(
                variant)
            if additional_disabled is None:
                variants[0] = variant
            elif len(additional_disabled) == 0:
                variants[1] = variant
            elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled):
                variants[2] = variant

        if np.any(variants < 0):
            print('Not enough variants to analyze')
            return

        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)

        all_yields = []
        for variant in variants:
            yields = []

            for sim_dir in ap.get_cells(variant=[variant]):
                sim_out_dir = os.path.join(sim_dir, 'simOut')

                # Listeners used
                fba_reader = TableReader(
                    os.path.join(sim_out_dir, 'FBAResults'))
                main_reader = TableReader(os.path.join(sim_out_dir, 'Main'))
                mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass'))

                # Load data
                time_step_sec = main_reader.readColumn('timeStepSec')

                external_fluxes = fba_reader.readColumn(
                    'externalExchangeFluxes')
                external_molecules = fba_reader.readAttribute(
                    'externalMoleculeIDs')

                dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass')
                growth = GROWTH_UNITS * mass_reader.readColumn(
                    'growth') / time_step_sec

                # Calculate growth yield on glucose
                glc_idx = external_molecules.index(GLUCOSE_ID)
                glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx]
                glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0]
                glc_mass_flux = glc_flux * glc_mw * dry_mass
                glc_mass_yield = growth / -glc_mass_flux

                yields += list(glc_mass_yield[1:].asNumber())

            all_yields += [yields]

        for i, v1 in enumerate(variants):
            for j, v2 in enumerate(variants[i + 1:]):
                t, p = stats.ttest_ind(all_yields[i],
                                       all_yields[i + j + 1],
                                       equal_var=False)
                print('p={:.2e} for variant {} vs variant {}'.format(
                    p, v1, v2))

        plt.figure(figsize=(4, 4))
        xticks = range(N_VARIANTS)

        # Plot data
        plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False)
        plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037')

        # Format axes
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.xticks(xticks, VARIANT_LABELS)
        plt.ylabel('Glucose Yield\n(g cell / g glucose)')

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Ejemplo n.º 6
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')

        plt.figure(figsize=(5, 5))
        plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal")
        plt.scatter(initial_volumes[1],
                    added_volumes[1],
                    s=3,
                    label="anaerobic")
        plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA")
        plt.xlim([0, 4])
        plt.ylim([0, 4])
        plt.xlabel("Birth Volume ($\mu m^3$)")
        plt.ylabel("Added Volume ($\mu m^3$)")
        plt.legend()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close("all")
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "variantDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)

        fig = plt.figure()
        fig.set_figwidth(5)
        fig.set_figheight(5)

        bremer_tau = [40, 100, 24]

        bremer_origins_per_cell_at_initiation = [2, 1, 4]
        bremer_rrn_init_rate = [20 * 23, 4 * 12.4, 58 * 35.9]

        bremer_rna_mass_per_cell = [77, 20, 211]
        bremer_elng_rate = [18, 12, 21]

        sim_doubling_time = np.zeros(ap.n_variant)
        sim_doubling_time_std = np.zeros(ap.n_variant)

        sim_origins_per_cell_at_initiation = np.zeros(ap.n_variant)
        sim_rna_mass_per_cell = np.zeros(ap.n_variant)
        sim_elng_rate = np.zeros(ap.n_variant)
        sim_rrn_init_rate = np.zeros(ap.n_variant)

        sim_origins_per_cell_at_initiation_std = np.zeros(ap.n_variant)
        sim_elng_rate_std = np.zeros(ap.n_variant)
        sim_rna_mass_per_cell_std = np.zeros(ap.n_variant)
        sim_rrn_init_rate_std = np.zeros(ap.n_variant)

        variants = ap.get_variants()

        for varIdx in range(ap.n_variant):
            variant = variants[varIdx]

            print("variant {}".format(variant))

            all_cells = ap.get_cells(variant=[variant])

            print("Total cells: {}".format(len(all_cells)))

            try:
                sim_data = cPickle.load(open(ap.get_variant_kb(variant)))
            except Exception as e:
                print "Couldn't load sim_data object. Exiting.", e
                return

            num_origin_at_init = np.zeros(len(all_cells))
            doubling_time = np.zeros(len(all_cells))
            meanRnaMass = np.zeros(len(all_cells))
            meanElngRate = np.zeros(len(all_cells))
            meanRrnInitRate = np.zeros(len(all_cells))

            for idx, simDir in enumerate(all_cells):
                print "cell {} of {}".format(idx, len(all_cells))

                simOutDir = os.path.join(simDir, "simOut")

                try:
                    time = TableReader(os.path.join(simOutDir,
                                                    "Main")).readColumn("time")
                    doubling_time[idx] = time[-1] - time[0]
                except Exception as e:
                    print 'Error with data for %s: %s' % (simDir, e)
                    continue

                timeStepSec = TableReader(os.path.join(
                    simOutDir, "Main")).readColumn("timeStepSec")

                meanRnaMass[idx] = TableReader(os.path.join(
                    simOutDir, "Mass")).readColumn("rnaMass").mean()
                meanElngRate[idx] = TableReader(
                    os.path.join(simOutDir, "RibosomeData")).readColumn(
                        "effectiveElongationRate").mean()

                numOrigin = TableReader(
                    os.path.join(simOutDir,
                                 "ReplicationData")).readColumn("numberOfOric")

                massPerOric = TableReader(
                    os.path.join(
                        simOutDir,
                        "ReplicationData")).readColumn("criticalMassPerOriC")
                idxInit = np.where(massPerOric >= 1)[0]
                numOriginAtInit = numOrigin[idxInit - 1]
                if numOriginAtInit.size:
                    num_origin_at_init[idx] = numOriginAtInit.mean()
                else:
                    num_origin_at_init[idx] = np.nan

                transcriptDataFile = TableReader(
                    os.path.join(simOutDir, "TranscriptElongationListener"))
                rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized")
                isRRna = sim_data.process.transcription.rnaData["isRRna"]
                meanRrnInitRate[idx] = (rnaSynth[:, isRRna].sum(axis=1) /
                                        timeStepSec).mean() * 60. / 3

            sim_rna_mass_per_cell[varIdx] = meanRnaMass.mean()
            sim_elng_rate[varIdx] = meanElngRate.mean()
            sim_origins_per_cell_at_initiation[varIdx] = np.nanmean(
                num_origin_at_init)
            sim_doubling_time[varIdx] = np.nanmean(doubling_time) / 60.
            sim_rrn_init_rate[varIdx] = np.nanmean(meanRrnInitRate)

            sim_rna_mass_per_cell_std[varIdx] = meanRnaMass.std()
            sim_elng_rate_std[varIdx] = meanElngRate.std()
            sim_origins_per_cell_at_initiation_std[varIdx] = np.nanstd(
                num_origin_at_init)
            sim_doubling_time_std[varIdx] = np.nanstd(doubling_time) / 60.
            sim_rrn_init_rate_std[varIdx] = np.nanstd(meanRrnInitRate)

        bremer_tau = np.array(bremer_tau)

        ax0 = plt.subplot2grid((2, 2), (0, 0))
        ax1 = plt.subplot2grid((2, 2), (1, 0), sharex=ax0)
        ax2 = plt.subplot2grid((2, 2), (0, 1), sharex=ax0)
        ax3 = plt.subplot2grid((2, 2), (1, 1), sharex=ax0)

        lines = {'linestyle': 'dashed'}
        plt.rc('lines', **lines)
        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        ax0.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_rna_mass_per_cell[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_rna_mass_per_cell_std[np.argsort(sim_doubling_time)
                                           [::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax0.errorbar(
            bremer_tau[np.argsort(bremer_tau)[::-1]],
            np.array(bremer_rna_mass_per_cell)[np.argsort(bremer_tau)[::-1]],
            color=color_cycle[2],
            **EXP_PLOT_STYLE)
        ax0.set_title("RNA mass per cell (fg)", fontsize=FONT_SIZE)
        ax0.set_xlim([0, 135])
        ax0.set_ylim([0, 250])
        ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False)

        ax1.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_elng_rate[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_elng_rate_std[np.argsort(sim_doubling_time)[::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax1.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]],
                     np.array(bremer_elng_rate)[np.argsort(bremer_tau)[::-1]],
                     color=color_cycle[2],
                     **EXP_PLOT_STYLE)
        ax1.set_title("Ribosome elongation\nrate (aa/s/ribosome)",
                      fontsize=FONT_SIZE)
        ax1.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE)
        ax1.set_ylim([0, 24])

        ax2.errorbar(sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
                     sim_origins_per_cell_at_initiation[np.argsort(
                         sim_doubling_time)[::-1]],
                     yerr=sim_origins_per_cell_at_initiation_std[np.argsort(
                         sim_doubling_time)[::-1]],
                     color=color_cycle[0],
                     **SIM_PLOT_STYLE)
        ax2.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]],
                     np.array(bremer_origins_per_cell_at_initiation)[
                         np.argsort(bremer_tau)[::-1]],
                     color=color_cycle[2],
                     **EXP_PLOT_STYLE)
        ax2.set_title("Average origins at chrom. init.", fontsize=FONT_SIZE)
        ax2.set_ylim([0.5, 4.5])

        ax3.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_rrn_init_rate[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_rrn_init_rate_std[np.argsort(sim_doubling_time)[::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax3.errorbar(
            bremer_tau[np.argsort(bremer_tau)[::-1]],
            np.array(bremer_rrn_init_rate)[np.argsort(bremer_tau)[::-1]],
            color=color_cycle[2],
            **EXP_PLOT_STYLE)
        ax3.set_title("Rate of rrn initiation (1/min)", fontsize=FONT_SIZE)
        ax3.set_ylim([0, 2500])

        # ax3.legend(loc=1, frameon=True, fontsize=7)
        ax3.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE)

        axes_list = [ax0, ax1, ax2, ax3]

        for a in axes_list:
            for tick in a.yaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)
            for tick in a.xaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)

        whitePadSparklineAxis(ax0, False)
        whitePadSparklineAxis(ax1)
        whitePadSparklineAxis(ax2, False)
        whitePadSparklineAxis(ax3)

        plt.subplots_adjust(bottom=0.2, wspace=0.3)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "variantDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot = True)
		variants = ap.get_variants()

		index_doubling_time = 0
		sim_doubling_time = []

		index_rna_mass = 1
		sim_rna_mass_per_cell = []
		sim_rna_mass_per_cell_std = []

		index_elng_rate = 2
		sim_elng_rate = []
		sim_elng_rate_std = []

		index_n_origin_init = 3
		sim_origins_per_cell_at_initiation = []
		sim_origins_per_cell_at_initiation_std = []

		index_rrn_init_rate = 4
		sim_rrn_init_rate = []
		sim_rrn_init_rate_std = []

		for varIdx in range(ap.n_variant):
			variant = variants[varIdx]
			print("variant {}".format(variant))

			sim_dirs = ap.get_cells(variant=[variant])
			n_sims = len(sim_dirs)
			print("Total cells: {}".format(n_sims))

			try:
				sim_data = cPickle.load(open(ap.get_variant_kb(variant)))

				global is_rRNA
				is_rRNA = sim_data.process.transcription.rnaData["isRRna"]

			except Exception as e:
				print "Couldn't load sim_data object. Exiting.", e
				return

			p = Pool(parallelization.cpus())
			output = np.array(p.map(mp_worker, sim_dirs))
			p.close()
			p.join()

			# Filter output from broken files using np.nanmean and np.nanstd
			sim_doubling_time.append(np.nanmean(output[:, index_doubling_time]) / 60.)

			sim_rna_mass_per_cell.append(np.nanmean(output[:, index_rna_mass]))
			sim_rna_mass_per_cell_std.append(np.nanstd(output[:, index_rna_mass]))

			sim_elng_rate.append(np.nanmean(output[:, index_elng_rate]))
			sim_elng_rate_std.append(np.nanstd(output[:, index_elng_rate]))

			sim_origins_per_cell_at_initiation.append(np.nanmean(output[:, index_n_origin_init]))
			sim_origins_per_cell_at_initiation_std.append(np.nanstd(output[:, index_n_origin_init]))

			sim_rrn_init_rate.append(np.nanmean(output[:, index_rrn_init_rate]))
			sim_rrn_init_rate_std.append(np.nanstd(output[:, index_rrn_init_rate]))

		sim_doubling_time = np.array(sim_doubling_time)

		# Plot
		fig, axes_list = plt.subplots(1, 4, figsize=(15, 5))
		ax0, ax1, ax2, ax3 = axes_list
		sort_sim = np.argsort(sim_doubling_time)[::-1]
		sort_bremer = np.argsort(bremer_tau)[::-1]

		# RNA mass per cell
		ax0.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rna_mass_per_cell)[sort_sim],
			yerr=np.array(sim_rna_mass_per_cell_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax0.errorbar(
			bremer_tau[sort_bremer],
			bremer_rna_mass_per_cell[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax0.set_title('RNA mass per cell (fg)', fontsize=FONT_SIZE)
		ax0.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax0.set_xlim([0, 135])
		ax0.set_ylim([0, 250])
		ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False)

		# Ribosome elongation rate
		ax1.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_elng_rate)[sort_sim],
			yerr=np.array(sim_elng_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax1.errorbar(
			bremer_tau[sort_bremer],
			bremer_elng_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax1.set_title('Ribosome elongation\nrate (aa/s/ribosome)', fontsize=FONT_SIZE)
		ax1.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax1.set_ylim([5, 24])

		# Number of origins at chromosome initiation
		ax2.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_origins_per_cell_at_initiation)[sort_sim],
			yerr=np.array(sim_origins_per_cell_at_initiation_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax2.errorbar(
			bremer_tau[sort_bremer],
			bremer_origins_per_cell_at_initiation[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax2.set_title('Average origins at chrom. init.', fontsize=FONT_SIZE)
		ax2.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax2.set_ylim([0.5, 4.5])

		# rRNA initiation rate
		ax3.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rrn_init_rate)[sort_sim],
			yerr=np.array(sim_rrn_init_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax3.errorbar(
			bremer_tau[sort_bremer],
			bremer_rrn_init_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax3.set_title('Rate of rrn initiation (1/min)', fontsize=FONT_SIZE)
		ax3.set_ylim([0, 2500])
		ax3.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)

		for ax in axes_list:
			ax.set_xlim(X_LIM)
			ax.set_xticks(X_LIM)
			ax.set_ylim(ax.get_ylim())
			ax.set_yticks(ax.get_ylim())

			for tick in ax.yaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)
			for tick in ax.xaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)

		plt.subplots_adjust(bottom=0.25, top=0.75, left=0.05, right=0.95, wspace=0.4)
		exportFigure(plt, plotOutDir, '{}__test'.format(plotOutFileName), metadata)
		plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if metadata["variant"] != "condition":
			print('This analysis only runs for the "condition" variant.')
			return

		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		n_gens = ap.n_generation
		variants = ap.get_variants()

		if n_gens - 1 < FIRST_GENERATION:
			print('Not enough generations to plot.')
			return

		all_growth_rates = []
		all_rna_to_protein_ratios = []

		for variant in variants:
			doubling_times = np.zeros(0)
			variant_rna_to_protein_ratios = np.zeros(0)
			
			all_cells = ap.get_cells(
				variant=[variant],
				generation=range(FIRST_GENERATION, n_gens))

			if len(all_cells) == 0:
				continue

			for simDir in all_cells:
				try:
					simOutDir = os.path.join(simDir, "simOut")
					mass = TableReader(os.path.join(simOutDir, "Mass"))
					rna_mass = mass.readColumn("rnaMass")
					protein_mass = mass.readColumn("proteinMass")
					
					time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time")

					doubling_times = np.hstack(
						(doubling_times, (time[-1] - time[0])/3600.)
						)
					
					variant_rna_to_protein_ratios = np.hstack(
						(variant_rna_to_protein_ratios, rna_mass.mean()/protein_mass.mean())
						)
				except:
					continue

			variant_growth_rates = np.log(2)/doubling_times

			all_growth_rates.append(variant_growth_rates)
			all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios)

		plt.figure(figsize=FIGSIZE)

		plt.style.use('seaborn-deep')
		color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

		for i in range(3):
			plt.errorbar(
				all_growth_rates[i].mean(),
				all_rna_to_protein_ratios[i].mean(),
				yerr=all_rna_to_protein_ratios[i].std(),
				color=color_cycle[0], marker='o', markersize=5, linewidth=1,
				capsize=2)

		# Add linear plot proposed in Scott et al. (2010)
		x_linear = np.linspace(0, 3, 100)
		y_linear = x_linear/4.5 + 0.087
		plt.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

		plt.xlim([0, 3])
		plt.ylim([0, 1.6])
		plt.xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
		plt.ylabel("RNA/protein mass ratio")
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close("all")
Ejemplo n.º 10
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "variantDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)

        if ap.n_generation == 1:
            print "Need more data to create addedMass"
            return

        allScatter = plt.figure()
        allScatter.set_figwidth(11)
        allScatter.set_figheight(6)

        xHist = plt.figure()
        xHist.set_figwidth(11)
        xHist.set_figheight(6)

        yHist = plt.figure()
        yHist.set_figwidth(11)
        yHist.set_figheight(6)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        title_list = [
            "Glucose minimal\n" + r"$\tau = $" + "44 min",
            "Glucose minimal anaerobic\n" + r"$\tau = $" + "100 min",
            "Glucose minimal + 20 amino acids\n" + r"$\tau = $" + "22 min"
        ]

        plot = False

        for varIdx in ap.get_variants():

            if varIdx == 0:
                plotIdx = 1
                gen = [2, 3]
            elif varIdx == 1:
                plotIdx = 0
                gen = [2, 3]
            elif varIdx == 2:
                plotIdx = 2
                gen = [2, 3]
            else:
                continue

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(generation=gen, variant=[varIdx])
            if len(all_cells) == 0:
                continue
            plot = True

            fail = 0
            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("dryMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except Exception as e:
                    print e
                    fail += 1

            added_masses = final_masses - initial_masses

            all_scaled_initial_masses = initial_masses / initial_masses.mean()
            all_scaled_added_masses = added_masses / added_masses.mean()

            idxs_to_keep = np.where((0.6 < all_scaled_initial_masses)
                                    & (all_scaled_initial_masses < 1.25)
                                    & (0.45 < all_scaled_added_masses)
                                    & (all_scaled_added_masses < 1.5))

            scaled_initial_masses = all_scaled_initial_masses[idxs_to_keep]
            scaled_added_masses = all_scaled_added_masses[idxs_to_keep]

            nbins = 5

            n, xbin = np.histogram(scaled_initial_masses, bins=nbins)
            sy, xbin = np.histogram(scaled_initial_masses,
                                    bins=nbins,
                                    weights=scaled_added_masses)
            sy2, xbin = np.histogram(scaled_initial_masses,
                                     bins=nbins,
                                     weights=scaled_added_masses *
                                     scaled_added_masses)
            mean = sy / n
            std = np.sqrt(sy2 / (n - 1) - n * mean * mean / (n - 1))

            slope, intercept, r_value, p_value, std_err = linregress(
                scaled_initial_masses, scaled_added_masses)

            # plot all scatter plots
            plt.figure(allScatter.number)
            ax = plt.subplot2grid((1, 3), (0, plotIdx))
            ax.plot(scaled_initial_masses,
                    scaled_added_masses,
                    '.',
                    color="black",
                    alpha=0.2,
                    zorder=1,
                    markeredgewidth=0.0)
            ax.errorbar(((xbin[1:] + xbin[:-1]) / 2),
                        mean,
                        yerr=std,
                        color="black",
                        linewidth=1,
                        zorder=2)
            ax.plot(scaled_initial_masses,
                    slope * scaled_initial_masses + intercept,
                    color="blue")

            ax.set_title(
                title_list[varIdx] + ", n=%d, n*=%d" %
                ((len(all_cells) - fail), len(scaled_initial_masses)) + "\n" +
                r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" %
                (slope, intercept) + "\n" + "p-value=%0.2g" % p_value,
                fontsize=FONT_SIZE)

            ax.set_xlim([0.6, 1.25])
            ax.set_ylim([0.45, 1.5])
            ax.get_yaxis().get_major_formatter().set_useOffset(False)
            ax.get_xaxis().get_major_formatter().set_useOffset(False)

            if varIdx == 1:
                ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE)
            ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE)

            plt.subplots_adjust(bottom=0.2)

            whitePadSparklineAxis(ax)

            for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)
            for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)

            # plot stripped figure
            fig = plt.figure()
            fig.set_figwidth(1.73)
            fig.set_figheight(1.18)
            ax = plt.subplot2grid((1, 1), (0, 0))
            ax.plot(scaled_initial_masses,
                    scaled_added_masses,
                    '.',
                    color=color_cycle[0],
                    alpha=0.2,
                    zorder=1,
                    markeredgewidth=0.0)
            ax.set_title(title_list[varIdx] + ", n=%d, n*=%d" %
                         (len(all_cells) - fail, len(scaled_initial_masses)),
                         fontsize=FONT_SIZE)
            ax.plot(scaled_initial_masses,
                    slope * scaled_initial_masses + intercept,
                    color='k')

            ax.set_ylim([0.45, 1.5])

            ax.get_yaxis().get_major_formatter().set_useOffset(False)
            ax.get_xaxis().get_major_formatter().set_useOffset(False)

            plt.subplots_adjust(bottom=0.2)

            whitePadSparklineAxis(ax)

            ax.tick_params(axis='x',
                           which='both',
                           bottom='off',
                           top='off',
                           labelbottom='off')
            ax.tick_params(axis='y',
                           which='both',
                           left='off',
                           right='off',
                           labelleft='off')

            ax.set_xlabel("")
            ax.set_ylabel("")

            plt.subplots_adjust(top=0.95,
                                bottom=3 * trim,
                                left=2 * trim,
                                right=0.95,
                                hspace=0,
                                wspace=0)

            exportFigure(plt,
                         plotOutDir,
                         plotOutFileName + str(varIdx) + "_stripped",
                         metadata,
                         transparent=True)

            # plot histogram for x-axis
            plt.figure(xHist.number)
            bins = 25
            ax = plt.subplot2grid((1, 3), (0, plotIdx))
            ax.hist(all_scaled_initial_masses, bins, color=color_cycle[0])

            ax.axvline(x=0.6, color="k", linestyle="--")
            ax.axvline(x=1.25, color="k", linestyle="--")
            ax.set_title(title_list[varIdx] + "\n" + "[0.6, 1.25]",
                         fontsize=FONT_SIZE)
            ax.yaxis.set_major_locator(MaxNLocator(integer=True))

            ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE)

            plt.subplots_adjust(bottom=0.2)

            whitePadSparklineAxis(ax)

            for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)
            for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)

            # plot histogram for y-axis
            plt.figure(yHist.number)
            ax = plt.subplot2grid((1, 3), (0, plotIdx))
            ax.hist(all_scaled_added_masses, bins, color=color_cycle[0])

            ax.axvline(x=0.45, color="k", linestyle="--")
            ax.axvline(x=1.5, color="k", linestyle="--")
            ax.set_title(title_list[varIdx] + "\n" + "[0.45, 1.5]",
                         fontsize=FONT_SIZE)
            ax.yaxis.set_major_locator(MaxNLocator(integer=True))

            ax.set_xlabel("Normed added mass", fontsize=FONT_SIZE)

            plt.subplots_adjust(bottom=0.2)

            whitePadSparklineAxis(ax)

            for tick in ax.yaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)
            for tick in ax.xaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)

        if plot:
            plt.figure(allScatter.number)
            exportFigure(plt, plotOutDir, plotOutFileName, metadata)
            plt.figure(xHist.number)
            exportFigure(plt,
                         plotOutDir,
                         plotOutFileName + "_histogram_scaled_initial_mass",
                         metadata,
                         transparent=True)
            plt.figure(yHist.number)
            exportFigure(plt,
                         plotOutDir,
                         plotOutFileName + "_histogram_scaled_added_mass",
                         metadata,
                         transparent=True)
        plt.close("all")
Ejemplo n.º 11
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        # scan all variants to find variant indexes for comparison
        old_variant = None
        new_variant = None
        for v, variant in enumerate(variants):
            disable_constraints, additional_disabled = get_disabled_constraints(
                variant)
            if additional_disabled is None:
                old_variant = variant
            elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled):
                new_variant = variant

        # if the baseline variant or the new variant are missing, stop plotting
        if (old_variant is None) or (new_variant is None):
            print('Variant simulations missing!')
            return

        compared_variants = [old_variant, new_variant]

        # Load sim_data
        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)

        # get reactions from sim_data
        reactionCatalysts = sim_data.process.metabolism.reactionCatalysts

        reaction_to_enzyme = {r: reactionCatalysts[r][0] for r in REACTIONS}
        enzyme_names = reaction_to_enzyme.values()
        reactions_with_km = sorted(SIMULATION_KMS)
        km_metabolites = [
            SIMULATION_KMS[r]['metabolite'] for r in reactions_with_km
        ]
        kms = np.array([SIMULATION_KMS[r]['KM'] for r in reactions_with_km])
        km_constraint_indices = [
            SIMULATION_KMS[r]['constraint_index'] for r in reactions_with_km
        ]

        # initialize dictionaries for fluxes and concentrations
        all_reaction_fluxes = {}
        all_enzyme_concentrations = {}
        all_km_adjustments = {}
        for variant in compared_variants:
            reaction_fluxes = {r: [] for r in REACTIONS}
            enzyme_concentrations = {e: [] for e in enzyme_names}
            km_adjustments = {r: [] for r in reactions_with_km}
            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                try:
                    kinetics_reader = TableReader(
                        os.path.join(simOutDir, 'EnzymeKinetics'))
                    fbaResults = TableReader(
                        os.path.join(simOutDir, "FBAResults"))
                except Exception as e:
                    print(e)
                    continue

                # read from kinetics listener
                counts_to_molar = ((COUNTS_UNITS / VOLUME_UNITS) *
                                   kinetics_reader.readColumn('countsToMolar')
                                   [START_TIME_STEP:].reshape(-1, 1))
                all_constraints_used = kinetics_reader.readColumn(
                    'reactionConstraint')[START_TIME_STEP:]

                # Store fluxes
                reactionIDs = np.array(fbaResults.readAttribute("reactionIDs"))
                reactionFluxes = fbaResults.readColumn("reactionFluxes")[
                    START_TIME_STEP:, :]
                reaction_flux_dict = dict(zip(reactionIDs, reactionFluxes.T))
                for reaction_id in REACTIONS:
                    reaction_fluxes[reaction_id].extend(
                        list(reaction_flux_dict[reaction_id]))

                # Store enzyme concentrations
                enzyme_counts, met_counts = read_bulk_molecule_counts(
                    simOutDir, (enzyme_names, km_metabolites))
                enzyme_conc = counts_to_molar.asNumber(
                    COUNTS_UNITS /
                    VOLUME_UNITS) * enzyme_counts[START_TIME_STEP:, :]
                met_conc = counts_to_molar.asNumber(
                    units.umol / units.L) * met_counts[START_TIME_STEP:, :]
                for enzyme_id, conc_time_series in zip(enzyme_names,
                                                       enzyme_conc.T):
                    enzyme_concentrations[enzyme_id].extend(
                        list(conc_time_series))

                # Calculate enzyme saturation for reactions with KM values
                adjust_km = np.zeros(
                    (len(counts_to_molar), len(km_constraint_indices)), bool)
                for i, idx in enumerate(km_constraint_indices):
                    constraint_used, _ = np.where(all_constraints_used == idx)
                    adjust_km[constraint_used, i] = True
                enzyme_saturation = met_conc / (met_conc + kms)
                enzyme_saturation[~adjust_km] = 1
                for rxn, saturation in zip(reactions_with_km,
                                           enzyme_saturation.T):
                    km_adjustments[rxn].extend(list(saturation))

            all_reaction_fluxes[variant] = reaction_fluxes
            all_enzyme_concentrations[variant] = enzyme_concentrations
            all_km_adjustments[variant] = km_adjustments

        ### Make figure ###
        cols = 1
        rows = len(REACTIONS)
        plt.figure(figsize=(cols * 3, rows * 5))

        # go through each reaction to show predicted k_cat distribution for the
        # new and old variant, and experimental measurements
        for reaction_idx, reaction_id in enumerate(REACTIONS):
            enzyme_id = reaction_to_enzyme[reaction_id]

            # old measurements
            reaction_measurements = OLD_MEASUREMENTS[reaction_id]
            measurements = reaction_measurements['measurements']
            temps = reaction_measurements['temps']
            adjusted_measurements = np.array([
                2**((37. - t) / 10.) * m
                for (m, t) in zip(measurements, temps)
            ])

            # new measurements
            reaction_measurements = NEW_MEASUREMENTS.get(reaction_id, {})
            measurements = reaction_measurements.get('measurements', [])
            temps = reaction_measurements.get('temps', [])
            new_adjusted_measurements = np.array([
                2**((37. - t) / 10.) * m
                for (m, t) in zip(measurements, temps)
            ])

            # get effective kcat for GLUTATHIONE-REDUCT
            if reaction_id == 'GLUTATHIONE-REDUCT-NADPH-RXN':
                # saturated_fraction calculated from Smirnova, et al. (2005). "Effects of cystine and
                # hydrogen peroxideon glutathione status and expression of	antioxidant	genes in Escherichia coli"
                # Oxidized glutathione (GSSG in table 2) gives ~19 uM concentration (with 0.3 dry fraction and 1.1 g/mL density)
                # With 61 uM Km for this reaction, that gives a saturated fraction of 0.238
                saturated_fraction = 0.238
                new_adjusted_measurements = adjusted_measurements * saturated_fraction

            # Initialize subplots
            ax = plt.subplot(rows, cols, reaction_idx + 1)

            # calculate the reaction's k_cat distribution for each compared variant
            k_cat_distribution = {}
            for variant in compared_variants:
                ## Get data
                rxn_fluxes = np.array(
                    all_reaction_fluxes[variant][reaction_id])  # mmol / L / s
                enzyme_concs = np.array(
                    all_enzyme_concentrations[variant][enzyme_id])  # mmol / L
                saturation = np.array(all_km_adjustments[variant].get(
                    reaction_id, [1] * len(rxn_fluxes)))

                # calculate k_cats (adjusted for saturation in the sim), remove zeros, save to this variant's distribution
                k_cats = rxn_fluxes / enzyme_concs / saturation
                k_cats = k_cats[k_cats > 1e-10]
                k_cat_distribution[variant] = k_cats

            data = [
                k_cat_distribution[old_variant],
                k_cat_distribution[new_variant]
            ]

            # plot
            violin_pos = [1, 3]  # position of violin plots [old, new]
            measure_pos = 2  # position of measurements
            ax.violinplot(data,
                          violin_pos,
                          widths=1.0,
                          showmeans=False,
                          showextrema=False,
                          showmedians=False)
            ax.scatter(np.full_like(adjusted_measurements, measure_pos),
                       adjusted_measurements,
                       marker='o',
                       color='#eb7037',
                       s=50,
                       alpha=0.7)
            ax.scatter(np.full_like(new_adjusted_measurements, measure_pos),
                       new_adjusted_measurements,
                       marker='o',
                       color='#eb7037',
                       s=50,
                       alpha=0.7)

            # format
            rxn_id_length = 25
            text_reaction_id = ('reaction: %s' % reaction_id[:rxn_id_length])
            labels = [
                '\nModel Predicted\n(Old Constraints)', 'Measured',
                '\nModel Predicted\n(New Constraints)'
            ]
            ax.set_title(text_reaction_id, fontsize=8)
            ax.set_ylabel('$k_{cat}$ (1/s)', fontsize=8)
            set_ticks(ax, labels)
            ax.set_yscale('log')

        ### Create Plot ###
        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		n_variants = len(variants)

		if n_variants <= 1:
			print('This plot only runs for multiple variants'.format(__name__))
			return

		filepath.makedirs(plotOutDir)

		# Load validation data
		validation_data = cPickle.load(open(validationDataFile, 'rb'))
		toya_reactions = validation_data.reactionFlux.toya2010fluxes['reactionID']
		toya_fluxes = np.array([x.asNumber(DCW_FLUX_UNITS) for x in validation_data.reactionFlux.toya2010fluxes['reactionFlux']])
		outlier_filter = [False if rxn in OUTLIER_REACTIONS else True for rxn in toya_reactions]

		# Arrays to populate for plots
		lambdas = np.zeros(n_variants)
		n_sims = np.zeros(n_variants)
		growth_rates = np.zeros(n_variants)
		conc_correlation = np.zeros(n_variants)
		n_conc_off_axis = np.zeros(n_variants)
		flux_correlation = np.zeros(n_variants)
		nonzero_flux_correlation = np.zeros(n_variants)
		n_flux_above_0 = np.zeros(n_variants)
		n_flux_off_axis = np.zeros(n_variants)
		correlation_coefficient = np.zeros(n_variants)
		filtered_correlation_coefficient = np.zeros(n_variants)
		homeostatic_objective_value = np.zeros(n_variants)
		kinetic_objective_value = np.zeros(n_variants)
		homeostatic_objective_std = np.zeros(n_variants)
		kinetic_objective_std = np.zeros(n_variants)

		# Pull information from sim data and listeners in parallel
		pool = Pool(processes=parallelization.plotter_cpus())
		args = zip(
			variants,
			[ap] * n_variants,
			[toya_reactions] * n_variants,
			[toya_fluxes] * n_variants,
			[outlier_filter] * n_variants
			)
		results = pool.map(analyze_variant, args)
		pool.close()
		pool.join()
		for i, result in enumerate(results):
			(lambdas[i],
				n_sims[i],
				growth_rates[i],
				conc_correlation[i],
				n_conc_off_axis[i],
				flux_correlation[i],
				n_flux_off_axis[i],
				nonzero_flux_correlation[i],
				n_flux_above_0[i],
				correlation_coefficient[i],
				filtered_correlation_coefficient[i],
				kinetic_objective_value[i],
				kinetic_objective_std[i],
				homeostatic_objective_value[i],
				homeostatic_objective_std[i],
				n_metabolites,
				n_fluxes) = result

		tick_labels = [r'$10^{%i}$' % (np.log10(x),) if x != 0 else '0' for x in lambdas]
		lambdas = [np.log10(x) if x != 0 else np.nanmin(np.log10(lambdas[lambdas != 0]))-1 for x in lambdas]

		plt.figure(figsize = (8.5, 22))
		plt.style.use('seaborn-deep')
		subplots = 8

		# Growth rates
		ax = plt.subplot(subplots, 1, 1)
		plt.bar(lambdas, growth_rates / growth_rates[0], align='center')
		plt.axhline(1, linestyle='--', color='k')
		plt.ylim([0, 2])
		plt.ylabel('Growth rate deviation\nfrom no kinetics')
		whitePadSparklineAxis(ax, xAxis=False)
		plt.yticks([0, 1, 2])

		# Flux target comparisons
		ax = plt.subplot(subplots, 1, 2)
		plt.bar(lambdas, nonzero_flux_correlation, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Kinetic target flux PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 3)
		plt.bar(lambdas, n_flux_above_0 / n_fluxes, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of fluxes\nabove 0')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 4)
		plt.bar(lambdas, n_flux_off_axis / n_fluxes, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of fluxes\noff axis (>{:.0f}%)'.format(FRAC_FLUX_OFF_AXIS*100))
		whitePadSparklineAxis(ax, xAxis=False)

		# Metabolite comparisons
		ax = plt.subplot(subplots, 1, 5)
		plt.bar(lambdas, conc_correlation, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Concentration PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 6)
		plt.bar(lambdas, n_conc_off_axis / n_metabolites, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of concentrations\noff axis (>{:.0f}%)'.format(FRAC_CONC_OFF_AXIS*100))
		whitePadSparklineAxis(ax, xAxis=False)

		# Toya comparison
		ax = plt.subplot(subplots, 1, 7)
		plt.bar(lambdas, filtered_correlation_coefficient, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Central carbon flux PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		# Viable sims
		ax = plt.subplot(subplots, 1, 8)
		plt.bar(lambdas, n_sims, align='center')
		plt.ylabel('Number of sims\nwith data')
		whitePadSparklineAxis(ax)
		plt.xticks(lambdas, tick_labels)

		plt.xlabel('lambda')

		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		# Plot kinetic vs homeostatic objective values
		plt.figure(figsize=(3.5, 3.5))
		ax = plt.gca()
		ax.set_xscale("log", nonposx='clip')
		ax.set_yscale("log", nonposy='clip')
		plt.errorbar(homeostatic_objective_value, kinetic_objective_value, xerr=homeostatic_objective_std, yerr=kinetic_objective_std, fmt='none', ecolor='k', alpha=0.5, linewidth=0.5)
		plt.plot(homeostatic_objective_value, kinetic_objective_value, "ob", markeredgewidth=0.1, alpha=0.9)
		for i in range(len(lambdas)):
			plt.text(homeostatic_objective_value[i], 0.6*kinetic_objective_value[i], i, horizontalalignment='center', verticalalignment='center')
		plt.xlabel('Homeostatic Objective Value')
		plt.ylabel('Kinetics Objective Value')

		whitePadSparklineAxis(ax)

		# Adjust limits to get tick labels to display
		xlim = ax.get_xlim()
		xlim = [10**np.floor(np.log10(xlim[0])), 10**np.ceil(np.log10(xlim[1]))]
		ax.set_xticks(xlim)
		ylim = ax.get_ylim()
		ylim = [10**np.floor(np.log10(ylim[0])), 10**np.ceil(np.log10(ylim[1]))]
		ax.set_yticks(ylim)

		exportFigure(plt, plotOutDir, '{}_obj'.format(plotOutFileName), metadata)

		plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if metadata.get('variant', '') != 'param_sensitivity':
			print 'This plot only runs for the param_sensitivity variant.'
			return

		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		global ap
		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = np.array(ap.get_variants())

		# Check to analyze control (variant 0) separately from other variants
		use_control = False
		if CONTROL_VARIANT in variants:
			use_control = True
			variants = variants[variants != CONTROL_VARIANT]
		n_variants = len(variants)

		# Load one instance of sim_data to get number of parameters and ids
		global sim_data
		global validation_data
		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		# sim_data information
		total_params = np.sum(number_params(sim_data))
		rna_to_gene = {gene['rnaId']: gene['symbol'] for gene in sim_data.process.replication.geneData}
		monomer_to_gene = {gene['monomerId']: gene['symbol'] for gene in sim_data.process.replication.geneData}
		rna_ids = sim_data.process.transcription.rnaData['id']
		monomer_ids = sim_data.process.translation.monomerData['id']

		# IDs must match order from param_indices() from param_sensitivity.py variant
		param_ids = np.array(
			['{} RNA deg Km'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids]
			+ ['{} protein deg rate'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids]
			+ ['{} translation eff'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids]
			+ ['{} synth prob'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids])
		if len(param_ids) != total_params:
			raise ValueError('Number of adjusted parameters and list of ids do not match.')

		pool = Pool(processes=parallelization.plotter_cpus())
		args = zip(
			variants,
			[total_params] * n_variants,
			)

		results = pool.imap_unordered(analyze_variant, args)
		(increase_params_counts,
			decrease_params_counts,
			increase_params_growth_rate,
			decrease_params_growth_rate,
			increase_params_flux_correlation,
			decrease_params_flux_correlation) = reduce(operator.add, results)
		pool.close()
		pool.join()

		# Calculate effects and z score
		labels = [
			'growth rate',
			'flux correlation',
			]
		increase_params_data = np.vstack((
			increase_params_growth_rate / increase_params_counts,
			increase_params_flux_correlation / increase_params_counts,
			))
		decrease_params_data = np.vstack((
			decrease_params_growth_rate / decrease_params_counts,
			decrease_params_flux_correlation / decrease_params_counts,
			))
		n_outputs = len(labels)

		# Difference between effect when parameter increased vs decreased
		data_diff = increase_params_data - decrease_params_data
		mean_diff = np.nanmean(data_diff, axis=1).reshape(-1, 1)
		std_diff = np.nanstd(data_diff, axis=1).reshape(-1, 1)
		z_score_diff = (data_diff - mean_diff) / std_diff

		# Individual increase or decrease effects to check asymmetric effects
		all_data = np.hstack((increase_params_data, decrease_params_data))
		mean = np.nanmean(all_data, axis=1).reshape(-1, 1)
		std = np.nanstd(all_data, axis=1).reshape(-1, 1)
		z_score_increase = (increase_params_data - mean) / std
		z_score_decrease = (decrease_params_data - mean) / std

		# Get control data
		if use_control:
			control_counts, _, control_growth_rate, _, control_flux_correlation, _ = analyze_variant((CONTROL_VARIANT, total_params))
			control_data = [
				control_growth_rate[0] / control_counts[0],
				control_flux_correlation[0] / control_counts[0],
				]
		else:
			control_data = [None] * n_outputs

		# Multiple hypothesis adjustment for significance of each parameter.
		# Solves Gaussian CDF for how many standard deviations are needed to
		# include 1 - 0.05 / total_params of the data (test each parameter for p<0.05).
		n_stds = special.erfinv(2 * (1 - 0.05 / total_params) - 1) * np.sqrt(2)

		# Plot histograms
		plt.figure(figsize=(16, 4*n_outputs))
		n_cols = 4
		top_limit = 20  # limit of the number of highest/lowest parameters to plot
		for i, (z_diff, z_increase, z_decrease) in enumerate(zip(z_score_diff, z_score_increase, z_score_decrease)):
			sorted_idx = np.argsort(z_diff)
			above_idx = np.where(z_diff[sorted_idx] > n_stds)[0][-top_limit:]
			below_idx = np.where(z_diff[sorted_idx] < -n_stds)[0][:top_limit]

			## Plot z difference data
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 1)
			plt.yscale('symlog', linthreshold=0.01)
			plt.fill_between(range(total_params), z_diff[sorted_idx])
			plt.axhline(n_stds , color='k', linestyle='--')
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax, xAxis=False)
			plt.xticks([])
			plt.yticks([-n_stds, 0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			lim = np.max(np.abs(plt.ylim()))
			plt.ylim([-lim, lim])
			if i == 0:
				plt.title('Difference of Positive and Negative\nParameter Changes')
			if i == n_outputs - 1:
				plt.xlabel('Sorted Parameters')
			plt.ylabel('Z score\nparameter effect on {}\n(log scale)'.format(labels[i]))

			## Plot single direction z data
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 2)
			plt.yscale('symlog', linthreshold=0.01)
			plt.step(range(total_params), z_increase[sorted_idx], color='g', linewidth=1, alpha=0.5)
			plt.step(range(total_params), z_decrease[sorted_idx], color='r', linewidth=1, alpha=0.5)
			plt.axhline(n_stds , color='k', linestyle='--')
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax, xAxis=False)
			plt.xticks([])
			plt.yticks([-n_stds, 0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			plt.ylim([-lim, lim])
			if i == 0:
				plt.title('Positive and Negative\nParameter Changes')
			if i == n_outputs - 1:
				plt.xlabel('Sorted Parameters')

			## Plot highest parameters
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 3)
			plt.yscale('symlog', linthreshold=0.01)
			plt.bar(above_idx, z_diff[sorted_idx[above_idx]])
			plt.axhline(n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax)
			ax.spines["bottom"].set_visible(False)
			ax.tick_params(bottom=False)
			plt.xticks(above_idx, param_ids[sorted_idx[above_idx]], rotation=90, fontsize=6)
			plt.yticks([0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			if i == 0:
				plt.title('Highest Positive Effect Parameters')
			if i == n_outputs - 1:
				plt.xlabel('Parameter IDs')

			## Plot lowest parameters
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 4)
			plt.yscale('symlog', linthreshold=0.01)
			plt.bar(below_idx, z_diff[sorted_idx[below_idx]])
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax)
			ax.spines["bottom"].set_visible(False)
			ax.tick_params(bottom=False)
			plt.xticks(below_idx, param_ids[sorted_idx[below_idx]], rotation=90, fontsize=6)
			plt.yticks([-n_stds, 0])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			if i == 0:
				plt.title('Highest Negative Effect Parameters')
			if i == n_outputs - 1:
				plt.xlabel('Parameter IDs')

		## Save figure
		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		# Plot individual parameters
		individual_indices = [
			np.nanargmax(z_score_diff[0, :]),
			np.nanargmin(z_score_diff[0, :]),
			np.nanargmax(z_score_diff[1, :]),
			np.nanargmin(z_score_diff[1, :]),
			]
		n_individual = len(individual_indices)
		x_values = [-1, 0, 1]
		plt.figure()

		for i, label in enumerate(labels):
			shared_ax = None
			for j, idx in enumerate(individual_indices):
				## Shared y axis for each row
				ax = plt.subplot(n_outputs, n_individual, i*n_individual + j + 1, sharey=shared_ax)
				if shared_ax is None:
					shared_ax = ax

				## Plot data
				plt.plot(x_values, [decrease_params_data[i, idx], control_data[i], increase_params_data[i, idx]], 'x')

				## Format axes
				plt.xticks(x_values, ['Decrease', 'Control', 'Increase'])
				ax.tick_params(labelsize=6)
				ax.spines['right'].set_visible(False)
				ax.spines['top'].set_visible(False)
				if i < n_outputs - 1:
					ax.tick_params(labelbottom=False)
				if j > 0:
					ax.tick_params(labelleft=False)
				if i == 0:
					plt.title(param_ids[idx], fontsize=8)
				if j == 0:
					plt.ylabel(label, fontsize=7)

		## Save figure
		plt.tight_layout()
		exportFigure(plt, plotOutDir, '{}_individual'.format(plotOutFileName, metadata))
		plt.close('all')

		# Save z scores to tsv
		with open(os.path.join(plotOutDir, '{}.tsv'.format(plotOutFileName)), 'w') as f:
			writer = csv.writer(f, delimiter='\t')

			writer.writerow(
				['Parameter']
				+ headers(labels, 'Z-score, difference')
				+ headers(labels, 'Z-score, increase')
				+ headers(labels, 'Z-score, decrease')
				+ headers(labels, 'Raw average, difference')
				+ headers(labels, 'Raw average, increase')
				+ headers(labels, 'Raw average, decrease')
				)
			writer.writerows(np.hstack((
				param_ids.reshape(-1, 1),
				z_score_diff.T,
				z_score_increase.T,
				z_score_decrease.T,
				data_diff.T,
				increase_params_data.T,
				decrease_params_data.T
				)))
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		expected_n_variants = 2
		n_variants = len(variants)

		if n_variants < expected_n_variants:
			print('This plot only runs for {} variants.'.format(expected_n_variants))
			return

		# IDs for appropriate proteins
		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData['id'].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))

		# Stoichiometry matrices
		equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers()
		complex_stoich = sim_data.process.complexation.stoichMatrixMonomers()

		# Protein container views
		protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64)
		view_complexation = protein_container.countsView(ids_complexation)
		view_complexation_complexes = protein_container.countsView(ids_complexation_complexes)
		view_equilibrium = protein_container.countsView(ids_equilibrium)
		view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes)

		# Load model data
		model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		for i, variant in enumerate(variants):
			if i >= expected_n_variants:
				print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants))
				continue

			variant_counts = []
			for sim_dir in ap.get_cells(variant=[variant]):
				simOutDir = os.path.join(sim_dir, 'simOut')

				# Listeners used
				unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts'))

				# Account for bulk molecules
				(bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein)
				protein_container.countsIs(bulk_counts.mean(axis=0))

				# Account for unique molecules
				ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome')
				rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly')
				n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index]
				n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index]
				protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
				protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull])

				# Account for small-molecule bound complexes
				view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts()))

				# Account for monomers in complexed form
				view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts()))

				variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts())
			model_counts[:, i] = np.mean(variant_counts, axis=0)
			model_std[:, i] = np.std(variant_counts, axis=0)

		# Validation data
		schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])}
		schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts']
		validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE])

		# Process data
		model_log_counts = np.log10(model_counts)
		model_log_lower_std = model_log_counts - np.log10(model_counts - model_std)
		model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts
		validation_log_counts = np.log10(validation_counts)
		r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0])
		r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1])

		# Scatter plot of model vs validation counts
		max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max()))
		limits = [0, max_counts]
		plt.figure()
		colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

		## Plot data
		for i in range(expected_n_variants):
			plt.errorbar(validation_log_counts, model_log_counts[:, i],
				yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])),
				fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5)
		plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_')

		## Format axes
		plt.xlabel('Validation Counts\n(log10(counts))')
		plt.ylabel('Average Simulation Counts\n(log10(counts))')
		ax = plt.gca()
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)
		ax.spines['left'].set_position(('outward', 10))
		ax.spines['bottom'].set_position(('outward', 10))
		ax.xaxis.set_major_locator(MaxNLocator(integer=True))
		ax.yaxis.set_major_locator(MaxNLocator(integer=True))

		## Add legend
		legend_text = [
			'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]),
			'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]),
			]
		plt.legend(legend_text, frameon=False)

		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close('all')
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"

        ap = AnalysisPaths(inputDir, variant_plot=True)
        all_cells = ap.get_cells()

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        rnaToProteinDict = {}
        dnaToProteinDict = {}
        elngRateDict = {}
        stableRnaFractionDict = {}
        doublingPerHourDict = {}

        variantSimDataFile = ap.get_variant_kb(ap.get_variants()[0])
        sim_data = cPickle.load(open(variantSimDataFile, "rb"))
        nAvogadro = sim_data.constants.nAvogadro.asNumber()
        chromMass = (sim_data.getter.getMass(['CHROM_FULL[c]'])[0] /
                     sim_data.constants.nAvogadro).asNumber()

        for simDir in all_cells:
            simOutDir = os.path.join(simDir, "simOut")
            variant = int(simDir[simDir.rfind('generation_') -
                                 14:simDir.rfind('generation_') - 8])

            mass = TableReader(os.path.join(simOutDir, "Mass"))

            protein = mass.readColumn("proteinMass") * 10**-15
            rna = mass.readColumn("rnaMass") * 10**-15
            dna = mass.readColumn("dnaMass") * 10**-15

            growthRate = mass.readColumn("instantaniousGrowthRate")
            doublingTime = np.nanmean(np.log(2) / growthRate / 60)

            rnaNT = rna / NT_MW * nAvogadro
            proteinAA = protein / PROTEIN_MW * nAvogadro

            # Count chromosome equivalents
            chromEquivalents = dna / chromMass

            # Load ribosome data
            ribosomeDataFile = TableReader(
                os.path.join(simOutDir, "RibosomeData"))
            actualElongations = ribosomeDataFile.readColumn(
                "actualElongations")
            ribosomeDataFile.close()

            transcriptDataFile = TableReader(
                os.path.join(simOutDir, "TranscriptElongationListener"))
            rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized")
            isTRna = sim_data.process.transcription.rnaData["isTRna"]
            isRRna = sim_data.process.transcription.rnaData["isRRna"]
            stableRnaSynth = np.sum(rnaSynth[:, isTRna], axis=1) + np.sum(
                rnaSynth[:, isRRna], axis=1)
            totalRnaSynth = np.sum(rnaSynth, axis=1).astype(float)
            rnaFraction = stableRnaSynth / totalRnaSynth

            uniqueMoleculeCounts = TableReader(
                os.path.join(simOutDir, "UniqueMoleculeCounts"))

            ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRibosome")
            activeRibosome = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, ribosomeIndex]

            uniqueMoleculeCounts.close()

            initialTime = TableReader(os.path.join(
                simOutDir, "Main")).readAttribute("initialTime")
            t = TableReader(os.path.join(
                simOutDir, "Main")).readColumn("time") - initialTime
            timeStepSec = TableReader(os.path.join(
                simOutDir, "Main")).readColumn("timeStepSec")

            if variant in rnaToProteinDict.keys():
                rnaToProteinDict[variant] = np.append(
                    rnaToProteinDict[variant], rnaNT / (proteinAA / 100))
                dnaToProteinDict[variant] = np.append(
                    dnaToProteinDict[variant],
                    chromEquivalents / (proteinAA / 10**9))
                elngRateDict[variant] = np.append(
                    elngRateDict[variant],
                    (actualElongations / activeRibosome / timeStepSec)[3:])
                stableRnaFractionDict[variant] = np.append(
                    stableRnaFractionDict[variant],
                    np.asarray(rnaFraction)[~np.isnan(rnaFraction)])
                doublingPerHourDict[variant] = np.append(
                    doublingPerHourDict[variant], 60 / doublingTime)
            else:
                rnaToProteinDict[variant] = rnaNT / (proteinAA / 100)
                dnaToProteinDict[variant] = chromEquivalents / (proteinAA /
                                                                10**9)
                elngRateDict[variant] = (actualElongations / activeRibosome /
                                         timeStepSec)[3:]
                stableRnaFractionDict[variant] = np.asarray(
                    rnaFraction)[~np.isnan(rnaFraction)]
                doublingPerHourDict[variant] = 60 / doublingTime

        rnaToProtein = []
        dnaToProtein = []
        elngRate = []
        stableRnaFraction = []
        doublingPerHour = []

        for key in rnaToProteinDict.keys():
            rnaToProtein += [rnaToProteinDict[key]]
            dnaToProtein += [dnaToProteinDict[key]]
            elngRate += [elngRateDict[key]]
            stableRnaFraction += [stableRnaFractionDict[key]]
            doublingPerHour += [np.mean(doublingPerHourDict[key])]

        plt.figure(figsize=(8.5, 11))

        sp = plt.subplot(4, 1, 1)
        sp.violinplot(rnaToProtein, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("RNA to Protein\n(nuc/100 aa)")

        sp = plt.subplot(4, 1, 2)
        sp.violinplot(dnaToProtein, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("DNA to Protein\n(chrom eq/10^9 aa)")

        sp = plt.subplot(4, 1, 3)
        sp.violinplot(elngRate, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("Ribosome Elongation\nRate (aa/s)")

        sp = plt.subplot(4, 1, 4)
        sp.violinplot(stableRnaFraction,
                      positions=doublingPerHour,
                      showmeans=True)
        sp.set_ylabel("Rate Stable RNA to\nRate Total RNA")
        sp.set_xlabel("Doublings per Hour")

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata.get('variant', '') != 'flux_sensitivity':
            print 'This plot only runs for the flux_sensitivity variant.'
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        succ_fluxes = []
        iso_fluxes = []
        for variant in variants:
            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                fba_reader = TableReader(os.path.join(simOutDir, 'FBAResults'))

                # Load data
                reactions = np.array(
                    fba_reader.readAttribute('sensitivity_reactions'))
                succ_fluxes += [
                    fba_reader.readColumn('succinate_flux_sensitivity')[1:, :]
                ]
                iso_fluxes += [
                    fba_reader.readColumn('isocitrate_flux_sensitivity')[1:, :]
                ]

        succ_fluxes = np.vstack(succ_fluxes)
        iso_fluxes = np.vstack(iso_fluxes)

        succ_z = calc_z(succ_fluxes)
        iso_z = calc_z(iso_fluxes)

        threshold = -0.1

        # Plot data
        plt.figure()
        gs = gridspec.GridSpec(2, 2)

        ## Succinate dehydrogenase all fluxes
        ax = plt.subplot(gs[0, 0])
        plot_lows(ax, succ_z, threshold, 'succinate dehydrogenase')

        ## Succinate dehydrogenase fluxes over threshold
        ax = plt.subplot(gs[0, 1])
        plot_threshold(ax, succ_z, threshold, reactions)

        ## Isocitrate dehydrogenase all fluxes
        ax = plt.subplot(gs[1, 0])
        plot_lows(ax, iso_z, threshold, 'isocitrate dehydrogenase')

        ## Isocitrate dehydrogenase fluxes over threshold
        ax = plt.subplot(gs[1, 1])
        plot_threshold(ax, iso_z, threshold, reactions)

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "variantDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot = True)

		if ap.n_generation == 1:
			print "Need more data to create addedMass"
			return

		allScatter = plt.figure()
		allScatter.set_figwidth(11)
		allScatter.set_figheight(6)

		plt.style.use('seaborn-deep')
		color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

		title_list = [r"Glucose minimal, $\tau = $44 min", r"Glucose minimal anaerobic, $\tau = $100 min", r"Glucose minimal + 20 amino acids, $\tau = $25 min"]

		for varIdx in ap.get_variants():

			if varIdx == 0:
				plotIdx = 1
				gen = [2,3]
			elif varIdx == 1:
				plotIdx = 0
				gen = [2,3]
			elif varIdx == 2:
				plotIdx = 2
				gen = [2,3]
			else:
				continue

			initial_masses = np.zeros(0)
			final_masses = np.zeros(0)

			all_cells = ap.get_cells(generation=gen, variant=[varIdx])
			if len(all_cells) == 0:
				continue

			fail = 0
			for simDir in all_cells:
				try:
					simOutDir = os.path.join(simDir, "simOut")
					mass = TableReader(os.path.join(simOutDir, "Mass"))
					cellMass = mass.readColumn("dryMass")

					initial_masses = np.hstack((initial_masses, cellMass[0]))
					final_masses = np.hstack((final_masses, cellMass[-1]))
				except Exception as e:
					print e
					fail+=1

			added_masses = final_masses - initial_masses

			scaled_initial_masses = initial_masses / initial_masses.mean()
			scaled_added_masses = added_masses / added_masses.mean()

			nbins = 5

			n, xbin = np.histogram(scaled_initial_masses, bins=nbins)
			sy, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses)
			sy2, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses*scaled_added_masses)
			mean = sy / n
			std = np.sqrt(sy2/(n-1) - n*mean*mean/(n-1))

			slope, intercept, r_value, p_value, std_err = linregress(scaled_initial_masses, scaled_added_masses)

			# plot all scatter plots
			plt.figure(allScatter.number)
			ax = plt.subplot2grid((1,3), (0,plotIdx))
			ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = "black", alpha = 0.2, zorder=1, markeredgewidth = 0.0)
			ax.errorbar(((xbin[1:] + xbin[:-1])/2), mean, yerr=std, color = "black", linewidth=1, zorder=2)
			ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = "blue")

			ax.set_title(
				title_list[varIdx] + ", n=%d" % ((len(all_cells) - fail), ) + "\n" +
				r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" % (slope,intercept) + "\n" +
				"r-value=%0.2g" % r_value + "\n" +
				"p-value=%0.2g" % p_value,
				fontsize=FONT_SIZE)

			ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM])
			ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM])
			ax.get_yaxis().get_major_formatter().set_useOffset(False)
			ax.get_xaxis().get_major_formatter().set_useOffset(False)

			if varIdx == 1:
				ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE)
			ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE)

			plt.subplots_adjust(bottom = 0.2)

			whitePadSparklineAxis(ax)

			for tick in ax.yaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)
			for tick in ax.xaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)

			# plot stripped figure
			fig = plt.figure()
			fig.set_figwidth(3)
			fig.set_figheight(2)
			ax = plt.subplot2grid((1,1), (0,0))
			ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = color_cycle[0], alpha = 0.25, ms=6, zorder=1, markeredgewidth = 0.0, clip_on=False)
			ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = 'k')

			ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM])
			ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM])

			ax.get_yaxis().get_major_formatter().set_useOffset(False)
			ax.get_xaxis().get_major_formatter().set_useOffset(False)

			whitePadSparklineAxis(ax)

			ax.tick_params(which='both', bottom=True, left=True,
				top=False, right=False, labelbottom=True, labelleft=True,
				labelsize=FONT_SIZE)

			ax.set_xlabel("")
			ax.set_ylabel("")

			plt.tight_layout()
			exportFigure(plt, plotOutDir, plotOutFileName + str(varIdx) + "_stripped", metadata, transparent = True)

		plt.figure(allScatter.number)
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close("all")