def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        with open(validationDataFile, 'rb') as f:
            validation_data = cPickle.load(f)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                main_reader = TableReader(os.path.join(simOutDir, 'Main'))

                # Load data
                time = main_reader.readColumn('time')

        plt.figure()

        ### Create Plot ###

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close('all')
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        plt.figure(figsize=(4, 4))
        ax = plt.subplot2grid((1, 1), (0, 0))

        options = {
            "edgecolors": color_cycle[0],
            "alpha": 0.2,
            "s": 50,
            "clip_on": False
        }
        labels = ["minimal", "anaerobic", "minimal + AA"]

        ax.scatter(initial_volumes[2],
                   added_volumes[2],
                   marker="x",
                   label=labels[2],
                   **options)
        ax.scatter(initial_volumes[0],
                   added_volumes[0],
                   facecolors="none",
                   marker="o",
                   label=labels[0],
                   **options)
        ax.scatter(initial_volumes[1],
                   added_volumes[1],
                   facecolors="none",
                   marker="^",
                   label=labels[1],
                   **options)

        ax.set_xlim([0, 4])
        ax.set_ylim([0, 4])
        ax.set_xlabel("Birth Volume ($\mu m^3$)")
        ax.set_ylabel("Added Volume ($\mu m^3$)")
        ax.legend()

        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        # Get clean version of plot
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata)

        plt.close("all")
Ejemplo n.º 3
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')

        plt.figure(figsize=(5, 5))
        plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal")
        plt.scatter(initial_volumes[1],
                    added_volumes[1],
                    s=3,
                    label="anaerobic")
        plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA")
        plt.xlim([0, 4])
        plt.ylim([0, 4])
        plt.xlabel("Birth Volume ($\mu m^3$)")
        plt.ylabel("Added Volume ($\mu m^3$)")
        plt.legend()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close("all")
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "variantDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)

        fig = plt.figure()
        fig.set_figwidth(5)
        fig.set_figheight(5)

        bremer_tau = [40, 100, 24]

        bremer_origins_per_cell_at_initiation = [2, 1, 4]
        bremer_rrn_init_rate = [20 * 23, 4 * 12.4, 58 * 35.9]

        bremer_rna_mass_per_cell = [77, 20, 211]
        bremer_elng_rate = [18, 12, 21]

        sim_doubling_time = np.zeros(ap.n_variant)
        sim_doubling_time_std = np.zeros(ap.n_variant)

        sim_origins_per_cell_at_initiation = np.zeros(ap.n_variant)
        sim_rna_mass_per_cell = np.zeros(ap.n_variant)
        sim_elng_rate = np.zeros(ap.n_variant)
        sim_rrn_init_rate = np.zeros(ap.n_variant)

        sim_origins_per_cell_at_initiation_std = np.zeros(ap.n_variant)
        sim_elng_rate_std = np.zeros(ap.n_variant)
        sim_rna_mass_per_cell_std = np.zeros(ap.n_variant)
        sim_rrn_init_rate_std = np.zeros(ap.n_variant)

        variants = ap.get_variants()

        for varIdx in range(ap.n_variant):
            variant = variants[varIdx]

            print("variant {}".format(variant))

            all_cells = ap.get_cells(variant=[variant])

            print("Total cells: {}".format(len(all_cells)))

            try:
                sim_data = cPickle.load(open(ap.get_variant_kb(variant)))
            except Exception as e:
                print "Couldn't load sim_data object. Exiting.", e
                return

            num_origin_at_init = np.zeros(len(all_cells))
            doubling_time = np.zeros(len(all_cells))
            meanRnaMass = np.zeros(len(all_cells))
            meanElngRate = np.zeros(len(all_cells))
            meanRrnInitRate = np.zeros(len(all_cells))

            for idx, simDir in enumerate(all_cells):
                print "cell {} of {}".format(idx, len(all_cells))

                simOutDir = os.path.join(simDir, "simOut")

                try:
                    time = TableReader(os.path.join(simOutDir,
                                                    "Main")).readColumn("time")
                    doubling_time[idx] = time[-1] - time[0]
                except Exception as e:
                    print 'Error with data for %s: %s' % (simDir, e)
                    continue

                timeStepSec = TableReader(os.path.join(
                    simOutDir, "Main")).readColumn("timeStepSec")

                meanRnaMass[idx] = TableReader(os.path.join(
                    simOutDir, "Mass")).readColumn("rnaMass").mean()
                meanElngRate[idx] = TableReader(
                    os.path.join(simOutDir, "RibosomeData")).readColumn(
                        "effectiveElongationRate").mean()

                numOrigin = TableReader(
                    os.path.join(simOutDir,
                                 "ReplicationData")).readColumn("numberOfOric")

                massPerOric = TableReader(
                    os.path.join(
                        simOutDir,
                        "ReplicationData")).readColumn("criticalMassPerOriC")
                idxInit = np.where(massPerOric >= 1)[0]
                numOriginAtInit = numOrigin[idxInit - 1]
                if numOriginAtInit.size:
                    num_origin_at_init[idx] = numOriginAtInit.mean()
                else:
                    num_origin_at_init[idx] = np.nan

                transcriptDataFile = TableReader(
                    os.path.join(simOutDir, "TranscriptElongationListener"))
                rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized")
                isRRna = sim_data.process.transcription.rnaData["isRRna"]
                meanRrnInitRate[idx] = (rnaSynth[:, isRRna].sum(axis=1) /
                                        timeStepSec).mean() * 60. / 3

            sim_rna_mass_per_cell[varIdx] = meanRnaMass.mean()
            sim_elng_rate[varIdx] = meanElngRate.mean()
            sim_origins_per_cell_at_initiation[varIdx] = np.nanmean(
                num_origin_at_init)
            sim_doubling_time[varIdx] = np.nanmean(doubling_time) / 60.
            sim_rrn_init_rate[varIdx] = np.nanmean(meanRrnInitRate)

            sim_rna_mass_per_cell_std[varIdx] = meanRnaMass.std()
            sim_elng_rate_std[varIdx] = meanElngRate.std()
            sim_origins_per_cell_at_initiation_std[varIdx] = np.nanstd(
                num_origin_at_init)
            sim_doubling_time_std[varIdx] = np.nanstd(doubling_time) / 60.
            sim_rrn_init_rate_std[varIdx] = np.nanstd(meanRrnInitRate)

        bremer_tau = np.array(bremer_tau)

        ax0 = plt.subplot2grid((2, 2), (0, 0))
        ax1 = plt.subplot2grid((2, 2), (1, 0), sharex=ax0)
        ax2 = plt.subplot2grid((2, 2), (0, 1), sharex=ax0)
        ax3 = plt.subplot2grid((2, 2), (1, 1), sharex=ax0)

        lines = {'linestyle': 'dashed'}
        plt.rc('lines', **lines)
        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        ax0.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_rna_mass_per_cell[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_rna_mass_per_cell_std[np.argsort(sim_doubling_time)
                                           [::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax0.errorbar(
            bremer_tau[np.argsort(bremer_tau)[::-1]],
            np.array(bremer_rna_mass_per_cell)[np.argsort(bremer_tau)[::-1]],
            color=color_cycle[2],
            **EXP_PLOT_STYLE)
        ax0.set_title("RNA mass per cell (fg)", fontsize=FONT_SIZE)
        ax0.set_xlim([0, 135])
        ax0.set_ylim([0, 250])
        ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False)

        ax1.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_elng_rate[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_elng_rate_std[np.argsort(sim_doubling_time)[::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax1.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]],
                     np.array(bremer_elng_rate)[np.argsort(bremer_tau)[::-1]],
                     color=color_cycle[2],
                     **EXP_PLOT_STYLE)
        ax1.set_title("Ribosome elongation\nrate (aa/s/ribosome)",
                      fontsize=FONT_SIZE)
        ax1.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE)
        ax1.set_ylim([0, 24])

        ax2.errorbar(sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
                     sim_origins_per_cell_at_initiation[np.argsort(
                         sim_doubling_time)[::-1]],
                     yerr=sim_origins_per_cell_at_initiation_std[np.argsort(
                         sim_doubling_time)[::-1]],
                     color=color_cycle[0],
                     **SIM_PLOT_STYLE)
        ax2.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]],
                     np.array(bremer_origins_per_cell_at_initiation)[
                         np.argsort(bremer_tau)[::-1]],
                     color=color_cycle[2],
                     **EXP_PLOT_STYLE)
        ax2.set_title("Average origins at chrom. init.", fontsize=FONT_SIZE)
        ax2.set_ylim([0.5, 4.5])

        ax3.errorbar(
            sim_doubling_time[np.argsort(sim_doubling_time)[::-1]],
            sim_rrn_init_rate[np.argsort(sim_doubling_time)[::-1]],
            yerr=sim_rrn_init_rate_std[np.argsort(sim_doubling_time)[::-1]],
            color=color_cycle[0],
            **SIM_PLOT_STYLE)
        ax3.errorbar(
            bremer_tau[np.argsort(bremer_tau)[::-1]],
            np.array(bremer_rrn_init_rate)[np.argsort(bremer_tau)[::-1]],
            color=color_cycle[2],
            **EXP_PLOT_STYLE)
        ax3.set_title("Rate of rrn initiation (1/min)", fontsize=FONT_SIZE)
        ax3.set_ylim([0, 2500])

        # ax3.legend(loc=1, frameon=True, fontsize=7)
        ax3.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE)

        axes_list = [ax0, ax1, ax2, ax3]

        for a in axes_list:
            for tick in a.yaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)
            for tick in a.xaxis.get_major_ticks():
                tick.label.set_fontsize(FONT_SIZE)

        whitePadSparklineAxis(ax0, False)
        whitePadSparklineAxis(ax1)
        whitePadSparklineAxis(ax2, False)
        whitePadSparklineAxis(ax3)

        plt.subplots_adjust(bottom=0.2, wspace=0.3)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "inputDir does not currently exist as a directory"
		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		# Get cells
		ap = AnalysisPaths(inputDir, variant_plot = True)
		if ap.n_variant != len(FACTORS):
			print("This plot expects all variants of subgen_expression")
			return

		# Get constants from wildtype variant
		sim_data = cPickle.load(open(ap.get_variant_kb(4), "rb")) # 4 is the wildtype variant
		cellDensity = sim_data.constants.cellDensity
		nAvogadro = sim_data.constants.nAvogadro
		metabolite_target = sim_data.process.metabolism.concDict[METABOLITE_ID]
		metabolite_threshold = (THRESHOLD * metabolite_target).asNumber(CONC_UNITS)

		# Investigate each variant
		enzyme_depletion = np.zeros([ap.n_seed, ap.n_variant])
		metabolite_depletion = np.zeros([ap.n_seed, ap.n_variant])

		for variant in xrange(ap.n_variant):
			for seed in xrange(ap.n_seed):
				cells = ap.get_cells(variant=[variant], seed=[seed])
				time_enzyme_depleted = []  # seconds
				time_metabolite_depleted = []  # seconds

				for i, simDir in enumerate(cells):
					simOutDir = os.path.join(simDir, "simOut")

					main_reader = TableReader(os.path.join(simOutDir, "Main"))
					mass_reader = TableReader(os.path.join(simOutDir, "Mass"))

					# Get molecule counts
					(enzyme_counts, metabolite_counts) = read_bulk_molecule_counts(simOutDir, (ENZYME_IDS, [METABOLITE_ID]))

					# Compute time with zero counts of enzyme
					time_step_sec = main_reader.readColumn("timeStepSec")
					time_enzyme_depleted.append(time_step_sec[np.sum(enzyme_counts, axis=1) == 0].sum())

					# Compute time with end products under the target concentration
					mass = units.fg * mass_reader.readColumn("cellMass")
					volume = mass / cellDensity
					metabolite_conc = (1 / nAvogadro / volume * metabolite_counts).asNumber(CONC_UNITS)
					time_metabolite_depleted.append(time_step_sec[metabolite_conc < metabolite_threshold].sum())

				# Record MENE-CPLX depletion
				total_time = main_reader.readColumn("time")[-1] + time_step_sec[-1]
				fraction_enzyme_depleted = np.sum(time_enzyme_depleted) / total_time
				enzyme_depletion[seed, variant] = fraction_enzyme_depleted

				# Record end product depletion
				fraction_metabolite_depleted = np.sum(time_metabolite_depleted) / total_time
				metabolite_depletion[seed, variant] = fraction_metabolite_depleted

		# Compute average and standard deviations
		metabolite_depletion_avg = np.average(metabolite_depletion, axis = 0)
		metabolite_depletion_std = np.std(metabolite_depletion, axis = 0)
		enzyme_depletion_avg = np.average(enzyme_depletion, axis = 0)
		enzyme_depletion_std = np.std(enzyme_depletion, axis = 0)

		# Plot
		fig, axesList = plt.subplots(2, 1, figsize = (8, 8))
		ax1, ax2 = axesList
		xvals = np.arange(ap.n_variant)
		fig.suptitle("Sensitivity Analysis: pabB depletion")

		for ax, avg, std in zip(axesList, [metabolite_depletion_avg, enzyme_depletion_avg], [metabolite_depletion_std, enzyme_depletion_std]):
			ax.scatter(xvals, avg, edgecolor = "none", clip_on = False, s = MARKERSIZE)
			ax.errorbar(xvals, avg, yerr = std, color = "b", linewidth = 1, clip_on = False, fmt = "o", capsize = 4, capthick = 1, markeredgecolor = "none")

		ax1.set_title("Enzyme depletion", fontsize = FONTSIZE)
		ax2.set_title("Metabolite depletion", fontsize = FONTSIZE)
		xlabels = ["1/10 x", "1/8 x", "1/4 x", "1/2 x", "1 x", "2 x", "4 x", "8 x", "10 x"]
		title_tags = ["counts = 0", "<%s%% of wildtype" % (THRESHOLD * 100)]
		for i, ax in enumerate([ax1, ax2]):
			ax.set_ylabel("Fraction of Time\n%s" % title_tags[i], fontsize = FONTSIZE)
			ax.set_xlabel("Factor of change of pabB synthesis probability", fontsize = FONTSIZE)
			ax.set_xlim([-0.25, 8.25])
			whitePadSparklineAxis(ax)
			ax.set_xticks(xvals)
			ax.set_xticklabels(xlabels)
			ax.set_yticks([0, 1])

		plt.subplots_adjust(hspace = 1, wspace = 1, top = 0.9, bottom = 0.1)
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)
		plt.close("all")

		# Plot clean versions for figure
		FIRST = True
		for avg, std, filename in zip([metabolite_depletion_avg, enzyme_depletion_avg], [metabolite_depletion_std, enzyme_depletion_std], ["pabB", "methylene-thf"]):
			fig, ax = plt.subplots(1, 1, figsize = (10, 3))
			ax.scatter(xvals, avg, edgecolor = "none", clip_on = False, s = MARKERSIZE)
			ax.errorbar(xvals, avg, yerr = std, color = "b", linewidth = 1, clip_on = False, fmt = "o", capsize = 4, capthick = 1, markeredgecolor = "none")
			ax.set_xlim([-0.25, 8.25])
			if FIRST:
				FIRST = False
				whitePadSparklineAxis(ax, False)
			else:
				whitePadSparklineAxis(ax)
			ax.set_xticks(xvals)
			ax.set_xticklabels([])
			ax.set_yticks([0, 1])
			ax.set_yticklabels([])
			exportFigure(plt, plotOutDir, plotOutFileName + "_%s" % filename, metadata)
			plt.close("all")
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"

        ap = AnalysisPaths(inputDir, variant_plot=True)
        all_cells = ap.get_cells()

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        rnaToProteinDict = {}
        dnaToProteinDict = {}
        elngRateDict = {}
        stableRnaFractionDict = {}
        doublingPerHourDict = {}

        variantSimDataFile = ap.get_variant_kb(ap.get_variants()[0])
        sim_data = cPickle.load(open(variantSimDataFile, "rb"))
        nAvogadro = sim_data.constants.nAvogadro.asNumber()
        chromMass = (sim_data.getter.getMass(['CHROM_FULL[c]'])[0] /
                     sim_data.constants.nAvogadro).asNumber()

        for simDir in all_cells:
            simOutDir = os.path.join(simDir, "simOut")
            variant = int(simDir[simDir.rfind('generation_') -
                                 14:simDir.rfind('generation_') - 8])

            mass = TableReader(os.path.join(simOutDir, "Mass"))

            protein = mass.readColumn("proteinMass") * 10**-15
            rna = mass.readColumn("rnaMass") * 10**-15
            dna = mass.readColumn("dnaMass") * 10**-15

            growthRate = mass.readColumn("instantaniousGrowthRate")
            doublingTime = np.nanmean(np.log(2) / growthRate / 60)

            rnaNT = rna / NT_MW * nAvogadro
            proteinAA = protein / PROTEIN_MW * nAvogadro

            # Count chromosome equivalents
            chromEquivalents = dna / chromMass

            # Load ribosome data
            ribosomeDataFile = TableReader(
                os.path.join(simOutDir, "RibosomeData"))
            actualElongations = ribosomeDataFile.readColumn(
                "actualElongations")
            ribosomeDataFile.close()

            transcriptDataFile = TableReader(
                os.path.join(simOutDir, "TranscriptElongationListener"))
            rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized")
            isTRna = sim_data.process.transcription.rnaData["isTRna"]
            isRRna = sim_data.process.transcription.rnaData["isRRna"]
            stableRnaSynth = np.sum(rnaSynth[:, isTRna], axis=1) + np.sum(
                rnaSynth[:, isRRna], axis=1)
            totalRnaSynth = np.sum(rnaSynth, axis=1).astype(float)
            rnaFraction = stableRnaSynth / totalRnaSynth

            uniqueMoleculeCounts = TableReader(
                os.path.join(simOutDir, "UniqueMoleculeCounts"))

            ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRibosome")
            activeRibosome = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, ribosomeIndex]

            uniqueMoleculeCounts.close()

            initialTime = TableReader(os.path.join(
                simOutDir, "Main")).readAttribute("initialTime")
            t = TableReader(os.path.join(
                simOutDir, "Main")).readColumn("time") - initialTime
            timeStepSec = TableReader(os.path.join(
                simOutDir, "Main")).readColumn("timeStepSec")

            if variant in rnaToProteinDict.keys():
                rnaToProteinDict[variant] = np.append(
                    rnaToProteinDict[variant], rnaNT / (proteinAA / 100))
                dnaToProteinDict[variant] = np.append(
                    dnaToProteinDict[variant],
                    chromEquivalents / (proteinAA / 10**9))
                elngRateDict[variant] = np.append(
                    elngRateDict[variant],
                    (actualElongations / activeRibosome / timeStepSec)[3:])
                stableRnaFractionDict[variant] = np.append(
                    stableRnaFractionDict[variant],
                    np.asarray(rnaFraction)[~np.isnan(rnaFraction)])
                doublingPerHourDict[variant] = np.append(
                    doublingPerHourDict[variant], 60 / doublingTime)
            else:
                rnaToProteinDict[variant] = rnaNT / (proteinAA / 100)
                dnaToProteinDict[variant] = chromEquivalents / (proteinAA /
                                                                10**9)
                elngRateDict[variant] = (actualElongations / activeRibosome /
                                         timeStepSec)[3:]
                stableRnaFractionDict[variant] = np.asarray(
                    rnaFraction)[~np.isnan(rnaFraction)]
                doublingPerHourDict[variant] = 60 / doublingTime

        rnaToProtein = []
        dnaToProtein = []
        elngRate = []
        stableRnaFraction = []
        doublingPerHour = []

        for key in rnaToProteinDict.keys():
            rnaToProtein += [rnaToProteinDict[key]]
            dnaToProtein += [dnaToProteinDict[key]]
            elngRate += [elngRateDict[key]]
            stableRnaFraction += [stableRnaFractionDict[key]]
            doublingPerHour += [np.mean(doublingPerHourDict[key])]

        plt.figure(figsize=(8.5, 11))

        sp = plt.subplot(4, 1, 1)
        sp.violinplot(rnaToProtein, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("RNA to Protein\n(nuc/100 aa)")

        sp = plt.subplot(4, 1, 2)
        sp.violinplot(dnaToProtein, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("DNA to Protein\n(chrom eq/10^9 aa)")

        sp = plt.subplot(4, 1, 3)
        sp.violinplot(elngRate, positions=doublingPerHour, showmeans=True)
        sp.set_ylabel("Ribosome Elongation\nRate (aa/s)")

        sp = plt.subplot(4, 1, 4)
        sp.violinplot(stableRnaFraction,
                      positions=doublingPerHour,
                      showmeans=True)
        sp.set_ylabel("Rate Stable RNA to\nRate Total RNA")
        sp.set_xlabel("Doublings per Hour")

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
Ejemplo n.º 7
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "tfActivity":
            print "This plot only runs for the 'tfActivity' variant."
            return

        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = sorted(ap._path_data['variant'].tolist()
                          )  # Sorry for accessing private data

        if 0 in variants:
            variants.remove(0)

        if len(variants) == 0:
            return

        all_cells = sorted(
            ap.get_cells(variant=variants, seed=[0], generation=[0]))

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        expectedProbBound = []
        simulatedProbBound = []
        expectedSynthProb = []
        simulatedSynthProb = []
        targetId = []
        targetCondition = []
        targetToTfType = {}

        for variant, simDir in zip(variants, all_cells):
            sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb"))

            shape = sim_data.process.transcription_regulation.recruitmentData[
                "shape"]
            hI = sim_data.process.transcription_regulation.recruitmentData[
                "hI"]
            hJ = sim_data.process.transcription_regulation.recruitmentData[
                "hJ"]
            hV = sim_data.process.transcription_regulation.recruitmentData[
                "hV"]
            H = np.zeros(shape, np.float64)
            H[hI, hJ] = hV
            colNames = sim_data.process.transcription_regulation.recruitmentColNames

            tfList = ["basal (no TF)"] + sorted(
                sim_data.tfToActiveInactiveConds)
            simOutDir = os.path.join(simDir, "simOut")
            tf = tfList[(variant + 1) // 2]
            tfStatus = None
            if variant % 2 == 1:
                tfStatus = "active"
            else:
                tfStatus = "inactive"

            bulkMoleculesReader = TableReader(
                os.path.join(simOutDir, "BulkMolecules"))
            bulkMoleculeIds = bulkMoleculesReader.readAttribute("objectNames")

            rnaSynthProbReader = TableReader(
                os.path.join(simOutDir, "RnaSynthProb"))
            rnaIds = rnaSynthProbReader.readAttribute("rnaIds")

            tfTargetBoundIds = []
            tfTargetBoundIndices = []
            tfTargetSynthProbIds = []
            tfTargetSynthProbIndices = []
            for tfTarget in sorted(sim_data.tfToFC[tf]):
                tfTargetBoundIds.append(tfTarget + "__" + tf)
                tfTargetBoundIndices.append(
                    bulkMoleculeIds.index(tfTargetBoundIds[-1]))
                tfTargetSynthProbIds.append(tfTarget + "[c]")
                tfTargetSynthProbIndices.append(
                    rnaIds.index(tfTargetSynthProbIds[-1]))
            tfTargetBoundCountsAll = bulkMoleculesReader.readColumn(
                "counts")[:, tfTargetBoundIndices]
            tfTargetSynthProbAll = rnaSynthProbReader.readColumn(
                "rnaSynthProb")[:, tfTargetSynthProbIndices]

            for targetIdx, tfTarget in enumerate(sorted(sim_data.tfToFC[tf])):
                tfTargetBoundCounts = tfTargetBoundCountsAll[:,
                                                             targetIdx].reshape(
                                                                 -1)

                expectedProbBound.append(sim_data.pPromoterBound[tf + "__" +
                                                                 tfStatus][tf])
                simulatedProbBound.append(tfTargetBoundCounts[5:].mean())

                tfTargetSynthProbId = [tfTarget + "[c]"]
                tfTargetSynthProbIndex = np.array(
                    [rnaIds.index(x) for x in tfTargetSynthProbId])
                tfTargetSynthProb = tfTargetSynthProbAll[:,
                                                         targetIdx].reshape(-1)

                rnaIdx = np.where(
                    sim_data.process.transcription.rnaData["id"] == tfTarget +
                    "[c]")[0][0]
                regulatingTfIdxs = np.where(H[rnaIdx, :])

                for i in regulatingTfIdxs[0]:
                    if colNames[i].split("__")[1] != "alpha":
                        if tfTarget not in targetToTfType:
                            targetToTfType[tfTarget] = []
                        targetToTfType[tfTarget].append(
                            sim_data.process.transcription_regulation.
                            tfToTfType[colNames[i].split("__")[1]])

                expectedSynthProb.append(
                    sim_data.process.transcription.rnaSynthProb[
                        tf + "__" + tfStatus][rnaIdx])
                simulatedSynthProb.append(tfTargetSynthProb[5:].mean())

                targetId.append(tfTarget)
                targetCondition.append(tf + "__" + tfStatus)

            bulkMoleculesReader.close()
            rnaSynthProbReader.close()

        expectedProbBound = np.array(expectedProbBound)
        simulatedProbBound = np.array(simulatedProbBound)
        expectedSynthProb = np.array(expectedSynthProb)
        simulatedSynthProb = np.array(simulatedSynthProb)

        regressionResult = scipy.stats.linregress(
            np.log10(expectedProbBound[expectedProbBound > NUMERICAL_ZERO]),
            np.log10(simulatedProbBound[expectedProbBound > NUMERICAL_ZERO]))
        regressionResultLargeValues = scipy.stats.linregress(
            np.log10(expectedProbBound[expectedProbBound > 1e-2]),
            np.log10(simulatedProbBound[expectedProbBound > 1e-2]))

        ax = plt.subplot(2, 1, 1)
        ax.scatter(np.log10(expectedProbBound), np.log10(simulatedProbBound))
        plt.xlabel("log10(Expected probability bound)", fontsize=6)
        plt.ylabel("log10(Simulated probability bound)", fontsize=6)
        plt.title(
            "Slope: %0.3f   Intercept: %0.3e      (Without Small Values:  Slope: %0.3f Intercept: %0.3e)"
            % (regressionResult.slope, regressionResult.intercept,
               regressionResultLargeValues.slope,
               regressionResultLargeValues.intercept),
            fontsize=6)
        ax.tick_params(which='both', direction='out', labelsize=6)

        regressionResult = scipy.stats.linregress(
            np.log10(expectedSynthProb[expectedSynthProb > NUMERICAL_ZERO]),
            np.log10(simulatedSynthProb[expectedSynthProb > NUMERICAL_ZERO]))

        ax = plt.subplot(2, 1, 2)
        ax.scatter(np.log10(expectedSynthProb), np.log10(simulatedSynthProb))
        plt.xlabel("log10(Expected synthesis probability)", fontsize=6)
        plt.ylabel("log10(Simulated synthesis probability)", fontsize=6)
        plt.title("Slope: %0.3f   Intercept: %0.3e" %
                  (regressionResult.slope, regressionResult.intercept),
                  fontsize=6)
        ax.tick_params(which='both', direction='out', labelsize=6)

        plt.tight_layout()

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")

        # Probability bound - hover for ID
        source1 = ColumnDataSource(data=dict(x=np.log10(expectedProbBound),
                                             y=np.log10(simulatedProbBound),
                                             ID=targetId,
                                             condition=targetCondition))
        hover1 = HoverTool(tooltips=[("ID", "@ID"), ("condition",
                                                     "@condition")])
        tools1 = [
            hover1,
            BoxZoomTool(),
            LassoSelectTool(),
            PanTool(),
            WheelZoomTool(),
            ResizeTool(),
            UndoTool(),
            RedoTool(), "reset"
        ]
        s1 = figure(x_axis_label="log10(Expected probability bound)",
                    y_axis_label="log10(Simulated probability bound)",
                    width=800,
                    height=500,
                    tools=tools1)
        s1.scatter("x", "y", source=source1)

        if not os.path.exists(os.path.join(plotOutDir, "html_plots")):
            os.makedirs(os.path.join(plotOutDir, "html_plots"))
        bokeh.io.output_file(os.path.join(
            plotOutDir, "html_plots",
            plotOutFileName + "__probBound" + ".html"),
                             title=plotOutFileName,
                             autosave=False)
        bokeh.io.save(s1)

        # Synthesis probability - hover for ID
        source2 = ColumnDataSource(data=dict(x=np.log10(expectedSynthProb),
                                             y=np.log10(simulatedSynthProb),
                                             ID=targetId,
                                             condition=targetCondition))
        hover2 = HoverTool(tooltips=[("ID", "@ID"), ("condition",
                                                     "@condition")])
        tools2 = [
            hover2,
            BoxZoomTool(),
            LassoSelectTool(),
            PanTool(),
            WheelZoomTool(),
            ResizeTool(),
            UndoTool(),
            RedoTool(), "reset"
        ]
        s2 = figure(x_axis_label="log10(Expected synthesis probability)",
                    y_axis_label="log10(Simulated synthesis probability)",
                    width=800,
                    height=500,
                    tools=tools2)
        s2.scatter("x", "y", source=source2)

        bokeh.io.output_file(os.path.join(
            plotOutDir, "html_plots",
            plotOutFileName + "__synthProb" + ".html"),
                             title=plotOutFileName,
                             autosave=False)
        bokeh.io.save(s2)

        # Synthesis probability - filter targets by TF type
        bokeh.io.output_file(os.path.join(
            plotOutDir, "html_plots",
            plotOutFileName + "__synthProb__interactive" + ".html"),
                             title=plotOutFileName,
                             autosave=False)

        tfTypes = []
        for i in targetId:
            if i in targetToTfType:
                uniqueSet = np.unique(targetToTfType[i])

                if uniqueSet.shape[0] == 1:
                    tfTypes.append(uniqueSet[0])
                elif uniqueSet.shape[0] == 3:
                    tfTypes.append("all")
                else:
                    tfTypes.append(uniqueSet[0] + "_" + uniqueSet[1])
            else:
                tfTypes.append("none")
        tfTypes = np.array(tfTypes)

        x0 = np.copy(expectedSynthProb)
        x0[np.where(tfTypes != "0CS")] = np.nan
        x1 = np.copy(expectedSynthProb)
        x1[np.where(tfTypes != "1CS")] = np.nan
        x2 = np.copy(expectedSynthProb)
        x2[np.where(tfTypes != "2CS")] = np.nan
        x01 = np.copy(expectedSynthProb)
        x01[np.where(tfTypes != "0CS_1CS")] = np.nan
        x02 = np.copy(expectedSynthProb)
        x02[np.where(tfTypes != "0CS_2CS")] = np.nan
        x12 = np.copy(expectedSynthProb)
        x12[np.where(tfTypes != "1CS_2CS")] = np.nan

        y0 = np.copy(simulatedSynthProb)
        y0[np.where(tfTypes != "0CS")] = np.nan
        y1 = np.copy(simulatedSynthProb)
        y1[np.where(tfTypes != "1CS")] = np.nan
        y2 = np.copy(simulatedSynthProb)
        y2[np.where(tfTypes != "2CS")] = np.nan
        y01 = np.copy(simulatedSynthProb)
        y01[np.where(tfTypes != "0CS_1CS")] = np.nan
        y02 = np.copy(simulatedSynthProb)
        y02[np.where(tfTypes != "0CS_2CS")] = np.nan
        y12 = np.copy(simulatedSynthProb)
        x12[np.where(tfTypes != "1CS_2CS")] = np.nan

        source_all = ColumnDataSource(data=dict(x=np.log10(expectedSynthProb),
                                                y=np.log10(simulatedSynthProb),
                                                ID=targetId,
                                                condition=targetCondition))
        source_tf = ColumnDataSource(
            data=dict(x0=np.log10(x0),
                      y0=np.log10(y0),
                      x1=np.log10(x1),
                      y1=np.log10(y1),
                      x2=np.log10(x2),
                      y2=np.log10(y2),
                      x01=np.log10(x01),
                      y01=np.log10(y01),
                      x02=np.log10(x02),
                      y02=np.log10(y02),
                      x12=np.log10(x12),
                      y12=np.log10(y12),
                      x123=np.log10(expectedSynthProb),
                      y123=np.log10(simulatedSynthProb),
                      ID=targetId,
                      condition=targetCondition))
        hover3 = HoverTool(tooltips=[("ID", "@ID"), ("condition",
                                                     "@condition")])
        tools3 = [
            hover3,
            BoxZoomTool(),
            LassoSelectTool(),
            PanTool(),
            WheelZoomTool(),
            ResizeTool(),
            UndoTool(),
            RedoTool(), "reset"
        ]

        axis_max = np.ceil(np.log10(expectedSynthProb).max())
        for i in np.sort(expectedSynthProb):
            if i > 0:
                break
        axis_min = np.floor(np.log10(i))
        s3 = figure(
            x_axis_label="log10(Expected synthesis probability)",
            y_axis_label="log10(Simulated synthesis probability)",
            plot_width=800,
            plot_height=500,
            x_range=(axis_min, axis_max),
            y_range=(axis_min, axis_max),
            tools=tools3,
        )
        s3.scatter("x", "y", source=source_all)
        callback = CustomJS(args=dict(source_all=source_all,
                                      source_tf=source_tf),
                            code="""
			var data_all = source_all.get('data');
			var data_tf = source_tf.get('data');
			data_all['x'] = data_tf['x' + cb_obj.get("name")];
			data_all['y'] = data_tf['y' + cb_obj.get("name")];
			source_all.trigger('change');
			""")

        toggle0 = Button(label="0CS", callback=callback, name="0")
        toggle1 = Button(label="1CS", callback=callback, name="1")
        toggle2 = Button(label="2CS", callback=callback, name="2")
        toggle3 = Button(label="0CS and 1CS", callback=callback, name="01")
        toggle4 = Button(label="0CS and 2CS", callback=callback, name="02")
        toggle5 = Button(label="1CS and 2CS", callback=callback, name="12")
        toggle6 = Button(label="All", callback=callback, name="123")
        layout = vplot(toggle0, toggle1, toggle2, toggle3, toggle4, toggle5,
                       toggle6, s3)
        bokeh.io.save(layout)
        bokeh.io.curstate().reset()
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = sorted(ap._path_data['variant'].tolist()
                          )  # Sorry for accessing private data

        if len(variants) <= 1:
            return

        all_cells = sorted(
            ap.get_cells(variant=variants, seed=[0], generation=[0]))

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        #make structures to hold mean flux values
        mean_fluxes = []
        BURN_IN_STEPS = 20
        n_variants = 0
        IDs = []

        #Puts you into the specific simulation's data.  Pull fluxes from here  #TODO LEARN HOW TO PULL FLUXES FROM LISTENER FILE (see kineticsflux comparison)
        for variant, simDir in zip(variants, all_cells):
            sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb"))
            simOutDir = os.path.join(simDir, "simOut")

            #crafting area
            enzymeKineticsReader = TableReader(
                os.path.join(simOutDir, "FBAResults"))  # "EnzymeKinetics"))
            actualFluxes = enzymeKineticsReader.readColumn(
                "reactionFluxes")  #"actualFluxes")
            IDs = enzymeKineticsReader.readAttribute("reactionIDs")
            enzymeKineticsReader.close()

            actualAve = np.mean(actualFluxes[BURN_IN_STEPS:, :], axis=0)
            mean_fluxes.append(actualAve)
            n_variants = n_variants + 1

        ###Plot the fluxes
        plt.figure(figsize=(8.5, 11))

        #Generalizred plotting
        for j in range(0, n_variants):
            for k in range(0, n_variants):
                if j <= k:
                    continue
                plt.subplot(n_variants - 1, n_variants - 1, j + k)
                plt.plot(np.log10(mean_fluxes[j][:]),
                         np.log10(mean_fluxes[k][:]), 'o')
                plt.plot([-12, 0], [-12, 0],
                         color='k',
                         linestyle='-',
                         linewidth=2)
                plt.xlabel('Variant ' + str(j) + ' Flux')
                plt.ylabel('Variant ' + str(k) + ' Flux')
                plt.ylim((-11, 0))
                plt.xlim((-11, 0))

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")

        #nifty fun tool
        # Bokeh
        if len(mean_fluxes) < 2:
            return

        # Plot first metabolite to initialize plot settings
        x = np.log10(mean_fluxes[0][:])
        y = np.log10(mean_fluxes[1][:])

        source = ColumnDataSource(data=dict(x=x, y=y, rxn=IDs))

        hover = HoverTool(tooltips=[
            ("ID", "@rxn"),
        ])

        TOOLS = [
            hover,
            BoxZoomTool(),
            LassoSelectTool(),
            PanTool(),
            WheelZoomTool(),
            ResizeTool(),
            UndoTool(),
            RedoTool(), "reset"
        ]

        p = figure(
            x_axis_label="Variant 0 Flux",
            y_axis_label="Variant 1 Flux",
            width=800,
            height=800,
            tools=TOOLS,
        )

        p.circle(
            'x', 'y', size=5, source=source
        )  #np.log10(mean_fluxes[0][:]),np.log10(mean_fluxes[1][:]), size=10)
        p.line([-12, 0], [-12, 0], color="firebrick", line_width=2)

        if not os.path.exists(os.path.join(plotOutDir, "html_plots")):
            os.makedirs(os.path.join(plotOutDir, "html_plots"))

        bokeh.io.output_file(os.path.join(plotOutDir, "html_plots",
                                          plotOutFileName + ".html"),
                             title=plotOutFileName,
                             autosave=False)
        bokeh.io.save(p)
        bokeh.io.curstate().reset()
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"
        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Get cells
        ap = AnalysisPaths(inputDir, variant_plot=True)
        if ap.n_variant != 9:
            print "This plot expects all variants of meneParams"
            return

        # Get constants from wildtype variant
        sim_data = cPickle.load(open(ap.get_variant_kb(4),
                                     "rb"))  # 4 is the wildtype variant
        cellDensity = sim_data.constants.cellDensity
        nAvogadro = sim_data.constants.nAvogadro

        # Initialize variables
        enzymeId = "MENE-CPLX[c]"
        endProductIds = ["REDUCED-MENAQUINONE[c]", "CPD-12115[c]"]
        TARGET_CONC = len(endProductIds) * TARGET_CONC_SINGLE

        # Check for cache
        cacheFileName = "%s.pickle" % plotOutFileName
        CACHE_EXISTS = False
        if os.path.exists(os.path.join(plotOutDir, cacheFileName)):
            CACHE_EXISTS = True

        if not CACHE_EXISTS:
            # Investigate each variant
            meneDepletion = np.zeros([ap.n_seed, ap.n_variant])
            endProductDepletion = np.zeros([ap.n_seed, ap.n_variant])

            for variant in xrange(ap.n_variant):
                for seed in xrange(ap.n_seed):
                    cells = ap.get_cells(variant=[variant], seed=[seed])
                    timeMeneDepleted = []  # seconds
                    timeEndProdDepleted = []  # seconds

                    for i, simDir in enumerate(cells):
                        simOutDir = os.path.join(simDir, "simOut")

                        # Get molecule counts
                        bulkMolecules = TableReader(
                            os.path.join(simOutDir, "BulkMolecules"))
                        moleculeIds = bulkMolecules.readAttribute(
                            "objectNames")
                        meneIndex = moleculeIds.index(enzymeId)
                        meneCounts = bulkMolecules.readColumn(
                            "counts")[:, meneIndex]
                        endProductIndices = [
                            moleculeIds.index(x) for x in endProductIds
                        ]
                        endProductCounts = bulkMolecules.readColumn(
                            "counts")[:, endProductIndices]
                        bulkMolecules.close()

                        # Compute time with zero counts of tetramer (MENE-CPLX)
                        timeStepSec = TableReader(
                            os.path.join(simOutDir,
                                         "Main")).readColumn("timeStepSec")
                        meneDepletionIndices = np.where(meneCounts == 0)[0]
                        timeMeneDepleted.append(
                            timeStepSec[meneDepletionIndices].sum())

                        # Compute time with end products under the target concentration
                        mass = TableReader(os.path.join(
                            simOutDir,
                            "Mass")).readColumn("cellMass") * units.fg
                        volume = mass / cellDensity
                        endProductConcentrations = np.sum([
                            endProductCounts[:, col] / nAvogadro / volume
                            for col in xrange(endProductCounts.shape[1])
                        ],
                                                          axis=0)
                        endProductDepletionIndices = np.where(
                            endProductConcentrations < (
                                (1 - THRESHOLD) * TARGET_CONC))[0]
                        timeEndProdDepleted.append(
                            timeStepSec[endProductDepletionIndices].sum())

                    # Record MENE-CPLX depletion
                    totalTime = TableReader(os.path.join(
                        simOutDir,
                        "Main")).readColumn("time")[-1] + timeStepSec[-1]
                    fractionMeneDepleted = np.sum(timeMeneDepleted) / totalTime
                    meneDepletion[seed, variant] = fractionMeneDepleted

                    # Record end product depletion
                    fractionEndProdDepleted = np.sum(
                        timeEndProdDepleted) / totalTime
                    endProductDepletion[seed,
                                        variant] = fractionEndProdDepleted

            # Cache
            D = {"mene": meneDepletion, "endProduct": endProductDepletion}
            cPickle.dump(D, open(os.path.join(plotOutDir, cacheFileName),
                                 "wb"))

        else:
            D = cPickle.load(
                open(os.path.join(plotOutDir, cacheFileName), "rb"))
            meneDepletion = D["mene"]
            endProductDepletion = D["endProduct"]

        # Compute average and standard deviations
        meneDepletion_avg = np.average(meneDepletion, axis=0)
        meneDepletion_std = np.std(meneDepletion, axis=0)
        endProductDepletion_avg = np.average(endProductDepletion, axis=0)
        endProductDepletion_std = np.std(endProductDepletion, axis=0)

        # Plot
        fig, axesList = plt.subplots(2, 1, figsize=(8, 8))
        ax1, ax2 = axesList
        xvals = np.arange(ap.n_variant)
        fig.suptitle("Sensitivity Analysis: menE depletion")

        for ax, avg, std in zip(axesList,
                                [meneDepletion_avg, endProductDepletion_avg],
                                [meneDepletion_std, endProductDepletion_std]):
            ax.scatter(xvals,
                       avg,
                       edgecolor="none",
                       clip_on=False,
                       s=MARKERSIZE)
            ax.errorbar(xvals,
                        avg,
                        yerr=std,
                        color="b",
                        linewidth=1,
                        clip_on=False,
                        fmt="o",
                        capsize=4,
                        capthick=1,
                        markeredgecolor="none")

        ax1.set_title("MenE tetramer depletion", fontsize=FONTSIZE)
        ax2.set_title("Menaquinone products depletion", fontsize=FONTSIZE)
        xlabels = [
            "1/10 x", "1/8 x", "1/4 x", "1/2 x", "1 x", "2 x", "4 x", "8 x",
            "10 x"
        ]
        title_tags = [
            "counts = 0",
            "<%s percent of wildtype" % (THRESHOLD * 100)
        ]
        for i, ax in enumerate([ax1, ax2]):
            ax.set_ylabel("Fraction of Time\n%s" % title_tags[i],
                          fontsize=FONTSIZE)
            ax.set_xlabel("Factor of increase of menE synthesis probability",
                          fontsize=FONTSIZE)
            ax.set_xlim([-0.25, 8.25])
            whitePadSparklineAxis(ax)
            ax.set_xticks(xvals)
            ax.set_xticklabels(xlabels)
            ax.set_yticks([0, 1])

        plt.subplots_adjust(hspace=1, wspace=1, top=0.9, bottom=0.1)
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")

        # Plot clean versions for figure
        FIRST = True
        for avg, std, filename in zip(
            [meneDepletion_avg, endProductDepletion_avg],
            [meneDepletion_std, endProductDepletion_std],
            ["mene", "menaquinone"]):
            fig, ax = plt.subplots(1, 1, figsize=(10, 3))
            ax.scatter(xvals,
                       avg,
                       edgecolor="none",
                       clip_on=False,
                       s=MARKERSIZE)
            ax.errorbar(xvals,
                        avg,
                        yerr=std,
                        color="b",
                        linewidth=1,
                        clip_on=False,
                        fmt="o",
                        capsize=4,
                        capthick=1,
                        markeredgecolor="none")
            ax.set_xlim([-0.25, 8.25])
            if FIRST:
                FIRST = False
                whitePadSparklineAxis(ax, False)
            else:
                whitePadSparklineAxis(ax)
            ax.set_xticks(xvals)
            ax.set_xticklabels([])
            ax.set_yticks([0, 1])
            ax.set_yticklabels([])
            exportFigure(plt, plotOutDir, plotOutFileName + "_%s" % filename,
                         metadata)
            plt.close("all")
Ejemplo n.º 10
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, "inputDir does not currently exist as a directory"
        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = sorted(ap._path_data['variant'].tolist()
                          )  # Sorry for accessing private data
        variant = variants[0]
        sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb"))

        targetToFC = {}
        targetToFCTF = {}

        for tf in sim_data.tfToActiveInactiveConds:
            for target in sim_data.tfToFC[tf]:
                if target not in targetToFC:
                    targetToFC[target] = []
                    targetToFCTF[target] = []
                targetToFC[target].append(np.log2(sim_data.tfToFC[tf][target]))
                targetToFCTF[target].append(tf)

        for target in targetToFC:
            targetToFC[target] = np.array(targetToFC[target])

        targets = sorted(targetToFC)

        x = []
        y = []
        maxVals = []
        tfs = []
        targetIds = []

        for idx, target in enumerate(targets):
            for FC, tf in zip(targetToFC[target], targetToFCTF[target]):
                x.append(idx)
                y.append(FC)

                if targetToFC[target].max() >= -1. * targetToFC[target].min():
                    maxVals.append(targetToFC[target].max())
                else:
                    maxVals.append(targetToFC[target].min())

                tfs.append(tf)
                targetIds.append(target)
        conditions = [
            sim_data.conditions[tf + "__active"]["nutrients"] for tf in tfs
        ]

        x = np.array(x)
        y = np.array(y)
        maxVals = np.array(maxVals)

        sortedIdxs = np.argsort(maxVals)
        conditions = [conditions[i] for i in sortedIdxs]
        tfs = [tfs[i] for i in sortedIdxs]
        targetIds = [targetIds[i] for i in sortedIdxs]

        fig = plt.figure(figsize=(11, 8.5))
        ax = plt.subplot(1, 1, 1)
        ax.plot(x, y[sortedIdxs], ".")
        xlabel = "Gene targets (sorted)"
        ylabel = "log2 (Target expression fold change)"
        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")

        source = ColumnDataSource(data=dict(x=x,
                                            y=y[sortedIdxs],
                                            targetId=targetIds,
                                            tfId=tfs,
                                            condition=conditions))
        hover = HoverTool(
            tooltips=[("target",
                       "@targetId"), ("TF",
                                      "@tfId"), ("condition", "@condition")])
        tools = [
            hover,
            BoxZoomTool(),
            LassoSelectTool(),
            PanTool(),
            WheelZoomTool(),
            ResizeTool(),
            UndoTool(),
            RedoTool(), "reset"
        ]
        plot = figure(x_axis_label=xlabel,
                      y_axis_label=ylabel,
                      width=800,
                      height=500,
                      tools=tools)

        plot.scatter("x", "y", source=source)

        if not os.path.exists(os.path.join(plotOutDir, "html_plots")):
            os.makedirs(os.path.join(plotOutDir, "html_plots"))
        bokeh.io.output_file(os.path.join(
            plotOutDir, "html_plots",
            plotOutFileName + "__probBound" + ".html"),
                             title=plotOutFileName,
                             autosave=False)
        bokeh.io.save(plot)
        bokeh.io.curstate().reset()
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "variantDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot = True)
		variants = ap.get_variants()

		index_doubling_time = 0
		sim_doubling_time = []

		index_rna_mass = 1
		sim_rna_mass_per_cell = []
		sim_rna_mass_per_cell_std = []

		index_elng_rate = 2
		sim_elng_rate = []
		sim_elng_rate_std = []

		index_n_origin_init = 3
		sim_origins_per_cell_at_initiation = []
		sim_origins_per_cell_at_initiation_std = []

		index_rrn_init_rate = 4
		sim_rrn_init_rate = []
		sim_rrn_init_rate_std = []

		for varIdx in range(ap.n_variant):
			variant = variants[varIdx]
			print("variant {}".format(variant))

			sim_dirs = ap.get_cells(variant=[variant])
			n_sims = len(sim_dirs)
			print("Total cells: {}".format(n_sims))

			try:
				sim_data = cPickle.load(open(ap.get_variant_kb(variant)))

				global is_rRNA
				is_rRNA = sim_data.process.transcription.rnaData["isRRna"]

			except Exception as e:
				print "Couldn't load sim_data object. Exiting.", e
				return

			p = Pool(parallelization.cpus())
			output = np.array(p.map(mp_worker, sim_dirs))
			p.close()
			p.join()

			# Filter output from broken files using np.nanmean and np.nanstd
			sim_doubling_time.append(np.nanmean(output[:, index_doubling_time]) / 60.)

			sim_rna_mass_per_cell.append(np.nanmean(output[:, index_rna_mass]))
			sim_rna_mass_per_cell_std.append(np.nanstd(output[:, index_rna_mass]))

			sim_elng_rate.append(np.nanmean(output[:, index_elng_rate]))
			sim_elng_rate_std.append(np.nanstd(output[:, index_elng_rate]))

			sim_origins_per_cell_at_initiation.append(np.nanmean(output[:, index_n_origin_init]))
			sim_origins_per_cell_at_initiation_std.append(np.nanstd(output[:, index_n_origin_init]))

			sim_rrn_init_rate.append(np.nanmean(output[:, index_rrn_init_rate]))
			sim_rrn_init_rate_std.append(np.nanstd(output[:, index_rrn_init_rate]))

		sim_doubling_time = np.array(sim_doubling_time)

		# Plot
		fig, axes_list = plt.subplots(1, 4, figsize=(15, 5))
		ax0, ax1, ax2, ax3 = axes_list
		sort_sim = np.argsort(sim_doubling_time)[::-1]
		sort_bremer = np.argsort(bremer_tau)[::-1]

		# RNA mass per cell
		ax0.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rna_mass_per_cell)[sort_sim],
			yerr=np.array(sim_rna_mass_per_cell_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax0.errorbar(
			bremer_tau[sort_bremer],
			bremer_rna_mass_per_cell[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax0.set_title('RNA mass per cell (fg)', fontsize=FONT_SIZE)
		ax0.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax0.set_xlim([0, 135])
		ax0.set_ylim([0, 250])
		ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False)

		# Ribosome elongation rate
		ax1.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_elng_rate)[sort_sim],
			yerr=np.array(sim_elng_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax1.errorbar(
			bremer_tau[sort_bremer],
			bremer_elng_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax1.set_title('Ribosome elongation\nrate (aa/s/ribosome)', fontsize=FONT_SIZE)
		ax1.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax1.set_ylim([5, 24])

		# Number of origins at chromosome initiation
		ax2.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_origins_per_cell_at_initiation)[sort_sim],
			yerr=np.array(sim_origins_per_cell_at_initiation_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax2.errorbar(
			bremer_tau[sort_bremer],
			bremer_origins_per_cell_at_initiation[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax2.set_title('Average origins at chrom. init.', fontsize=FONT_SIZE)
		ax2.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax2.set_ylim([0.5, 4.5])

		# rRNA initiation rate
		ax3.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rrn_init_rate)[sort_sim],
			yerr=np.array(sim_rrn_init_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax3.errorbar(
			bremer_tau[sort_bremer],
			bremer_rrn_init_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax3.set_title('Rate of rrn initiation (1/min)', fontsize=FONT_SIZE)
		ax3.set_ylim([0, 2500])
		ax3.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)

		for ax in axes_list:
			ax.set_xlim(X_LIM)
			ax.set_xticks(X_LIM)
			ax.set_ylim(ax.get_ylim())
			ax.set_yticks(ax.get_ylim())

			for tick in ax.yaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)
			for tick in ax.xaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)

		plt.subplots_adjust(bottom=0.25, top=0.75, left=0.05, right=0.95, wspace=0.4)
		exportFigure(plt, plotOutDir, '{}__test'.format(plotOutFileName), metadata)
		plt.close('all')