Exemple #1
0
    def __init__(self, path):
        filepath.makedirs(path)

        self._data = open(os.path.join(path, FILE_DATA), "w")
        self._offsets = open(os.path.join(path, FILE_OFFSETS), "w")

        self._dtype = None
    def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(seedOutDir):
            raise Exception, 'seedOutDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        with open(simDataFile, 'rb') as f:
            sim_data = cPickle.load(f)
        with open(validationDataFile, 'rb') as f:
            validation_data = cPickle.load(f)

        ap = AnalysisPaths(seedOutDir, multi_gen_plot=True)

        for sim_dir in ap.get_cells():
            simOutDir = os.path.join(sim_dir, 'simOut')

            # Listeners used
            main_reader = TableReader(os.path.join(simOutDir, 'Main'))

            # Load data
            time = main_reader.readColumn('time')

        plt.figure()

        ### Create Plot ###

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Exemple #3
0
    def __init__(self, path):

        dirMetadata = filepath.makedirs(path, DIR_METADATA)

        open(os.path.join(dirMetadata, FILE_VERSION), "w").write(VERSION)

        self._dirAttributes = filepath.makedirs(path, DIR_ATTRIBUTES)
        self._attributeNames = []

        self._dirColumns = filepath.makedirs(path, DIR_COLUMNS)
        self._columns = None
	def run(self, args):
		kb_directory = fp.makedirs(args.sim_path, "kb")
		raw_data_file = os.path.join(kb_directory, constants.SERIALIZED_RAW_DATA)
		sim_data_file = os.path.join(kb_directory, constants.SERIALIZED_FIT1_FILENAME)
		cached_sim_data_file = os.path.join(
			fp.ROOT_PATH, 'cached', constants.SERIALIZED_FIT1_FILENAME)
		most_fit_filename = os.path.join(
			kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME)
		raw_validation_data_file = os.path.join(
			kb_directory, constants.SERIALIZED_RAW_VALIDATION_DATA)
		validation_data_file = os.path.join(
			kb_directory, constants.SERIALIZED_VALIDATION_DATA)

		if args.debug or args.cached:
			print "{}{}Fitter".format(
				'DEBUG ' if args.debug else '',
				'CACHED ' if args.cached else '',
				)

		tasks = [
			InitRawDataTask(
				output=raw_data_file,
				),

			FitSimDataTask(
				fit_level=1,
				input_data=raw_data_file,
				output_data=sim_data_file,
				cached=args.cached,  # bool
				cached_data=cached_sim_data_file,  # cached file to copy
				cpus=args.cpus,
				debug=args.debug,
				disable_ribosome_capacity_fitting=args.disable_ribosome_fitting,
				disable_rnapoly_capacity_fitting=args.disable_rnapoly_fitting,
				adjust_rna_and_protein_parameters=args.no_expression_adjustment,
				),

			SymlinkTask(
				to=constants.SERIALIZED_FIT1_FILENAME,
				link=most_fit_filename,
				overwrite_if_exists=True,
				),

			InitRawValidationDataTask(
				output=raw_validation_data_file,
				),

			InitValidationDataTask(
				validation_data_input=raw_validation_data_file,
				knowledge_base_raw=raw_data_file,
				output_data=validation_data_file,
				),
			]
		for task in tasks:
			task.run_task({})

		print '\n\t'.join(['Wrote', raw_data_file, sim_data_file,
			most_fit_filename, raw_validation_data_file, validation_data_file])
	def parse_args(self):
		args = super(RunFitter, self).parse_args()

		if args.timestamp:
			args.sim_outdir = fp.timestamp() + '__' + args.sim_outdir.replace(
				' ', '_')

		args.sim_path = fp.makedirs(fp.ROOT_PATH, "out", args.sim_outdir)
		return args
Exemple #6
0
    def __init__(self, **kwargs):
        # Validate subclassing
        for attrName in self._definedBySubclass:
            if not hasattr(self, attrName):
                raise SimulationException(
                    "Simulation subclasses must define" +
                    " the {} attribute.".format(attrName))

        for listenerClass in DEFAULT_LISTENER_CLASSES:
            if listenerClass in self._listenerClasses:
                raise SimulationException("The {} listener is included by" +
                                          " default in the Simulation class.".
                                          format(listenerClass.name()))

        # Set instance attributes
        for attrName, value in DEFAULT_SIMULATION_KWARGS.viewitems():
            if attrName in kwargs.viewkeys():
                value = kwargs[attrName]

            setattr(self, "_" + attrName, value)

        unknownKeywords = kwargs.viewkeys(
        ) - DEFAULT_SIMULATION_KWARGS.viewkeys()

        if any(unknownKeywords):
            raise SimulationException(
                "Unknown keyword arguments: {}".format(unknownKeywords))

        # Set time variables
        self._simulationStep = 0

        self.randomState = np.random.RandomState(
            seed=np.uint32(self._seed % np.iinfo(np.uint32).max))

        # divide_cell will fail if _outputDir is no good (e.g. defaulted to
        # None) so catch it *before* running the simulation in case _logToDisk
        # doesn't.
        filepath.makedirs(self._outputDir)

        # Load KB
        sim_data = cPickle.load(open(self._simDataLocation, "rb"))

        # Initialize simulation from fit KB
        self._initialize(sim_data)
    def run(self, args):
        output_dir = filepath.makedirs(args.sim_path, 'plotOut')

        task = AnalysisVariantTask(
            input_directory=args.sim_path,
            input_validation_data=args.input_validation_data,
            output_plots_directory=output_dir,
            metadata=args.metadata,
            plots_to_run=args.plot,
            output_filename_prefix=args.output_prefix,
        )
        task.run_task({})
    def test_makedirs(self):
        directories = 'this/is/a/test'
        expected_path = os.path.join(self.test_dir, directories)
        self.assertFalse(os.path.exists(expected_path))

        # Test creating a directory path.
        full_path = filepath.makedirs(self.test_dir, directories)
        self.assertEqual(full_path, expected_path)
        self.assertTrue(os.path.exists(expected_path))

        # Test that it's happy with an existing path.
        full_path2 = filepath.makedirs(self.test_dir, 'this', 'is', 'a/test')
        self.assertEqual(full_path2, expected_path)
        self.assertTrue(os.path.exists(expected_path))

        # Test failure to create a directory path because a data file is there.
        filename = 'data'
        with open(os.path.join(full_path, filename), 'w') as f:
            f.write('hi')
        with nose.tools.assert_raises(OSError):
            filepath.makedirs(self.test_dir, directories, filename)
    def run(self, args):
        sim_path = args.sim_path
        variant_dir_name = args.variant_dir_name

        input_variant_directory = os.path.join(sim_path, variant_dir_name)
        sim_data_modified = os.path.join(
            input_variant_directory, 'kb',
            constants.SERIALIZED_SIM_DATA_MODIFIED)
        # TODO(jerry): Load simData_Modified into metadata?
        output_dir = filepath.makedirs(input_variant_directory, 'plotOut')

        task = AnalysisCohortTask(
            input_variant_directory=input_variant_directory,
            input_sim_data=sim_data_modified,
            input_validation_data=args.input_validation_data,
            output_plots_directory=output_dir,
            metadata=args.metadata,
            plots_to_run=args.plot,
            output_filename_prefix=args.output_prefix,
        )
        task.run_task({})
Exemple #10
0
    def run(self, args):
        sim_path = args.sim_path
        variant_dir_name = args.variant_dir_name

        input_variant_directory = os.path.join(sim_path, variant_dir_name)
        input_path = os.path.join(input_variant_directory, args.seed_str)
        sim_data_modified = os.path.join(
            input_variant_directory, 'kb',
            constants.SERIALIZED_SIM_DATA_MODIFIED)
        output_dir = filepath.makedirs(input_path, "plotOut")

        task = AnalysisMultiGenTask(
            input_seed_directory=input_path,
            input_sim_data=sim_data_modified,
            input_validation_data=args.input_validation_data,
            output_plots_directory=output_dir,
            metadata=args.metadata,
            plots_to_run=args.plot,
            output_filename_prefix=args.output_prefix,
        )
        task.run_task({})
Exemple #11
0
    def run(self, args):
        sim_path = args.sim_path
        variant_dir_name = args.variant_dir_name

        dirs = os.path.join(args.seed_str, args.gen_str, args.daughter_str)

        input_variant_directory = os.path.join(sim_path, variant_dir_name)
        input_dir = os.path.join(input_variant_directory, dirs, 'simOut')
        sim_data_modified = os.path.join(
            input_variant_directory, 'kb',
            constants.SERIALIZED_SIM_DATA_MODIFIED)
        output_dir = filepath.makedirs(input_variant_directory, dirs,
                                       'plotOut')

        task = AnalysisSingleTask(
            input_results_directory=input_dir,
            input_sim_data=sim_data_modified,
            input_validation_data=args.input_validation_data,
            output_plots_directory=output_dir,
            metadata=args.metadata,
            plots_to_run=args.plot,
            output_filename_prefix=args.output_prefix,
        )
        task.run_task({})
    def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(variantDir):
            raise Exception, 'variantDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(variantDir, cohort_plot=True)

        limited_metabolites = []
        for sim_dir in ap.get_cells():
            sim_out_dir = os.path.join(sim_dir, 'simOut')

            # Listeners used
            kinetics_reader = TableReader(
                os.path.join(sim_out_dir, "EnzymeKinetics"))

            # Load data
            try:
                metabolite_indices = {
                    m: i
                    for i, m in enumerate(
                        kinetics_reader.readAttribute('metaboliteNames'))
                }
                metabolite_counts = kinetics_reader.readColumn(
                    "metaboliteCountsFinal")[1:, :]
                counts_to_molar = kinetics_reader.readColumn(
                    'countsToMolar')[1:].reshape(-1, 1)
            except:
                print('Error reading data from {}'.format(sim_out_dir))
                continue

            # Calculate concentrations
            met_idx = np.array(
                [metabolite_indices[m] for m in LIMITED_METABOLITES])
            metabolite_conc = counts_to_molar * metabolite_counts[:, met_idx]
            limited_metabolites += [metabolite_conc]

        limited_metabolites = np.vstack(limited_metabolites)

        # Values to calculate significance between different cohorts
        print('Metabolites: {}'.format(LIMITED_METABOLITES))
        print('Means: {}'.format(limited_metabolites.mean(axis=0)))
        print('Stds: {}'.format(limited_metabolites.std(axis=0)))
        print('N: {}'.format(limited_metabolites.shape[0]))

        plt.figure(figsize=(4, 4))
        xticks = [0, 1]

        # Plot data
        plt.violinplot(limited_metabolites, xticks, showmeans=True)

        # Format axes
        plt.ylim([0, 50])
        whitePadSparklineAxis(plt.gca())
        plt.xticks(xticks, LIMITED_METABOLITES)
        plt.ylabel('Concentration (uM)')

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

        plt.figure(figsize=(4, 4))
        ax = plt.subplot2grid((1, 1), (0, 0))

        options = {
            "edgecolors": color_cycle[0],
            "alpha": 0.2,
            "s": 50,
            "clip_on": False
        }
        labels = ["minimal", "anaerobic", "minimal + AA"]

        ax.scatter(initial_volumes[2],
                   added_volumes[2],
                   marker="x",
                   label=labels[2],
                   **options)
        ax.scatter(initial_volumes[0],
                   added_volumes[0],
                   facecolors="none",
                   marker="o",
                   label=labels[0],
                   **options)
        ax.scatter(initial_volumes[1],
                   added_volumes[1],
                   facecolors="none",
                   marker="^",
                   label=labels[1],
                   **options)

        ax.set_xlim([0, 4])
        ax.set_ylim([0, 4])
        ax.set_xlabel("Birth Volume ($\mu m^3$)")
        ax.set_ylabel("Added Volume ($\mu m^3$)")
        ax.legend()

        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        # Get clean version of plot
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata)

        plt.close("all")
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		n_variants = len(variants)

		if n_variants <= 1:
			print('This plot only runs for multiple variants'.format(__name__))
			return

		filepath.makedirs(plotOutDir)

		# Load validation data
		validation_data = cPickle.load(open(validationDataFile, 'rb'))
		toya_reactions = validation_data.reactionFlux.toya2010fluxes['reactionID']
		toya_fluxes = np.array([x.asNumber(DCW_FLUX_UNITS) for x in validation_data.reactionFlux.toya2010fluxes['reactionFlux']])
		outlier_filter = [False if rxn in OUTLIER_REACTIONS else True for rxn in toya_reactions]

		# Arrays to populate for plots
		lambdas = np.zeros(n_variants)
		n_sims = np.zeros(n_variants)
		growth_rates = np.zeros(n_variants)
		conc_correlation = np.zeros(n_variants)
		n_conc_off_axis = np.zeros(n_variants)
		flux_correlation = np.zeros(n_variants)
		nonzero_flux_correlation = np.zeros(n_variants)
		n_flux_above_0 = np.zeros(n_variants)
		n_flux_off_axis = np.zeros(n_variants)
		correlation_coefficient = np.zeros(n_variants)
		filtered_correlation_coefficient = np.zeros(n_variants)
		homeostatic_objective_value = np.zeros(n_variants)
		kinetic_objective_value = np.zeros(n_variants)
		homeostatic_objective_std = np.zeros(n_variants)
		kinetic_objective_std = np.zeros(n_variants)

		# Pull information from sim data and listeners in parallel
		pool = Pool(processes=parallelization.plotter_cpus())
		args = zip(
			variants,
			[ap] * n_variants,
			[toya_reactions] * n_variants,
			[toya_fluxes] * n_variants,
			[outlier_filter] * n_variants
			)
		results = pool.map(analyze_variant, args)
		pool.close()
		pool.join()
		for i, result in enumerate(results):
			(lambdas[i],
				n_sims[i],
				growth_rates[i],
				conc_correlation[i],
				n_conc_off_axis[i],
				flux_correlation[i],
				n_flux_off_axis[i],
				nonzero_flux_correlation[i],
				n_flux_above_0[i],
				correlation_coefficient[i],
				filtered_correlation_coefficient[i],
				kinetic_objective_value[i],
				kinetic_objective_std[i],
				homeostatic_objective_value[i],
				homeostatic_objective_std[i],
				n_metabolites,
				n_fluxes) = result

		tick_labels = [r'$10^{%i}$' % (np.log10(x),) if x != 0 else '0' for x in lambdas]
		lambdas = [np.log10(x) if x != 0 else np.nanmin(np.log10(lambdas[lambdas != 0]))-1 for x in lambdas]

		plt.figure(figsize = (8.5, 22))
		plt.style.use('seaborn-deep')
		subplots = 8

		# Growth rates
		ax = plt.subplot(subplots, 1, 1)
		plt.bar(lambdas, growth_rates / growth_rates[0], align='center')
		plt.axhline(1, linestyle='--', color='k')
		plt.ylim([0, 2])
		plt.ylabel('Growth rate deviation\nfrom no kinetics')
		whitePadSparklineAxis(ax, xAxis=False)
		plt.yticks([0, 1, 2])

		# Flux target comparisons
		ax = plt.subplot(subplots, 1, 2)
		plt.bar(lambdas, nonzero_flux_correlation, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Kinetic target flux PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 3)
		plt.bar(lambdas, n_flux_above_0 / n_fluxes, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of fluxes\nabove 0')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 4)
		plt.bar(lambdas, n_flux_off_axis / n_fluxes, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of fluxes\noff axis (>{:.0f}%)'.format(FRAC_FLUX_OFF_AXIS*100))
		whitePadSparklineAxis(ax, xAxis=False)

		# Metabolite comparisons
		ax = plt.subplot(subplots, 1, 5)
		plt.bar(lambdas, conc_correlation, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Concentration PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		ax = plt.subplot(subplots, 1, 6)
		plt.bar(lambdas, n_conc_off_axis / n_metabolites, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Fraction of concentrations\noff axis (>{:.0f}%)'.format(FRAC_CONC_OFF_AXIS*100))
		whitePadSparklineAxis(ax, xAxis=False)

		# Toya comparison
		ax = plt.subplot(subplots, 1, 7)
		plt.bar(lambdas, filtered_correlation_coefficient, align='center')
		plt.ylim([0, 1])
		plt.ylabel('Central carbon flux PCC')
		whitePadSparklineAxis(ax, xAxis=False)

		# Viable sims
		ax = plt.subplot(subplots, 1, 8)
		plt.bar(lambdas, n_sims, align='center')
		plt.ylabel('Number of sims\nwith data')
		whitePadSparklineAxis(ax)
		plt.xticks(lambdas, tick_labels)

		plt.xlabel('lambda')

		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		# Plot kinetic vs homeostatic objective values
		plt.figure(figsize=(3.5, 3.5))
		ax = plt.gca()
		ax.set_xscale("log", nonposx='clip')
		ax.set_yscale("log", nonposy='clip')
		plt.errorbar(homeostatic_objective_value, kinetic_objective_value, xerr=homeostatic_objective_std, yerr=kinetic_objective_std, fmt='none', ecolor='k', alpha=0.5, linewidth=0.5)
		plt.plot(homeostatic_objective_value, kinetic_objective_value, "ob", markeredgewidth=0.1, alpha=0.9)
		for i in range(len(lambdas)):
			plt.text(homeostatic_objective_value[i], 0.6*kinetic_objective_value[i], i, horizontalalignment='center', verticalalignment='center')
		plt.xlabel('Homeostatic Objective Value')
		plt.ylabel('Kinetics Objective Value')

		whitePadSparklineAxis(ax)

		# Adjust limits to get tick labels to display
		xlim = ax.get_xlim()
		xlim = [10**np.floor(np.log10(xlim[0])), 10**np.ceil(np.log10(xlim[1]))]
		ax.set_xticks(xlim)
		ylim = ax.get_ylim()
		ylim = [10**np.floor(np.log10(ylim[0])), 10**np.ceil(np.log10(ylim[1]))]
		ax.set_yticks(ylim)

		exportFigure(plt, plotOutDir, '{}_obj'.format(plotOutFileName), metadata)

		plt.close('all')
Exemple #15
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()
        n_variants = len(variants)

        # Load sim_data
        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)
        cell_density = sim_data.constants.cellDensity.asNumber(MASS_UNITS /
                                                               VOLUME_UNITS)

        # Load validation_data
        with open(validationDataFile, "rb") as f:
            validation_data = cPickle.load(f)
        toyaReactions = validation_data.reactionFlux.toya2010fluxes[
            "reactionID"]
        toyaFluxes = validation_data.reactionFlux.toya2010fluxes[
            "reactionFlux"]
        toyaStdev = validation_data.reactionFlux.toya2010fluxes[
            "reactionFluxStdev"]
        toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes))
        toyaStdevDict = dict(zip(toyaReactions, toyaStdev))

        glc_uptakes = np.zeros(n_variants)
        log_ratio_succ = np.zeros(n_variants)
        size_pearson = np.zeros(n_variants)
        selected_indicies = np.zeros(n_variants, bool)
        for v, variant in enumerate(variants):
            # initialize kinetic flux comparison
            exchange_fluxes = {entry: [] for entry in EXCHANGES}
            reaction_fluxes = {entry: [] for entry in REACTIONS}

            modelFluxes = {}
            toyaOrder = []
            for rxn in toyaReactions:
                modelFluxes[rxn] = []
                toyaOrder.append(rxn)

            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                try:
                    # Listeners used
                    massListener = TableReader(os.path.join(simOutDir, "Mass"))
                    fbaResults = TableReader(
                        os.path.join(simOutDir, "FBAResults"))
                    enzymeKineticsReader = TableReader(
                        os.path.join(simOutDir, "EnzymeKinetics"))

                    ## Read from mass listener
                    cellMass = massListener.readColumn("cellMass")
                    # skip if no data
                    if cellMass.shape is ():
                        continue
                    dryMass = massListener.readColumn("dryMass")
                except Exception as e:
                    print(e)
                    continue

                coefficient = (dryMass / cellMass * cell_density).reshape(
                    -1, 1)

                ## Read from FBA listener
                reactionIDs = {
                    r: i
                    for i, r in enumerate(
                        fbaResults.readAttribute("reactionIDs"))
                }
                exMolec = {
                    m: i
                    for i, m in enumerate(
                        fbaResults.readAttribute("externalMoleculeIDs"))
                }
                reactionFluxes = FLUX_CONVERSION * (
                    fbaResults.readColumn("reactionFluxes") /
                    coefficient)[1:, :]
                exFlux = fbaResults.readColumn("externalExchangeFluxes")[1:, :]

                ## Read from EnzymeKinetics listener
                constrainedReactions = {
                    r: i
                    for i, r in enumerate(
                        enzymeKineticsReader.readAttribute(
                            "constrainedReactions"))
                }

                ## Append values for relevant reactions.
                # append to exchanges
                for entry in EXCHANGES:
                    exchange_fluxes[entry].extend(
                        list(exFlux[:, exMolec[entry]]))
                # append to reaction fluxes
                for entry in REACTIONS:
                    reaction_fluxes[entry].extend(
                        list(reactionFluxes[:, reactionIDs[entry]]))

                ## get all Toya reactions, and corresponding simulated fluxes.
                toya_idx = {r: [] for r in toyaReactions}
                for rxn, i in reactionIDs.items():
                    rxn = rxn.split(' (reverse)')
                    if len(rxn) > 1:
                        i = -i
                    rxn = rxn[0].split('__')[0]
                    if rxn in toya_idx:
                        toya_idx[rxn] += [i]
                for toyaReaction, reaction_idx in toya_idx.items():
                    flux_time_course = np.sum([
                        np.sign(i) * reactionFluxes[:, np.abs(i)]
                        for i in reaction_idx
                    ],
                                              axis=0)
                    modelFluxes[toyaReaction].append(flux_time_course.mean())

            ## Flux comparison with Toya
            toyaVsReactionAve = []
            rxn_order = []
            for rxn, toyaFlux in toyaFluxesDict.iteritems():
                rxn_order.append(rxn)
                if rxn in modelFluxes:
                    toyaVsReactionAve.append(
                        (np.mean(modelFluxes[rxn]),
                         toyaFlux.asNumber(OUTPUT_FLUX_UNITS),
                         np.std(modelFluxes[rxn]),
                         toyaStdevDict[rxn].asNumber(OUTPUT_FLUX_UNITS)))

            toyaVsReactionAve = np.array(toyaVsReactionAve)
            rWithAll = pearsonr(toyaVsReactionAve[:, 0], toyaVsReactionAve[:,
                                                                           1])
            succ_toya_flux = toyaVsReactionAve[rxn_order.index(SUCC_ID), 1]

            # Save data for plotting
            glc_uptakes[v] = -np.mean(exchange_fluxes[GLC_ID])
            log_ratio_succ[v] = np.log2(
                np.mean(reaction_fluxes[SUCC_ID]) / succ_toya_flux)
            size_pearson[v] = (rWithAll[0] * 8)**2
            selected_indicies[v] = np.all([
                c not in constrainedReactions for c in HIGHLIGHTED_CONSTRAINTS
            ])

        # Plot scatterplot
        fig = plt.figure(figsize=(5, 5))
        gs = gridspec.GridSpec(40, 40)

        ## Plot full data
        plt.scatter(glc_uptakes[~selected_indicies],
                    log_ratio_succ[~selected_indicies],
                    color='blue',
                    alpha=0.6,
                    s=size_pearson[~selected_indicies])
        plt.scatter(glc_uptakes[selected_indicies],
                    log_ratio_succ[selected_indicies],
                    color='red',
                    alpha=0.6,
                    s=size_pearson[selected_indicies])
        x_min, x_max = plt.xlim()
        y_max = max(np.abs(plt.ylim()))
        plt.axvspan(0, GLC_MAX, facecolor='g', alpha=0.1)
        plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1)
        plt.axhline(y=0, color='k', linestyle='--')

        ## Format axes
        plt.ylabel('log2(model flux / Toya flux)')
        plt.xlabel('glucose uptake (mmol / g DCW / hr)')
        plt.xlim([np.floor(min(x_min, 10)), np.ceil(x_max)])
        plt.ylim([-y_max, y_max])

        ## Plot highlighted region data
        fig.add_subplot(gs[1:28, -20:-1])
        in_region = (glc_uptakes < GLC_MAX) & (np.abs(log_ratio_succ) <
                                               SUCC_DISTANCE)
        selected_in = in_region & selected_indicies
        not_selected_in = in_region & ~selected_indicies
        constraint_labels = np.array(
            [[c[:2] for c in constraints] if constraints is not None else []
             for _, constraints in map(get_disabled_constraints, variants)])
        plt.scatter(glc_uptakes[not_selected_in],
                    log_ratio_succ[not_selected_in],
                    color='blue',
                    alpha=0.6,
                    s=size_pearson[not_selected_in])
        plt.scatter(glc_uptakes[selected_in],
                    log_ratio_succ[selected_in],
                    color='red',
                    alpha=0.6,
                    s=size_pearson[selected_in])
        for x, y, label in zip(glc_uptakes[in_region],
                               log_ratio_succ[in_region],
                               constraint_labels[in_region]):
            plt.text(x, y, ', '.join(label), ha='center', va='top', fontsize=6)
        x_min, _ = plt.xlim()
        x_min = np.floor(min(x_min, 10))
        plt.axvspan(x_min, GLC_MAX, facecolor='g', alpha=0.1)
        plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1)

        ## Format axes
        plt.xlim([x_min, GLC_MAX])
        plt.ylim([-SUCC_DISTANCE, SUCC_DISTANCE])

        ## Save figure
        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        all_variants = ap.get_variants()
        variants = -np.ones(N_VARIANTS)
        for v, variant in enumerate(all_variants):
            disable_constraints, additional_disabled = get_disabled_constraints(
                variant)
            if additional_disabled is None:
                variants[0] = variant
            elif len(additional_disabled) == 0:
                variants[1] = variant
            elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled):
                variants[2] = variant

        if np.any(variants < 0):
            print('Not enough variants to analyze')
            return

        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)

        all_yields = []
        for variant in variants:
            yields = []

            for sim_dir in ap.get_cells(variant=[variant]):
                sim_out_dir = os.path.join(sim_dir, 'simOut')

                # Listeners used
                fba_reader = TableReader(
                    os.path.join(sim_out_dir, 'FBAResults'))
                main_reader = TableReader(os.path.join(sim_out_dir, 'Main'))
                mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass'))

                # Load data
                time_step_sec = main_reader.readColumn('timeStepSec')

                external_fluxes = fba_reader.readColumn(
                    'externalExchangeFluxes')
                external_molecules = fba_reader.readAttribute(
                    'externalMoleculeIDs')

                dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass')
                growth = GROWTH_UNITS * mass_reader.readColumn(
                    'growth') / time_step_sec

                # Calculate growth yield on glucose
                glc_idx = external_molecules.index(GLUCOSE_ID)
                glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx]
                glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0]
                glc_mass_flux = glc_flux * glc_mw * dry_mass
                glc_mass_yield = growth / -glc_mass_flux

                yields += list(glc_mass_yield[1:].asNumber())

            all_yields += [yields]

        for i, v1 in enumerate(variants):
            for j, v2 in enumerate(variants[i + 1:]):
                t, p = stats.ttest_ind(all_yields[i],
                                       all_yields[i + j + 1],
                                       equal_var=False)
                print('p={:.2e} for variant {} vs variant {}'.format(
                    p, v1, v2))

        plt.figure(figsize=(4, 4))
        xticks = range(N_VARIANTS)

        # Plot data
        plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False)
        plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037')

        # Format axes
        ax = plt.gca()
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        plt.xticks(xticks, VARIANT_LABELS)
        plt.ylabel('Glucose Yield\n(g cell / g glucose)')

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Exemple #17
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print('This analysis only runs for the "condition" variant.')
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        n_gens = ap.n_generation
        variants = ap.get_variants()

        if n_gens - 1 < FIRST_GENERATION:
            print('Not enough generations to plot.')
            return

        all_growth_rates = []
        all_rna_to_protein_ratios = []

        for variant in variants:
            doubling_times = np.zeros(0)
            variant_rna_to_protein_ratios = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant],
                                     generation=range(FIRST_GENERATION,
                                                      n_gens))

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    rna_mass = mass.readColumn("rnaMass")
                    protein_mass = mass.readColumn("proteinMass")

                    time = TableReader(os.path.join(simOutDir,
                                                    "Main")).readColumn("time")

                    doubling_times = np.hstack(
                        (doubling_times, (time[-1] - time[0]) / 3600.))

                    variant_rna_to_protein_ratios = np.hstack(
                        (variant_rna_to_protein_ratios,
                         rna_mass.mean() / protein_mass.mean()))
                except:
                    continue

            variant_growth_rates = np.log(2) / doubling_times

            all_growth_rates.append(variant_growth_rates)
            all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios)

        # Get errorbar plot
        plt.figure(figsize=FIGSIZE)

        plt.style.use('seaborn-deep')
        color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']
        marker_styles = ['o', '^', 'x']
        labels = ['basal', 'anaerobic', '+AA']

        ax = plt.subplot2grid((1, 1), (0, 0))

        for i in range(3):
            ax.errorbar(all_growth_rates[i].mean(),
                        all_rna_to_protein_ratios[i].mean(),
                        yerr=all_rna_to_protein_ratios[i].std(),
                        color=color_cycle[0],
                        mec=color_cycle[0],
                        marker=marker_styles[i],
                        markersize=8,
                        mfc='white',
                        linewidth=1,
                        capsize=2,
                        label=labels[i])

        # Add linear plot proposed in Scott et al. (2010)
        x_linear = np.linspace(0.05, 1.95, 100)
        y_linear = x_linear / 4.5 + 0.087
        ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

        ax.set_xlim([0, 2])
        ax.set_ylim([0, 0.7])
        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
        ax.set_ylabel("RNA/protein mass ratio")
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        # Get clean version of errorbar plot
        ax.set_xlabel("")
        ax.set_ylabel("")
        ax.set_yticklabels([])
        ax.set_xticklabels([])
        exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata)

        plt.close("all")

        # Get scatter version of plot
        plt.figure(figsize=FIGSIZE)
        ax = plt.subplot2grid((1, 1), (0, 0))

        options = {"edgecolors": color_cycle[0], "alpha": 0.25, "s": 20}

        ax.scatter(all_growth_rates[0],
                   all_rna_to_protein_ratios[0],
                   facecolors="none",
                   marker="o",
                   label=labels[0],
                   **options)
        ax.scatter(all_growth_rates[1],
                   all_rna_to_protein_ratios[1],
                   facecolors="none",
                   marker="^",
                   label=labels[1],
                   **options)
        ax.scatter(all_growth_rates[2],
                   all_rna_to_protein_ratios[2],
                   marker="x",
                   label=labels[2],
                   **options)

        x_linear = np.linspace(0.05, 2.45, 100)
        y_linear = x_linear / 4.5 + 0.087
        ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

        ax.set_xlim([0, 2.5])
        ax.set_ylim([0, 0.8])
        ax.get_yaxis().get_major_formatter().set_useOffset(False)
        ax.get_xaxis().get_major_formatter().set_useOffset(False)

        whitePadSparklineAxis(ax)

        ax.tick_params(which='both',
                       bottom=True,
                       left=True,
                       top=False,
                       right=False,
                       labelbottom=True,
                       labelleft=True)

        ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
        ax.set_ylabel("RNA/protein mass ratio")
        exportFigure(plt, plotOutDir, plotOutFileName + "_scatter", metadata)
Exemple #18
0
    def run(self, args):
        kb_directory = os.path.join(args.sim_path, 'kb')
        sim_data_file = os.path.join(kb_directory, 'simData_Fit_1.cPickle')
        fp.verify_file_exists(sim_data_file, 'Run runFitter?')

        timestamp, description = parse_timestamp_description(args.sim_path)

        variant_type = args.variant[0]
        variants_to_run = xrange(int(args.variant[1]),
                                 int(args.variant[2]) + 1)

        cli_sim_args = data.select_keys(
            vars(args),
            ('length_sec', 'timestep_safety_frac', 'timestep_max',
             'timestep_update_freq', 'mass_distribution', 'growth_rate_noise',
             'd_period_division', 'translation_supply'))

        # Write the metadata file.

        cli_metadata_args = data.select_keys(
            vars(args),
            ('total_gens', 'mass_distribution', 'growth_rate_noise',
             'd_period_division', 'translation_supply',
             'variable_elongation_translation',
             'variable_elongation_transcription'))

        metadata = dict(
            cli_metadata_args,
            git_hash=fp.run_cmdline("git rev-parse HEAD") or '--',
            git_branch=fp.run_cmdline("git symbolic-ref --short HEAD") or '--',
            description=description,
            time=timestamp,
            analysis_type=None,
            variant=variant_type,
            total_variants=str(len(variants_to_run)))

        metadata_dir = fp.makedirs(args.sim_path, 'metadata')
        metadata_path = os.path.join(metadata_dir,
                                     constants.SERIALIZED_METADATA_FILE)
        with open(metadata_path, "wb") as f:
            cPickle.dump(metadata, f, cPickle.HIGHEST_PROTOCOL)

        # args.sim_path is called INDIV_OUT_DIRECTORY in fw_queue.
        for i in variants_to_run:
            variant_directory = os.path.join(args.sim_path,
                                             variant_type + "_%06d" % i)
            variant_sim_data_directory = os.path.join(variant_directory, "kb")

            most_fit_filename = os.path.join(
                kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME)
            variant_sim_data_modified_file = os.path.join(
                variant_sim_data_directory,
                constants.SERIALIZED_SIM_DATA_MODIFIED)

            fp.makedirs(variant_sim_data_directory)
            variant_metadata_directory = fp.makedirs(variant_directory,
                                                     "metadata")
            task = VariantSimDataTask(
                variant_function=variant_type,
                variant_index=i,
                input_sim_data=most_fit_filename,
                output_sim_data=variant_sim_data_modified_file,
                variant_metadata_directory=variant_metadata_directory,
            )
            task.run_task({})

            for j in xrange(args.seed,
                            args.seed + args.init_sims):  # init sim seeds
                seed_directory = fp.makedirs(variant_directory, "%06d" % j)

                for k in xrange(args.generations):  # generation number k
                    gen_directory = fp.makedirs(seed_directory,
                                                "generation_%06d" % k)

                    # l is the daughter number among all of this generation's cells,
                    # which is 0 for single-daughters but would span range(2**k) if
                    # each parent had 2 daughters.
                    l = 0
                    cell_directory = fp.makedirs(gen_directory, "%06d" % l)
                    cell_sim_out_directory = fp.makedirs(
                        cell_directory, "simOut")

                    options = dict(
                        cli_sim_args,
                        input_sim_data=variant_sim_data_modified_file,
                        output_directory=cell_sim_out_directory,
                    )

                    if k == 0:
                        task = SimulationTask(seed=j, **options)
                    else:
                        parent_gen_directory = os.path.join(
                            seed_directory, "generation_%06d" % (k - 1))
                        parent_cell_directory = os.path.join(
                            parent_gen_directory, "%06d" % (l // 2))
                        parent_cell_sim_out_directory = os.path.join(
                            parent_cell_directory, "simOut")
                        daughter_state_path = os.path.join(
                            parent_cell_sim_out_directory,
                            "Daughter%d" % (l % 2 + 1))
                        task = SimulationDaughterTask(
                            inherited_state_path=daughter_state_path,
                            seed=(j + 1) * ((2**k - 1) + l),
                            **options)
                    task.run_task({})
	def _populateDerivativeAndJacobian(self):
		'''
		Creates callable functions for computing the derivative and the Jacobian.
		'''
		fixturesDir = filepath.makedirs(
			os.path.dirname(os.path.dirname(wholecell.__file__)),
			"fixtures",
			"twoComponentSystem"
			)
		odeFile = os.path.join(
			os.path.dirname(os.path.dirname(wholecell.__file__)),
			"reconstruction", "ecoli", "dataclasses", "process", "two_component_system_odes.py"
			)
		odeFitterFile = os.path.join(
			os.path.dirname(os.path.dirname(wholecell.__file__)),
			"reconstruction", "ecoli", "dataclasses", "process", "two_component_system_odes_fitter.py"
			)

		needToCreate = False

		if not os.path.exists(odeFile):
			needToCreate = True

		if not os.path.exists(odeFitterFile):
			needToCreate = True

		if os.path.exists(os.path.join(fixturesDir, "S.cPickle")):
			S = cPickle.load(open(os.path.join(fixturesDir, "S.cPickle"), "rb"))
			if not np.all(S == self.stoichMatrix()):
				needToCreate = True
		else:
			needToCreate = True

		if os.path.exists(os.path.join(fixturesDir, "ratesFwd.cPickle")):
			ratesFwd =  cPickle.load(open(os.path.join(fixturesDir, "ratesFwd.cPickle"), "rb"))
			if not np.all(ratesFwd == self.ratesFwd):
				needToCreate = True
		else:
			needToCreate = True

		if os.path.exists(os.path.join(fixturesDir, "ratesRev.cPickle")):
			ratesRev =  cPickle.load(open(os.path.join(fixturesDir, "ratesRev.cPickle"), "rb"))
			if not np.all(ratesRev == self.ratesRev):
				needToCreate = True
		else:
			needToCreate = True

		if needToCreate:
			self._makeDerivative()
			self._makeDerivativeFitter()
			writeOdeFile(odeFile, self.derivativesSymbolic, self.derivativesJacobianSymbolic)
			writeOdeFile(odeFitterFile, self.derivativesFitterSymbolic, self.derivativesFitterJacobianSymbolic)
			import reconstruction.ecoli.dataclasses.process.two_component_system_odes
			import reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter
			self.derivatives = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivatives
			self.derivativesJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivativesJacobian
			self.derivativesFitter = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivatives
			self.derivativesFitterJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivativesJacobian
			cPickle.dump(self.stoichMatrix(), open(os.path.join(fixturesDir, "S.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL)
			cPickle.dump(self.ratesFwd, open(os.path.join(fixturesDir, "ratesFwd.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL)
			cPickle.dump(self.ratesRev, open(os.path.join(fixturesDir, "ratesRev.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL)
		else:
			import reconstruction.ecoli.dataclasses.process.two_component_system_odes
			import reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter
			self.derivatives = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivatives
			self.derivativesJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivativesJacobian
			self.derivativesFitter = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivatives
			self.derivativesFitterJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivativesJacobian
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata.get('variant', '') != 'flux_sensitivity':
            print 'This plot only runs for the flux_sensitivity variant.'
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        succ_fluxes = []
        iso_fluxes = []
        for variant in variants:
            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                fba_reader = TableReader(os.path.join(simOutDir, 'FBAResults'))

                # Load data
                reactions = np.array(
                    fba_reader.readAttribute('sensitivity_reactions'))
                succ_fluxes += [
                    fba_reader.readColumn('succinate_flux_sensitivity')[1:, :]
                ]
                iso_fluxes += [
                    fba_reader.readColumn('isocitrate_flux_sensitivity')[1:, :]
                ]

        succ_fluxes = np.vstack(succ_fluxes)
        iso_fluxes = np.vstack(iso_fluxes)

        succ_z = calc_z(succ_fluxes)
        iso_z = calc_z(iso_fluxes)

        threshold = -0.1

        # Plot data
        plt.figure()
        gs = gridspec.GridSpec(2, 2)

        ## Succinate dehydrogenase all fluxes
        ax = plt.subplot(gs[0, 0])
        plot_lows(ax, succ_z, threshold, 'succinate dehydrogenase')

        ## Succinate dehydrogenase fluxes over threshold
        ax = plt.subplot(gs[0, 1])
        plot_threshold(ax, succ_z, threshold, reactions)

        ## Isocitrate dehydrogenase all fluxes
        ax = plt.subplot(gs[1, 0])
        plot_lows(ax, iso_z, threshold, 'isocitrate dehydrogenase')

        ## Isocitrate dehydrogenase fluxes over threshold
        ax = plt.subplot(gs[1, 1])
        plot_threshold(ax, iso_z, threshold, reactions)

        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if metadata.get('variant', '') != 'param_sensitivity':
			print 'This plot only runs for the param_sensitivity variant.'
			return

		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		global ap
		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = np.array(ap.get_variants())

		# Check to analyze control (variant 0) separately from other variants
		use_control = False
		if CONTROL_VARIANT in variants:
			use_control = True
			variants = variants[variants != CONTROL_VARIANT]
		n_variants = len(variants)

		# Load one instance of sim_data to get number of parameters and ids
		global sim_data
		global validation_data
		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		# sim_data information
		total_params = np.sum(number_params(sim_data))
		rna_to_gene = {gene['rnaId']: gene['symbol'] for gene in sim_data.process.replication.geneData}
		monomer_to_gene = {gene['monomerId']: gene['symbol'] for gene in sim_data.process.replication.geneData}
		rna_ids = sim_data.process.transcription.rnaData['id']
		monomer_ids = sim_data.process.translation.monomerData['id']

		# IDs must match order from param_indices() from param_sensitivity.py variant
		param_ids = np.array(
			['{} RNA deg Km'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids]
			+ ['{} protein deg rate'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids]
			+ ['{} translation eff'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids]
			+ ['{} synth prob'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids])
		if len(param_ids) != total_params:
			raise ValueError('Number of adjusted parameters and list of ids do not match.')

		pool = Pool(processes=parallelization.plotter_cpus())
		args = zip(
			variants,
			[total_params] * n_variants,
			)

		results = pool.imap_unordered(analyze_variant, args)
		(increase_params_counts,
			decrease_params_counts,
			increase_params_growth_rate,
			decrease_params_growth_rate,
			increase_params_flux_correlation,
			decrease_params_flux_correlation) = reduce(operator.add, results)
		pool.close()
		pool.join()

		# Calculate effects and z score
		labels = [
			'growth rate',
			'flux correlation',
			]
		increase_params_data = np.vstack((
			increase_params_growth_rate / increase_params_counts,
			increase_params_flux_correlation / increase_params_counts,
			))
		decrease_params_data = np.vstack((
			decrease_params_growth_rate / decrease_params_counts,
			decrease_params_flux_correlation / decrease_params_counts,
			))
		n_outputs = len(labels)

		# Difference between effect when parameter increased vs decreased
		data_diff = increase_params_data - decrease_params_data
		mean_diff = np.nanmean(data_diff, axis=1).reshape(-1, 1)
		std_diff = np.nanstd(data_diff, axis=1).reshape(-1, 1)
		z_score_diff = (data_diff - mean_diff) / std_diff

		# Individual increase or decrease effects to check asymmetric effects
		all_data = np.hstack((increase_params_data, decrease_params_data))
		mean = np.nanmean(all_data, axis=1).reshape(-1, 1)
		std = np.nanstd(all_data, axis=1).reshape(-1, 1)
		z_score_increase = (increase_params_data - mean) / std
		z_score_decrease = (decrease_params_data - mean) / std

		# Get control data
		if use_control:
			control_counts, _, control_growth_rate, _, control_flux_correlation, _ = analyze_variant((CONTROL_VARIANT, total_params))
			control_data = [
				control_growth_rate[0] / control_counts[0],
				control_flux_correlation[0] / control_counts[0],
				]
		else:
			control_data = [None] * n_outputs

		# Multiple hypothesis adjustment for significance of each parameter.
		# Solves Gaussian CDF for how many standard deviations are needed to
		# include 1 - 0.05 / total_params of the data (test each parameter for p<0.05).
		n_stds = special.erfinv(2 * (1 - 0.05 / total_params) - 1) * np.sqrt(2)

		# Plot histograms
		plt.figure(figsize=(16, 4*n_outputs))
		n_cols = 4
		top_limit = 20  # limit of the number of highest/lowest parameters to plot
		for i, (z_diff, z_increase, z_decrease) in enumerate(zip(z_score_diff, z_score_increase, z_score_decrease)):
			sorted_idx = np.argsort(z_diff)
			above_idx = np.where(z_diff[sorted_idx] > n_stds)[0][-top_limit:]
			below_idx = np.where(z_diff[sorted_idx] < -n_stds)[0][:top_limit]

			## Plot z difference data
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 1)
			plt.yscale('symlog', linthreshold=0.01)
			plt.fill_between(range(total_params), z_diff[sorted_idx])
			plt.axhline(n_stds , color='k', linestyle='--')
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax, xAxis=False)
			plt.xticks([])
			plt.yticks([-n_stds, 0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			lim = np.max(np.abs(plt.ylim()))
			plt.ylim([-lim, lim])
			if i == 0:
				plt.title('Difference of Positive and Negative\nParameter Changes')
			if i == n_outputs - 1:
				plt.xlabel('Sorted Parameters')
			plt.ylabel('Z score\nparameter effect on {}\n(log scale)'.format(labels[i]))

			## Plot single direction z data
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 2)
			plt.yscale('symlog', linthreshold=0.01)
			plt.step(range(total_params), z_increase[sorted_idx], color='g', linewidth=1, alpha=0.5)
			plt.step(range(total_params), z_decrease[sorted_idx], color='r', linewidth=1, alpha=0.5)
			plt.axhline(n_stds , color='k', linestyle='--')
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax, xAxis=False)
			plt.xticks([])
			plt.yticks([-n_stds, 0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			plt.ylim([-lim, lim])
			if i == 0:
				plt.title('Positive and Negative\nParameter Changes')
			if i == n_outputs - 1:
				plt.xlabel('Sorted Parameters')

			## Plot highest parameters
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 3)
			plt.yscale('symlog', linthreshold=0.01)
			plt.bar(above_idx, z_diff[sorted_idx[above_idx]])
			plt.axhline(n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax)
			ax.spines["bottom"].set_visible(False)
			ax.tick_params(bottom=False)
			plt.xticks(above_idx, param_ids[sorted_idx[above_idx]], rotation=90, fontsize=6)
			plt.yticks([0, n_stds])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			if i == 0:
				plt.title('Highest Positive Effect Parameters')
			if i == n_outputs - 1:
				plt.xlabel('Parameter IDs')

			## Plot lowest parameters
			ax = plt.subplot(n_outputs, n_cols, n_cols*i + 4)
			plt.yscale('symlog', linthreshold=0.01)
			plt.bar(below_idx, z_diff[sorted_idx[below_idx]])
			plt.axhline(-n_stds, color='k', linestyle='--')

			## Format axes
			sparkline.whitePadSparklineAxis(ax)
			ax.spines["bottom"].set_visible(False)
			ax.tick_params(bottom=False)
			plt.xticks(below_idx, param_ids[sorted_idx[below_idx]], rotation=90, fontsize=6)
			plt.yticks([-n_stds, 0])
			ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
			if i == 0:
				plt.title('Highest Negative Effect Parameters')
			if i == n_outputs - 1:
				plt.xlabel('Parameter IDs')

		## Save figure
		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		# Plot individual parameters
		individual_indices = [
			np.nanargmax(z_score_diff[0, :]),
			np.nanargmin(z_score_diff[0, :]),
			np.nanargmax(z_score_diff[1, :]),
			np.nanargmin(z_score_diff[1, :]),
			]
		n_individual = len(individual_indices)
		x_values = [-1, 0, 1]
		plt.figure()

		for i, label in enumerate(labels):
			shared_ax = None
			for j, idx in enumerate(individual_indices):
				## Shared y axis for each row
				ax = plt.subplot(n_outputs, n_individual, i*n_individual + j + 1, sharey=shared_ax)
				if shared_ax is None:
					shared_ax = ax

				## Plot data
				plt.plot(x_values, [decrease_params_data[i, idx], control_data[i], increase_params_data[i, idx]], 'x')

				## Format axes
				plt.xticks(x_values, ['Decrease', 'Control', 'Increase'])
				ax.tick_params(labelsize=6)
				ax.spines['right'].set_visible(False)
				ax.spines['top'].set_visible(False)
				if i < n_outputs - 1:
					ax.tick_params(labelbottom=False)
				if j > 0:
					ax.tick_params(labelleft=False)
				if i == 0:
					plt.title(param_ids[idx], fontsize=8)
				if j == 0:
					plt.ylabel(label, fontsize=7)

		## Save figure
		plt.tight_layout()
		exportFigure(plt, plotOutDir, '{}_individual'.format(plotOutFileName, metadata))
		plt.close('all')

		# Save z scores to tsv
		with open(os.path.join(plotOutDir, '{}.tsv'.format(plotOutFileName)), 'w') as f:
			writer = csv.writer(f, delimiter='\t')

			writer.writerow(
				['Parameter']
				+ headers(labels, 'Z-score, difference')
				+ headers(labels, 'Z-score, increase')
				+ headers(labels, 'Z-score, decrease')
				+ headers(labels, 'Raw average, difference')
				+ headers(labels, 'Raw average, increase')
				+ headers(labels, 'Raw average, decrease')
				)
			writer.writerows(np.hstack((
				param_ids.reshape(-1, 1),
				z_score_diff.T,
				z_score_increase.T,
				z_score_decrease.T,
				data_diff.T,
				increase_params_data.T,
				decrease_params_data.T
				)))
Exemple #22
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        # scan all variants to find variant indexes for comparison
        old_variant = None
        new_variant = None
        for v, variant in enumerate(variants):
            disable_constraints, additional_disabled = get_disabled_constraints(
                variant)
            if additional_disabled is None:
                old_variant = variant
            elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled):
                new_variant = variant

        # if the baseline variant or the new variant are missing, stop plotting
        if (old_variant is None) or (new_variant is None):
            print('Variant simulations missing!')
            return

        compared_variants = [old_variant, new_variant]

        # Load sim_data
        with open(
                os.path.join(inputDir, 'kb',
                             constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
            sim_data = cPickle.load(f)

        # get reactions from sim_data
        reactionCatalysts = sim_data.process.metabolism.reactionCatalysts

        reaction_to_enzyme = {r: reactionCatalysts[r][0] for r in REACTIONS}
        enzyme_names = reaction_to_enzyme.values()
        reactions_with_km = sorted(SIMULATION_KMS)
        km_metabolites = [
            SIMULATION_KMS[r]['metabolite'] for r in reactions_with_km
        ]
        kms = np.array([SIMULATION_KMS[r]['KM'] for r in reactions_with_km])
        km_constraint_indices = [
            SIMULATION_KMS[r]['constraint_index'] for r in reactions_with_km
        ]

        # initialize dictionaries for fluxes and concentrations
        all_reaction_fluxes = {}
        all_enzyme_concentrations = {}
        all_km_adjustments = {}
        for variant in compared_variants:
            reaction_fluxes = {r: [] for r in REACTIONS}
            enzyme_concentrations = {e: [] for e in enzyme_names}
            km_adjustments = {r: [] for r in reactions_with_km}
            for sim_dir in ap.get_cells(variant=[variant]):
                simOutDir = os.path.join(sim_dir, "simOut")

                # Listeners used
                try:
                    kinetics_reader = TableReader(
                        os.path.join(simOutDir, 'EnzymeKinetics'))
                    fbaResults = TableReader(
                        os.path.join(simOutDir, "FBAResults"))
                except Exception as e:
                    print(e)
                    continue

                # read from kinetics listener
                counts_to_molar = ((COUNTS_UNITS / VOLUME_UNITS) *
                                   kinetics_reader.readColumn('countsToMolar')
                                   [START_TIME_STEP:].reshape(-1, 1))
                all_constraints_used = kinetics_reader.readColumn(
                    'reactionConstraint')[START_TIME_STEP:]

                # Store fluxes
                reactionIDs = np.array(fbaResults.readAttribute("reactionIDs"))
                reactionFluxes = fbaResults.readColumn("reactionFluxes")[
                    START_TIME_STEP:, :]
                reaction_flux_dict = dict(zip(reactionIDs, reactionFluxes.T))
                for reaction_id in REACTIONS:
                    reaction_fluxes[reaction_id].extend(
                        list(reaction_flux_dict[reaction_id]))

                # Store enzyme concentrations
                enzyme_counts, met_counts = read_bulk_molecule_counts(
                    simOutDir, (enzyme_names, km_metabolites))
                enzyme_conc = counts_to_molar.asNumber(
                    COUNTS_UNITS /
                    VOLUME_UNITS) * enzyme_counts[START_TIME_STEP:, :]
                met_conc = counts_to_molar.asNumber(
                    units.umol / units.L) * met_counts[START_TIME_STEP:, :]
                for enzyme_id, conc_time_series in zip(enzyme_names,
                                                       enzyme_conc.T):
                    enzyme_concentrations[enzyme_id].extend(
                        list(conc_time_series))

                # Calculate enzyme saturation for reactions with KM values
                adjust_km = np.zeros(
                    (len(counts_to_molar), len(km_constraint_indices)), bool)
                for i, idx in enumerate(km_constraint_indices):
                    constraint_used, _ = np.where(all_constraints_used == idx)
                    adjust_km[constraint_used, i] = True
                enzyme_saturation = met_conc / (met_conc + kms)
                enzyme_saturation[~adjust_km] = 1
                for rxn, saturation in zip(reactions_with_km,
                                           enzyme_saturation.T):
                    km_adjustments[rxn].extend(list(saturation))

            all_reaction_fluxes[variant] = reaction_fluxes
            all_enzyme_concentrations[variant] = enzyme_concentrations
            all_km_adjustments[variant] = km_adjustments

        ### Make figure ###
        cols = 1
        rows = len(REACTIONS)
        plt.figure(figsize=(cols * 3, rows * 5))

        # go through each reaction to show predicted k_cat distribution for the
        # new and old variant, and experimental measurements
        for reaction_idx, reaction_id in enumerate(REACTIONS):
            enzyme_id = reaction_to_enzyme[reaction_id]

            # old measurements
            reaction_measurements = OLD_MEASUREMENTS[reaction_id]
            measurements = reaction_measurements['measurements']
            temps = reaction_measurements['temps']
            adjusted_measurements = np.array([
                2**((37. - t) / 10.) * m
                for (m, t) in zip(measurements, temps)
            ])

            # new measurements
            reaction_measurements = NEW_MEASUREMENTS.get(reaction_id, {})
            measurements = reaction_measurements.get('measurements', [])
            temps = reaction_measurements.get('temps', [])
            new_adjusted_measurements = np.array([
                2**((37. - t) / 10.) * m
                for (m, t) in zip(measurements, temps)
            ])

            # get effective kcat for GLUTATHIONE-REDUCT
            if reaction_id == 'GLUTATHIONE-REDUCT-NADPH-RXN':
                # saturated_fraction calculated from Smirnova, et al. (2005). "Effects of cystine and
                # hydrogen peroxideon glutathione status and expression of	antioxidant	genes in Escherichia coli"
                # Oxidized glutathione (GSSG in table 2) gives ~19 uM concentration (with 0.3 dry fraction and 1.1 g/mL density)
                # With 61 uM Km for this reaction, that gives a saturated fraction of 0.238
                saturated_fraction = 0.238
                new_adjusted_measurements = adjusted_measurements * saturated_fraction

            # Initialize subplots
            ax = plt.subplot(rows, cols, reaction_idx + 1)

            # calculate the reaction's k_cat distribution for each compared variant
            k_cat_distribution = {}
            for variant in compared_variants:
                ## Get data
                rxn_fluxes = np.array(
                    all_reaction_fluxes[variant][reaction_id])  # mmol / L / s
                enzyme_concs = np.array(
                    all_enzyme_concentrations[variant][enzyme_id])  # mmol / L
                saturation = np.array(all_km_adjustments[variant].get(
                    reaction_id, [1] * len(rxn_fluxes)))

                # calculate k_cats (adjusted for saturation in the sim), remove zeros, save to this variant's distribution
                k_cats = rxn_fluxes / enzyme_concs / saturation
                k_cats = k_cats[k_cats > 1e-10]
                k_cat_distribution[variant] = k_cats

            data = [
                k_cat_distribution[old_variant],
                k_cat_distribution[new_variant]
            ]

            # plot
            violin_pos = [1, 3]  # position of violin plots [old, new]
            measure_pos = 2  # position of measurements
            ax.violinplot(data,
                          violin_pos,
                          widths=1.0,
                          showmeans=False,
                          showextrema=False,
                          showmedians=False)
            ax.scatter(np.full_like(adjusted_measurements, measure_pos),
                       adjusted_measurements,
                       marker='o',
                       color='#eb7037',
                       s=50,
                       alpha=0.7)
            ax.scatter(np.full_like(new_adjusted_measurements, measure_pos),
                       new_adjusted_measurements,
                       marker='o',
                       color='#eb7037',
                       s=50,
                       alpha=0.7)

            # format
            rxn_id_length = 25
            text_reaction_id = ('reaction: %s' % reaction_id[:rxn_id_length])
            labels = [
                '\nModel Predicted\n(Old Constraints)', 'Measured',
                '\nModel Predicted\n(New Constraints)'
            ]
            ax.set_title(text_reaction_id, fontsize=8)
            ax.set_ylabel('$k_{cat}$ (1/s)', fontsize=8)
            set_ticks(ax, labels)
            ax.set_yscale('log')

        ### Create Plot ###
        plt.tight_layout()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close('all')
Exemple #23
0
def divide_cell(sim):
	"""
	Divides simulated states (chromosome, bulkMolecules, and uniqueMolecules)
	of a dividing cell randomly into two daughter cells.
	"""
	# Assign data from simulation required
	randomState = sim.randomState

	bulkMolecules = sim.internal_states['BulkMolecules']
	uniqueMolecules = sim.internal_states['UniqueMolecules']

	# TODO (Eran): division should be based on both nutrient and gene perturbation condition
	current_nutrients = sim.external_states['Environment'].nutrients

	# Create output directories
	filepath.makedirs(sim._outputDir, "Daughter1")
	filepath.makedirs(sim._outputDir, "Daughter2")

	# Check for uneven numbers of partial chromosomes. This should not happen
	# too often if the four partial chromosomes are elongated in a roughly
	# synchronized way.
	# TODO (Gwanggyu): try to handle this case instead of raising an exception
	partial_chromosome_counts = bulkMolecules.container.counts(
		bulkMolecules.divisionIds['partialChromosome'])
	uneven_counts = partial_chromosome_counts - partial_chromosome_counts.min()
	if uneven_counts.any():
		raise Exception("You won the lottery! There is an uneven number of partial chromosomes...")

	# Transform any leftover partial chromosomes into full a chromosome. This
	# should have happened in the chromosome_formation process but we could get
	# unlucky and miss this in the final timestep.
	bulkMolecules.container.countInc(
		partial_chromosome_counts.min(),
		bulkMolecules.divisionIds['fullChromosome'][0]
		)

	# Check if the cell is dead
	isDead = False
	if bulkMolecules.container.count(
			bulkMolecules.divisionIds['fullChromosome'][0]) == 0 and (
			sim.time() - sim.initialTime()) > sim.lengthSec():
		# If the cell does not have any full chromosomes at the end of its
		# maximal simulation duration, the cell is considered dead
		isDead = True
	elif sim._isDead:
		isDead = True

	with open(os.path.join(sim._outputDir, "Daughter1", "IsDead.cPickle"), 'wb') as f:
		cPickle.dump(isDead, f)
	with open(os.path.join(sim._outputDir, "Daughter2", "IsDead.cPickle"), 'wb') as f:
		cPickle.dump(isDead, f)

	if isDead:
		# Cell is dead - set daughter cell containers to empty values
		d1_bulkMolCntr = bulkMolecules.container.emptyLike()
		d2_bulkMolCntr = bulkMolecules.container.emptyLike()
		d1_uniqueMolCntr = uniqueMolecules.container.emptyLike()
		d2_uniqueMolCntr = uniqueMolecules.container.emptyLike()
		daughter_elng_rates = {"d1_elng_rate": 0., "d2_elng_rate": 0.,
			"d1_elng_rate_factor": 0., "d2_elng_rate_factor": 0.}
	else:
		# Divide the chromosome into two daughter cells
		# The output is used when dividing both bulk molecules and unique
		# molecules
		chromosome_counts = chromosomeDivision(bulkMolecules, randomState)

		# Create divided containers
		d1_bulkMolCntr, d2_bulkMolCntr = divideBulkMolecules(
			bulkMolecules, randomState, chromosome_counts)
		d1_uniqueMolCntr, d2_uniqueMolCntr, daughter_elng_rates = (
			divideUniqueMolecules(uniqueMolecules, randomState,
			chromosome_counts, current_nutrients, sim)
			)

	# Save divided containers
	saveContainer(d1_bulkMolCntr, os.path.join(
		sim._outputDir, "Daughter1", "BulkMolecules"))
	saveContainer(d2_bulkMolCntr, os.path.join(
		sim._outputDir, "Daughter2", "BulkMolecules"))
	saveContainer(d1_uniqueMolCntr, os.path.join(
		sim._outputDir, "Daughter1", "UniqueMolecules"))
	saveContainer(d2_uniqueMolCntr, os.path.join(
		sim._outputDir, "Daughter2", "UniqueMolecules"))

	with open(os.path.join(sim._outputDir, "Daughter1", "ElngRate.cPickle"), 'wb') as f:
		cPickle.dump(daughter_elng_rates["d1_elng_rate"], f)
	with open(os.path.join(sim._outputDir, "Daughter2", "ElngRate.cPickle"), 'wb') as f:
		cPickle.dump(daughter_elng_rates["d2_elng_rate"], f)
	with open(os.path.join(sim._outputDir, "Daughter1", "elng_rate_factor.cPickle"), 'wb') as f:
		cPickle.dump(daughter_elng_rates["d1_elng_rate_factor"], f)
	with open(os.path.join(sim._outputDir, "Daughter2", "elng_rate_factor.cPickle"), 'wb') as f:
		cPickle.dump(daughter_elng_rates["d2_elng_rate_factor"], f)

	# Save daughter cell initial time steps
	saveTime(sim.time(), os.path.join(sim._outputDir, "Daughter1", "Time"),
		sim.timeStepSec())
	saveTime(sim.time(), os.path.join(sim._outputDir, "Daughter2", "Time"),
		sim.timeStepSec())
Exemple #24
0
    def run(self, args):
        kb_directory = fp.makedirs(args.sim_path, "kb")
        raw_data_file = os.path.join(kb_directory,
                                     constants.SERIALIZED_RAW_DATA)
        sim_data_file = os.path.join(kb_directory,
                                     constants.SERIALIZED_FIT1_FILENAME)
        cell_specs_file = os.path.join(kb_directory,
                                       constants.SERIALIZED_CELL_SPECS)
        cached_sim_data_file = os.path.join(fp.ROOT_PATH, 'cached',
                                            constants.SERIALIZED_FIT1_FILENAME)
        most_fit_filename = os.path.join(
            kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME)
        raw_validation_data_file = os.path.join(
            kb_directory, constants.SERIALIZED_RAW_VALIDATION_DATA)
        validation_data_file = os.path.join(
            kb_directory, constants.SERIALIZED_VALIDATION_DATA)

        if args.debug or args.cached:
            print "{}{}Fitter".format(
                'DEBUG ' if args.debug else '',
                'CACHED ' if args.cached else '',
            )

        tasks = [
            InitRawDataTask(output=raw_data_file, ),
            FitSimDataTask(
                fit_level=1,
                input_data=raw_data_file,
                output_data=sim_data_file,
                cached=args.cached,  # bool
                cached_data=cached_sim_data_file,  # cached file to copy
                cpus=args.cpus,
                debug=args.debug,
                disable_ribosome_capacity_fitting=args.
                disable_ribosome_fitting,
                disable_rnapoly_capacity_fitting=args.disable_rnapoly_fitting,
                variable_elongation_transcription=args.
                variable_elongation_transcription,
                variable_elongation_translation=args.
                variable_elongation_translation,
                rnapoly_activity_fitting=args.rnapoly_activity_fitting,
                mrna_half_life_fitting=args.mrna_half_life_fitting,
                max_rnap_activity=args.max_rnap_activity,
                adjust_rna_and_protein_parameters=args.
                no_expression_adjustment,
                adjust_rnase_expression=args.adjust_rnase_expression,
                disable_measured_protein_deg=args.disable_measured_protein_deg,
                alternate_mass_fraction_protein=args.
                alternate_mass_fraction_protein,
                alternate_mass_fraction_rna=args.alternate_mass_fraction_rna,
                alternate_mass_fraction_mrna=args.alternate_mass_fraction_mrna,
                alternate_r_protein_degradation=args.
                alternate_r_protein_degradation,
                alternate_rna_seq=args.alternate_rna_seq,
                alternate_rna_half_life=args.alternate_rna_half_life,
                alternate_translation_efficiency=args.
                alternate_translation_efficiency,
                alternate_ribosome_activity=args.alternate_ribosome_activity,
                disable_rnap_fraction_increase=args.
                disable_rnap_fraction_increase,
                disable_ribosome_activity_fix=args.
                disable_ribosome_activity_fix,
                save_cell_specs=args.save_cell_specs,
                cell_specs_file=cell_specs_file,
                write_translation_efficiencies=args.
                write_translation_efficiencies),
            SymlinkTask(
                to=constants.SERIALIZED_FIT1_FILENAME,
                link=most_fit_filename,
                overwrite_if_exists=True,
            ),
            InitRawValidationDataTask(output=raw_validation_data_file, ),
            InitValidationDataTask(
                validation_data_input=raw_validation_data_file,
                knowledge_base_raw=raw_data_file,
                output_data=validation_data_file,
            ),
        ]
        for task in tasks:
            task.run_task({})

        print '\n\t'.join([
            'Wrote', raw_data_file, sim_data_file, most_fit_filename,
            raw_validation_data_file, validation_data_file
        ])
Exemple #25
0
def exportFigure(plt,
                 plotOutDir,
                 plotOutFileName,
                 metadata=None,
                 transparent=False):

    if metadata != None and "analysis_type" in metadata:
        if metadata["analysis_type"] == 'single':
            # Format metadata signature for single gen figure
            metadata_signature = "_".join([
                str(metadata["time"])[:13],
                str(metadata["variant_function"]),
                str(metadata["variant_index"]), "Seed",
                str(metadata["seed"]), "Gen",
                str(metadata["gen"]) + '/' +
                str(int(metadata["total_gens"]) - 1), "Githash",
                str(metadata["git_hash"])[:10], "Desc",
                str(metadata["description"])
            ])
        elif metadata["analysis_type"] == 'multigen':
            # Format metadata signature for multi gen figure
            metadata_signature = "_".join([
                str(metadata["time"][:13]),
                str(metadata["variant_function"]),
                str(metadata["variant_index"]), "Seed",
                str(metadata["seed"]),
                str(metadata["total_gens"]), "gens", "Githash",
                str(metadata["git_hash"])[:10], "Desc",
                str(metadata["description"])
            ])
        elif metadata["analysis_type"] == 'cohort':
            # Format metadata signature for cohort figure
            metadata_signature = "_".join([
                str(metadata["time"][:13]),
                str(metadata["variant_function"]),
                str(metadata["variant_index"]),
                str(metadata["total_gens"]), "gens", "Githash",
                str(metadata["git_hash"])[:10], "Desc",
                str(metadata["description"])
            ])
        elif metadata["analysis_type"] == 'variant':
            # Format metadata signature for variant figure
            metadata_signature = "_".join([
                str(metadata["time"][:13]),
                str(metadata["total_variants"]), "variants",
                str(metadata["total_gens"]), "gens", "Githash",
                str(metadata["git_hash"])[:10], "Desc",
                str(metadata["description"])
            ])

        # Add metadata signature to the bottom of the plot
        plt.figtext(0, 0, metadata_signature, size=8)

    # Make folders for holding alternate types of images
    filepath.makedirs(plotOutDir, LOW_RES_DIR)
    filepath.makedirs(plotOutDir, SVG_DIR)

    # Save PDF image
    plt.savefig(os.path.join(plotOutDir, plotOutFileName + DEFAULT_IMAGE_TYPE),
                transparent=transparent)

    # Save SVG image
    plt.savefig(os.path.join(plotOutDir, SVG_DIR, plotOutFileName + '.svg'),
                transparent=transparent)

    # Save PNG image
    plt.savefig(os.path.join(plotOutDir, LOW_RES_DIR,
                             plotOutFileName + '.png'),
                dpi=LOW_RES_DPI,
                transparent=transparent)
Exemple #26
0
    def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if metadata["variant"] != "condition":
            print("This plot only runs for the 'condition' variant.")
            return

        if not os.path.isdir(inputDir):
            raise Exception, 'inputDir does not currently exist as a directory'

        filepath.makedirs(plotOutDir)

        ap = AnalysisPaths(inputDir, variant_plot=True)
        variants = ap.get_variants()

        gens = [2, 3]

        initial_volumes = []
        added_volumes = []

        for variant in variants:
            with open(ap.get_variant_kb(variant), 'rb') as f:
                sim_data = cPickle.load(f)

            cell_density = sim_data.constants.cellDensity

            initial_masses = np.zeros(0)
            final_masses = np.zeros(0)

            all_cells = ap.get_cells(variant=[variant], generation=gens)

            if len(all_cells) == 0:
                continue

            for simDir in all_cells:
                try:
                    simOutDir = os.path.join(simDir, "simOut")
                    mass = TableReader(os.path.join(simOutDir, "Mass"))
                    cellMass = mass.readColumn("cellMass")

                    initial_masses = np.hstack((initial_masses, cellMass[0]))
                    final_masses = np.hstack((final_masses, cellMass[-1]))
                except:
                    continue

            added_masses = final_masses - initial_masses

            initial_volume = initial_masses / cell_density.asNumber(
                units.fg / units.um**3)
            added_volume = added_masses / cell_density.asNumber(
                units.fg / units.um**3)

            initial_volumes.append(initial_volume)
            added_volumes.append(added_volume)

        plt.style.use('seaborn-deep')

        plt.figure(figsize=(5, 5))
        plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal")
        plt.scatter(initial_volumes[1],
                    added_volumes[1],
                    s=3,
                    label="anaerobic")
        plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA")
        plt.xlim([0, 4])
        plt.ylim([0, 4])
        plt.xlabel("Birth Volume ($\mu m^3$)")
        plt.ylabel("Added Volume ($\mu m^3$)")
        plt.legend()
        exportFigure(plt, plotOutDir, plotOutFileName, metadata)

        plt.close("all")
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		expected_n_variants = 2
		n_variants = len(variants)

		if n_variants < expected_n_variants:
			print('This plot only runs for {} variants.'.format(expected_n_variants))
			return

		# IDs for appropriate proteins
		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData['id'].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))

		# Stoichiometry matrices
		equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers()
		complex_stoich = sim_data.process.complexation.stoichMatrixMonomers()

		# Protein container views
		protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64)
		view_complexation = protein_container.countsView(ids_complexation)
		view_complexation_complexes = protein_container.countsView(ids_complexation_complexes)
		view_equilibrium = protein_container.countsView(ids_equilibrium)
		view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes)

		# Load model data
		model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		for i, variant in enumerate(variants):
			if i >= expected_n_variants:
				print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants))
				continue

			variant_counts = []
			for sim_dir in ap.get_cells(variant=[variant]):
				simOutDir = os.path.join(sim_dir, 'simOut')

				# Listeners used
				unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts'))

				# Account for bulk molecules
				(bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein)
				protein_container.countsIs(bulk_counts.mean(axis=0))

				# Account for unique molecules
				ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome')
				rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly')
				n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index]
				n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index]
				protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
				protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull])

				# Account for small-molecule bound complexes
				view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts()))

				# Account for monomers in complexed form
				view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts()))

				variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts())
			model_counts[:, i] = np.mean(variant_counts, axis=0)
			model_std[:, i] = np.std(variant_counts, axis=0)

		# Validation data
		schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])}
		schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts']
		validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE])

		# Process data
		model_log_counts = np.log10(model_counts)
		model_log_lower_std = model_log_counts - np.log10(model_counts - model_std)
		model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts
		validation_log_counts = np.log10(validation_counts)
		r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0])
		r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1])

		# Scatter plot of model vs validation counts
		max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max()))
		limits = [0, max_counts]
		plt.figure()
		colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

		## Plot data
		for i in range(expected_n_variants):
			plt.errorbar(validation_log_counts, model_log_counts[:, i],
				yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])),
				fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5)
		plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_')

		## Format axes
		plt.xlabel('Validation Counts\n(log10(counts))')
		plt.ylabel('Average Simulation Counts\n(log10(counts))')
		ax = plt.gca()
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)
		ax.spines['left'].set_position(('outward', 10))
		ax.spines['bottom'].set_position(('outward', 10))
		ax.xaxis.set_major_locator(MaxNLocator(integer=True))
		ax.yaxis.set_major_locator(MaxNLocator(integer=True))

		## Add legend
		legend_text = [
			'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]),
			'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]),
			]
		plt.legend(legend_text, frameon=False)

		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close('all')
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if metadata["variant"] != "condition":
			print('This analysis only runs for the "condition" variant.')
			return

		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		n_gens = ap.n_generation
		variants = ap.get_variants()

		if n_gens - 1 < FIRST_GENERATION:
			print('Not enough generations to plot.')
			return

		all_growth_rates = []
		all_rna_to_protein_ratios = []

		for variant in variants:
			doubling_times = np.zeros(0)
			variant_rna_to_protein_ratios = np.zeros(0)
			
			all_cells = ap.get_cells(
				variant=[variant],
				generation=range(FIRST_GENERATION, n_gens))

			if len(all_cells) == 0:
				continue

			for simDir in all_cells:
				try:
					simOutDir = os.path.join(simDir, "simOut")
					mass = TableReader(os.path.join(simOutDir, "Mass"))
					rna_mass = mass.readColumn("rnaMass")
					protein_mass = mass.readColumn("proteinMass")
					
					time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time")

					doubling_times = np.hstack(
						(doubling_times, (time[-1] - time[0])/3600.)
						)
					
					variant_rna_to_protein_ratios = np.hstack(
						(variant_rna_to_protein_ratios, rna_mass.mean()/protein_mass.mean())
						)
				except:
					continue

			variant_growth_rates = np.log(2)/doubling_times

			all_growth_rates.append(variant_growth_rates)
			all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios)

		plt.figure(figsize=FIGSIZE)

		plt.style.use('seaborn-deep')
		color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

		for i in range(3):
			plt.errorbar(
				all_growth_rates[i].mean(),
				all_rna_to_protein_ratios[i].mean(),
				yerr=all_rna_to_protein_ratios[i].std(),
				color=color_cycle[0], marker='o', markersize=5, linewidth=1,
				capsize=2)

		# Add linear plot proposed in Scott et al. (2010)
		x_linear = np.linspace(0, 3, 100)
		y_linear = x_linear/4.5 + 0.087
		plt.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2])

		plt.xlim([0, 3])
		plt.ylim([0, 1.6])
		plt.xlabel("Growth rate $\lambda$ (hour$^{-1}$)")
		plt.ylabel("RNA/protein mass ratio")
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close("all")
Exemple #29
0
ALTERNATE_RNA_HALF_LIFE = bool(int(os.environ.get("ALTERNATE_RNA_HALF_LIFE", "0")))
ALTERNATE_TRANSLATION_EFFICIENCY = bool(int(os.environ.get("ALTERNATE_TRANSLATION_EFFICIENCY", "0")))
ALTERNATE_RIBOSOME_ACTIVITY = bool(int(os.environ.get("ALTERNATE_RIBOSOME_ACTIVITY", "0")))
DISABLE_RNAP_FRACTION_INCREASE = bool(int(os.environ.get("DISABLE_RNAP_FRACTION_INCREASE", "0")))
DISABLE_RIBOSOME_ACTIVITY_FIX = bool(int(os.environ.get("DISABLE_RIBOSOME_ACTIVITY_FIX", "0")))
SAVE_CELL_SPECS = bool(int(os.environ.get("SAVE_CELL_SPECS", "0")))
CELL_SPECS_FILE = bool(int(os.environ.get("CELL_SPECS_FILE", "0")))
WRITE_TRANSLATION_EFFICIENCIES = bool(int(os.environ.get("WRITE_TRANSLATION_EFFICIENCIES", "0")))

if not RUN_AGGREGATE_ANALYSIS:
	COMPRESS_OUTPUT = False

### Set path variables and create directories

WC_ECOLI_DIRECTORY = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
OUT_DIRECTORY = filepath.makedirs(WC_ECOLI_DIRECTORY, "out")
CACHED_SIM_DATA_DIRECTORY = os.path.join(WC_ECOLI_DIRECTORY, "cached")

SUBMISSION_TIME = filepath.timestamp()
INDIV_OUT_DIRECTORY = filepath.makedirs(OUT_DIRECTORY, SUBMISSION_TIME + "__" + SIM_DESCRIPTION)
KB_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, "kb")
METADATA_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, "metadata")


if VERBOSE_QUEUE:
	print "Building filestructure."

for i in VARIANTS_TO_RUN:
	VARIANT_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, VARIANT + "_%06d" % i)
	VARIANT_SIM_DATA_DIRECTORY = filepath.makedirs(VARIANT_DIRECTORY, "kb")
	VARIANT_METADATA_DIRECTORY = filepath.makedirs(VARIANT_DIRECTORY, "metadata")
	"""

	default_output_dir = FILE_LOCATION

	parser = argparse.ArgumentParser(description='Script to save lists of'
		' included genes, metabolites and kinetic constraints in the model')

	parser.add_argument('-r', '--raw-data', default='',
		help='Path to raw_data cPickle object to load, recalculates raw_data if not specified')
	parser.add_argument('-s', '--sim-data', default='',
		help='Path to sim_data cPickle object to load, recalculates sim_data if not specified')
	parser.add_argument('-o', '--output', default=default_output_dir,
		help='Directory path to save tsv files (default: {})'.format(default_output_dir))

	return parser.parse_args()


if __name__ == '__main__':
	# Parse command line args
	args = parse_args()

	# Load required data
	raw_data = load_raw_data(args.raw_data)
	sim_data = load_sim_data(args.sim_data, raw_data)

	# Analyze data and save tsv files
	filepath.makedirs(args.output)
	save_genes(raw_data, sim_data, os.path.join(args.output, GENES_FILE))
	save_metabolites(raw_data, sim_data, os.path.join(args.output, METABOLITES_FILE))
	save_kinetics(sim_data, os.path.join(args.output, KINETICS_FILE))