コード例 #1
0
    def initialize(self, sim, sim_data):
        super(BulkMolecules, self).initialize(sim, sim_data)

        self._processIDs = sim.processes.keys()

        # Load constants
        self._moleculeIDs = sim_data.internal_state.bulkMolecules.bulkData[
            'id']

        self._moleculeMass = sim_data.internal_state.bulkMolecules.bulkData[
            'mass'].asNumber(
                units.fg / units.mol) / sim_data.constants.nAvogadro.asNumber(
                    1 / units.mol)

        self._submassNameToIndex = sim_data.submassNameToIndex

        # Create the container for molecule counts
        self.container = BulkObjectsContainer(self._moleculeIDs)

        # Set up vector of process priorities
        self._processPriorities = np.empty(self._nProcesses, np.int64)
        self._processPriorities.fill(REQUEST_PRIORITY_DEFAULT)

        # Set up ids for division into daughter cells
        self.divisionIds = {}
        self.divisionIds[
            'binomial'] = sim_data.moleculeGroups.bulkMoleculesBinomialDivision
        self.divisionIds[
            'equally'] = sim_data.moleculeGroups.bulkMoleculesEqualDivision
        self.divisionIds['fullChromosome'] = [
            sim_data.moleculeIds.fullChromosome
        ]
        self.divisionIds[
            'partialChromosome'] = sim_data.moleculeGroups.partialChromosome
        self.divisionIds[
            'setTo1'] = sim_data.moleculeGroups.bulkMoleculesSetTo1Division
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		expected_n_variants = 2
		n_variants = len(variants)

		if n_variants < expected_n_variants:
			print('This plot only runs for {} variants.'.format(expected_n_variants))
			return

		# IDs for appropriate proteins
		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData['id'].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))

		# Stoichiometry matrices
		equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers()
		complex_stoich = sim_data.process.complexation.stoichMatrixMonomers()

		# Protein container views
		protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64)
		view_complexation = protein_container.countsView(ids_complexation)
		view_complexation_complexes = protein_container.countsView(ids_complexation_complexes)
		view_equilibrium = protein_container.countsView(ids_equilibrium)
		view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes)

		# Load model data
		model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		for i, variant in enumerate(variants):
			if i >= expected_n_variants:
				print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants))
				continue

			variant_counts = []
			for sim_dir in ap.get_cells(variant=[variant]):
				simOutDir = os.path.join(sim_dir, 'simOut')

				# Listeners used
				unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts'))

				# Account for bulk molecules
				(bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein)
				protein_container.countsIs(bulk_counts.mean(axis=0))

				# Account for unique molecules
				ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome')
				rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly')
				n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index]
				n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index]
				protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
				protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull])

				# Account for small-molecule bound complexes
				view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts()))

				# Account for monomers in complexed form
				view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts()))

				variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts())
			model_counts[:, i] = np.mean(variant_counts, axis=0)
			model_std[:, i] = np.std(variant_counts, axis=0)

		# Validation data
		schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])}
		schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts']
		validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE])

		# Process data
		model_log_counts = np.log10(model_counts)
		model_log_lower_std = model_log_counts - np.log10(model_counts - model_std)
		model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts
		validation_log_counts = np.log10(validation_counts)
		r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0])
		r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1])

		# Scatter plot of model vs validation counts
		max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max()))
		limits = [0, max_counts]
		plt.figure()
		colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

		## Plot data
		for i in range(expected_n_variants):
			plt.errorbar(validation_log_counts, model_log_counts[:, i],
				yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])),
				fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5)
		plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_')

		## Format axes
		plt.xlabel('Validation Counts\n(log10(counts))')
		plt.ylabel('Average Simulation Counts\n(log10(counts))')
		ax = plt.gca()
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)
		ax.spines['left'].set_position(('outward', 10))
		ax.spines['bottom'].set_position(('outward', 10))
		ax.xaxis.set_major_locator(MaxNLocator(integer=True))
		ax.yaxis.set_major_locator(MaxNLocator(integer=True))

		## Add legend
		legend_text = [
			'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]),
			'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]),
			]
		plt.legend(legend_text, frameon=False)

		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close('all')
コード例 #3
0
def fitSimData_2(kb, simOutDir):

	subMass = kb.mass.subMass
	proteinMass = subMass["proteinMass"].asUnit(units.g)
	rnaMass = subMass["rnaMass"].asUnit(units.g)

	# Construct bulk container

	# We want to know something about the distribution of the copy numbers of
	# macromolecules in the cell.  While RNA and protein expression can be
	# approximated using well-described statistical distributions, we need
	# absolute copy numbers to form complexes.  To get a distribution, we must
	# instantiate many cells, form complexes, and finally compute the
	# statistics we will use in the fitting operations.

	bulkContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id'])
	rnaView = bulkContainer.countsView(kb.process.transcription.rnaData["id"])
	proteinView = bulkContainer.countsView(kb.process.translation.monomerData["id"])
	complexationMoleculesView = bulkContainer.countsView(kb.process.complexation.moleculeNames)
	allMoleculesIDs = list(
		set(kb.process.transcription.rnaData["id"]) | set(kb.process.translation.monomerData["id"]) | set(kb.process.complexation.moleculeNames)
		)
	allMoleculesView = bulkContainer.countsView(allMoleculesIDs)

	allMoleculeCounts = np.empty((N_SEEDS, allMoleculesView.counts().size), np.int64)

	complexationStoichMatrix = kb.process.complexation.stoichMatrix().astype(np.int64, order = "F")

	complexationPrebuiltMatrices = mccBuildMatrices(
		complexationStoichMatrix
		)

	rnaDistribution = kb.process.transcription.rnaData["expression"]

	rnaTotalCounts = countsFromMassAndExpression(
		rnaMass.asNumber(units.g),
		kb.process.transcription.rnaData["mw"].asNumber(units.g / units.mol),
		rnaDistribution,
		kb.constants.nAvogadro.asNumber(1 / units.mol)
		)

	proteinDistribution = calcProteinDistribution(kb)

	proteinTotalCounts = calcProteinTotalCounts(kb, proteinMass, proteinDistribution)

	for seed in xrange(N_SEEDS):
		randomState = np.random.RandomState(seed)

		allMoleculesView.countsIs(0)

		rnaView.countsIs(randomState.multinomial(
			rnaTotalCounts,
			rnaDistribution
			))

		proteinView.countsIs(randomState.multinomial(
			proteinTotalCounts,
			proteinDistribution
			))

		complexationMoleculeCounts = complexationMoleculesView.counts()

		updatedCompMoleculeCounts = mccFormComplexesWithPrebuiltMatrices(
			complexationMoleculeCounts,
			seed,
			complexationStoichMatrix,
			*complexationPrebuiltMatrices
			)

		complexationMoleculesView.countsIs(updatedCompMoleculeCounts)

		allMoleculeCounts[seed, :] = allMoleculesView.counts()

	bulkAverageContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id'], np.float64)
	bulkDeviationContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id'], np.float64)

	bulkAverageContainer.countsIs(allMoleculeCounts.mean(0), allMoleculesIDs)
	bulkDeviationContainer.countsIs(allMoleculeCounts.std(0), allMoleculesIDs)

	# Free up memory
	# TODO: make this more functional; one function for returning average & distribution
	del allMoleculeCounts
	del bulkContainer
	
	# ----- Calculate ppGpp concentration ----- #
	aminoAcidsInProtein = (bulkAverageContainer.counts(kb.process.translation.monomerData['id']) * kb.process.translation.monomerData['length'].asNumber()).sum()
	aminoAcidsInComplex = 0.
	for cplx in list(kb.process.complexation.complexNames):
		cplx_data = kb.process.complexation.getMonomers(cplx)
		cplx_subunit = cplx_data['subunitIds']
		cplx_stoich = cplx_data['subunitStoich']

		subunit_idxs = []
		subunit_idxs_to_delete = []
		for idx, subunit in enumerate(cplx_subunit):
			try:
				subunit_idxs.append(np.where(kb.process.translation.monomerData['id'] == subunit)[0][0])
			except IndexError:
				subunit_idxs_to_delete.append(idx)
		cplx_stoich = np.delete(cplx_stoich, subunit_idxs_to_delete)

		subunit_length = kb.process.translation.monomerData['length'][subunit_idxs].asNumber()
		aminoAcidsInComplex += (bulkAverageContainer.count(cplx) * subunit_length * cplx_stoich).sum()

	totalAminoAcidsInMacromolecules = (aminoAcidsInComplex + aminoAcidsInProtein)
	totalAAInSolublePool = totalAminoAcidsInMacromolecules * 0.08 # Approximatly correct for one time calculature.
	# TODO: Calculate soluble pools here too!
	totalAminoAcidsInCell = totalAminoAcidsInMacromolecules + totalAAInSolublePool

	ppGpp_per_cell = (totalAminoAcidsInCell * kb.constants.ppGpp_base_concentration).asUnit(units.count)
	cellVolume = kb.mass.avgCellDryMassInit / kb.constants.cellDensity
	ppGpp_concentration = (ppGpp_per_cell.asUnit(units.mol) / cellVolume).asUnit(units.mol / units.L)
	# Finally set ppGpp concentration to maintain
	kb.process.metabolism.metabolitePoolConcentrations[kb.process.metabolism.metabolitePoolIDs.index('PPGPP[c]')] = ppGpp_concentration

	# ----- tRNA synthetase turnover rates ------
	# Fit tRNA synthetase kcat values based on expected rates of translation
	# compute values at initial time point

	## Compute rate of AA incorperation
	proteinComposition = kb.process.translation.monomerData["aaCounts"]

	initialProteinMass = kb.mass.subMass['proteinMass']

	initialProteinCounts = calcProteinCounts(kb, initialProteinMass)

	initialProteinTranslationRate = (
		(np.log(2) / kb.doubling_time + kb.process.translation.monomerData["degRate"]) * initialProteinCounts
		).asUnit(1 / units.s)

	initialAAPolymerizationRate = units.dot(
		units.transpose(proteinComposition), initialProteinTranslationRate
		).asUnit(units.aa / units.s)

	## Compute expression of tRNA synthetases
	## Assuming independence in variance
	synthetase_counts_by_group = np.zeros(len(kb.process.translation.AA_SYNTHETASE_GROUPS), dtype = np.float64)
	synthetase_variance_by_group = np.zeros(len(kb.process.translation.AA_SYNTHETASE_GROUPS), dtype = np.float)
	for idx, synthetase_group in enumerate(kb.process.translation.AA_SYNTHETASE_GROUPS.itervalues()):
		group_count = 0.
		group_variance = 0.
		for synthetase in synthetase_group:
			counts = bulkAverageContainer.countsView([synthetase]).counts()
			variance = bulkDeviationContainer.countsView([synthetase]).counts()
			group_count += counts
			group_variance += variance
		synthetase_counts_by_group[idx] = group_count
		synthetase_variance_by_group[idx] = group_variance

	## Saved for plotting
	kb.synthetase_counts = synthetase_counts_by_group
	kb.synthetase_variance = synthetase_variance_by_group
	kb.initial_aa_polymerization_rate = initialAAPolymerizationRate
	kb.minimum_trna_synthetase_rates = initialAAPolymerizationRate / synthetase_counts_by_group

	# TODO: Reimplement this with better fit taking into account the variance in aa
	#		utilization.
	## Scaling synthetase counts by -2*variance so that rates will be high enough
	## to accomodate stochastic behavior in the model without translation stalling.
	# scaled_synthetase_counts = synthetase_counts_by_group - (2 * synthetase_variance_by_group)
	scaled_synthetase_counts = synthetase_counts_by_group
	assert all(scaled_synthetase_counts > 0)

	predicted_trna_synthetase_rates = initialAAPolymerizationRate / scaled_synthetase_counts
	kb.trna_synthetase_rates = 2 * predicted_trna_synthetase_rates
コード例 #4
0
class BulkMolecules(wholecell.states.internal_state.InternalState):
    _name = 'BulkMolecules'

    def __init__(self, *args, **kwargs):
        self.container = None
        self._moleculeMass = None
        self._moleculeIDs = None
        self._countsRequested = None
        self._countsAllocatedInitial = None
        self._countsAllocatedFinal = None
        self._countsUnallocated = None

        super(BulkMolecules, self).__init__(*args, **kwargs)

    def initialize(self, sim, sim_data):
        super(BulkMolecules, self).initialize(sim, sim_data)

        self._processIDs = sim.processes.keys()

        # Load constants
        self._moleculeIDs = sim_data.internal_state.bulkMolecules.bulkData[
            'id']

        self._moleculeMass = sim_data.internal_state.bulkMolecules.bulkData[
            'mass'].asNumber(
                units.fg / units.mol) / sim_data.constants.nAvogadro.asNumber(
                    1 / units.mol)

        self._submassNameToIndex = sim_data.submassNameToIndex

        # Create the container for molecule counts
        self.container = BulkObjectsContainer(self._moleculeIDs)

        # Set up vector of process priorities
        self._processPriorities = np.empty(self._nProcesses, np.int64)
        self._processPriorities.fill(REQUEST_PRIORITY_DEFAULT)

        # Set up ids for division into daughter cells
        self.divisionIds = {}
        self.divisionIds[
            'binomial'] = sim_data.moleculeGroups.bulkMoleculesBinomialDivision
        self.divisionIds[
            'equally'] = sim_data.moleculeGroups.bulkMoleculesEqualDivision
        self.divisionIds['fullChromosome'] = [
            sim_data.moleculeIds.fullChromosome
        ]
        self.divisionIds[
            'partialChromosome'] = sim_data.moleculeGroups.partialChromosome
        self.divisionIds[
            'setTo1'] = sim_data.moleculeGroups.bulkMoleculesSetTo1Division

    def processRequestPriorityIs(self, processIndex, priorityLevel):
        self._processPriorities[processIndex] = priorityLevel

    def allocate(self):
        super(BulkMolecules, self).allocate()  # Allocates partitions

        nMolecules = self.container._counts.size
        dtype = self.container._counts.dtype

        # Arrays for tracking values related to partitioning
        self._countsRequested = np.zeros((nMolecules, self._nProcesses), dtype)
        self._countsAllocatedInitial = np.zeros((nMolecules, self._nProcesses),
                                                dtype)
        self._countsAllocatedFinal = np.zeros((nMolecules, self._nProcesses),
                                              dtype)
        self._countsUnallocated = np.zeros(nMolecules, dtype)

    def partition(self):
        if self._nProcesses == 0:
            self._countsUnallocated = self.container._counts
            return

        # Calculate and store requests
        self._countsRequested[:] = 0

        for view in self._views:
            self._countsRequested[view._containerIndexes,
                                  view._processIndex] += view._request()

        if ASSERT_POSITIVE_COUNTS and not (self._countsRequested >= 0).all():
            raise NegativeCountsError(
                "Negative value(s) in self._countsRequested:\n" +
                "\n".join("{} in {} ({})".format(
                    self._moleculeIDs[molIndex],
                    self._processIDs[processIndex], self._countsRequested[
                        molIndex, processIndex])
                          for molIndex, processIndex in izip(*np.where(
                              self._countsRequested < 0))))

        # Calculate partition

        calculatePartition(self._processPriorities, self._countsRequested,
                           self.container._counts,
                           self._countsAllocatedInitial)

        if ASSERT_POSITIVE_COUNTS and not (self._countsAllocatedInitial >=
                                           0).all():
            raise NegativeCountsError(
                "Negative value(s) in self._countsAllocatedInitial:\n" +
                "\n".join("{} in {} ({})".format(
                    self._moleculeIDs[molIndex],
                    self._processIDs[processIndex],
                    self._countsAllocatedInitial[molIndex, processIndex])
                          for molIndex, processIndex in izip(*np.where(
                              self._countsAllocatedInitial < 0))))

        # Record unpartitioned counts for later merging
        self._countsUnallocated = self.container._counts - np.sum(
            self._countsAllocatedInitial, axis=-1)

        if ASSERT_POSITIVE_COUNTS and not (self._countsUnallocated >= 0).all():
            raise NegativeCountsError(
                "Negative value(s) in self._countsUnallocated:\n" + "\n".join(
                    "{} ({})".format(self._moleculeIDs[molIndex],
                                     self._countsUnallocated[molIndex])
                    for molIndex in np.where(self._countsUnallocated < 0)[0]))

        self._countsAllocatedFinal[:] = self._countsAllocatedInitial

    def calculatePreEvolveStateMass(self):
        # Compute masses of partitioned molecules

        if self.simulationStep() == 0:
            self._countsUnallocated = self.container._counts

        self._masses[self._preEvolveStateMassIndex, ...] = np.dot(
            np.hstack([
                self._countsAllocatedInitial,
                self._countsUnallocated[:, np.newaxis]
            ]).T, self._moleculeMass)

    def merge(self):
        if ASSERT_POSITIVE_COUNTS and not (self._countsAllocatedFinal >=
                                           0).all():
            raise NegativeCountsError(
                "Negative value(s) in self._countsAllocatedFinal:\n" +
                "\n".join("{} in {} ({})".format(
                    self._moleculeIDs[molIndex],
                    self._processIDs[processIndex], self._countsAllocatedFinal[
                        molIndex, processIndex])
                          for molIndex, processIndex in izip(*np.where(
                              self._countsAllocatedFinal < 0))))

        self.container.countsIs(self._countsUnallocated +
                                self._countsAllocatedFinal.sum(axis=-1))

    def calculatePostEvolveStateMass(self):
        # Compute masses of partitioned molecules

        if self.simulationStep() == 0:
            self._countsUnallocated = self.container._counts

        self._masses[self._postEvolveStateMassIndex, ...] = np.dot(
            np.hstack([
                self._countsAllocatedFinal, self._countsUnallocated[:,
                                                                    np.newaxis]
            ]).T, self._moleculeMass)

    def tableCreate(self, tableWriter):
        self.container.tableCreate(tableWriter)
        tableWriter.writeAttributes(processNames=self._processIDs, )

    def tableAppend(self, tableWriter):
        # self.container.tableAppend(tableWriter)
        tableWriter.append(
            counts=self.container._counts,
            atpAllocatedInitial=self._countsAllocatedInitial[
                self.container._objectNames.index("ATP[c]"), :],
            atpAllocatedFinal=self._countsAllocatedFinal[
                self.container._objectNames.index("ATP[c]"), :],
            atpRequested=self._countsRequested[
                self.container._objectNames.index("ATP[c]"), :],
        )

    def tableLoad(self, tableReader, tableIndex):
        self.container.tableLoad(tableReader, tableIndex)
コード例 #5
0
    def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(seedOutDir):
            raise Exception, "seedOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Get all cells
        ap = AnalysisPaths(seedOutDir, multi_gen_plot=True)
        allDir = ap.get_cells()

        sim_data = cPickle.load(open(simDataFile, "rb"))
        tcsComplexToMonomers = sim_data.process.two_component_system.complexToMonomer
        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_twoComponent = sim_data.process.two_component_system.moleculeNames.tolist(
        )
        ids_twoComponent_complexes = sim_data.process.two_component_system.complexToMonomer.keys(
        )
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_twoComponent +
                ids_translation))

        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_twoComponent = bulkContainer.countsView(ids_twoComponent)
        view_twoComponent_complexes = bulkContainer.countsView(
            ids_twoComponent_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        proteinPresence = []
        for simDir in allDir:
            simOutDir = os.path.join(simDir, "simOut")
            bulkMolecules = TableReader(
                os.path.join(simOutDir, "BulkMolecules"))
            moleculeIds = bulkMolecules.readAttribute("objectNames")
            proteinIndexes = np.array(
                [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
                np.int)
            proteinCountsBulk = bulkMolecules.readColumn(
                "counts")[:, proteinIndexes]
            bulkMolecules.close()

            # Account for monomers
            bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

            # Account for unique molecules
            uniqueMoleculeCounts = TableReader(
                os.path.join(simOutDir, "UniqueMoleculeCounts"))
            ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRibosome")
            rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRnaPoly")
            nActiveRibosome = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, ribosomeIndex]
            nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, rnaPolyIndex]
            uniqueMoleculeCounts.close()
            bulkContainer.countsInc(nActiveRibosome.mean(), [
                sim_data.moleculeIds.s30_fullComplex,
                sim_data.moleculeIds.s50_fullComplex
            ])
            bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                    [sim_data.moleculeIds.rnapFull])

            # Account for two-component complexes
            view_twoComponent.countsInc(
                np.dot(
                    sim_data.process.two_component_system.stoichMatrixMonomers(
                    ),
                    view_twoComponent_complexes.counts() * -1))

            # Account for small-molecule bound complexes
            view_equilibrium.countsInc(
                np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                       view_equilibrium_complexes.counts() * -1))

            # Account for monomers in complexed form
            view_complexation.countsInc(
                np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                       view_complexation_complexes.counts() * -1))

            # Get boolean protein presence
            proteinCounts = view_translation.counts()
            proteinPresence.append(proteinCounts != 0)

            # Clear counts
            bulkContainer.countsIs(0)

        proteinPresence = np.array(proteinPresence)

        # Plot
        fig = plt.figure(figsize=(12, 12))
        ax = plt.subplot(1, 1, 1)
        nGens = len(allDir)
        ax.hist(np.mean(proteinPresence, axis=0), nGens)
        ax.set_xlabel(
            "Frequency of observing at least 1 protein copy in 1 generation",
            fontsize=14)
        ax.set_ylabel("Number of proteins", fontsize=14)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
コード例 #6
0
	def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(seedOutDir):
			raise Exception, "seedOutDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		sim_data = cPickle.load(open(simDataFile, "rb"))
		validation_data = cPickle.load(open(validationDataFile, "rb"))

		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData["id"].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))
		bulkContainer = BulkObjectsContainer(ids_protein, dtype = np.float64)
		view_complexation = bulkContainer.countsView(ids_complexation)
		view_complexation_complexes = bulkContainer.countsView(ids_complexation_complexes)
		view_equilibrium = bulkContainer.countsView(ids_equilibrium)
		view_equilibrium_complexes = bulkContainer.countsView(ids_equilibrium_complexes)
		view_translation = bulkContainer.countsView(ids_translation)
		view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist())

		# Get all cells
		ap = AnalysisPaths(seedOutDir, multi_gen_plot = True)

		allDir = ap.get_cells()

		View_Validation_Schmidt = []

		fig = plt.figure(figsize = (4, 4))

		for simDir in allDir:
			# print simDir

			simOutDir = os.path.join(simDir, "simOut")

			bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
			moleculeIds = bulkMolecules.readAttribute("objectNames")
			proteinIndexes = np.array([moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int)
			proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes]
			bulkMolecules.close()

			# Account for monomers
			bulkContainer.countsIs(proteinCountsBulk.mean(axis = 0))

			# Account for unique molecules
			uniqueMoleculeCounts = TableReader(os.path.join(simOutDir, "UniqueMoleculeCounts"))
			ribosomeIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRibosome")
			rnaPolyIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRnaPoly")
			nActiveRibosome = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, ribosomeIndex]
			nActiveRnaPoly = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, rnaPolyIndex]
			uniqueMoleculeCounts.close()
			bulkContainer.countsInc(nActiveRibosome.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
			bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull])

			# Account for small-molecule bound complexes
			view_equilibrium.countsInc(
				np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)
				)

			# Account for monomers in complexed form
			view_complexation.countsInc(
				np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1)
				)

			view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist())
			View_Validation_Schmidt.append(view_validation_schmidt.counts())

		simulation_counts = (np.array(View_Validation_Schmidt)).mean(axis = 0)

		# Schmidt Counts
		schmidtLabels = validation_data.protein.schmidt2015Data["monomerId"]
		schmidt_counts = validation_data.protein.schmidt2015Data["glucoseCounts"]

		# Set up mask for proteins with low counts
		low_count_mask = schmidt_counts < LOW_COUNT_THRESHOLD
		n_low_count = low_count_mask.sum()
		n_high_count = schmidt_counts.size - n_low_count
		
		# Take logs
		schmidt_counts_log = np.log10(schmidt_counts + 1)
		simulation_counts_log = np.log10(simulation_counts + 1)

		# Compute deviations
		deviation_log = np.log10(np.abs(simulation_counts - schmidt_counts))

		axis = plt.subplot(1,1,1)

		axis.plot(schmidt_counts_log, simulation_counts_log, 'o', color = "black", markersize = 6, alpha = 0.1, zorder = 1, markeredgewidth = 0.0)
		print("R^2 (all proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log, schmidt_counts_log)[0])**2,
			schmidt_counts.size
			))
		print("R^2 (low-abundance proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log[low_count_mask],
				schmidt_counts_log[low_count_mask])[0])**2,
			n_low_count
			))
		print("R^2 (high-abundance proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log[~low_count_mask],
				schmidt_counts_log[~low_count_mask])[0])**2,
			n_high_count
			))
		
		print("Average log deviation (low-abundance proteins) = %.3f" % (
			deviation_log[low_count_mask].mean()))
		print("Average log deviation (high-abundance proteins) = %.3f" % (
			deviation_log[~low_count_mask].mean()))

		maxLine = np.ceil(
			max(schmidt_counts_log.max(), simulation_counts_log.max())
			)
		plt.plot([0, maxLine], [0, maxLine], '-k')

		plt.xlim(xmin=0, xmax=maxLine)
		plt.ylim(ymin=0, ymax=maxLine)

		axis.spines["right"].set_visible(False)
		axis.spines["top"].set_visible(False)
		axis.spines["left"].set_position(("outward", 10))
		axis.spines["bottom"].set_position(("outward", 10))
		axis.tick_params(right = "off")
		axis.tick_params(top = "off")
		axis.tick_params(which = "both", direction = "out")

		axis.set_xlim([-0.07, maxLine])
		axis.set_ylim([-0.07, maxLine])

		exportFigure(plt, plotOutDir, plotOutFileName, metadata)
		plt.close("all")
コード例 #7
0
    def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        return

        HIGHLIGHT_GENES = False
        USE_CACHE = False  # value of this boolean may change (see line 50)

        if not os.path.isdir(seedOutDir):
            raise Exception, "seedOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Check if cache from figure5B_E_F_G.py exist
        if os.path.exists(os.path.join(plotOutDir, "figure5B.pickle")):
            figure5B_data = cPickle.load(
                open(os.path.join(plotOutDir, "figure5B.pickle"), "rb"))
            colors = figure5B_data["colors"]
            mrnaIds = figure5B_data["id"].tolist()
        else:
            print "Requires figure5B.pickle from figure5B_E_F_G.py"
            return

        # Check if cache exists
        if os.path.exists(
                os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName)):
            USE_CACHE = True

        # Get all cells
        ap = AnalysisPaths(seedOutDir, multi_gen_plot=True)
        allDir = ap.get_cells()

        # Load sim data
        sim_data = cPickle.load(open(simDataFile, "rb"))
        rnaIds = sim_data.process.transcription.rnaData["id"][
            sim_data.relation.
            rnaIndexToMonomerMapping]  # orders rna IDs to match monomer IDs

        # Make views for monomers
        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))
        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        # Identify monomers that are subunits for multiple complexes
        monomersInvolvedInManyComplexes = []
        monomersInvolvedInComplexes = []
        for complexId in ids_complexation_complexes:
            subunitIds = sim_data.process.complexation.getMonomers(
                complexId)["subunitIds"]
            for subunitId in subunitIds:
                if subunitId in monomersInvolvedInComplexes:
                    monomersInvolvedInManyComplexes.append(subunitId)
                monomersInvolvedInComplexes.append(subunitId)
        monomersInvolvedInManyComplexes_id = list(
            set(monomersInvolvedInManyComplexes))
        monomersInvolvedInManyComplexes_dict = {}
        for x in monomersInvolvedInManyComplexes_id:
            monomersInvolvedInManyComplexes_dict[x] = {}
        USE_CACHE = False
        if not USE_CACHE:
            # Get average (over timesteps) counts for All genseration (ie. All cells)
            avgRnaCounts_forAllCells = np.zeros(rnaIds.shape[0], np.float64)
            avgProteinCounts_forAllCells = np.zeros(rnaIds.shape[0],
                                                    np.float64)
            for i, simDir in enumerate(allDir):
                simOutDir = os.path.join(simDir, "simOut")

                # Account for bulk molecules
                bulkMolecules = TableReader(
                    os.path.join(simOutDir, "BulkMolecules"))
                moleculeIds = bulkMolecules.readAttribute("objectNames")
                proteinIndexes = np.array([
                    moleculeIds.index(moleculeId) for moleculeId in ids_protein
                ], np.int)
                proteinCountsBulk = bulkMolecules.readColumn(
                    "counts")[:, proteinIndexes]
                rnaIndexes = np.array(
                    [moleculeIds.index(moleculeId) for moleculeId in rnaIds],
                    np.int)
                avgRnaCounts = bulkMolecules.readColumn(
                    "counts")[:, rnaIndexes].mean(axis=0)
                bulkMolecules.close()
                if i == 0:
                    # Skip first few time steps for 1st generation (becaused complexes have not yet formed during these steps)
                    bulkContainer.countsIs(
                        np.mean(proteinCountsBulk[5:, :], axis=0))
                else:
                    bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

                # Unique molecules
                uniqueMoleculeCounts = TableReader(
                    os.path.join(simOutDir, "UniqueMoleculeCounts"))
                ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                    "uniqueMoleculeIds").index("activeRibosome")
                rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
                    "uniqueMoleculeIds").index("activeRnaPoly")
                nActiveRibosome = uniqueMoleculeCounts.readColumn(
                    "uniqueMoleculeCounts")[:, ribosomeIndex]
                nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
                    "uniqueMoleculeCounts")[:, rnaPolyIndex]
                uniqueMoleculeCounts.close()

                # Account for unique molecules
                bulkContainer.countsInc(nActiveRibosome.mean(), [
                    sim_data.moleculeIds.s30_fullComplex,
                    sim_data.moleculeIds.s50_fullComplex
                ])
                bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                        [sim_data.moleculeIds.rnapFull])

                # Account for small-molecule bound complexes
                view_equilibrium.countsInc(
                    np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                           view_equilibrium_complexes.counts() * -1))

                # Average counts of monomers
                avgMonomerCounts = view_translation.counts()

                # Get counts of "functional units" (ie. complexed forms)
                avgProteinCounts = avgMonomerCounts[:]
                avgComplexCounts = view_complexation_complexes.counts()

                for j, complexId in enumerate(ids_complexation_complexes):
                    # Map all subsunits to the average counts of the complex (ignores counts of monomers)
                    # Some subunits are involved in multiple complexes - these cases are kept track
                    subunitIds = sim_data.process.complexation.getMonomers(
                        complexId)["subunitIds"]

                    for subunitId in subunitIds:
                        if subunitId not in ids_translation:
                            if subunitId in monomerToTranslationMonomer:
                                # couple monomers have different ID in ids_translation
                                subunitId = monomerToTranslationMonomer[
                                    subunitId]
                            elif "CPLX" in subunitId:
                                # few transcription factors are complexed with ions
                                subunitId = complexToMonomer[subunitId]
                            elif "RNA" in subunitId:
                                continue

                        if subunitId not in monomersInvolvedInManyComplexes_id:
                            avgProteinCounts[ids_translation.index(
                                subunitId)] = avgComplexCounts[j]
                        else:
                            if complexId not in monomersInvolvedInManyComplexes_dict[
                                    subunitId]:
                                monomersInvolvedInManyComplexes_dict[
                                    subunitId][complexId] = 0.
                            monomersInvolvedInManyComplexes_dict[subunitId][
                                complexId] += avgComplexCounts[j]

                # Store
                avgRnaCounts_forAllCells += avgRnaCounts
                avgProteinCounts_forAllCells += avgProteinCounts

            # Cache
            D = {
                "rna": avgRnaCounts_forAllCells,
                "protein": avgProteinCounts_forAllCells,
                "monomersInManyComplexes": monomersInvolvedInManyComplexes_dict
            }
            cPickle.dump(
                D,
                open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName),
                     "wb"))

        else:
            # Using cached data
            D = cPickle.load(
                open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName),
                     "rb"))
            avgRnaCounts_forAllCells = D["rna"]
            avgProteinCounts_forAllCells = D["protein"]
            monomersInvolvedInManyComplexes_dict = D["monomersInManyComplexes"]

        # Per cell
        avgRnaCounts_perCell = avgRnaCounts_forAllCells / float(len(allDir))
        avgProteinCounts_perCell = avgProteinCounts_forAllCells / float(
            len(allDir))

        # Plot
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))

        for monomer in monomersInvolvedInManyComplexes_id:
            index = ids_translation.index(monomer)
            color_index = mrnaIds.index(rnaIds[index])
            color = colors[color_index]

            for complexId in monomersInvolvedInManyComplexes_dict[monomer]:
                avgComplexCount = monomersInvolvedInManyComplexes_dict[
                    monomer][complexId] / float(len(allDir))

                if avgComplexCount == 0:
                    ax.loglog(avgRnaCounts_perCell[index],
                              2.5e-6,
                              alpha=0.5,
                              marker=".",
                              lw=0.,
                              color=color)

                else:
                    if avgRnaCounts_perCell[index] == 0:
                        ax.loglog(PLOT_ZEROS_ON_LINE,
                                  avgComplexCount,
                                  alpha=0.5,
                                  marker=".",
                                  lw=0.,
                                  color=color)
                    else:
                        ax.loglog(avgRnaCounts_perCell[index],
                                  avgComplexCount,
                                  alpha=0.5,
                                  marker=".",
                                  lw=0.,
                                  color=color)

        # plot monomers that are not involved in complexes or involved in only 1 complex
        monomersInvolvedInManyComplexes_index = [
            ids_translation.index(x)
            for x in monomersInvolvedInManyComplexes_id
        ]
        A = [
            x for x in xrange(len(ids_translation))
            if x not in monomersInvolvedInManyComplexes_index
        ]
        for i in A:
            color = colors[mrnaIds.index(rnaIds[i])]
            ax.loglog(avgRnaCounts_perCell[i],
                      avgProteinCounts_perCell[i],
                      alpha=0.5,
                      marker=".",
                      lw=0.,
                      color=color)
        # ax.loglog(avgRnaCounts_perCell[A], avgProteinCounts_perCell[A], alpha = 0.5, marker = ".", lw = 0., color = plot_colors)

        # Plot genes with zero transcripts an arbitrary line
        noTranscripts_indices = [
            x for x in np.where(avgRnaCounts_perCell == 0)[0]
            if x not in monomersInvolvedInManyComplexes_index
        ]
        for i in noTranscripts_indices:
            color = colors[mrnaIds.index(rnaIds[i])]
            ax.loglog(PLOT_ZEROS_ON_LINE,
                      avgProteinCounts_perCell[i],
                      alpha=0.5,
                      marker=".",
                      lw=0.,
                      color=color)

        # Highlight
        if HIGHLIGHT_GENES:
            rnaIds = rnaIds.tolist()
            highlights_rnaId = ["EG12437_RNA[c]",
                                "EG12058_RNA[c]"]  # menE, ccmB
            colors = ["g", "r"]
            for i, rna in enumerate(highlights_rnaId):
                if avgRnaCounts_perCell[rnaIds.index(rna)] == 0:
                    ax.loglog(PLOT_ZEROS_ON_LINE,
                              avgProteinCounts_perCell[rnaIds.index(rna)],
                              marker='.',
                              lw=0.,
                              color=colors[i],
                              ms=15)
                else:
                    ax.loglog(avgRnaCounts_perCell[rnaIds.index(rna)],
                              avgProteinCounts_perCell[rnaIds.index(rna)],
                              marker='.',
                              lw=0.,
                              color=colors[i],
                              ms=15)

            green_dot = mlines.Line2D([], [],
                                      color="green",
                                      linewidth=0.,
                                      marker=".",
                                      markersize=15,
                                      label="menE")
            red_dot = mlines.Line2D([], [],
                                    color="red",
                                    linewidth=0.,
                                    marker=".",
                                    markersize=15,
                                    label="ccmB")
            plt.legend(handles=[green_dot, red_dot], loc="lower right")

        # ax.hlines(1, ax.get_xlim()[0], ax.get_xlim()[1], linestyle = "--")
        ax.hlines(9786.77, ax.get_xlim()[0], ax.get_xlim()[1], linestyle="--")

        ax.set_title(
            "Each (translatable) gene's functional unit is represented as a point\n(ie. x points per gene where x == number of complexes the monomer is involved in)\n(avg across %s generations)"
            % len(allDir))
        ax.set_xlabel("<RNA> per cell")
        ax.set_ylabel("<Functional units (protein)> per cell")
        ax.tick_params(which="both", direction="out")

        plt.subplots_adjust(hspace=0.5,
                            wspace=0.5,
                            left=0.1,
                            bottom=0.1,
                            top=0.9,
                            right=0.95)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
	def test_dtype_float32(self):
		"""A BulkObjectsContainer with dtype=np.float32 should support
		fractional counts and deltas.
		"""
		container = BulkObjectsContainer(OBJECT_NAMES, dtype=np.float32)
		initialCounts = [10, 10.5, 20]
		container.countsIs(initialCounts)
		npt.assert_equal(container.counts(), initialCounts)

		incCounts = [10, 20.5, 30.5]
		newCounts = [20, 31, 50.5]
		container.countsInc(incCounts)
		npt.assert_equal(container.counts(), newCounts)

		decCounts = [1.5, 2, 3.5]
		newCounts = [18.5, 29, 47]
		container.countsDec(decCounts)
		npt.assert_equal(container.counts(), newCounts)

		countsView = container.countsView()
		newCounts = [28.5, 49.5, 77.5]
		countsView.countsInc(incCounts)
		npt.assert_equal(countsView.counts(), newCounts)

		newCounts = [27, 47.5, 74]
		countsView.countsDec(decCounts)
		npt.assert_equal(countsView.counts(), newCounts)
def createContainer():
	container = BulkObjectsContainer(OBJECT_NAMES)

	container.countsIs(OBJECT_COUNTS)

	return container
コード例 #10
0
    def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(simOutDir):
            raise Exception, "simOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Get the names of proteins from the KB

        sim_data = cPickle.load(open(simDataFile, "rb"))

        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))
        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
        moleculeIds = bulkMolecules.readAttribute("objectNames")
        proteinIndexes = np.array(
            [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
            np.int)
        proteinCountsBulk = bulkMolecules.readColumn("counts")[:,
                                                               proteinIndexes]
        bulkMolecules.close()

        # Account for monomers
        bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

        # Account for unique molecules
        uniqueMoleculeCounts = TableReader(
            os.path.join(simOutDir, "UniqueMoleculeCounts"))
        ribosomeIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRibosome")
        rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRnaPoly")
        nActiveRibosome = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, ribosomeIndex]
        nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, rnaPolyIndex]
        uniqueMoleculeCounts.close()
        bulkContainer.countsInc(nActiveRibosome.mean(), [
            sim_data.moleculeIds.s30_fullComplex,
            sim_data.moleculeIds.s50_fullComplex
        ])
        bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                [sim_data.moleculeIds.rnapFull])

        # Account for small-molecule bound complexes
        view_equilibrium.countsInc(
            np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                   view_equilibrium_complexes.counts() * -1))

        # Account for monomers in complexed form
        view_complexation.countsInc(
            np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                   view_complexation_complexes.counts() * -1))

        avgCounts = view_translation.counts()

        relativeCounts = avgCounts / avgCounts.sum()

        expectedCountsArbitrary = normalize(
            sim_data.process.transcription.rnaExpression[sim_data.condition][
                sim_data.relation.rnaIndexToMonomerMapping] *
            sim_data.process.translation.translationEfficienciesByMonomer /
            (np.log(2) / sim_data.doubling_time.asNumber(units.s) +
             sim_data.process.translation.monomerData["degRate"].asNumber(
                 1 / units.s)))

        expectedCountsRelative = expectedCountsArbitrary / expectedCountsArbitrary.sum(
        )

        plt.figure(figsize=(8.5, 11))

        maxLine = 1.1 * max(np.log10(expectedCountsRelative.max() + 1),
                            np.log10(relativeCounts.max() + 1))
        plt.plot([0, maxLine], [0, maxLine], '--r')
        plt.plot(np.log10(expectedCountsRelative + 1),
                 np.log10(relativeCounts + 1),
                 'o',
                 markeredgecolor='k',
                 markerfacecolor='none')

        plt.xlabel("log10(Expected protein distribution (from fitting))")
        plt.ylabel(
            "log10(Actual protein distribution (average over life cycle))")
        plt.title("PCC (of log values): %0.2f" %
                  pearsonr(np.log10(expectedCountsRelative + 1),
                           np.log10(relativeCounts + 1))[0])

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
コード例 #11
0
def getPCC((variant, ap, monomerIds, schmidtCounts)):
    try:
        simDir = ap.get_cells(variant=[variant])[0]

        sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb"))

        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))

        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)
        view_validation_schmidt = bulkContainer.countsView(monomerIds)

        simOutDir = os.path.join(simDir, "simOut")

        bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
        moleculeIds = bulkMolecules.readAttribute("objectNames")
        proteinIndexes = np.array(
            [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
            np.int)
        proteinCountsBulk = bulkMolecules.readColumn("counts")[:,
                                                               proteinIndexes]
        bulkMolecules.close()

        # Account for monomers
        bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

        # Account for unique molecules
        uniqueMoleculeCounts = TableReader(
            os.path.join(simOutDir, "UniqueMoleculeCounts"))
        ribosomeIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRibosome")
        rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRnaPoly")
        nActiveRibosome = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, ribosomeIndex]
        nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, rnaPolyIndex]
        uniqueMoleculeCounts.close()
        bulkContainer.countsInc(nActiveRibosome.mean(), [
            sim_data.moleculeIds.s30_fullComplex,
            sim_data.moleculeIds.s50_fullComplex
        ])
        bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                [sim_data.moleculeIds.rnapFull])

        # Account for small-molecule bound complexes
        view_equilibrium.countsInc(
            np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                   view_equilibrium_complexes.counts() * -1))

        # Account for monomers in complexed form
        view_complexation.countsInc(
            np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                   view_complexation_complexes.counts() * -1))

        pcc, pval = pearsonr(np.log10(view_validation_schmidt.counts() + 1),
                             np.log10(schmidtCounts + 1))

        return pcc, pval

    except Exception as e:
        print e
        return np.nan, np.nan