def mp_worker(sim_dir): sim_out_dir = os.path.join(sim_dir, 'simOut') rnap_count_avg_cell = None try: bulk_molecule_reader = TableReader( os.path.join(sim_out_dir, 'BulkMolecules')) index_rnap = bulk_molecule_reader.readAttribute('objectNames').index( rnap_id) rnap_count = bulk_molecule_reader.readColumn('counts', np.array([index_rnap])) unique_molecule_reader = TableReader( os.path.join(sim_out_dir, 'UniqueMoleculeCounts')) unique_molecule_ids = unique_molecule_reader.readAttribute( 'uniqueMoleculeIds') unique_molecule_counts = unique_molecule_reader.readColumn( 'uniqueMoleculeCounts') unique_molecule_reader.close() index_rnap = unique_molecule_ids.index('activeRnaPoly') rnap_active_count = unique_molecule_counts[:, index_rnap] index_average_cell = int(len(rnap_active_count) * CELL_CYCLE_FRACTION) rnap_count_avg_cell = rnap_count[ index_average_cell] + rnap_active_count[index_average_cell] except Exception as e: print('Excluded from analysis due to broken files: {}'.format( sim_out_dir)) return rnap_count_avg_cell
def setDaughterInitialConditions(sim, sim_data): assert sim._inheritedStatePath != None isDead = cPickle.load( open(os.path.join(sim._inheritedStatePath, "IsDead.cPickle"), "rb")) sim._isDead = isDead elngRate = cPickle.load( open(os.path.join(sim._inheritedStatePath, "ElngRate.cPickle"), "rb")) elng_rate_factor = cPickle.load( open(os.path.join(sim._inheritedStatePath, "elng_rate_factor.cPickle"), "rb")) if sim._growthRateNoise: sim.processes["PolypeptideElongation"].setElngRate = elngRate sim.processes[ "PolypeptideElongation"].elngRateFactor = elng_rate_factor bulk_table_reader = TableReader( os.path.join(sim._inheritedStatePath, "BulkMolecules")) sim.internal_states["BulkMolecules"].tableLoad(bulk_table_reader, 0) unique_table_reader = TableReader( os.path.join(sim._inheritedStatePath, "UniqueMolecules")) sim.internal_states["UniqueMolecules"].tableLoad(unique_table_reader, 0) time_table_reader = TableReader( os.path.join(sim._inheritedStatePath, "Time")) initialTime = TableReader(os.path.join( sim._inheritedStatePath, "Time")).readAttribute("initialTime") sim._initialTime = initialTime
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, 'seedOutDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(simDataFile, 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) for sim_dir in ap.get_cells(): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used main_reader = TableReader(os.path.join(simOutDir, 'Main')) # Load data time = main_reader.readColumn('time') plt.figure() ### Create Plot ### exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # Get all cells allDir = ap.get_cells() cellCycleLengths = [] generations = [] for idx, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") cellCycleLengths.append((time[-1] - time[0]) / 60. / 60.) generations.append(idx) plt.scatter(generations, cellCycleLengths) plt.xlabel('Generation') plt.ylabel('Time (hr)') plt.title('Cell cycle lengths') plt.xticks(generations) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) # TODO: Declutter Y-axis # Get first cell from each generation firstCellLineage = [] for gen_idx in range(ap.n_generation): firstCellLineage.append(ap.get_cells(generation = [gen_idx])[0]) massNames = [ #"dryMass", "proteinMass", "tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ #"Dry\nmass", "Protein\nmass frac.", "tRNA\nmass frac.", "rRNA\nmass frac.", "mRNA\nmass frac.", "DNA\nmass frac." ] fig, axesList = plt.subplots(len(massNames), sharex = True) for simDir in firstCellLineage: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) massData = np.zeros((len(massNames),time.size)) for idx, massType in enumerate(massNames): massData[idx,:] = mass.readColumn(massNames[idx]) massData = massData / massData.sum(axis = 0) for idx, massType in enumerate(massNames): axesList[idx].plot(time / 60, massData[idx,:]) axesList[idx].set_ylabel(cleanNames[idx]) for axes in axesList: axes.set_yticks(list(axes.get_ylim())) axesList[-1].set_xlabel('Time (min)') exportFigure(plt, plotOutDir, plotOutFileName,metadata) plt.close("all")
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells in each seed ap = AnalysisPaths(variantDir, cohort_plot=True) max_cells_in_gen = 0 for genIdx in range(ap.n_generation): n_cells = len(ap.get_cells(generation=[genIdx])) if n_cells > max_cells_in_gen: max_cells_in_gen = n_cells fig, axesList = plt.subplots(ap.n_generation, sharex=True) doubling_time = np.zeros((max_cells_in_gen, ap.n_generation)) for genIdx in range(ap.n_generation): gen_cells = ap.get_cells(generation=[genIdx]) for simDir in gen_cells: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") doubling_time[np.where(simDir == gen_cells)[0], genIdx] = (time.max() - initialTime) / 60. # Plot initial vs final masses if ap.n_generation == 1: axesList = [axesList] for idx, axes in enumerate(axesList): if max_cells_in_gen > 1: axes.hist(doubling_time[:, idx].flatten(), int(np.ceil(np.sqrt(doubling_time[:, idx].size)))) else: axes.plot(doubling_time[:, idx], 1, 'x') axes.set_ylim([0, 2]) axes.axvline(doubling_time[:, idx].mean(), color='k', linestyle='dashed', linewidth=2) axes.text( doubling_time[:, idx].mean(), 1, "Mean: %.3f Var: %.3f" % (doubling_time[:, idx].mean(), doubling_time[:, idx].var())) axesList[-1].set_xlabel("Doubling time (min))") axesList[ap.n_generation / 2].set_ylabel("Frequency") plt.subplots_adjust(hspace=0.2, wspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def getMaxTime(allCells): maxTime = 0 for simDir in allCells: simOutDir = os.path.join(simDir, "simOut") initialTime = TableReader(os.path.join(simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") - initialTime maxTime = np.max([maxTime, time.size]) return maxTime
def read_bulk_molecule_counts(sim_out_dir, mol_names): ''' Reads a subset of molecule counts from BulkMolecules using the indexing method of readColumn. Should only be called once per simulation being analyzed with all molecules of interest. Args: sim_out_dir (str): path to the directory with simulation output data mol_names (list-like or tuple of list-like): lists of strings containing names of molecules to read the counts for. A single array will be converted to a tuple for processing. Returns: generator of ndarray: int counts with all time points on the first dimension and each molecule of interest on the second dimension. The number of generated arrays will be separated based on the input dimensions of mol_names (ie if mol_names is a tuple of two arrays, two arrays will be generated). Example use cases: names1 = ['ATP[c]', 'AMP[c]'] names2 = ['WATER[c]'] # Read one set of molecules (counts1,) = read_bulk_molecule_counts(sim_out_dir, names1) # Read two or more sets of molecules (counts1, counts2) = read_bulk_molecule_counts(sim_out_dir, (names1, names2)) TODO: generalize to any TableReader, not just BulkMolecules, if readColumn method is used for those tables. ''' # Convert an array to tuple to ensure correct dimensions if not isinstance(mol_names, tuple): mol_names = (mol_names, ) # Check for string instead of array since it will cause mol_indices lookup to fail for names in mol_names: if isinstance(names, basestring): raise Exception( 'mol_names must be a tuple of arrays not strings like {}'. format(names)) bulk_reader = TableReader(os.path.join(sim_out_dir, 'BulkMolecules')) bulk_molecule_names = bulk_reader.readAttribute("objectNames") mol_indices = {mol: i for i, mol in enumerate(bulk_molecule_names)} lengths = [len(names) for names in mol_names] indices = np.hstack([[mol_indices[mol] for mol in names] for names in mol_names]) bulk_counts = bulk_reader.readColumn('counts', indices) start_slice = 0 for length in lengths: counts = bulk_counts[:, start_slice:start_slice + length].squeeze() start_slice += length yield counts
def getFinalMass((variant, ap)): try: simDir = ap.get_cells(variant=[variant])[0] simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellDry = mass.readColumn("dryMass") return cellDry[-1] except Exception as e: print e return np.nan
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) # Get all cells allDir = ap.get_cells() massNames = [ "dryMass", "proteinMass", #"tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ "Dry\nmass", "Protein\nmass", #"tRNA\nmass", "rRNA\nmass", "mRNA\nmass", "DNA\nmass" ] fig, axesList = plt.subplots(len(massNames), sharex = True) for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) for idx, massType in enumerate(massNames): massToPlot = mass.readColumn(massNames[idx]) axesList[idx].plot(time / 60. / 60., massToPlot, linewidth = 2) axesList[idx].set_ylabel(cleanNames[idx] + " (fg)") for axes in axesList: axes.get_ylim() axes.set_yticks(list(axes.get_ylim())) axesList[0].set_title("Cell mass fractions") axesList[len(massNames) - 1].set_xlabel("Time (hr)") plt.subplots_adjust(hspace = 0.2, wspace = 0.5) exportFigure(plt, plotOutDir, plotOutFileName,metadata) plt.close("all")
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells in each seed ap = AnalysisPaths(variantDir, cohort_plot = True) max_cells_in_gen = 0 for genIdx in range(ap.n_generation): n_cells = len(ap.get_cells(generation = [genIdx])) if n_cells > max_cells_in_gen: max_cells_in_gen = n_cells fig, axesList = plt.subplots(ap.n_generation, sharey = True, sharex = True, subplot_kw={'aspect': 0.4, 'adjustable': 'box'}) initial_masses = np.zeros((max_cells_in_gen, ap.n_generation)) final_masses = np.zeros((max_cells_in_gen, ap.n_generation)) for genIdx in range(ap.n_generation): gen_cells = ap.get_cells(generation = [genIdx]) for simDir in gen_cells: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses[np.where(simDir == gen_cells)[0], genIdx] = cellMass[0] / 1000. final_masses[np.where(simDir == gen_cells)[0], genIdx] = cellMass[-1] / 1000. # Plot initial vs final masses if ap.n_generation == 1: axesList = [axesList] for idx, axes in enumerate(axesList): axes.plot(initial_masses[:, idx], final_masses[:, idx], 'o') z = np.polyfit(initial_masses[:, idx], final_masses[:, idx], 1) p = np.poly1d(z) axes.plot(initial_masses[:, idx], p(initial_masses[:, idx]), '--') text_x = np.mean(axes.get_xlim()) text_y = np.mean(axes.get_ylim()) + np.mean(axes.get_ylim())*0.1 axes.text(text_x, text_y, r"$m_f$=%.3f$\times$$m_i$ + %.3f" % (z[0], z[1])) axesList[-1].set_xlabel("Initial mass (pg)") axesList[ap.n_generation / 2].set_ylabel("Final mass (pg)") plt.subplots_adjust(hspace = 0.2, wspace = 0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) rnaIds = [ "G7355_RNA[c]", "EG11783_RNA[c]", "G7742_RNA[c]", "G6253_RNA[c]", "EG10632_RNA[c]", "EG11484_RNA[c]", "G7889_RNA[c]", "EG10997_RNA[c]", "EG10780_RNA[c]", "EG11060_RNA[c]", ] names = [ "ypjD - Predicted inner membrane protein", "intA - CP4-57 prophage; integrase", "yrfG - Purine nucleotidase", "ylaC - Predicted inner membrane protein", "nagA - N-acetylglucosamine-6-phosphate deacetylase", "yigZ - Predicted elongation factor", "lptG - LptG (part of LPS transport system)", "mnmE - GTPase, involved in modification of U34 in tRNA", "pspE - Thiosulfate sulfurtransferase", "ushA - UDP-sugar hydrolase / 5'-ribonucleotidase / 5'-deoxyribonucleotidase", ] moleculeIds = bulkMolecules.readAttribute("objectNames") rnaIndexes = np.array([moleculeIds.index(x) for x in rnaIds], np.int) rnaCounts = bulkMolecules.readColumn("counts")[:, rnaIndexes] bulkMolecules.close() initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime plt.figure(figsize=(8.5, 11)) for subplotIdx in xrange(1, 10): plt.subplot(3, 3, subplotIdx) plt.plot(time / 60., rnaCounts[:, subplotIdx]) plt.xlabel("Time (min)") plt.ylabel("mRNA counts") plt.title(names[subplotIdx].split(" - ")[0]) plt.subplots_adjust(hspace=0.5, top=0.95, bottom=0.05) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) rnaIds = [ "EG10789_RNA[c]", "EG11556_RNA[c]", "EG12095_RNA[c]", "G1_RNA[c]", "G360_RNA[c]", "EG10944_RNA[c]", "EG12419_RNA[c]", "EG10372_RNA[c]", "EG10104_RNA[c]", "EG10539_RNA[c]", ] names = [ "ptsI - PTS enzyme I", "talB - Transaldolase", "secG - SecG", "thiS - ThiS protein", "flgD - Flagellar biosynthesis", "serA - (S)-2-hydroxyglutarate reductase", "gatY - GatY", "gdhA - Glutamate dehydrogenase", "atpG - ATP synthase F1 complex - gamma subunit", "livJ - Branched chain amino acid ABC transporter - periplasmic binding protein", ] moleculeIds = bulkMolecules.readAttribute("objectNames") rnaIndexes = np.array([moleculeIds.index(x) for x in rnaIds], np.int) rnaCounts = bulkMolecules.readColumn("counts")[:, rnaIndexes] bulkMolecules.close() initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime plt.figure(figsize=(8.5, 11)) for subplotIdx in xrange(1, 10): plt.subplot(3, 3, subplotIdx) plt.plot(time / 60., rnaCounts[:, subplotIdx]) plt.xlabel("Time (min)") plt.ylabel("mRNA counts") plt.title(names[subplotIdx].split(" - ")[0]) plt.subplots_adjust(hspace=0.5, top=0.95, bottom=0.05) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) rnaIds = [ "EG10367_RNA[c]", "EG11036_RNA[c]", "EG50002_RNA[c]", "EG10671_RNA[c]", "EG50003_RNA[c]", "EG10669_RNA[c]", "EG10873_RNA[c]", "EG12179_RNA[c]", "EG10321_RNA[c]", "EG10544_RNA[c]", ] names = [ "gapA - Glyceraldehyde 3-phosphate dehydrogenase", "tufA - Elongation factor Tu", "rpmA - 50S Ribosomal subunit protein L27", "ompF - Outer membrane protein F", "acpP - Apo-[acyl carrier protein]", "ompA - Outer membrane protein A", "rplL - 50S Ribosomal subunit protein L7/L12 dimer", "cspE - Transcription antiterminator and regulator of RNA stability", "fliC - Flagellin", "lpp - Murein lipoprotein", ] moleculeIds = bulkMolecules.readAttribute("objectNames") rnaIndexes = np.array([moleculeIds.index(x) for x in rnaIds], np.int) rnaCounts = bulkMolecules.readColumn("counts")[:, rnaIndexes] bulkMolecules.close() initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime plt.figure(figsize=(8.5, 11)) for subplotIdx in xrange(1, 10): plt.subplot(3, 3, subplotIdx) plt.plot(time / 60., rnaCounts[:, subplotIdx]) plt.xlabel("Time (min)") plt.ylabel("mRNA counts") plt.title(names[subplotIdx].split(" - ")[0]) plt.subplots_adjust(hspace=0.5, top=0.95, bottom=0.05) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") waterIndex = np.array(moleculeIds.index('WATER[c]'), np.int) waterCount = bulkMolecules.readColumn("counts")[:, waterIndex] initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime bulkMolecules.close() plt.figure(figsize=(8.5, 11)) plt.plot(time / 60., waterCount, linewidth=2) plt.xlabel("Time (min)") plt.ylabel("WATER[c] counts") plt.title("Counts of water") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def getMassData(simDir, massNames): simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) massFractionData = np.zeros((len(massNames), time.size)) for idx, massType in enumerate(massNames): massFractionData[idx, :] = mass.readColumn(massNames[idx]) if len(massNames) == 1: massFractionData = massFractionData.reshape(-1) return time, massFractionData
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) if DISABLED: print "Currently disabled because it requires too much memory." return ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # Get all cells allDir = ap.get_cells() for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") counts = TableReader(os.path.join( simOutDir, "BulkMolecules")).readColumn("counts") countsToMolar = TableReader( os.path.join(simOutDir, "EnzymeKinetics")).readColumn("countsToMolar") allNames = TableReader(os.path.join( simOutDir, "BulkMolecules")).readAttribute('objectNames') compoundNames = [] nonZeroCounts = counts.T[np.any(counts.T, axis=1)] for idx, counts in enumerate(nonZeroCounts): if (counts[BURN_IN_SECONDS:] > 0).sum() > 100: compartment = allNames[idx][-3:] compoundNames.append(allNames[idx][:20]) concentrations = (counts * countsToMolar) if time[0] < 1: concentrations[:BURN_IN_SECONDS] = np.mean( concentrations[BURN_IN_SECONDS:]) plt.plot(time / 60., concentrations / np.mean(concentrations)) # plt.legend(compoundNames, fontsize=5) plt.title("Protein Concentrations") plt.xlabel("Time (min)") plt.ylabel("Mean-normalized concentration") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def test_performance(sim_out_dir): ''' Performs tests on multiple index conditions to compare times of various methods. Inputs: sim_out_dir (str): directory of simulation output to read from ''' # Bulk molecule information bulk_molecules = TableReader(os.path.join(sim_out_dir, 'BulkMolecules')) bulk_ids = bulk_molecules.readAttribute('objectNames') n_mols = len(bulk_ids) # Sets of functions to test three_functions = [test_old, test_new_block, test_new_multiple] two_functions = [test_old, test_new_block] # Test reads ## Single index indices = np.array([0]) test_functions(three_functions, 'One index', bulk_molecules, indices) ## First and last index indices = np.array([0, n_mols - 1]) test_functions(three_functions, 'First and last indices', bulk_molecules, indices) ## Large block indices = np.array(range(BLOCK_SIZE)) test_functions(three_functions, 'Block indices', bulk_molecules, indices) ## 2 Large blocks indices = np.array(range(BLOCK_SIZE) + range(n_mols)[-BLOCK_SIZE:]) test_functions(three_functions, 'Two blocks of indices', bulk_molecules, indices) ## Dispersed reads - multiple reads method is slow so only test two methods indices = np.linspace(0, n_mols - 1, BLOCK_SIZE, dtype=np.int64) test_functions(two_functions, 'Dispersed indices', bulk_molecules, indices) ## Random reads - multiple reads method is slow so only test two methods indices = np.array(range(n_mols)) np.random.shuffle(indices) indices = indices[:BLOCK_SIZE] test_functions(two_functions, 'Random indices', bulk_molecules, indices) ## All indices indices = np.array(range(n_mols)) test_functions(three_functions, 'All indices', bulk_molecules, indices)
def getDivisionTime((variant, ap)): try: simDir = ap.get_cells(variant=[variant])[0] simOutDir = os.path.join(simDir, "simOut") time_column = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") return (time_column.max() - initialTime) / 60. except Exception as e: print e return np.nan
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get time time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") # Get tRNA IDs and counts sim_data = cPickle.load(open(simDataFile, "rb")) isTRna = sim_data.process.transcription.rnaData["isTRna"] rnaIds = sim_data.process.transcription.rnaData["id"][isTRna] bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") rnaIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in rnaIds], np.int) rnaCountsBulk = bulkMolecules.readColumn("counts")[:, rnaIndexes] bulkMolecules.close() # Plot fig = plt.figure(figsize=(8.5, 11)) ax = plt.subplot(1, 1, 1) ax.plot(time, rnaCountsBulk) ax.set_xlim([time[0], time[-1]]) ax.set_xlabel("Time (s)") ax.set_ylabel("Counts of tRNAs") ax.spines["right"].set_visible(False) ax.spines["top"].set_visible(False) ax.tick_params(right="off", top="off", which="both", direction="out") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ribosomeData = TableReader(os.path.join(simOutDir, "RibosomeData")) initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime fractionStalled = ribosomeData.readColumn("fractionStalled") ribosomeData.close() plt.figure(figsize=(8.5, 11)) plt.plot(time / 60, fractionStalled) plt.xlabel("Time (min)") plt.ylabel("Fraction of ribosomes stalled") plt.subplots_adjust(hspace=0.5, wspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def getSingleValue(allCells, tableName, colName, maxTime): allCellsData = np.ones((allCells.size, maxTime), np.float64) * np.nan for idx, simDir in enumerate(allCells): simOutDir = os.path.join(simDir, "simOut") value = TableReader(os.path.join(simOutDir, tableName)).readColumn(colName) allCellsData[idx, :value.size] = value return allCellsData
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) sim_data = cPickle.load(open(simDataFile, "rb")) isMRna = sim_data.process.transcription.rnaData["isMRna"] isRRna = sim_data.process.transcription.rnaData["isRRna"] isTRna = sim_data.process.transcription.rnaData["isTRna"] rnaSynthProbListener = TableReader(os.path.join(simOutDir, "RnaSynthProb")) rnaIds = rnaSynthProbListener.readAttribute('rnaIds') rnaSynthProb = rnaSynthProbListener.readColumn('rnaSynthProb') time = rnaSynthProbListener.readColumn('time') rnaSynthProbListener.close() mRnaSynthProb = rnaSynthProb[:, isMRna].sum(axis = 1) rRnaSynthProb = rnaSynthProb[:, isRRna].sum(axis = 1) tRnaSynthProb = rnaSynthProb[:, isTRna].sum(axis = 1) # Plot rows = 3 cols = 1 fig = plt.figure(figsize = (11, 8.5)) plt.figtext(0.4, 0.96, "RNA synthesis probabilities over time", fontsize = 12) nMRnas = np.sum(isMRna) nRRnas = np.sum(isRRna) nTRnas = np.sum(isTRna) subplotOrder = [mRnaSynthProb, rRnaSynthProb, tRnaSynthProb] subplotTitles = ["mRNA\n(sum of %s mRNAs)" % nMRnas, "rRNA\n(sum of %s rRNAs)" % nRRnas, "tRNA\n(sum of %s tRNAs)" % nTRnas] for index, rnaSynthProb in enumerate(subplotOrder): ax = plt.subplot(rows, cols, index + 1) ax.plot(time, rnaSynthProb) ax.set_title(subplotTitles[index], fontsize = 10) ymin = np.min(rnaSynthProb) ymax = np.max(rnaSynthProb) yaxisBuffer = np.around(1.2*(ymax - ymin), 3) ax.set_ylim([ymin, yaxisBuffer]) ax.set_yticks([ymin, ymax, yaxisBuffer]) ax.set_yticklabels([ymin, np.around(ymax, 3), yaxisBuffer], fontsize = 10) ax.set_xlim([time[0], time[-1]]) ax.tick_params(axis = "x", labelsize = 10) ax.spines["left"].set_visible(False) ax.spines["right"].set_visible(False) plt.subplots_adjust(hspace = 0.5, ) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get the names of rnas from the KB sim_data = cPickle.load(open(simDataFile, "rb")) rnaIds = sim_data.process.transcription.rnaData["id"][ sim_data.relation.rnaIndexToMonomerMapping] proteinIds = sim_data.process.translation.monomerData["id"] bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") rnaIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in rnaIds], np.int) rnaCountsBulk = bulkMolecules.readColumn("counts")[:, rnaIndexes] proteinIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in proteinIds], np.int) proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes] bulkMolecules.close() relativeMRnaCounts = rnaCountsBulk[ -1, :] #/ rnaCountsBulk[-1, :].sum() relativeProteinCounts = proteinCountsBulk[ -1, :] #/ proteinCountsBulk[-1, :].sum() plt.figure(figsize=(8.5, 11)) plt.plot(relativeMRnaCounts, relativeProteinCounts, 'o', markeredgecolor='k', markerfacecolor='none') plt.xlabel("RNA count (at final time step)") plt.ylabel("Protein count (at final time step)") # plt.show() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def mp_worker(sim_dir): sim_out_dir = os.path.join(sim_dir, 'simOut') ribosome_count_avg_cell = None try: (ribosome_30s_count, ribosome_50s_count) = read_bulk_molecule_counts( sim_out_dir, ( [ribosome_30s_id], [ribosome_50s_id])) unique_molecule_reader = TableReader(os.path.join(sim_out_dir, 'UniqueMoleculeCounts')) unique_molecule_ids = unique_molecule_reader.readAttribute('uniqueMoleculeIds') unique_molecule_counts = unique_molecule_reader.readColumn('uniqueMoleculeCounts') unique_molecule_reader.close() index_ribosome = unique_molecule_ids.index('activeRibosome') ribosome_active_count = unique_molecule_counts[:, index_ribosome] index_average_cell = int(len(ribosome_active_count) * CELL_CYCLE_FRACTION) ribosome_count_avg_cell = ribosome_active_count[index_average_cell] + min( ribosome_30s_count[index_average_cell], ribosome_50s_count[index_average_cell]) except Exception as e: print('Excluded from analysis due to broken files: {}'.format(sim_out_dir)) return ribosome_count_avg_cell
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) processNames = bulkMolecules.readAttribute("processNames") atpAllocatedInitial = bulkMolecules.readColumn("atpAllocatedInitial") atpRequested = bulkMolecules.readColumn("atpRequested") initialTime = TableReader(os.path.join(simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") - initialTime bulkMolecules.close() # Plot plt.figure(figsize = (8.5, 11)) rows = 7 cols = 2 for processIndex in np.arange(len(processNames)): ax = plt.subplot(rows, cols, processIndex + 1) ax.plot(time / 60., atpAllocatedInitial[:, processIndex]) ax.plot(time / 60., atpRequested[:, processIndex]) ax.set_title(str(processNames[processIndex]), fontsize = 8, y = 0.85) ymin = np.amin([atpAllocatedInitial[:, processIndex], atpRequested[:, processIndex]]) ymax = np.amax([atpAllocatedInitial[:, processIndex], atpRequested[:, processIndex]]) ax.set_ylim([ymin, ymax]) ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2e" % ymin, "%0.2e" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('bottom') ax.tick_params(which = 'both', direction = 'out', labelsize = 6) # ax.set_xticks([]) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all") plt.subplots_adjust(hspace = 2.0, wspace = 2.0)
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # TODO: Declutter Y-axis # Get all cells allDir = ap.get_cells().tolist() massNames = [ "dryMass", ] cleanNames = [ "Dry\nmass", ] for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime mass = TableReader(os.path.join(simOutDir, "Mass")) for idx, massType in enumerate(massNames): massToPlot = mass.readColumn(massNames[idx]) f = plt.figure(figsize=(1.25, 0.8), frameon=False) ax = f.add_axes([0, 0, 1, 1]) ax.axis("off") ax.plot(time, massToPlot, linewidth=2) ax.set_ylim([massToPlot.min(), massToPlot.max()]) ax.set_xlim([time.min(), time.max()]) exportFigure( plt, plotOutDir, "r01_{}_gen{}".format(massType, allDir.index(simDir))) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") NTP_IDS = ['ATP[c]', 'CTP[c]', 'GTP[c]', 'UTP[c]'] ntpIndexes = np.array([moleculeIds.index(ntpId) for ntpId in NTP_IDS], np.int) ntpCounts = bulkMolecules.readColumn("counts")[:, ntpIndexes] initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime bulkMolecules.close() plt.figure(figsize=(8.5, 11)) for idx in xrange(4): plt.subplot(2, 2, idx + 1) plt.plot(time / 60., ntpCounts[:, idx], linewidth=2) plt.xlabel("Time (min)") plt.ylabel("Counts") plt.title(NTP_IDS[idx]) print "NTPs required for cell division (nt/cell-cycle) = %d" % sum( ntpCounts[0, :]) plt.subplots_adjust(hspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") sim_data = cPickle.load(open(simDataFile)) aaIDs = sim_data.moleculeGroups.aaIDs aaIndexes = np.array([moleculeIds.index(aaId) for aaId in aaIDs], np.int) aaCounts = bulkMolecules.readColumn("counts")[:, aaIndexes] initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime bulkMolecules.close() plt.figure(figsize=(8.5, 11)) for idx in xrange(21): plt.subplot(6, 4, idx + 1) plt.plot(time / 60., aaCounts[:, idx], linewidth=2) plt.xlabel("Time (min)") plt.ylabel("Counts") plt.title(aaIDs[idx]) plt.subplots_adjust(hspace=0.5, wspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Exchange flux initialTime = TableReader(os.path.join(simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") - initialTime fba_results = TableReader(os.path.join(simOutDir, "FBAResults")) exFlux = fba_results.readColumn("externalExchangeFluxes") exMolec = fba_results.readAttribute("externalMoleculeIDs") moleculeIDs = ["GLC[p]", "OXYGEN-MOLECULE[p]"] # Plot fig = plt.figure(figsize = (8, 11.5)) rows = len(moleculeIDs) cols = 1 for index, molecule in enumerate(["GLC[p]", "OXYGEN-MOLECULE[p]"]): if molecule not in exMolec: continue moleculeFlux = -1. * exFlux[:, exMolec.index(molecule)] ax = plt.subplot(rows, cols, index + 1) ax.plot(time / 60. / 60., moleculeFlux) averageFlux = np.average(moleculeFlux) yRange = np.min([np.abs(np.max(moleculeFlux) - averageFlux), np.abs(np.min(moleculeFlux) - averageFlux)]) ymin = np.round(averageFlux - yRange) ymax = np.round(averageFlux + yRange) ax.set_ylim([ymin, ymax]) abs_max = np.max(moleculeFlux) abs_min = np.min(moleculeFlux) plt.figtext(0.7, 1. / float(rows) * 0.7 + (rows - 1 - index) / float(rows), "Max: %s\nMin: %s" % (abs_max, abs_min), fontsize = 8) ax.set_ylabel("External %s\n(mmol/gDCW/hr)" % molecule, fontsize = 8) ax.set_xlabel("Time (hr)", fontsize = 8) ax.set_title("%s" % molecule, fontsize = 10, y = 1.1) ax.tick_params(labelsize = 8, which = "both", direction = "out") plt.subplots_adjust(hspace = 0.5, wspace = 1) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")