def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used main_reader = TableReader(os.path.join(simOutDir, 'Main')) # Load data time = main_reader.readColumn('time') plt.figure() ### Create Plot ### exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] plt.figure(figsize=(4, 4)) ax = plt.subplot2grid((1, 1), (0, 0)) options = { "edgecolors": color_cycle[0], "alpha": 0.2, "s": 50, "clip_on": False } labels = ["minimal", "anaerobic", "minimal + AA"] ax.scatter(initial_volumes[2], added_volumes[2], marker="x", label=labels[2], **options) ax.scatter(initial_volumes[0], added_volumes[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(initial_volumes[1], added_volumes[1], facecolors="none", marker="^", label=labels[1], **options) ax.set_xlim([0, 4]) ax.set_ylim([0, 4]) ax.set_xlabel("Birth Volume ($\mu m^3$)") ax.set_ylabel("Added Volume ($\mu m^3$)") ax.legend() ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() n_variants = len(variants) # Load sim_data with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity.asNumber(MASS_UNITS / VOLUME_UNITS) # Load validation_data with open(validationDataFile, "rb") as f: validation_data = cPickle.load(f) toyaReactions = validation_data.reactionFlux.toya2010fluxes[ "reactionID"] toyaFluxes = validation_data.reactionFlux.toya2010fluxes[ "reactionFlux"] toyaStdev = validation_data.reactionFlux.toya2010fluxes[ "reactionFluxStdev"] toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes)) toyaStdevDict = dict(zip(toyaReactions, toyaStdev)) glc_uptakes = np.zeros(n_variants) log_ratio_succ = np.zeros(n_variants) size_pearson = np.zeros(n_variants) selected_indicies = np.zeros(n_variants, bool) for v, variant in enumerate(variants): # initialize kinetic flux comparison exchange_fluxes = {entry: [] for entry in EXCHANGES} reaction_fluxes = {entry: [] for entry in REACTIONS} modelFluxes = {} toyaOrder = [] for rxn in toyaReactions: modelFluxes[rxn] = [] toyaOrder.append(rxn) for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") try: # Listeners used massListener = TableReader(os.path.join(simOutDir, "Mass")) fbaResults = TableReader( os.path.join(simOutDir, "FBAResults")) enzymeKineticsReader = TableReader( os.path.join(simOutDir, "EnzymeKinetics")) ## Read from mass listener cellMass = massListener.readColumn("cellMass") # skip if no data if cellMass.shape is (): continue dryMass = massListener.readColumn("dryMass") except Exception as e: print(e) continue coefficient = (dryMass / cellMass * cell_density).reshape( -1, 1) ## Read from FBA listener reactionIDs = { r: i for i, r in enumerate( fbaResults.readAttribute("reactionIDs")) } exMolec = { m: i for i, m in enumerate( fbaResults.readAttribute("externalMoleculeIDs")) } reactionFluxes = FLUX_CONVERSION * ( fbaResults.readColumn("reactionFluxes") / coefficient)[1:, :] exFlux = fbaResults.readColumn("externalExchangeFluxes")[1:, :] ## Read from EnzymeKinetics listener constrainedReactions = { r: i for i, r in enumerate( enzymeKineticsReader.readAttribute( "constrainedReactions")) } ## Append values for relevant reactions. # append to exchanges for entry in EXCHANGES: exchange_fluxes[entry].extend( list(exFlux[:, exMolec[entry]])) # append to reaction fluxes for entry in REACTIONS: reaction_fluxes[entry].extend( list(reactionFluxes[:, reactionIDs[entry]])) ## get all Toya reactions, and corresponding simulated fluxes. toya_idx = {r: [] for r in toyaReactions} for rxn, i in reactionIDs.items(): rxn = rxn.split(' (reverse)') if len(rxn) > 1: i = -i rxn = rxn[0].split('__')[0] if rxn in toya_idx: toya_idx[rxn] += [i] for toyaReaction, reaction_idx in toya_idx.items(): flux_time_course = np.sum([ np.sign(i) * reactionFluxes[:, np.abs(i)] for i in reaction_idx ], axis=0) modelFluxes[toyaReaction].append(flux_time_course.mean()) ## Flux comparison with Toya toyaVsReactionAve = [] rxn_order = [] for rxn, toyaFlux in toyaFluxesDict.iteritems(): rxn_order.append(rxn) if rxn in modelFluxes: toyaVsReactionAve.append( (np.mean(modelFluxes[rxn]), toyaFlux.asNumber(OUTPUT_FLUX_UNITS), np.std(modelFluxes[rxn]), toyaStdevDict[rxn].asNumber(OUTPUT_FLUX_UNITS))) toyaVsReactionAve = np.array(toyaVsReactionAve) rWithAll = pearsonr(toyaVsReactionAve[:, 0], toyaVsReactionAve[:, 1]) succ_toya_flux = toyaVsReactionAve[rxn_order.index(SUCC_ID), 1] # Save data for plotting glc_uptakes[v] = -np.mean(exchange_fluxes[GLC_ID]) log_ratio_succ[v] = np.log2( np.mean(reaction_fluxes[SUCC_ID]) / succ_toya_flux) size_pearson[v] = (rWithAll[0] * 8)**2 selected_indicies[v] = np.all([ c not in constrainedReactions for c in HIGHLIGHTED_CONSTRAINTS ]) # Plot scatterplot fig = plt.figure(figsize=(5, 5)) gs = gridspec.GridSpec(40, 40) ## Plot full data plt.scatter(glc_uptakes[~selected_indicies], log_ratio_succ[~selected_indicies], color='blue', alpha=0.6, s=size_pearson[~selected_indicies]) plt.scatter(glc_uptakes[selected_indicies], log_ratio_succ[selected_indicies], color='red', alpha=0.6, s=size_pearson[selected_indicies]) x_min, x_max = plt.xlim() y_max = max(np.abs(plt.ylim())) plt.axvspan(0, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) plt.axhline(y=0, color='k', linestyle='--') ## Format axes plt.ylabel('log2(model flux / Toya flux)') plt.xlabel('glucose uptake (mmol / g DCW / hr)') plt.xlim([np.floor(min(x_min, 10)), np.ceil(x_max)]) plt.ylim([-y_max, y_max]) ## Plot highlighted region data fig.add_subplot(gs[1:28, -20:-1]) in_region = (glc_uptakes < GLC_MAX) & (np.abs(log_ratio_succ) < SUCC_DISTANCE) selected_in = in_region & selected_indicies not_selected_in = in_region & ~selected_indicies constraint_labels = np.array( [[c[:2] for c in constraints] if constraints is not None else [] for _, constraints in map(get_disabled_constraints, variants)]) plt.scatter(glc_uptakes[not_selected_in], log_ratio_succ[not_selected_in], color='blue', alpha=0.6, s=size_pearson[not_selected_in]) plt.scatter(glc_uptakes[selected_in], log_ratio_succ[selected_in], color='red', alpha=0.6, s=size_pearson[selected_in]) for x, y, label in zip(glc_uptakes[in_region], log_ratio_succ[in_region], constraint_labels[in_region]): plt.text(x, y, ', '.join(label), ha='center', va='top', fontsize=6) x_min, _ = plt.xlim() x_min = np.floor(min(x_min, 10)) plt.axvspan(x_min, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) ## Format axes plt.xlim([x_min, GLC_MAX]) plt.ylim([-SUCC_DISTANCE, SUCC_DISTANCE]) ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print('This analysis only runs for the "condition" variant.') return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) n_gens = ap.n_generation variants = ap.get_variants() if n_gens - 1 < FIRST_GENERATION: print('Not enough generations to plot.') return all_growth_rates = [] all_rna_to_protein_ratios = [] for variant in variants: doubling_times = np.zeros(0) variant_rna_to_protein_ratios = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=range(FIRST_GENERATION, n_gens)) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) rna_mass = mass.readColumn("rnaMass") protein_mass = mass.readColumn("proteinMass") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_times = np.hstack( (doubling_times, (time[-1] - time[0]) / 3600.)) variant_rna_to_protein_ratios = np.hstack( (variant_rna_to_protein_ratios, rna_mass.mean() / protein_mass.mean())) except: continue variant_growth_rates = np.log(2) / doubling_times all_growth_rates.append(variant_growth_rates) all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios) # Get errorbar plot plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] marker_styles = ['o', '^', 'x'] labels = ['basal', 'anaerobic', '+AA'] ax = plt.subplot2grid((1, 1), (0, 0)) for i in range(3): ax.errorbar(all_growth_rates[i].mean(), all_rna_to_protein_ratios[i].mean(), yerr=all_rna_to_protein_ratios[i].std(), color=color_cycle[0], mec=color_cycle[0], marker=marker_styles[i], markersize=8, mfc='white', linewidth=1, capsize=2, label=labels[i]) # Add linear plot proposed in Scott et al. (2010) x_linear = np.linspace(0.05, 1.95, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2]) ax.set_ylim([0, 0.7]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of errorbar plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all") # Get scatter version of plot plt.figure(figsize=FIGSIZE) ax = plt.subplot2grid((1, 1), (0, 0)) options = {"edgecolors": color_cycle[0], "alpha": 0.25, "s": 20} ax.scatter(all_growth_rates[0], all_rna_to_protein_ratios[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(all_growth_rates[1], all_rna_to_protein_ratios[1], facecolors="none", marker="^", label=labels[1], **options) ax.scatter(all_growth_rates[2], all_rna_to_protein_ratios[2], marker="x", label=labels[2], **options) x_linear = np.linspace(0.05, 2.45, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2.5]) ax.set_ylim([0, 0.8]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName + "_scatter", metadata)
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) all_variants = ap.get_variants() variants = -np.ones(N_VARIANTS) for v, variant in enumerate(all_variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: variants[0] = variant elif len(additional_disabled) == 0: variants[1] = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): variants[2] = variant if np.any(variants < 0): print('Not enough variants to analyze') return with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) all_yields = [] for variant in variants: yields = [] for sim_dir in ap.get_cells(variant=[variant]): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used fba_reader = TableReader( os.path.join(sim_out_dir, 'FBAResults')) main_reader = TableReader(os.path.join(sim_out_dir, 'Main')) mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass')) # Load data time_step_sec = main_reader.readColumn('timeStepSec') external_fluxes = fba_reader.readColumn( 'externalExchangeFluxes') external_molecules = fba_reader.readAttribute( 'externalMoleculeIDs') dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass') growth = GROWTH_UNITS * mass_reader.readColumn( 'growth') / time_step_sec # Calculate growth yield on glucose glc_idx = external_molecules.index(GLUCOSE_ID) glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx] glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0] glc_mass_flux = glc_flux * glc_mw * dry_mass glc_mass_yield = growth / -glc_mass_flux yields += list(glc_mass_yield[1:].asNumber()) all_yields += [yields] for i, v1 in enumerate(variants): for j, v2 in enumerate(variants[i + 1:]): t, p = stats.ttest_ind(all_yields[i], all_yields[i + j + 1], equal_var=False) print('p={:.2e} for variant {} vs variant {}'.format( p, v1, v2)) plt.figure(figsize=(4, 4)) xticks = range(N_VARIANTS) # Plot data plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False) plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037') # Format axes ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.xticks(xticks, VARIANT_LABELS) plt.ylabel('Glucose Yield\n(g cell / g glucose)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') plt.figure(figsize=(5, 5)) plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal") plt.scatter(initial_volumes[1], added_volumes[1], s=3, label="anaerobic") plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA") plt.xlim([0, 4]) plt.ylim([0, 4]) plt.xlabel("Birth Volume ($\mu m^3$)") plt.ylabel("Added Volume ($\mu m^3$)") plt.legend() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) fig = plt.figure() fig.set_figwidth(5) fig.set_figheight(5) bremer_tau = [40, 100, 24] bremer_origins_per_cell_at_initiation = [2, 1, 4] bremer_rrn_init_rate = [20 * 23, 4 * 12.4, 58 * 35.9] bremer_rna_mass_per_cell = [77, 20, 211] bremer_elng_rate = [18, 12, 21] sim_doubling_time = np.zeros(ap.n_variant) sim_doubling_time_std = np.zeros(ap.n_variant) sim_origins_per_cell_at_initiation = np.zeros(ap.n_variant) sim_rna_mass_per_cell = np.zeros(ap.n_variant) sim_elng_rate = np.zeros(ap.n_variant) sim_rrn_init_rate = np.zeros(ap.n_variant) sim_origins_per_cell_at_initiation_std = np.zeros(ap.n_variant) sim_elng_rate_std = np.zeros(ap.n_variant) sim_rna_mass_per_cell_std = np.zeros(ap.n_variant) sim_rrn_init_rate_std = np.zeros(ap.n_variant) variants = ap.get_variants() for varIdx in range(ap.n_variant): variant = variants[varIdx] print("variant {}".format(variant)) all_cells = ap.get_cells(variant=[variant]) print("Total cells: {}".format(len(all_cells))) try: sim_data = cPickle.load(open(ap.get_variant_kb(variant))) except Exception as e: print "Couldn't load sim_data object. Exiting.", e return num_origin_at_init = np.zeros(len(all_cells)) doubling_time = np.zeros(len(all_cells)) meanRnaMass = np.zeros(len(all_cells)) meanElngRate = np.zeros(len(all_cells)) meanRrnInitRate = np.zeros(len(all_cells)) for idx, simDir in enumerate(all_cells): print "cell {} of {}".format(idx, len(all_cells)) simOutDir = os.path.join(simDir, "simOut") try: time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_time[idx] = time[-1] - time[0] except Exception as e: print 'Error with data for %s: %s' % (simDir, e) continue timeStepSec = TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec") meanRnaMass[idx] = TableReader(os.path.join( simOutDir, "Mass")).readColumn("rnaMass").mean() meanElngRate[idx] = TableReader( os.path.join(simOutDir, "RibosomeData")).readColumn( "effectiveElongationRate").mean() numOrigin = TableReader( os.path.join(simOutDir, "ReplicationData")).readColumn("numberOfOric") massPerOric = TableReader( os.path.join( simOutDir, "ReplicationData")).readColumn("criticalMassPerOriC") idxInit = np.where(massPerOric >= 1)[0] numOriginAtInit = numOrigin[idxInit - 1] if numOriginAtInit.size: num_origin_at_init[idx] = numOriginAtInit.mean() else: num_origin_at_init[idx] = np.nan transcriptDataFile = TableReader( os.path.join(simOutDir, "TranscriptElongationListener")) rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized") isRRna = sim_data.process.transcription.rnaData["isRRna"] meanRrnInitRate[idx] = (rnaSynth[:, isRRna].sum(axis=1) / timeStepSec).mean() * 60. / 3 sim_rna_mass_per_cell[varIdx] = meanRnaMass.mean() sim_elng_rate[varIdx] = meanElngRate.mean() sim_origins_per_cell_at_initiation[varIdx] = np.nanmean( num_origin_at_init) sim_doubling_time[varIdx] = np.nanmean(doubling_time) / 60. sim_rrn_init_rate[varIdx] = np.nanmean(meanRrnInitRate) sim_rna_mass_per_cell_std[varIdx] = meanRnaMass.std() sim_elng_rate_std[varIdx] = meanElngRate.std() sim_origins_per_cell_at_initiation_std[varIdx] = np.nanstd( num_origin_at_init) sim_doubling_time_std[varIdx] = np.nanstd(doubling_time) / 60. sim_rrn_init_rate_std[varIdx] = np.nanstd(meanRrnInitRate) bremer_tau = np.array(bremer_tau) ax0 = plt.subplot2grid((2, 2), (0, 0)) ax1 = plt.subplot2grid((2, 2), (1, 0), sharex=ax0) ax2 = plt.subplot2grid((2, 2), (0, 1), sharex=ax0) ax3 = plt.subplot2grid((2, 2), (1, 1), sharex=ax0) lines = {'linestyle': 'dashed'} plt.rc('lines', **lines) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] ax0.errorbar( sim_doubling_time[np.argsort(sim_doubling_time)[::-1]], sim_rna_mass_per_cell[np.argsort(sim_doubling_time)[::-1]], yerr=sim_rna_mass_per_cell_std[np.argsort(sim_doubling_time) [::-1]], color=color_cycle[0], **SIM_PLOT_STYLE) ax0.errorbar( bremer_tau[np.argsort(bremer_tau)[::-1]], np.array(bremer_rna_mass_per_cell)[np.argsort(bremer_tau)[::-1]], color=color_cycle[2], **EXP_PLOT_STYLE) ax0.set_title("RNA mass per cell (fg)", fontsize=FONT_SIZE) ax0.set_xlim([0, 135]) ax0.set_ylim([0, 250]) ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False) ax1.errorbar( sim_doubling_time[np.argsort(sim_doubling_time)[::-1]], sim_elng_rate[np.argsort(sim_doubling_time)[::-1]], yerr=sim_elng_rate_std[np.argsort(sim_doubling_time)[::-1]], color=color_cycle[0], **SIM_PLOT_STYLE) ax1.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]], np.array(bremer_elng_rate)[np.argsort(bremer_tau)[::-1]], color=color_cycle[2], **EXP_PLOT_STYLE) ax1.set_title("Ribosome elongation\nrate (aa/s/ribosome)", fontsize=FONT_SIZE) ax1.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE) ax1.set_ylim([0, 24]) ax2.errorbar(sim_doubling_time[np.argsort(sim_doubling_time)[::-1]], sim_origins_per_cell_at_initiation[np.argsort( sim_doubling_time)[::-1]], yerr=sim_origins_per_cell_at_initiation_std[np.argsort( sim_doubling_time)[::-1]], color=color_cycle[0], **SIM_PLOT_STYLE) ax2.errorbar(bremer_tau[np.argsort(bremer_tau)[::-1]], np.array(bremer_origins_per_cell_at_initiation)[ np.argsort(bremer_tau)[::-1]], color=color_cycle[2], **EXP_PLOT_STYLE) ax2.set_title("Average origins at chrom. init.", fontsize=FONT_SIZE) ax2.set_ylim([0.5, 4.5]) ax3.errorbar( sim_doubling_time[np.argsort(sim_doubling_time)[::-1]], sim_rrn_init_rate[np.argsort(sim_doubling_time)[::-1]], yerr=sim_rrn_init_rate_std[np.argsort(sim_doubling_time)[::-1]], color=color_cycle[0], **SIM_PLOT_STYLE) ax3.errorbar( bremer_tau[np.argsort(bremer_tau)[::-1]], np.array(bremer_rrn_init_rate)[np.argsort(bremer_tau)[::-1]], color=color_cycle[2], **EXP_PLOT_STYLE) ax3.set_title("Rate of rrn initiation (1/min)", fontsize=FONT_SIZE) ax3.set_ylim([0, 2500]) # ax3.legend(loc=1, frameon=True, fontsize=7) ax3.set_xlabel("Doubling time (min)", fontsize=FONT_SIZE) axes_list = [ax0, ax1, ax2, ax3] for a in axes_list: for tick in a.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in a.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) whitePadSparklineAxis(ax0, False) whitePadSparklineAxis(ax1) whitePadSparklineAxis(ax2, False) whitePadSparklineAxis(ax3) plt.subplots_adjust(bottom=0.2, wspace=0.3) exportFigure(plt, plotOutDir, plotOutFileName, metadata)
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot = True) variants = ap.get_variants() index_doubling_time = 0 sim_doubling_time = [] index_rna_mass = 1 sim_rna_mass_per_cell = [] sim_rna_mass_per_cell_std = [] index_elng_rate = 2 sim_elng_rate = [] sim_elng_rate_std = [] index_n_origin_init = 3 sim_origins_per_cell_at_initiation = [] sim_origins_per_cell_at_initiation_std = [] index_rrn_init_rate = 4 sim_rrn_init_rate = [] sim_rrn_init_rate_std = [] for varIdx in range(ap.n_variant): variant = variants[varIdx] print("variant {}".format(variant)) sim_dirs = ap.get_cells(variant=[variant]) n_sims = len(sim_dirs) print("Total cells: {}".format(n_sims)) try: sim_data = cPickle.load(open(ap.get_variant_kb(variant))) global is_rRNA is_rRNA = sim_data.process.transcription.rnaData["isRRna"] except Exception as e: print "Couldn't load sim_data object. Exiting.", e return p = Pool(parallelization.cpus()) output = np.array(p.map(mp_worker, sim_dirs)) p.close() p.join() # Filter output from broken files using np.nanmean and np.nanstd sim_doubling_time.append(np.nanmean(output[:, index_doubling_time]) / 60.) sim_rna_mass_per_cell.append(np.nanmean(output[:, index_rna_mass])) sim_rna_mass_per_cell_std.append(np.nanstd(output[:, index_rna_mass])) sim_elng_rate.append(np.nanmean(output[:, index_elng_rate])) sim_elng_rate_std.append(np.nanstd(output[:, index_elng_rate])) sim_origins_per_cell_at_initiation.append(np.nanmean(output[:, index_n_origin_init])) sim_origins_per_cell_at_initiation_std.append(np.nanstd(output[:, index_n_origin_init])) sim_rrn_init_rate.append(np.nanmean(output[:, index_rrn_init_rate])) sim_rrn_init_rate_std.append(np.nanstd(output[:, index_rrn_init_rate])) sim_doubling_time = np.array(sim_doubling_time) # Plot fig, axes_list = plt.subplots(1, 4, figsize=(15, 5)) ax0, ax1, ax2, ax3 = axes_list sort_sim = np.argsort(sim_doubling_time)[::-1] sort_bremer = np.argsort(bremer_tau)[::-1] # RNA mass per cell ax0.errorbar( sim_doubling_time[sort_sim], np.array(sim_rna_mass_per_cell)[sort_sim], yerr=np.array(sim_rna_mass_per_cell_std)[sort_sim], color='tab:blue', **SIM_PLOT_STYLE) ax0.errorbar( bremer_tau[sort_bremer], bremer_rna_mass_per_cell[sort_bremer], color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE) ax0.set_title('RNA mass per cell (fg)', fontsize=FONT_SIZE) ax0.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE) ax0.set_xlim([0, 135]) ax0.set_ylim([0, 250]) ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False) # Ribosome elongation rate ax1.errorbar( sim_doubling_time[sort_sim], np.array(sim_elng_rate)[sort_sim], yerr=np.array(sim_elng_rate_std)[sort_sim], color='tab:blue', **SIM_PLOT_STYLE) ax1.errorbar( bremer_tau[sort_bremer], bremer_elng_rate[sort_bremer], color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE) ax1.set_title('Ribosome elongation\nrate (aa/s/ribosome)', fontsize=FONT_SIZE) ax1.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE) ax1.set_ylim([5, 24]) # Number of origins at chromosome initiation ax2.errorbar( sim_doubling_time[sort_sim], np.array(sim_origins_per_cell_at_initiation)[sort_sim], yerr=np.array(sim_origins_per_cell_at_initiation_std)[sort_sim], color='tab:blue', **SIM_PLOT_STYLE) ax2.errorbar( bremer_tau[sort_bremer], bremer_origins_per_cell_at_initiation[sort_bremer], color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE) ax2.set_title('Average origins at chrom. init.', fontsize=FONT_SIZE) ax2.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE) ax2.set_ylim([0.5, 4.5]) # rRNA initiation rate ax3.errorbar( sim_doubling_time[sort_sim], np.array(sim_rrn_init_rate)[sort_sim], yerr=np.array(sim_rrn_init_rate_std)[sort_sim], color='tab:blue', **SIM_PLOT_STYLE) ax3.errorbar( bremer_tau[sort_bremer], bremer_rrn_init_rate[sort_bremer], color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE) ax3.set_title('Rate of rrn initiation (1/min)', fontsize=FONT_SIZE) ax3.set_ylim([0, 2500]) ax3.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE) for ax in axes_list: ax.set_xlim(X_LIM) ax.set_xticks(X_LIM) ax.set_ylim(ax.get_ylim()) ax.set_yticks(ax.get_ylim()) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) plt.subplots_adjust(bottom=0.25, top=0.75, left=0.05, right=0.95, wspace=0.4) exportFigure(plt, plotOutDir, '{}__test'.format(plotOutFileName), metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print('This analysis only runs for the "condition" variant.') return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) n_gens = ap.n_generation variants = ap.get_variants() if n_gens - 1 < FIRST_GENERATION: print('Not enough generations to plot.') return all_growth_rates = [] all_rna_to_protein_ratios = [] for variant in variants: doubling_times = np.zeros(0) variant_rna_to_protein_ratios = np.zeros(0) all_cells = ap.get_cells( variant=[variant], generation=range(FIRST_GENERATION, n_gens)) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) rna_mass = mass.readColumn("rnaMass") protein_mass = mass.readColumn("proteinMass") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_times = np.hstack( (doubling_times, (time[-1] - time[0])/3600.) ) variant_rna_to_protein_ratios = np.hstack( (variant_rna_to_protein_ratios, rna_mass.mean()/protein_mass.mean()) ) except: continue variant_growth_rates = np.log(2)/doubling_times all_growth_rates.append(variant_growth_rates) all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios) plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] for i in range(3): plt.errorbar( all_growth_rates[i].mean(), all_rna_to_protein_ratios[i].mean(), yerr=all_rna_to_protein_ratios[i].std(), color=color_cycle[0], marker='o', markersize=5, linewidth=1, capsize=2) # Add linear plot proposed in Scott et al. (2010) x_linear = np.linspace(0, 3, 100) y_linear = x_linear/4.5 + 0.087 plt.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) plt.xlim([0, 3]) plt.ylim([0, 1.6]) plt.xlabel("Growth rate $\lambda$ (hour$^{-1}$)") plt.ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) if ap.n_generation == 1: print "Need more data to create addedMass" return allScatter = plt.figure() allScatter.set_figwidth(11) allScatter.set_figheight(6) xHist = plt.figure() xHist.set_figwidth(11) xHist.set_figheight(6) yHist = plt.figure() yHist.set_figwidth(11) yHist.set_figheight(6) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] title_list = [ "Glucose minimal\n" + r"$\tau = $" + "44 min", "Glucose minimal anaerobic\n" + r"$\tau = $" + "100 min", "Glucose minimal + 20 amino acids\n" + r"$\tau = $" + "22 min" ] plot = False for varIdx in ap.get_variants(): if varIdx == 0: plotIdx = 1 gen = [2, 3] elif varIdx == 1: plotIdx = 0 gen = [2, 3] elif varIdx == 2: plotIdx = 2 gen = [2, 3] else: continue initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(generation=gen, variant=[varIdx]) if len(all_cells) == 0: continue plot = True fail = 0 for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("dryMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except Exception as e: print e fail += 1 added_masses = final_masses - initial_masses all_scaled_initial_masses = initial_masses / initial_masses.mean() all_scaled_added_masses = added_masses / added_masses.mean() idxs_to_keep = np.where((0.6 < all_scaled_initial_masses) & (all_scaled_initial_masses < 1.25) & (0.45 < all_scaled_added_masses) & (all_scaled_added_masses < 1.5)) scaled_initial_masses = all_scaled_initial_masses[idxs_to_keep] scaled_added_masses = all_scaled_added_masses[idxs_to_keep] nbins = 5 n, xbin = np.histogram(scaled_initial_masses, bins=nbins) sy, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses) sy2, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses * scaled_added_masses) mean = sy / n std = np.sqrt(sy2 / (n - 1) - n * mean * mean / (n - 1)) slope, intercept, r_value, p_value, std_err = linregress( scaled_initial_masses, scaled_added_masses) # plot all scatter plots plt.figure(allScatter.number) ax = plt.subplot2grid((1, 3), (0, plotIdx)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color="black", alpha=0.2, zorder=1, markeredgewidth=0.0) ax.errorbar(((xbin[1:] + xbin[:-1]) / 2), mean, yerr=std, color="black", linewidth=1, zorder=2) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color="blue") ax.set_title( title_list[varIdx] + ", n=%d, n*=%d" % ((len(all_cells) - fail), len(scaled_initial_masses)) + "\n" + r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" % (slope, intercept) + "\n" + "p-value=%0.2g" % p_value, fontsize=FONT_SIZE) ax.set_xlim([0.6, 1.25]) ax.set_ylim([0.45, 1.5]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) if varIdx == 1: ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE) ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE) plt.subplots_adjust(bottom=0.2) whitePadSparklineAxis(ax) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) # plot stripped figure fig = plt.figure() fig.set_figwidth(1.73) fig.set_figheight(1.18) ax = plt.subplot2grid((1, 1), (0, 0)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color=color_cycle[0], alpha=0.2, zorder=1, markeredgewidth=0.0) ax.set_title(title_list[varIdx] + ", n=%d, n*=%d" % (len(all_cells) - fail, len(scaled_initial_masses)), fontsize=FONT_SIZE) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color='k') ax.set_ylim([0.45, 1.5]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) plt.subplots_adjust(bottom=0.2) whitePadSparklineAxis(ax) ax.tick_params(axis='x', which='both', bottom='off', top='off', labelbottom='off') ax.tick_params(axis='y', which='both', left='off', right='off', labelleft='off') ax.set_xlabel("") ax.set_ylabel("") plt.subplots_adjust(top=0.95, bottom=3 * trim, left=2 * trim, right=0.95, hspace=0, wspace=0) exportFigure(plt, plotOutDir, plotOutFileName + str(varIdx) + "_stripped", metadata, transparent=True) # plot histogram for x-axis plt.figure(xHist.number) bins = 25 ax = plt.subplot2grid((1, 3), (0, plotIdx)) ax.hist(all_scaled_initial_masses, bins, color=color_cycle[0]) ax.axvline(x=0.6, color="k", linestyle="--") ax.axvline(x=1.25, color="k", linestyle="--") ax.set_title(title_list[varIdx] + "\n" + "[0.6, 1.25]", fontsize=FONT_SIZE) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE) plt.subplots_adjust(bottom=0.2) whitePadSparklineAxis(ax) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) # plot histogram for y-axis plt.figure(yHist.number) ax = plt.subplot2grid((1, 3), (0, plotIdx)) ax.hist(all_scaled_added_masses, bins, color=color_cycle[0]) ax.axvline(x=0.45, color="k", linestyle="--") ax.axvline(x=1.5, color="k", linestyle="--") ax.set_title(title_list[varIdx] + "\n" + "[0.45, 1.5]", fontsize=FONT_SIZE) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ax.set_xlabel("Normed added mass", fontsize=FONT_SIZE) plt.subplots_adjust(bottom=0.2) whitePadSparklineAxis(ax) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) if plot: plt.figure(allScatter.number) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.figure(xHist.number) exportFigure(plt, plotOutDir, plotOutFileName + "_histogram_scaled_initial_mass", metadata, transparent=True) plt.figure(yHist.number) exportFigure(plt, plotOutDir, plotOutFileName + "_histogram_scaled_added_mass", metadata, transparent=True) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() # scan all variants to find variant indexes for comparison old_variant = None new_variant = None for v, variant in enumerate(variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: old_variant = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): new_variant = variant # if the baseline variant or the new variant are missing, stop plotting if (old_variant is None) or (new_variant is None): print('Variant simulations missing!') return compared_variants = [old_variant, new_variant] # Load sim_data with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) # get reactions from sim_data reactionCatalysts = sim_data.process.metabolism.reactionCatalysts reaction_to_enzyme = {r: reactionCatalysts[r][0] for r in REACTIONS} enzyme_names = reaction_to_enzyme.values() reactions_with_km = sorted(SIMULATION_KMS) km_metabolites = [ SIMULATION_KMS[r]['metabolite'] for r in reactions_with_km ] kms = np.array([SIMULATION_KMS[r]['KM'] for r in reactions_with_km]) km_constraint_indices = [ SIMULATION_KMS[r]['constraint_index'] for r in reactions_with_km ] # initialize dictionaries for fluxes and concentrations all_reaction_fluxes = {} all_enzyme_concentrations = {} all_km_adjustments = {} for variant in compared_variants: reaction_fluxes = {r: [] for r in REACTIONS} enzyme_concentrations = {e: [] for e in enzyme_names} km_adjustments = {r: [] for r in reactions_with_km} for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used try: kinetics_reader = TableReader( os.path.join(simOutDir, 'EnzymeKinetics')) fbaResults = TableReader( os.path.join(simOutDir, "FBAResults")) except Exception as e: print(e) continue # read from kinetics listener counts_to_molar = ((COUNTS_UNITS / VOLUME_UNITS) * kinetics_reader.readColumn('countsToMolar') [START_TIME_STEP:].reshape(-1, 1)) all_constraints_used = kinetics_reader.readColumn( 'reactionConstraint')[START_TIME_STEP:] # Store fluxes reactionIDs = np.array(fbaResults.readAttribute("reactionIDs")) reactionFluxes = fbaResults.readColumn("reactionFluxes")[ START_TIME_STEP:, :] reaction_flux_dict = dict(zip(reactionIDs, reactionFluxes.T)) for reaction_id in REACTIONS: reaction_fluxes[reaction_id].extend( list(reaction_flux_dict[reaction_id])) # Store enzyme concentrations enzyme_counts, met_counts = read_bulk_molecule_counts( simOutDir, (enzyme_names, km_metabolites)) enzyme_conc = counts_to_molar.asNumber( COUNTS_UNITS / VOLUME_UNITS) * enzyme_counts[START_TIME_STEP:, :] met_conc = counts_to_molar.asNumber( units.umol / units.L) * met_counts[START_TIME_STEP:, :] for enzyme_id, conc_time_series in zip(enzyme_names, enzyme_conc.T): enzyme_concentrations[enzyme_id].extend( list(conc_time_series)) # Calculate enzyme saturation for reactions with KM values adjust_km = np.zeros( (len(counts_to_molar), len(km_constraint_indices)), bool) for i, idx in enumerate(km_constraint_indices): constraint_used, _ = np.where(all_constraints_used == idx) adjust_km[constraint_used, i] = True enzyme_saturation = met_conc / (met_conc + kms) enzyme_saturation[~adjust_km] = 1 for rxn, saturation in zip(reactions_with_km, enzyme_saturation.T): km_adjustments[rxn].extend(list(saturation)) all_reaction_fluxes[variant] = reaction_fluxes all_enzyme_concentrations[variant] = enzyme_concentrations all_km_adjustments[variant] = km_adjustments ### Make figure ### cols = 1 rows = len(REACTIONS) plt.figure(figsize=(cols * 3, rows * 5)) # go through each reaction to show predicted k_cat distribution for the # new and old variant, and experimental measurements for reaction_idx, reaction_id in enumerate(REACTIONS): enzyme_id = reaction_to_enzyme[reaction_id] # old measurements reaction_measurements = OLD_MEASUREMENTS[reaction_id] measurements = reaction_measurements['measurements'] temps = reaction_measurements['temps'] adjusted_measurements = np.array([ 2**((37. - t) / 10.) * m for (m, t) in zip(measurements, temps) ]) # new measurements reaction_measurements = NEW_MEASUREMENTS.get(reaction_id, {}) measurements = reaction_measurements.get('measurements', []) temps = reaction_measurements.get('temps', []) new_adjusted_measurements = np.array([ 2**((37. - t) / 10.) * m for (m, t) in zip(measurements, temps) ]) # get effective kcat for GLUTATHIONE-REDUCT if reaction_id == 'GLUTATHIONE-REDUCT-NADPH-RXN': # saturated_fraction calculated from Smirnova, et al. (2005). "Effects of cystine and # hydrogen peroxideon glutathione status and expression of antioxidant genes in Escherichia coli" # Oxidized glutathione (GSSG in table 2) gives ~19 uM concentration (with 0.3 dry fraction and 1.1 g/mL density) # With 61 uM Km for this reaction, that gives a saturated fraction of 0.238 saturated_fraction = 0.238 new_adjusted_measurements = adjusted_measurements * saturated_fraction # Initialize subplots ax = plt.subplot(rows, cols, reaction_idx + 1) # calculate the reaction's k_cat distribution for each compared variant k_cat_distribution = {} for variant in compared_variants: ## Get data rxn_fluxes = np.array( all_reaction_fluxes[variant][reaction_id]) # mmol / L / s enzyme_concs = np.array( all_enzyme_concentrations[variant][enzyme_id]) # mmol / L saturation = np.array(all_km_adjustments[variant].get( reaction_id, [1] * len(rxn_fluxes))) # calculate k_cats (adjusted for saturation in the sim), remove zeros, save to this variant's distribution k_cats = rxn_fluxes / enzyme_concs / saturation k_cats = k_cats[k_cats > 1e-10] k_cat_distribution[variant] = k_cats data = [ k_cat_distribution[old_variant], k_cat_distribution[new_variant] ] # plot violin_pos = [1, 3] # position of violin plots [old, new] measure_pos = 2 # position of measurements ax.violinplot(data, violin_pos, widths=1.0, showmeans=False, showextrema=False, showmedians=False) ax.scatter(np.full_like(adjusted_measurements, measure_pos), adjusted_measurements, marker='o', color='#eb7037', s=50, alpha=0.7) ax.scatter(np.full_like(new_adjusted_measurements, measure_pos), new_adjusted_measurements, marker='o', color='#eb7037', s=50, alpha=0.7) # format rxn_id_length = 25 text_reaction_id = ('reaction: %s' % reaction_id[:rxn_id_length]) labels = [ '\nModel Predicted\n(Old Constraints)', 'Measured', '\nModel Predicted\n(New Constraints)' ] ax.set_title(text_reaction_id, fontsize=8) ax.set_ylabel('$k_{cat}$ (1/s)', fontsize=8) set_ticks(ax, labels) ax.set_yscale('log') ### Create Plot ### plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() n_variants = len(variants) if n_variants <= 1: print('This plot only runs for multiple variants'.format(__name__)) return filepath.makedirs(plotOutDir) # Load validation data validation_data = cPickle.load(open(validationDataFile, 'rb')) toya_reactions = validation_data.reactionFlux.toya2010fluxes['reactionID'] toya_fluxes = np.array([x.asNumber(DCW_FLUX_UNITS) for x in validation_data.reactionFlux.toya2010fluxes['reactionFlux']]) outlier_filter = [False if rxn in OUTLIER_REACTIONS else True for rxn in toya_reactions] # Arrays to populate for plots lambdas = np.zeros(n_variants) n_sims = np.zeros(n_variants) growth_rates = np.zeros(n_variants) conc_correlation = np.zeros(n_variants) n_conc_off_axis = np.zeros(n_variants) flux_correlation = np.zeros(n_variants) nonzero_flux_correlation = np.zeros(n_variants) n_flux_above_0 = np.zeros(n_variants) n_flux_off_axis = np.zeros(n_variants) correlation_coefficient = np.zeros(n_variants) filtered_correlation_coefficient = np.zeros(n_variants) homeostatic_objective_value = np.zeros(n_variants) kinetic_objective_value = np.zeros(n_variants) homeostatic_objective_std = np.zeros(n_variants) kinetic_objective_std = np.zeros(n_variants) # Pull information from sim data and listeners in parallel pool = Pool(processes=parallelization.plotter_cpus()) args = zip( variants, [ap] * n_variants, [toya_reactions] * n_variants, [toya_fluxes] * n_variants, [outlier_filter] * n_variants ) results = pool.map(analyze_variant, args) pool.close() pool.join() for i, result in enumerate(results): (lambdas[i], n_sims[i], growth_rates[i], conc_correlation[i], n_conc_off_axis[i], flux_correlation[i], n_flux_off_axis[i], nonzero_flux_correlation[i], n_flux_above_0[i], correlation_coefficient[i], filtered_correlation_coefficient[i], kinetic_objective_value[i], kinetic_objective_std[i], homeostatic_objective_value[i], homeostatic_objective_std[i], n_metabolites, n_fluxes) = result tick_labels = [r'$10^{%i}$' % (np.log10(x),) if x != 0 else '0' for x in lambdas] lambdas = [np.log10(x) if x != 0 else np.nanmin(np.log10(lambdas[lambdas != 0]))-1 for x in lambdas] plt.figure(figsize = (8.5, 22)) plt.style.use('seaborn-deep') subplots = 8 # Growth rates ax = plt.subplot(subplots, 1, 1) plt.bar(lambdas, growth_rates / growth_rates[0], align='center') plt.axhline(1, linestyle='--', color='k') plt.ylim([0, 2]) plt.ylabel('Growth rate deviation\nfrom no kinetics') whitePadSparklineAxis(ax, xAxis=False) plt.yticks([0, 1, 2]) # Flux target comparisons ax = plt.subplot(subplots, 1, 2) plt.bar(lambdas, nonzero_flux_correlation, align='center') plt.ylim([0, 1]) plt.ylabel('Kinetic target flux PCC') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 3) plt.bar(lambdas, n_flux_above_0 / n_fluxes, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of fluxes\nabove 0') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 4) plt.bar(lambdas, n_flux_off_axis / n_fluxes, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of fluxes\noff axis (>{:.0f}%)'.format(FRAC_FLUX_OFF_AXIS*100)) whitePadSparklineAxis(ax, xAxis=False) # Metabolite comparisons ax = plt.subplot(subplots, 1, 5) plt.bar(lambdas, conc_correlation, align='center') plt.ylim([0, 1]) plt.ylabel('Concentration PCC') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 6) plt.bar(lambdas, n_conc_off_axis / n_metabolites, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of concentrations\noff axis (>{:.0f}%)'.format(FRAC_CONC_OFF_AXIS*100)) whitePadSparklineAxis(ax, xAxis=False) # Toya comparison ax = plt.subplot(subplots, 1, 7) plt.bar(lambdas, filtered_correlation_coefficient, align='center') plt.ylim([0, 1]) plt.ylabel('Central carbon flux PCC') whitePadSparklineAxis(ax, xAxis=False) # Viable sims ax = plt.subplot(subplots, 1, 8) plt.bar(lambdas, n_sims, align='center') plt.ylabel('Number of sims\nwith data') whitePadSparklineAxis(ax) plt.xticks(lambdas, tick_labels) plt.xlabel('lambda') exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Plot kinetic vs homeostatic objective values plt.figure(figsize=(3.5, 3.5)) ax = plt.gca() ax.set_xscale("log", nonposx='clip') ax.set_yscale("log", nonposy='clip') plt.errorbar(homeostatic_objective_value, kinetic_objective_value, xerr=homeostatic_objective_std, yerr=kinetic_objective_std, fmt='none', ecolor='k', alpha=0.5, linewidth=0.5) plt.plot(homeostatic_objective_value, kinetic_objective_value, "ob", markeredgewidth=0.1, alpha=0.9) for i in range(len(lambdas)): plt.text(homeostatic_objective_value[i], 0.6*kinetic_objective_value[i], i, horizontalalignment='center', verticalalignment='center') plt.xlabel('Homeostatic Objective Value') plt.ylabel('Kinetics Objective Value') whitePadSparklineAxis(ax) # Adjust limits to get tick labels to display xlim = ax.get_xlim() xlim = [10**np.floor(np.log10(xlim[0])), 10**np.ceil(np.log10(xlim[1]))] ax.set_xticks(xlim) ylim = ax.get_ylim() ylim = [10**np.floor(np.log10(ylim[0])), 10**np.ceil(np.log10(ylim[1]))] ax.set_yticks(ylim) exportFigure(plt, plotOutDir, '{}_obj'.format(plotOutFileName), metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata.get('variant', '') != 'param_sensitivity': print 'This plot only runs for the param_sensitivity variant.' return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) global ap ap = AnalysisPaths(inputDir, variant_plot=True) variants = np.array(ap.get_variants()) # Check to analyze control (variant 0) separately from other variants use_control = False if CONTROL_VARIANT in variants: use_control = True variants = variants[variants != CONTROL_VARIANT] n_variants = len(variants) # Load one instance of sim_data to get number of parameters and ids global sim_data global validation_data with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) # sim_data information total_params = np.sum(number_params(sim_data)) rna_to_gene = {gene['rnaId']: gene['symbol'] for gene in sim_data.process.replication.geneData} monomer_to_gene = {gene['monomerId']: gene['symbol'] for gene in sim_data.process.replication.geneData} rna_ids = sim_data.process.transcription.rnaData['id'] monomer_ids = sim_data.process.translation.monomerData['id'] # IDs must match order from param_indices() from param_sensitivity.py variant param_ids = np.array( ['{} RNA deg Km'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids] + ['{} protein deg rate'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids] + ['{} translation eff'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids] + ['{} synth prob'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids]) if len(param_ids) != total_params: raise ValueError('Number of adjusted parameters and list of ids do not match.') pool = Pool(processes=parallelization.plotter_cpus()) args = zip( variants, [total_params] * n_variants, ) results = pool.imap_unordered(analyze_variant, args) (increase_params_counts, decrease_params_counts, increase_params_growth_rate, decrease_params_growth_rate, increase_params_flux_correlation, decrease_params_flux_correlation) = reduce(operator.add, results) pool.close() pool.join() # Calculate effects and z score labels = [ 'growth rate', 'flux correlation', ] increase_params_data = np.vstack(( increase_params_growth_rate / increase_params_counts, increase_params_flux_correlation / increase_params_counts, )) decrease_params_data = np.vstack(( decrease_params_growth_rate / decrease_params_counts, decrease_params_flux_correlation / decrease_params_counts, )) n_outputs = len(labels) # Difference between effect when parameter increased vs decreased data_diff = increase_params_data - decrease_params_data mean_diff = np.nanmean(data_diff, axis=1).reshape(-1, 1) std_diff = np.nanstd(data_diff, axis=1).reshape(-1, 1) z_score_diff = (data_diff - mean_diff) / std_diff # Individual increase or decrease effects to check asymmetric effects all_data = np.hstack((increase_params_data, decrease_params_data)) mean = np.nanmean(all_data, axis=1).reshape(-1, 1) std = np.nanstd(all_data, axis=1).reshape(-1, 1) z_score_increase = (increase_params_data - mean) / std z_score_decrease = (decrease_params_data - mean) / std # Get control data if use_control: control_counts, _, control_growth_rate, _, control_flux_correlation, _ = analyze_variant((CONTROL_VARIANT, total_params)) control_data = [ control_growth_rate[0] / control_counts[0], control_flux_correlation[0] / control_counts[0], ] else: control_data = [None] * n_outputs # Multiple hypothesis adjustment for significance of each parameter. # Solves Gaussian CDF for how many standard deviations are needed to # include 1 - 0.05 / total_params of the data (test each parameter for p<0.05). n_stds = special.erfinv(2 * (1 - 0.05 / total_params) - 1) * np.sqrt(2) # Plot histograms plt.figure(figsize=(16, 4*n_outputs)) n_cols = 4 top_limit = 20 # limit of the number of highest/lowest parameters to plot for i, (z_diff, z_increase, z_decrease) in enumerate(zip(z_score_diff, z_score_increase, z_score_decrease)): sorted_idx = np.argsort(z_diff) above_idx = np.where(z_diff[sorted_idx] > n_stds)[0][-top_limit:] below_idx = np.where(z_diff[sorted_idx] < -n_stds)[0][:top_limit] ## Plot z difference data ax = plt.subplot(n_outputs, n_cols, n_cols*i + 1) plt.yscale('symlog', linthreshold=0.01) plt.fill_between(range(total_params), z_diff[sorted_idx]) plt.axhline(n_stds , color='k', linestyle='--') plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax, xAxis=False) plt.xticks([]) plt.yticks([-n_stds, 0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) lim = np.max(np.abs(plt.ylim())) plt.ylim([-lim, lim]) if i == 0: plt.title('Difference of Positive and Negative\nParameter Changes') if i == n_outputs - 1: plt.xlabel('Sorted Parameters') plt.ylabel('Z score\nparameter effect on {}\n(log scale)'.format(labels[i])) ## Plot single direction z data ax = plt.subplot(n_outputs, n_cols, n_cols*i + 2) plt.yscale('symlog', linthreshold=0.01) plt.step(range(total_params), z_increase[sorted_idx], color='g', linewidth=1, alpha=0.5) plt.step(range(total_params), z_decrease[sorted_idx], color='r', linewidth=1, alpha=0.5) plt.axhline(n_stds , color='k', linestyle='--') plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax, xAxis=False) plt.xticks([]) plt.yticks([-n_stds, 0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) plt.ylim([-lim, lim]) if i == 0: plt.title('Positive and Negative\nParameter Changes') if i == n_outputs - 1: plt.xlabel('Sorted Parameters') ## Plot highest parameters ax = plt.subplot(n_outputs, n_cols, n_cols*i + 3) plt.yscale('symlog', linthreshold=0.01) plt.bar(above_idx, z_diff[sorted_idx[above_idx]]) plt.axhline(n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax) ax.spines["bottom"].set_visible(False) ax.tick_params(bottom=False) plt.xticks(above_idx, param_ids[sorted_idx[above_idx]], rotation=90, fontsize=6) plt.yticks([0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) if i == 0: plt.title('Highest Positive Effect Parameters') if i == n_outputs - 1: plt.xlabel('Parameter IDs') ## Plot lowest parameters ax = plt.subplot(n_outputs, n_cols, n_cols*i + 4) plt.yscale('symlog', linthreshold=0.01) plt.bar(below_idx, z_diff[sorted_idx[below_idx]]) plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax) ax.spines["bottom"].set_visible(False) ax.tick_params(bottom=False) plt.xticks(below_idx, param_ids[sorted_idx[below_idx]], rotation=90, fontsize=6) plt.yticks([-n_stds, 0]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) if i == 0: plt.title('Highest Negative Effect Parameters') if i == n_outputs - 1: plt.xlabel('Parameter IDs') ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Plot individual parameters individual_indices = [ np.nanargmax(z_score_diff[0, :]), np.nanargmin(z_score_diff[0, :]), np.nanargmax(z_score_diff[1, :]), np.nanargmin(z_score_diff[1, :]), ] n_individual = len(individual_indices) x_values = [-1, 0, 1] plt.figure() for i, label in enumerate(labels): shared_ax = None for j, idx in enumerate(individual_indices): ## Shared y axis for each row ax = plt.subplot(n_outputs, n_individual, i*n_individual + j + 1, sharey=shared_ax) if shared_ax is None: shared_ax = ax ## Plot data plt.plot(x_values, [decrease_params_data[i, idx], control_data[i], increase_params_data[i, idx]], 'x') ## Format axes plt.xticks(x_values, ['Decrease', 'Control', 'Increase']) ax.tick_params(labelsize=6) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) if i < n_outputs - 1: ax.tick_params(labelbottom=False) if j > 0: ax.tick_params(labelleft=False) if i == 0: plt.title(param_ids[idx], fontsize=8) if j == 0: plt.ylabel(label, fontsize=7) ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, '{}_individual'.format(plotOutFileName, metadata)) plt.close('all') # Save z scores to tsv with open(os.path.join(plotOutDir, '{}.tsv'.format(plotOutFileName)), 'w') as f: writer = csv.writer(f, delimiter='\t') writer.writerow( ['Parameter'] + headers(labels, 'Z-score, difference') + headers(labels, 'Z-score, increase') + headers(labels, 'Z-score, decrease') + headers(labels, 'Raw average, difference') + headers(labels, 'Raw average, increase') + headers(labels, 'Raw average, decrease') ) writer.writerows(np.hstack(( param_ids.reshape(-1, 1), z_score_diff.T, z_score_increase.T, z_score_decrease.T, data_diff.T, increase_params_data.T, decrease_params_data.T )))
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() expected_n_variants = 2 n_variants = len(variants) if n_variants < expected_n_variants: print('This plot only runs for {} variants.'.format(expected_n_variants)) return # IDs for appropriate proteins ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData['id'].tolist() ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation)) # Stoichiometry matrices equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers() complex_stoich = sim_data.process.complexation.stoichMatrixMonomers() # Protein container views protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = protein_container.countsView(ids_complexation) view_complexation_complexes = protein_container.countsView(ids_complexation_complexes) view_equilibrium = protein_container.countsView(ids_equilibrium) view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes) # Load model data model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) for i, variant in enumerate(variants): if i >= expected_n_variants: print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants)) continue variant_counts = [] for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts')) # Account for bulk molecules (bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein) protein_container.countsIs(bulk_counts.mean(axis=0)) # Account for unique molecules ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome') rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly') n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index] n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index] protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex]) protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts())) # Account for monomers in complexed form view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts())) variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts()) model_counts[:, i] = np.mean(variant_counts, axis=0) model_std[:, i] = np.std(variant_counts, axis=0) # Validation data schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])} schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts'] validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE]) # Process data model_log_counts = np.log10(model_counts) model_log_lower_std = model_log_counts - np.log10(model_counts - model_std) model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts validation_log_counts = np.log10(validation_counts) r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0]) r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1]) # Scatter plot of model vs validation counts max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max())) limits = [0, max_counts] plt.figure() colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] ## Plot data for i in range(expected_n_variants): plt.errorbar(validation_log_counts, model_log_counts[:, i], yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])), fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5) plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_') ## Format axes plt.xlabel('Validation Counts\n(log10(counts))') plt.ylabel('Average Simulation Counts\n(log10(counts))') ax = plt.gca() ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ## Add legend legend_text = [ 'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]), 'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]), ] plt.legend(legend_text, frameon=False) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "inputDir does not currently exist as a directory" ap = AnalysisPaths(inputDir, variant_plot=True) all_cells = ap.get_cells() if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) rnaToProteinDict = {} dnaToProteinDict = {} elngRateDict = {} stableRnaFractionDict = {} doublingPerHourDict = {} variantSimDataFile = ap.get_variant_kb(ap.get_variants()[0]) sim_data = cPickle.load(open(variantSimDataFile, "rb")) nAvogadro = sim_data.constants.nAvogadro.asNumber() chromMass = (sim_data.getter.getMass(['CHROM_FULL[c]'])[0] / sim_data.constants.nAvogadro).asNumber() for simDir in all_cells: simOutDir = os.path.join(simDir, "simOut") variant = int(simDir[simDir.rfind('generation_') - 14:simDir.rfind('generation_') - 8]) mass = TableReader(os.path.join(simOutDir, "Mass")) protein = mass.readColumn("proteinMass") * 10**-15 rna = mass.readColumn("rnaMass") * 10**-15 dna = mass.readColumn("dnaMass") * 10**-15 growthRate = mass.readColumn("instantaniousGrowthRate") doublingTime = np.nanmean(np.log(2) / growthRate / 60) rnaNT = rna / NT_MW * nAvogadro proteinAA = protein / PROTEIN_MW * nAvogadro # Count chromosome equivalents chromEquivalents = dna / chromMass # Load ribosome data ribosomeDataFile = TableReader( os.path.join(simOutDir, "RibosomeData")) actualElongations = ribosomeDataFile.readColumn( "actualElongations") ribosomeDataFile.close() transcriptDataFile = TableReader( os.path.join(simOutDir, "TranscriptElongationListener")) rnaSynth = transcriptDataFile.readColumn("countRnaSynthesized") isTRna = sim_data.process.transcription.rnaData["isTRna"] isRRna = sim_data.process.transcription.rnaData["isRRna"] stableRnaSynth = np.sum(rnaSynth[:, isTRna], axis=1) + np.sum( rnaSynth[:, isRRna], axis=1) totalRnaSynth = np.sum(rnaSynth, axis=1).astype(float) rnaFraction = stableRnaSynth / totalRnaSynth uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") activeRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] uniqueMoleculeCounts.close() initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") t = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime timeStepSec = TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec") if variant in rnaToProteinDict.keys(): rnaToProteinDict[variant] = np.append( rnaToProteinDict[variant], rnaNT / (proteinAA / 100)) dnaToProteinDict[variant] = np.append( dnaToProteinDict[variant], chromEquivalents / (proteinAA / 10**9)) elngRateDict[variant] = np.append( elngRateDict[variant], (actualElongations / activeRibosome / timeStepSec)[3:]) stableRnaFractionDict[variant] = np.append( stableRnaFractionDict[variant], np.asarray(rnaFraction)[~np.isnan(rnaFraction)]) doublingPerHourDict[variant] = np.append( doublingPerHourDict[variant], 60 / doublingTime) else: rnaToProteinDict[variant] = rnaNT / (proteinAA / 100) dnaToProteinDict[variant] = chromEquivalents / (proteinAA / 10**9) elngRateDict[variant] = (actualElongations / activeRibosome / timeStepSec)[3:] stableRnaFractionDict[variant] = np.asarray( rnaFraction)[~np.isnan(rnaFraction)] doublingPerHourDict[variant] = 60 / doublingTime rnaToProtein = [] dnaToProtein = [] elngRate = [] stableRnaFraction = [] doublingPerHour = [] for key in rnaToProteinDict.keys(): rnaToProtein += [rnaToProteinDict[key]] dnaToProtein += [dnaToProteinDict[key]] elngRate += [elngRateDict[key]] stableRnaFraction += [stableRnaFractionDict[key]] doublingPerHour += [np.mean(doublingPerHourDict[key])] plt.figure(figsize=(8.5, 11)) sp = plt.subplot(4, 1, 1) sp.violinplot(rnaToProtein, positions=doublingPerHour, showmeans=True) sp.set_ylabel("RNA to Protein\n(nuc/100 aa)") sp = plt.subplot(4, 1, 2) sp.violinplot(dnaToProtein, positions=doublingPerHour, showmeans=True) sp.set_ylabel("DNA to Protein\n(chrom eq/10^9 aa)") sp = plt.subplot(4, 1, 3) sp.violinplot(elngRate, positions=doublingPerHour, showmeans=True) sp.set_ylabel("Ribosome Elongation\nRate (aa/s)") sp = plt.subplot(4, 1, 4) sp.violinplot(stableRnaFraction, positions=doublingPerHour, showmeans=True) sp.set_ylabel("Rate Stable RNA to\nRate Total RNA") sp.set_xlabel("Doublings per Hour") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata.get('variant', '') != 'flux_sensitivity': print 'This plot only runs for the flux_sensitivity variant.' return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() succ_fluxes = [] iso_fluxes = [] for variant in variants: for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used fba_reader = TableReader(os.path.join(simOutDir, 'FBAResults')) # Load data reactions = np.array( fba_reader.readAttribute('sensitivity_reactions')) succ_fluxes += [ fba_reader.readColumn('succinate_flux_sensitivity')[1:, :] ] iso_fluxes += [ fba_reader.readColumn('isocitrate_flux_sensitivity')[1:, :] ] succ_fluxes = np.vstack(succ_fluxes) iso_fluxes = np.vstack(iso_fluxes) succ_z = calc_z(succ_fluxes) iso_z = calc_z(iso_fluxes) threshold = -0.1 # Plot data plt.figure() gs = gridspec.GridSpec(2, 2) ## Succinate dehydrogenase all fluxes ax = plt.subplot(gs[0, 0]) plot_lows(ax, succ_z, threshold, 'succinate dehydrogenase') ## Succinate dehydrogenase fluxes over threshold ax = plt.subplot(gs[0, 1]) plot_threshold(ax, succ_z, threshold, reactions) ## Isocitrate dehydrogenase all fluxes ax = plt.subplot(gs[1, 0]) plot_lows(ax, iso_z, threshold, 'isocitrate dehydrogenase') ## Isocitrate dehydrogenase fluxes over threshold ax = plt.subplot(gs[1, 1]) plot_threshold(ax, iso_z, threshold, reactions) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot = True) if ap.n_generation == 1: print "Need more data to create addedMass" return allScatter = plt.figure() allScatter.set_figwidth(11) allScatter.set_figheight(6) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] title_list = [r"Glucose minimal, $\tau = $44 min", r"Glucose minimal anaerobic, $\tau = $100 min", r"Glucose minimal + 20 amino acids, $\tau = $25 min"] for varIdx in ap.get_variants(): if varIdx == 0: plotIdx = 1 gen = [2,3] elif varIdx == 1: plotIdx = 0 gen = [2,3] elif varIdx == 2: plotIdx = 2 gen = [2,3] else: continue initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(generation=gen, variant=[varIdx]) if len(all_cells) == 0: continue fail = 0 for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("dryMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except Exception as e: print e fail+=1 added_masses = final_masses - initial_masses scaled_initial_masses = initial_masses / initial_masses.mean() scaled_added_masses = added_masses / added_masses.mean() nbins = 5 n, xbin = np.histogram(scaled_initial_masses, bins=nbins) sy, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses) sy2, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses*scaled_added_masses) mean = sy / n std = np.sqrt(sy2/(n-1) - n*mean*mean/(n-1)) slope, intercept, r_value, p_value, std_err = linregress(scaled_initial_masses, scaled_added_masses) # plot all scatter plots plt.figure(allScatter.number) ax = plt.subplot2grid((1,3), (0,plotIdx)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = "black", alpha = 0.2, zorder=1, markeredgewidth = 0.0) ax.errorbar(((xbin[1:] + xbin[:-1])/2), mean, yerr=std, color = "black", linewidth=1, zorder=2) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = "blue") ax.set_title( title_list[varIdx] + ", n=%d" % ((len(all_cells) - fail), ) + "\n" + r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" % (slope,intercept) + "\n" + "r-value=%0.2g" % r_value + "\n" + "p-value=%0.2g" % p_value, fontsize=FONT_SIZE) ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM]) ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) if varIdx == 1: ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE) ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE) plt.subplots_adjust(bottom = 0.2) whitePadSparklineAxis(ax) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) # plot stripped figure fig = plt.figure() fig.set_figwidth(3) fig.set_figheight(2) ax = plt.subplot2grid((1,1), (0,0)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = color_cycle[0], alpha = 0.25, ms=6, zorder=1, markeredgewidth = 0.0, clip_on=False) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = 'k') ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM]) ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True, labelsize=FONT_SIZE) ax.set_xlabel("") ax.set_ylabel("") plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName + str(varIdx) + "_stripped", metadata, transparent = True) plt.figure(allScatter.number) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")