# Determine number of entries per gene gene = colicogs[colicogs['gene_name'].str.lower()==g.lower()] b_number = gene['b_number'].unique()[0] gene_product = gene['gene_product'].unique()[0] go_term = ';'.join(list(gene['go_terms'].unique())) if len(gene) > 0: cog_class = gene['cog_class'].values[0] cog_cat = gene['cog_category'].values[0] cog_letter = gene['cog_letter'].values[0] gene_product= gene['gene_product'].values[0] mw = gene['mw_fg'].values[0] go_term = ';'.join(list(gene['go_terms'].unique())) for _c, _d in d.groupby(['growth_rate_hr-1']): # volume predictions based on MG1655 data, Si, F. et al. (2017, 2019) vol = size.lambda2size(_c) # extract relevant information. gene_dict = { 'gene_name': g.lower(), 'b_number': b_number, 'condition': _d['condition'].unique()[0], 'corrected_volume': vol, 'reported_tot_per_cell': _d['copy_number_molecule-per-fL'].values[0] * vol, 'reported_fg_per_cell': _d['copy_number_molecule-per-fL'].values[0] * vol * mw, 'go_terms':go_term, 'cog_class': cog_class, 'cog_category': cog_cat, 'cog_letter': cog_letter, 'gene_product': gene_product, 'growth_rate_hr': _c }
# plot the max for respiration ax1.plot(Ps_resp_rod, SA_V_ratio_rod, color=colors['blue'], label='rod', alpha=0.9, lw = 0.5, ls = '-.') ax1.plot(Ps_resp_sphere, SA_V_ratio_sphere, color=colors['blue'], label='sphere', alpha=0.9, lw = 0.5, ls = '--') ax1.fill_between(Ps_resp_, y1 = SA_V_ratio_sphere_, y2 = SA_V_ratio_rod_, color=colors['blue'],alpha=0.2, lw = 0) # # Populate second plot with growth rates # S/V for E. coli datasets # Load the data set data = pd.read_csv('../../data/compiled_absolute_measurements.csv') for g, d in data.groupby(['dataset', 'condition', 'growth_rate_hr']): V = size.lambda2size(g[2]) # ATP equivalents demand w.r.t. volume ; 1E6 ATP/(um3 s) Pv = 1E6 * V # assume aspect ratio of 4 (length/width), which is # appoximately correct for E. coli SA_rod = 2 * np.pi * V**(2/3) SV = SA_rod/V ax1.plot(Pv, SV, 'o', color=dataset_colors[g[0]], alpha=0.75, markeredgecolor='k', markeredgewidth=0.25, label = g[2], ms=4, zorder=10) # Format the axes for a in [ax1]:#,ax2]: a.xaxis.set_tick_params(labelsize=5) a.yaxis.set_tick_params(labelsize=5)
gr_schmidt = d_schmidt_.growth_rate_hr.values[0] cond_schmidt = d_schmidt_.condition.values[0] d_schmidt_ = d_schmidt_[d_schmidt_.growth_rate_hr == gr_schmidt] d_schmidt_ = d_schmidt_[d_schmidt_.condition == cond_schmidt] schmidt_genes = d_schmidt_[d_schmidt_.b_number.isin(d.b_number.unique())] rel_schmidt = schmidt_genes.fg_per_cell.sum() / d_schmidt_.fg_per_cell.sum( ) rel_corr_fg = df[df['growth_rate_hr'] == g]['reported_fg_per_cell'].sum() / \ size.lambda2P(g) print(g, ': total mass fg: ', np.round(size.lambda2P(g), 2), ' volume: ', np.round(size.lambda2size(g), 2), ' relative change in total fg: ', np.round(1 / rel_corr_fg, 2), ' abundance relative to Schmidt: ', np.round(rel_schmidt, 2)) df.loc[df['growth_rate_hr']==g, 'tot_per_cell'] = \ (df.loc[df['growth_rate_hr']==g]['reported_tot_per_cell'] / rel_corr_fg) * rel_schmidt df.loc[df['growth_rate_hr']==g, 'fg_per_cell'] = \ (df.loc[df['growth_rate_hr']==g]['reported_fg_per_cell'] / rel_corr_fg) * rel_schmidt #%% df['dataset'] = 'valgepea_2013' df['dataset_name'] = 'Valgepea et al. 2013' df['strain'] = 'MG1655' df.to_csv('../../../data/valgepea2013_longform_annotated.csv') # %%
# Load the complex subunit counts. subunits = pd.read_csv('../../data/compiled_annotated_complexes.csv') # # Load the compiled data data = pd.read_csv('../../data/compiled_absolute_measurements.csv') # Compute the minimum number of complexes. complex_count = subunits.groupby([ 'dataset', 'dataset_name', 'condition', 'growth_rate_hr', 'complex_annotation', 'complex' ])['n_units'].mean().reset_index() complex_ribo = complex_count[complex_count.complex_annotation == 'ribosome'] for g, d in complex_ribo.groupby(['dataset', 'dataset_name']): ax.plot(size.lambda2size(d['growth_rate_hr']), d['n_units'], 'o', color=dataset_colors[g[0]], alpha=0.75, markeredgecolor='k', markeredgewidth=0.25, label=g[1], ms=4, zorder=10) ax.set_xlabel('estimated cell volume [fL]', fontsize=6) ax.set_ylabel('ribosomes per cell', fontsize=6) ax.xaxis.set_tick_params(labelsize=5) ax.yaxis.set_tick_params(labelsize=5) ax.legend(fontsize=6, loc='upper left')
reported_volume = rates.loc[rates['condition'] == c]['volume_fL'].values[0] gene_dict = { 'gene_name': g, 'b_number': b_number, 'condition': c, 'reported_tot_per_cell': d[f'{c}_tot'].values[0], 'reported_fg_per_cell': d[f'{c}_tot'].values[0] * mw, 'go_terms': go_term, 'cog_class': cog_class, 'cog_category': cog_cat, 'cog_letter': cog_letter, 'growth_rate_hr': growth_rate, 'gene_product': gene_product, 'reported_volume': reported_volume, 'corrected_volume': size.lambda2size(growth_rate) } dfs.append(pd.DataFrame(gene_dict, index=[0])) else: print(f'Warning!!! {g} not found in the gene list!') for c in conditions: growth_rate = rates.loc[rates['condition'] == c]['growth_rate_hr'].values[0] reported_volume = rates.loc[rates['condition'] == c]['volume_fL'].values[0] gene_dict = { 'gene_name': g[0], 'b_number': g[1], 'condition': c, 'reported_tot_per_cell': d[f'{c}_tot'].values[0], 'reported_fg_per_cell': d[f'{c}_fg'].values[0],
label=g[1], ms=4, zorder=10) ax[0].set_xlabel('estimated # ori', fontsize=6) ax[0].set_ylabel('ribosomes per cell', fontsize=6) ax[0].xaxis.set_tick_params(labelsize=5) ax[0].yaxis.set_tick_params(labelsize=5) ax[0].legend(fontsize=6, loc='upper left') print(complex_ribo['n_units'].max() / complex_ribo['n_units'].min()) ## Plot of ribosome concentration for g, d in complex_ribo.groupby(['dataset', 'dataset_name']): ax[1].plot(d['growth_rate_hr'], d['n_units'] / size.lambda2size(d['growth_rate_hr']), 'o', color=dataset_colors[g[0]], alpha=0.75, markeredgecolor='k', markeredgewidth=0.25, label=g[1], ms=4, zorder=10) ax[1].set_xlabel('growth rate [hr$^{-1}$]', fontsize=6) ax[1].set_ylabel('ribosome concentration [fL$^{-1}$]', fontsize=6) ax[1].xaxis.set_tick_params(labelsize=5) ax[1].yaxis.set_tick_params(labelsize=5) ax[1].set_ylim(0, 40000) # ax[1].legend(fontsize=6, loc = 'upper left')