def small_scale(store_export='dictionary'): """ Performs a single simulation of the given system, creates the following plots: - population dynamics overview (free naive cells, free memory cells and GC populations over time) - for each GC, a clonal composition plot together with its memory output in a separate panel - for each GC, the evolution of its largest clone's affinities over mutation count (this plot contains aritificial noise to increase visibility!). If store_export is set 'datafile', the simulation data is stored in a hdf5 file for future purposes, for 'dictionary' the data is passed internally and discarded after the run. Recommended only for small simulation sizes with up to ~5 GCs and ~5k cells, as otherwise things get crowded and plots get large. """ # get runID from current system time runID = int(time.time()) # run simulation and get filepath or dict simdata = main(runID, store_export=store_export, evalperday=12) # import required information for small scale plots l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \ ms_times, ms_vals, ms_fams, ms_muts, mut_list, \ E_list = import_file(simdata) # plot population behaviour population_plot(l_times, l_fn, l_fm, l_GCs, runID) # plot GC contents and memory output for every GC for i in range(len(l_GCs)): GC_dynamics_plot(GCPans[i], ms_times[i], ms_fams[i], ms_vals[i], ms_muts[i], runID, i) return (simdata)
def AM_effect_nkey(nkeys=[1, 5, 10, 15], repeats=100, d_export=True): """ Given a list of values for nkey and a number of individual GC reactions to be averaged over for each of them, computes and plots the improvement within single GCs for one infection. Thus, overwrites parameters giving the infection protocol and duration of the simulation as well as setting the nubmer of GCs to 1. Other parameters remain untouched. A textfile with the computed mean results is exported if d_export==True. """ # set single infection and single GC for this analysis cf.endtime = 30*12 cf.tinf = [0*12] cf.dose = [1] cf.nGCs = 1 cf.naive_pool = 1000*1 # size of the naive precursor pool cf.memory_pool = 100*1 # size of the initial unspecific memory pool # function for calculating mean E_norm from GC panel def GC_affinity(GCPan): """ Given a GC panel, gets the mean E_norm for each timepoint.""" energies = [] tList = GCPan.keys() for tp in range(len(tList)): energy = GCPan[tList[tp]]['affinity'].dropna().mean() energies.append(energy) return tList, energies topElist = [] for hs in nkeys: # set binding model parameters accordingly cf.nkey = hs cf.lAg = hs cf.lAb = 220 - hs eL = [] # list for collecting energies timecurses of all runs for r in range(repeats): simdata = main(store_export='dictionary', evalperday=12) l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \ GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list\ = import_file(simdata) tList, energies = GC_affinity(GCPans[0]) eL.append(energies) # calculate mean and std of all runs and plot eM = np.nanmean(np.array(eL), axis=0) eStd = np.nanstd(np.array(eL), axis=0) topElist.append((hs, tList, eM, eStd)) # write information to file if d_export: datafile = open('processed_data/AM_effect_data', 'w') datafile.write('number of simulation runs per n_key = {} \n'.format(repeats)) datafile.write('n_key, time (days), mean(normalised energies), std(normalised energies) \n') for i in range(len(nkeys)): datafile.write('{0}, {1}, {2}, {3}\n \n'.format(topElist[i][0], np.array(topElist[i][1])/12., topElist[i][2], topElist[i][3])) datafile.close() # plot AM_effect_plot(topElist)
def map_params(dose=[1], LFdecay=[10*12], nGCs=[1], nLFs=[25], naive_pool=[1000], nkey=[1, 2, 10], p_err=[0.003], tinf=[[0*12]], p_block=[0.5], repeats=1): """ Function for mapping out the effects of different parameter (combinations) on the standard TUCHMI protocol. All arguments are lists, either containing only the default value or a set of values, in which case all combinations of list arguments will be executed the given number of times (repeat). The paramter set used is stored together with the complete timecourse of of E_bind, SHM, entropy, memory number (means and std where applicable) and exported into a .h5 file. For running on a cluster, accepts an ID argument (e.g. job ID) for easier handling of errors etc. """ # open file stamped with systemtime if no other ID was provided in the call try: sys.argv[1] except IndexError: filepath = 'map_data/data{}.h5'.format(int(time.time()*100)) else: filepath = 'map_data/data{}.h5'.format(sys.argv[1]) print(filepath) datafile = pd.HDFStore(filepath) # dict for collecting result series seriesdict = {} # set endtime and days for evaluation cf.endtime = 126*12 # run until challenge timepoint # evaluate pool every day evaldays = np.arange(126) # get parameter combinations paramsets = list(product(dose, LFdecay, nGCs, nLFs, naive_pool, nkey, p_err, tinf, p_block)) # for every parameter set, run the simulation repeat times and write # results to the file for p in paramsets: # set parameters cf.dose = [p[0] for i in range(len(p[7]))] cf.LFdecay = p[1] cf.nGCs = p[2] cf.nLFs = p[3] cf.naive_pool = p[4]*cf.nGCs cf.memory_pool = 100*cf.nGCs # fixed! (or change manually) cf.nkey = p[5] cf.lAg = p[5] cf.lAb = 220 - p[5] cf.p_err_FWR = p[6] cf.p_err_CDR = p[6] cf.tinf = p[7] cf.p_block_FWR = p[8] for r in range(repeats): # get lists to store individual simulation results l_mems = [] l_KDs = [] s_KDs = [] # std l_SHMs = [] s_SHMs = [] # std l_Entrs = [] # run simulation and get filepath or dict evaldays, l_mems, l_KDs, s_KDs, l_SHMs, s_SHMs, l_Entrs = \ main(store_export='minimal', evalperday=1) # write these lists to file together with parameters used. # Identifier system time. ID = 'ID_{}'.format(time.time()) series = pd.Series([cf.dose, cf.LFdecay, cf.nGCs, cf.nLFs, cf.naive_pool, cf.nkey, cf.p_block_FWR, cf.p_err_CDR, cf.tinf, cf.memory_pool, evaldays, np.array(l_mems), np.array(l_KDs), np.array(s_KDs), np.array(l_SHMs), np.array(s_SHMs), np.array(l_Entrs)], index=['dose', 'LFdecay', 'nGCs', 'nLFs', 'naive_pool', 'nkey', 'p_block', 'p_err', 'tinf', 'mem_pool', 'evaldays', 'memcount', 'E_bind', 'E_bind_std', 'SHM', 'SHM_std', 'entropy']) seriesdict[ID] = series # make dataframe and store it df = pd.DataFrame(seriesdict) df = df.transpose() datafile['data'] = df # close datafile datafile.close() print('END')
def TUCHMI_sampling(store_export='datafile', d_export=True, subsample=12): """ Performs a single simulation of a given size using the TUCHMI vaccination protocol, samples memory from the simulated pool and creates several plots summarising the information. User settings regarding the protocol are overwritten. Plots produced include: - mean SHMs and clonal expansion (fraction of cells sampled from clones that appeared more than once within the sample) in samples of size subsample - scatter plot of affinity over mutational status in polyclonal samples at TUCHMI time points I, II and III - scatter plot of affinity over mutational status at a clonal level, cells sampled from three TUCHMI time points merged into single plots (but sampling time point encoded in colouring) If store_export is set 'datafile', the simulation data is stored in a hdf5 file for future purposes, for 'dictionary' the data is passed internally and lost after the run. If d_export is set True, textfiles containing the sampled data (used for plotting) are exported for each plot individually. Subsample gives the number of cells to be sampled at each timepoint in oder to calculate entropy and unique fraction. Can be used for all simulation sizes, but is especially useful for larger simulations (e.g. >=50 GCs, 50k cells). """ # give protocol cf.endtime = 126 * 12 cf.tinf = [0 * 12, 28 * 12, 56 * 12] cf.dose = [1, 1, 1] # get runID from current system time runID = int(time.time()) # run simulation and get filepath or dict simdata = main(runID, store_export=store_export, evalperday=1) # import required information l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \ ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \ import_file(simdata) # for affinity-mutation scatter plot, downsample for visibility pick_tp = 35 * 12 samplefrac = 100. / len(freePan.sel(timepoint=pick_tp).dropna("dim_0")) # get list of lists to catch values at every timepoint tList = list(freePan["timepoint"].values) TT = len(tList) SHM_means = [[] for t in range(TT)] Entropies = [[] for t in range(TT)] clusterfracs = [[] for t in range(TT)] """ Cell pool affinity over time """ # get mean affinity at all time points Elist = [] for tp in range(len(tList)): C = freePan.sel(timepoint=tList[tp]).loc[dict( dim_1="affinity")].dropna("dim_0").values.mean() Elist.append(C) # pass energies to plot function pool_affinity_plot(tList, Elist) """ Mean SHM and clonal expansion within sample of size subsample """ # sample 100 times to calculate standard deviations for nn in range(100): for tp in range(TT): ttp = 12 * tp # cellnumber to be sampled is either subsample or, if less cells # are available (more of a hypothetic case really), all cells freePan_no_na = freePan.sel(timepoint=ttp).dropna("dim_0") cellnum = min(subsample, len(freePan_no_na)) if cellnum > 0: cell_id = np.random.choice(len(freePan_no_na), cellnum) cells = freePan_no_na[cell_id, :] c_muts = list(cells.loc[dict(dim_1="mutations")].values) SHM_means[tp].append(np.nanmean(c_muts)) # evaluate entropies and clusterfractions CC = Counter(list(cells.loc[dict(dim_1="family")].values)) Entropies[tp].append( scipy.stats.entropy(list(CC.values()), base=2) / math.log(cellnum, 2)) # count again to find how many clones have one member only, # calculate clusterfrac from this sizedist = list(CC.values()) C2 = Counter(sizedist) uniquefrac = float(C2[1]) / cellnum clusterfracs[tp].append(1 - uniquefrac) else: SHM_means[tp].append(np.nan) Entropies[tp].append(np.nan) uniquefrac[tp].append(np.nan) clusterfracs[tp].append(np.nan) # pass information to plotting function MSHM = np.nanmean(SHM_means, axis=1) SSHM = np.nanstd(SHM_means, axis=1) MEntropies = np.nanmean(Entropies, axis=1) SEntropies = np.nanstd(Entropies, axis=1) Mclusterfracs = np.nanmean(clusterfracs, axis=1) Sclusterfracs = np.nanstd(clusterfracs, axis=1) sample_statistics_plot(subsample, tList, MSHM, SSHM, MEntropies, SEntropies, Mclusterfracs, Sclusterfracs) """ Plot of affinity/mutations on tps I, II and III """ # sample cells 7 days post each infection, record SHM, KD and origin # (memory versus naive first activated ancestor) timecourse = [7 * 12, 35 * 12, 63 * 12] SHM_list = [[] for t in timecourse] KD_list = [[] for t in timecourse] orglist = [[] for t in timecourse] for d in range(len(timecourse)): tp = timecourse[d] freePan_no_na = freePan.sel(timepoint=tp).dropna("dim_0") cellnum = int(np.round(len(freePan_no_na) * samplefrac)) if cellnum > 0: cell_id = np.random.choice(len(freePan_no_na), cellnum) cells = freePan_no_na[cell_id, :] kdl = list(cells.loc[dict(dim_1="affinity")].values) # transform norm E to KD kdll = np.exp(cf.y0 + np.array(kdl) * cf.m) KD_list[d] = list(kdll) # get mutation counts, correct them and origin SHM_list[d] = list(cells.loc[dict(dim_1="mutations")].values) orglist[d] = list(cells.loc[dict(dim_1="origin")].values) # pass information to plot function sample_scatter_plot(KD_list, SHM_list, orglist) """ Affinity/mutation plots for individual clusters """ # samples from the memory pool at the given timepoints, split information # into clusters and plot SHM/KD scatter plots for some of these clusters. # lists to collect SHM, KD values, families and timepoints for all panels SHM_list = [] KD_list = [] fam_list = [] tp_list = [] for d in range(len(timecourse)): tp = timecourse[d] freePan_no_na = freePan.sel(timepoint=tp).dropna("dim_0") cellnum = int(len(freePan_no_na) * samplefrac) if cellnum > 0: cell_id = np.random.choice(len(freePan_no_na), cellnum) cells = freePan_no_na[cell_id, :] kdl = list(cells.loc[dict(dim_1="affinity")].values) # transform norm E to KD kdll = np.exp(cf.y0 + np.array(kdl) * cf.m) KD_list += list(kdll) SHM_list += list(list(cells.loc[dict(dim_1="mutations")].values)) fam_list += list(cells.loc[dict(dim_1="family")].values) tp_list += [tp for k in range(cellnum)] # count into families and find clusters with more than xx members famcounter = Counter(fam_list) fams = list(famcounter.keys()) clusters = [] for fam in fams: if famcounter[fam] > 1: clusters.append(fam) # make separate lists for SHM, KD and TP (defining color) within clusters # and add information to list iSHMs = [[] for i in clusters] iKDs = [[] for i in clusters] iTPs = [[] for i in clusters] for ff in range(len(fam_list)): if fam_list[ff] in clusters: ii = clusters.index(fam_list[ff]) iSHMs[ii].append(SHM_list[ff]) iKDs[ii].append(KD_list[ff]) # give different colors for different timepoints if tp_list[ff] == timecourse[0]: iTPs[ii].append('lightcoral') elif tp_list[ff] == timecourse[1]: iTPs[ii].append('indianred') else: iTPs[ii].append('firebrick') # pass information to plot function clonal_scatter_plot(iSHMs, iKDs, iTPs) # write information to file if d_export: datafile = open('processed_data/TUCHMI_sampling_data', 'w') datafile.write('1) SAMPLE STATISTICS \n \n') datafile.write('sampled fraction = {} \n \n'.format(samplefrac)) datafile.write('timecourse (days) \n {} \n \n'.format( np.array(tList) / 12.)) datafile.write( 'SHMs of cells in sample, mean and std \n {} \n {} \n \n'.format( MSHM, SSHM)) datafile.write( 'normalised Shannon entropy of cells in sample, mean and std \n {} \n {} \n \n' .format(MEntropies, SEntropies)) datafile.write( 'fraction of non-unique cells in sample, mean and std \n {} \n {} \n \n' .format(Mclusterfracs, Sclusterfracs)) datafile.close() return (simdata)
def stacked_mutations(store_export='dictionary', d_export=True, repeats=10): """ Performs a number of simulation runs, computes histograms for improved, impaired and unchanged binders at a single given timepoint and saves the individual as well as the summed values to file. For several repeats, data is accumulated in the histograms as well. If store_export is set 'datafile', the simulation data is stored in a hdf5 file for future purposes, for 'dictionary' the data is passed internally and lost after the run. If d_export is set True, textfiles containing the sampled data (used for plotting) are exported for each plot individually.""" # parameters relevant to this analysis # evaluation timepoint in days analysis_time = 29 bins = np.linspace(0.6, 1, 17) # collect results sum_zero = np.zeros(len(bins) - 1) sum_plus = np.zeros(len(bins) - 1) sum_minus = np.zeros(len(bins) - 1) list_zero = [] list_plus = [] list_minus = [] for i in range(repeats): # get runID from current system time runID = int(time.time()) # run simulation and get filepath or dict simdata = main(runID, store_export=store_export, evalperday=1) # import required information for small scale plots l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \ GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \ import_file(simdata) # extract the affinities and ancestor affinities at the analysis points tList = list(freePan["timepoint"].values) # limit cell number to be drawn in order not to clatter the plot # possibility of subsampling here tp = analysis_time freePan_no_na = freePan.sel(timepoint=tList[tp]).dropna("dim_0") cellnum = len(freePan_no_na) cell_id = np.random.choice(len(freePan_no_na), cellnum) cells = freePan_no_na[cell_id, :] final_dist = list( cells.loc[dict(dim_1="affinity")].dropna("dim_0").values) ancestor_dist = list( cells.loc[dict(dim_1="affinity0")].dropna("dim_0").values) # extract counts of unchanged, improved and impaired cells unchanged_list = np.array(final_dist)[np.where( np.array(ancestor_dist) == np.array(final_dist))[0]] improved_list = np.array(final_dist)[np.where( np.array(ancestor_dist) < np.array(final_dist))[0]] impaired_list = np.array(final_dist)[np.where( np.array(ancestor_dist) > np.array(final_dist))[0]] # make histograms, store information both in list and in sum. U_counts, _ = np.histogram(unchanged_list, bins=bins) plus_counts, _ = np.histogram(improved_list, bins=bins) minus_counts, _ = np.histogram(impaired_list, bins=bins) # collect results sum_zero += U_counts sum_plus += plus_counts sum_minus += minus_counts list_zero.append(U_counts) list_plus.append(plus_counts) list_minus.append(minus_counts) cellsum = np.sum(sum_zero) + np.sum(sum_plus) + np.sum(sum_minus) # plot stacked_energy_plot(bins, sum_plus, sum_minus, sum_zero, analysis_time) if d_export: datafile = open('processed_data/stacked_histogram_data', 'w') datafile.write('1) day \n \n') datafile.write('{} \n \n'.format(analysis_time)) datafile.write('2) bins \n \n') datafile.write('{} \n \n'.format(bins)) datafile.write('3) runs \n \n') datafile.write('{} \n \n'.format(repeats)) datafile.write('4) sum of counts with unchanged energies\n \n') datafile.write('{} \n \n'.format(sum_zero)) datafile.write('5) sum of counts with improved energies\n \n') datafile.write('{} \n \n'.format(sum_plus)) datafile.write('6) sum of counts with impaired energies\n \n') datafile.write('{} \n \n'.format(sum_minus)) datafile.write('7) list of counts with unchanged energies\n \n') datafile.write('{} \n \n'.format(list_zero)) datafile.write('8) list of counts with improved energies\n \n') datafile.write('{} \n \n'.format(list_plus)) datafile.write('9) list of counts with impaired energies\n \n') datafile.write('{} \n \n'.format(list_minus)) datafile.write('10) percentage of umutated, improved, impaired \n \n') datafile.write('{}, {}, {}'.format( np.sum(sum_zero) / cellsum, np.sum(sum_plus) / cellsum, np.sum(sum_minus) / cellsum)) datafile.close()
def selection_vs_mutation(store_export='dictionary', d_export=True): """ Performs a single simulation of a given size using a specified protocol of vaccination boosters. At specified timepoints, a specified number of memory cells is sampled and the affinities of their ancestors as well as their current affinities are written to a list. Also written to list are the binding energies of the naive cells. These three lists are then passed on to be plotted as distribution histograms. Plots produced include a collection of three histograms (unselected, selected germline energies, actual energies after mutations) for each queried timepoint and a more complex scatter plot with marginal histograms for each queried timepoint. For each timepoint, the fraction of cells with unaltered/improved/impaired affinity is printed to screen. If store_export is set 'datafile', the simulation data is stored in a hdf5 file for future purposes, for 'dictionary' the data is passed internally and lost after the run. If d_export is set True, textfiles containing the sampled data (used for plotting) are exported for each plot individually. """ # parameters relevant to this analysis # evaluation timepoint in days analysis_times = [29] # prepare lists ancestor_dists = [] final_dists = [] # get runID from current system time runID = int(time.time()) # run simulation and get filepath or dict simdata = main(runID, store_export=store_export, evalperday=1) # import required information for small scale plots l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \ ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \ import_file(simdata) # extract the affinities and ancestor affinities at the analysis points tList = list(freePan["timepoint"].values) for i in range(len(analysis_times)): # limit cell number to be drawn in order not to clatter the plot tp = analysis_times[i] freePan_no_na = freePan.sel(timepoint=tList[tp]).dropna("dim_0") cellnum = min(2000, len(freePan_no_na)) cellnum = len(freePan_no_na) cell_id = np.random.choice(len(freePan_no_na), cellnum) cells = freePan_no_na[cell_id, :] afflist = list(cells.loc[dict(dim_1="affinity")].values) final_dists.append(afflist) aff0list = list(cells.loc[dict(dim_1="affinity0")].values) ancestor_dists.append(aff0list) # send energy lists to histogram plot for i in range(len(analysis_times)): energy_distributions_plot(E_list, ancestor_dists[i], final_dists[i], analysis_times[i]) energy_scatter_plot(ancestor_dists[i], final_dists[i], analysis_times[i]) if d_export: datafile = open('processed_data/energy_distribution_data', 'w') datafile.write('1) naive distribution \n \n') datafile.write('{} \n \n'.format(E_list)) datafile.write('2) analysis days \n \n') datafile.write('{} \n \n'.format(analysis_times)) datafile.write('3) ancestor distributions per time point \n \n') datafile.write('{} \n \n'.format(ancestor_dists)) datafile.write('4) memory distributions per time point \n \n') datafile.write('{} \n \n'.format(final_dists)) datafile.close() return (simdata)
def oneGC(repeats=100): """ fig 3C/D, showing clone number, cell number and mutation number per day in an average GC """ cenL = [] clnL = [] mmL = [] bmL = [] for r in range(repeats): # get runID from current system time runID = int(time.time()) # run simulation and get filepath or dict simdata = main(runID, store_export='datafile', evalperday=12) # import required information for small scale plots l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \ GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, \ E_list = import_file(simdata) tList, cen, cln, endtime, mm, bm = GC_phases(GCPans[0], mut_list) cenL.append(cen) clnL.append(cln) mmL.append(mm) bmL.append(bm) cen = np.nanmean(np.array(cenL), axis=0) cln = np.nanmean(np.array(clnL), axis=0) mm = np.nanmean(np.array(mmL), axis=0) bm = np.nanmean(np.array(bmL), axis=0) # bin mutation counts into days mmbin = [] bmbin = [] tend = int(tList[-1]/12) for i in [12*j for j in range(tend+1)]: if np.isinf(np.nansum(mm[i:i+12])): mmbin.append(0) bmbin.append(0) else: mmbin.append(np.nansum(mm[i:i+12])) bmbin.append(np.nansum(bm[i:i+12])) """ plot """ fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 10)) ax1.plot(np.array(tList)/12., cen, label='cells/GC', color='crimson') ax1.plot(np.array(tList)/12., cln, label='clones/GC', color='cornflowerblue') ax1.set_ylabel('count') ax1.legend(loc=0) seaborn.despine() ax2.plot(range(tend+1), mmbin, '-o', label='all', color='crimson') ax2.plot(range(tend+1), np.array(bmbin)*10, '-o', label='beneficial ($\cdot 10$)', color='cornflowerblue') ax2.legend(loc=0) ax2.set_ylabel('mutations/(clone$\cdot$day)') ax2.set_xlabel('time after infection (days)') seaborn.despine() pylab.savefig('figures/oneGC.pdf', bbox_inches='tight') # write the matrices to file datasave = open('processed_data/datafile_oneGC', 'w') datasave.write(str(tList)+'\n') datasave.write(str(cen)+'\n') datasave.write(str(cln)+'\n') datasave.write(str(mmbin)+'\n') datasave.write(str(bmbin)+'\n') datasave.close()