예제 #1
0
def small_scale(store_export='dictionary'):
    """ Performs a single simulation of the given system, creates
    the following plots:
            - population dynamics overview (free naive cells, free memory cells
            and GC populations over time)
            - for each GC, a clonal composition plot together with its memory
            output in a separate panel
            - for each GC, the evolution of its largest clone's affinities over
            mutation count (this plot contains aritificial noise to increase
            visibility!).

    If store_export is set 'datafile', the simulation data is stored in a
    hdf5 file for future purposes, for 'dictionary' the data is passed
    internally and discarded after the run.

    Recommended only for small simulation sizes with up to ~5 GCs and ~5k
    cells, as otherwise things get crowded and plots get large.
    """
    # get runID from current system time
    runID = int(time.time())
    # run simulation and get filepath or dict
    simdata = main(runID, store_export=store_export, evalperday=12)
    # import required information for small scale plots
    l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \
        ms_times, ms_vals, ms_fams, ms_muts, mut_list, \
        E_list = import_file(simdata)
    # plot population behaviour
    population_plot(l_times, l_fn, l_fm, l_GCs, runID)
    # plot GC contents and memory output for every GC
    for i in range(len(l_GCs)):
        GC_dynamics_plot(GCPans[i], ms_times[i], ms_fams[i], ms_vals[i],
                         ms_muts[i], runID, i)

    return (simdata)
예제 #2
0
def AM_effect_nkey(nkeys=[1, 5, 10, 15], repeats=100, d_export=True):
    """ Given a list of values for nkey and a number of individual GC reactions
    to be averaged over for each of them, computes and plots the improvement
    within single GCs for one infection. Thus, overwrites parameters giving
    the infection protocol and duration of the simulation as well as setting
    the nubmer of GCs to 1. Other parameters remain untouched. A textfile with
    the computed mean results is exported if d_export==True.
    """

    # set single infection and single GC for this analysis
    cf.endtime = 30*12
    cf.tinf = [0*12]
    cf.dose = [1]
    cf.nGCs = 1
    cf.naive_pool = 1000*1  # size of the naive precursor pool
    cf.memory_pool = 100*1  # size of the initial unspecific memory pool
    # function for calculating mean E_norm from GC panel

    def GC_affinity(GCPan):
        """ Given a GC panel, gets the mean E_norm for each timepoint."""
        energies = []
        tList = GCPan.keys()

        for tp in range(len(tList)):
            energy = GCPan[tList[tp]]['affinity'].dropna().mean()
            energies.append(energy)

        return tList, energies

    topElist = []
    for hs in nkeys:
        # set binding model parameters accordingly
        cf.nkey = hs
        cf.lAg = hs
        cf.lAb = 220 - hs
        eL = []  # list for collecting energies timecurses of all runs
        for r in range(repeats):
            simdata = main(store_export='dictionary', evalperday=12)
            l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \
                GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list\
                = import_file(simdata)
            tList, energies = GC_affinity(GCPans[0])
            eL.append(energies)

        # calculate mean and std of all runs and plot
        eM = np.nanmean(np.array(eL), axis=0)
        eStd = np.nanstd(np.array(eL), axis=0)
        topElist.append((hs, tList, eM, eStd))

    # write information to file
    if d_export:
        datafile = open('processed_data/AM_effect_data', 'w')
        datafile.write('number of simulation runs per n_key = {} \n'.format(repeats))
        datafile.write('n_key, time (days), mean(normalised energies), std(normalised energies) \n')
        for i in range(len(nkeys)):
            datafile.write('{0}, {1}, {2}, {3}\n \n'.format(topElist[i][0], np.array(topElist[i][1])/12., topElist[i][2], topElist[i][3]))
        datafile.close()
    # plot
    AM_effect_plot(topElist)
예제 #3
0
파일: gc_maps.py 프로젝트: obrzts/gc_memo
def map_params(dose=[1], LFdecay=[10*12], nGCs=[1], nLFs=[25],
               naive_pool=[1000], nkey=[1, 2, 10], p_err=[0.003],
               tinf=[[0*12]],
               p_block=[0.5], repeats=1):

    """ Function for mapping out the effects of different parameter
    (combinations) on the standard TUCHMI protocol. All arguments are lists,
    either containing only the default value or a set of values, in which case
    all combinations of list arguments will be executed the given number
    of times (repeat).

    The paramter set used is stored together with the complete timecourse of
    of E_bind, SHM, entropy, memory number (means and std where applicable)
    and exported into a .h5 file.

    For running on a cluster, accepts an ID argument (e.g. job ID) for easier
    handling of errors etc.
    """

    # open file stamped with systemtime if no other ID was provided in the call
    try:
        sys.argv[1]
    except IndexError:
        filepath = 'map_data/data{}.h5'.format(int(time.time()*100))
    else:
        filepath = 'map_data/data{}.h5'.format(sys.argv[1])
    print(filepath)
    datafile = pd.HDFStore(filepath)
    # dict for collecting result series
    seriesdict = {}

    # set endtime and days for evaluation
    cf.endtime = 126*12  # run until challenge timepoint
    # evaluate pool every day
    evaldays = np.arange(126)

    # get parameter combinations
    paramsets = list(product(dose, LFdecay, nGCs, nLFs, naive_pool, nkey,
                             p_err, tinf, p_block))
    # for every parameter set, run the simulation repeat times and write
    # results to the file
    for p in paramsets:
        # set parameters
        cf.dose = [p[0] for i in range(len(p[7]))]
        cf.LFdecay = p[1]
        cf.nGCs = p[2]
        cf.nLFs = p[3]
        cf.naive_pool = p[4]*cf.nGCs
        cf.memory_pool = 100*cf.nGCs  # fixed! (or change manually)
        cf.nkey = p[5]
        cf.lAg = p[5]
        cf.lAb = 220 - p[5]
        cf.p_err_FWR = p[6]
        cf.p_err_CDR = p[6]
        cf.tinf = p[7]
        cf.p_block_FWR = p[8]

        for r in range(repeats):
            # get lists to store individual simulation results
            l_mems = []
            l_KDs = []
            s_KDs = []  # std
            l_SHMs = []
            s_SHMs = []  # std
            l_Entrs = []
            # run simulation and get filepath or dict
            evaldays, l_mems, l_KDs, s_KDs, l_SHMs, s_SHMs, l_Entrs = \
                main(store_export='minimal', evalperday=1)
            # write these lists to file together with parameters used.
            # Identifier system time.
            ID = 'ID_{}'.format(time.time())
            series = pd.Series([cf.dose, cf.LFdecay, cf.nGCs, cf.nLFs,
                                cf.naive_pool, cf.nkey, cf.p_block_FWR,
                                cf.p_err_CDR, cf.tinf, cf.memory_pool,
                                evaldays, np.array(l_mems),
                                np.array(l_KDs), np.array(s_KDs),
                                np.array(l_SHMs), np.array(s_SHMs),
                                np.array(l_Entrs)],
                               index=['dose', 'LFdecay', 'nGCs', 'nLFs',
                                      'naive_pool', 'nkey', 'p_block',
                                      'p_err', 'tinf', 'mem_pool',
                                      'evaldays', 'memcount', 'E_bind',
                                      'E_bind_std', 'SHM', 'SHM_std',
                                      'entropy'])
            seriesdict[ID] = series
    # make dataframe and store it
    df = pd.DataFrame(seriesdict)
    df = df.transpose()
    datafile['data'] = df
    # close datafile
    datafile.close()
    print('END')
예제 #4
0
def TUCHMI_sampling(store_export='datafile', d_export=True, subsample=12):
    """ Performs a single simulation of a given size using the TUCHMI vaccination
    protocol, samples memory from the simulated pool and creates several plots
    summarising the information. User settings regarding the protocol are
    overwritten.

    Plots produced include:
        - mean SHMs and clonal expansion (fraction of cells sampled from
        clones that appeared more than once within the sample) in samples of
        size subsample
        - scatter plot of affinity over mutational status in polyclonal samples
        at TUCHMI time points I, II and III
        - scatter plot of affinity over mutational status at a clonal level,
        cells sampled from three TUCHMI time points merged into single plots
        (but sampling time point encoded in colouring)

    If store_export is set 'datafile', the simulation data is stored in a
    hdf5 file for future purposes, for 'dictionary' the data is passed
    internally and lost after the run.

    If d_export is set True, textfiles containing the sampled data (used for
    plotting) are exported for each plot individually.

    Subsample gives the number of cells to be sampled at each timepoint in
    oder to calculate entropy and unique fraction.

    Can be used for all simulation sizes, but is especially useful for larger
    simulations (e.g. >=50 GCs, 50k cells).
    """
    # give protocol
    cf.endtime = 126 * 12
    cf.tinf = [0 * 12, 28 * 12, 56 * 12]
    cf.dose = [1, 1, 1]

    # get runID from current system time
    runID = int(time.time())

    # run simulation and get filepath or dict
    simdata = main(runID, store_export=store_export, evalperday=1)

    # import required information
    l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \
        ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \
        import_file(simdata)

    # for affinity-mutation scatter plot, downsample for visibility
    pick_tp = 35 * 12
    samplefrac = 100. / len(freePan.sel(timepoint=pick_tp).dropna("dim_0"))

    # get list of lists to catch values at every timepoint
    tList = list(freePan["timepoint"].values)
    TT = len(tList)
    SHM_means = [[] for t in range(TT)]
    Entropies = [[] for t in range(TT)]
    clusterfracs = [[] for t in range(TT)]
    """ Cell pool affinity over time """
    # get mean affinity at all time points
    Elist = []
    for tp in range(len(tList)):
        C = freePan.sel(timepoint=tList[tp]).loc[dict(
            dim_1="affinity")].dropna("dim_0").values.mean()
        Elist.append(C)
    # pass energies to plot function
    pool_affinity_plot(tList, Elist)
    """ Mean SHM and clonal expansion within sample of size subsample """
    # sample 100 times to calculate standard deviations
    for nn in range(100):
        for tp in range(TT):
            ttp = 12 * tp
            # cellnumber to be sampled is either subsample or, if less cells
            # are available (more of a hypothetic case really), all cells
            freePan_no_na = freePan.sel(timepoint=ttp).dropna("dim_0")
            cellnum = min(subsample, len(freePan_no_na))
            if cellnum > 0:
                cell_id = np.random.choice(len(freePan_no_na), cellnum)
                cells = freePan_no_na[cell_id, :]
                c_muts = list(cells.loc[dict(dim_1="mutations")].values)
                SHM_means[tp].append(np.nanmean(c_muts))
                # evaluate entropies and clusterfractions
                CC = Counter(list(cells.loc[dict(dim_1="family")].values))
                Entropies[tp].append(
                    scipy.stats.entropy(list(CC.values()), base=2) /
                    math.log(cellnum, 2))
                # count again to find how many clones have one member only,
                # calculate clusterfrac from this
                sizedist = list(CC.values())
                C2 = Counter(sizedist)
                uniquefrac = float(C2[1]) / cellnum
                clusterfracs[tp].append(1 - uniquefrac)
            else:
                SHM_means[tp].append(np.nan)
                Entropies[tp].append(np.nan)
                uniquefrac[tp].append(np.nan)
                clusterfracs[tp].append(np.nan)

    # pass information to plotting function
    MSHM = np.nanmean(SHM_means, axis=1)
    SSHM = np.nanstd(SHM_means, axis=1)
    MEntropies = np.nanmean(Entropies, axis=1)
    SEntropies = np.nanstd(Entropies, axis=1)
    Mclusterfracs = np.nanmean(clusterfracs, axis=1)
    Sclusterfracs = np.nanstd(clusterfracs, axis=1)

    sample_statistics_plot(subsample, tList, MSHM, SSHM, MEntropies,
                           SEntropies, Mclusterfracs, Sclusterfracs)
    """ Plot of affinity/mutations on tps I, II and III """
    # sample cells 7 days post each infection, record SHM, KD and origin
    # (memory versus naive first activated ancestor)
    timecourse = [7 * 12, 35 * 12, 63 * 12]
    SHM_list = [[] for t in timecourse]
    KD_list = [[] for t in timecourse]
    orglist = [[] for t in timecourse]

    for d in range(len(timecourse)):
        tp = timecourse[d]
        freePan_no_na = freePan.sel(timepoint=tp).dropna("dim_0")
        cellnum = int(np.round(len(freePan_no_na) * samplefrac))
        if cellnum > 0:
            cell_id = np.random.choice(len(freePan_no_na), cellnum)
            cells = freePan_no_na[cell_id, :]
            kdl = list(cells.loc[dict(dim_1="affinity")].values)
            # transform norm E to KD
            kdll = np.exp(cf.y0 + np.array(kdl) * cf.m)
            KD_list[d] = list(kdll)
            # get mutation counts, correct them and origin
            SHM_list[d] = list(cells.loc[dict(dim_1="mutations")].values)
            orglist[d] = list(cells.loc[dict(dim_1="origin")].values)

    # pass information to plot function
    sample_scatter_plot(KD_list, SHM_list, orglist)
    """ Affinity/mutation plots for individual clusters """
    # samples from the memory pool at the given timepoints, split information
    # into clusters and plot SHM/KD scatter plots for some of these clusters.

    # lists to collect SHM, KD values, families and timepoints for all panels
    SHM_list = []
    KD_list = []
    fam_list = []
    tp_list = []

    for d in range(len(timecourse)):
        tp = timecourse[d]
        freePan_no_na = freePan.sel(timepoint=tp).dropna("dim_0")
        cellnum = int(len(freePan_no_na) * samplefrac)
        if cellnum > 0:
            cell_id = np.random.choice(len(freePan_no_na), cellnum)
            cells = freePan_no_na[cell_id, :]
            kdl = list(cells.loc[dict(dim_1="affinity")].values)
            # transform norm E to KD
            kdll = np.exp(cf.y0 + np.array(kdl) * cf.m)
            KD_list += list(kdll)
            SHM_list += list(list(cells.loc[dict(dim_1="mutations")].values))
            fam_list += list(cells.loc[dict(dim_1="family")].values)
            tp_list += [tp for k in range(cellnum)]

    # count into families and find clusters with more than xx members
    famcounter = Counter(fam_list)
    fams = list(famcounter.keys())
    clusters = []
    for fam in fams:
        if famcounter[fam] > 1:
            clusters.append(fam)

    # make separate lists for SHM, KD and TP (defining color) within clusters
    # and add information to list
    iSHMs = [[] for i in clusters]
    iKDs = [[] for i in clusters]
    iTPs = [[] for i in clusters]

    for ff in range(len(fam_list)):
        if fam_list[ff] in clusters:
            ii = clusters.index(fam_list[ff])
            iSHMs[ii].append(SHM_list[ff])
            iKDs[ii].append(KD_list[ff])
            # give different colors for different timepoints
            if tp_list[ff] == timecourse[0]:
                iTPs[ii].append('lightcoral')
            elif tp_list[ff] == timecourse[1]:
                iTPs[ii].append('indianred')
            else:
                iTPs[ii].append('firebrick')
    # pass information to plot function
    clonal_scatter_plot(iSHMs, iKDs, iTPs)

    # write information to file
    if d_export:
        datafile = open('processed_data/TUCHMI_sampling_data', 'w')

        datafile.write('1) SAMPLE STATISTICS \n \n')
        datafile.write('sampled fraction = {} \n \n'.format(samplefrac))
        datafile.write('timecourse (days) \n {} \n \n'.format(
            np.array(tList) / 12.))
        datafile.write(
            'SHMs of cells in sample, mean and std \n {} \n {} \n \n'.format(
                MSHM, SSHM))
        datafile.write(
            'normalised Shannon entropy of cells in sample, mean and std \n {} \n {} \n \n'
            .format(MEntropies, SEntropies))
        datafile.write(
            'fraction of non-unique cells in sample, mean and std \n {} \n {} \n \n'
            .format(Mclusterfracs, Sclusterfracs))

        datafile.close()

    return (simdata)
예제 #5
0
def stacked_mutations(store_export='dictionary', d_export=True, repeats=10):
    """ Performs a number of simulation runs, computes histograms for
    improved, impaired and unchanged binders at a single given timepoint
    and saves the individual as well as the summed values to file. For several
    repeats, data is accumulated in the histograms as well.

    If store_export is set 'datafile', the simulation data is stored in a
    hdf5 file for future purposes, for 'dictionary' the data is passed
    internally and lost after the run.

    If d_export is set True, textfiles containing the sampled data (used for
    plotting) are exported for each plot individually."""
    # parameters relevant to this analysis
    # evaluation timepoint in days
    analysis_time = 29
    bins = np.linspace(0.6, 1, 17)
    # collect results
    sum_zero = np.zeros(len(bins) - 1)
    sum_plus = np.zeros(len(bins) - 1)
    sum_minus = np.zeros(len(bins) - 1)
    list_zero = []
    list_plus = []
    list_minus = []

    for i in range(repeats):
        # get runID from current system time
        runID = int(time.time())
        # run simulation and get filepath or dict
        simdata = main(runID, store_export=store_export, evalperday=1)
        # import required information for small scale plots
        l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \
            GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \
            import_file(simdata)
        # extract the affinities and ancestor affinities at the analysis points
        tList = list(freePan["timepoint"].values)
        # limit cell number to be drawn in order not to clatter the plot
        # possibility of subsampling here
        tp = analysis_time

        freePan_no_na = freePan.sel(timepoint=tList[tp]).dropna("dim_0")
        cellnum = len(freePan_no_na)
        cell_id = np.random.choice(len(freePan_no_na), cellnum)
        cells = freePan_no_na[cell_id, :]

        final_dist = list(
            cells.loc[dict(dim_1="affinity")].dropna("dim_0").values)
        ancestor_dist = list(
            cells.loc[dict(dim_1="affinity0")].dropna("dim_0").values)

        # extract counts of unchanged, improved and impaired cells
        unchanged_list = np.array(final_dist)[np.where(
            np.array(ancestor_dist) == np.array(final_dist))[0]]
        improved_list = np.array(final_dist)[np.where(
            np.array(ancestor_dist) < np.array(final_dist))[0]]
        impaired_list = np.array(final_dist)[np.where(
            np.array(ancestor_dist) > np.array(final_dist))[0]]

        # make histograms, store information both in list and in sum.
        U_counts, _ = np.histogram(unchanged_list, bins=bins)
        plus_counts, _ = np.histogram(improved_list, bins=bins)
        minus_counts, _ = np.histogram(impaired_list, bins=bins)

        # collect results
        sum_zero += U_counts
        sum_plus += plus_counts
        sum_minus += minus_counts
        list_zero.append(U_counts)
        list_plus.append(plus_counts)
        list_minus.append(minus_counts)
    cellsum = np.sum(sum_zero) + np.sum(sum_plus) + np.sum(sum_minus)

    # plot
    stacked_energy_plot(bins, sum_plus, sum_minus, sum_zero, analysis_time)

    if d_export:
        datafile = open('processed_data/stacked_histogram_data', 'w')

        datafile.write('1) day \n \n')
        datafile.write('{} \n \n'.format(analysis_time))

        datafile.write('2) bins \n \n')
        datafile.write('{} \n \n'.format(bins))

        datafile.write('3) runs \n \n')
        datafile.write('{} \n \n'.format(repeats))

        datafile.write('4) sum of counts with unchanged energies\n \n')
        datafile.write('{} \n \n'.format(sum_zero))

        datafile.write('5) sum of counts with improved energies\n \n')
        datafile.write('{} \n \n'.format(sum_plus))

        datafile.write('6) sum of counts with impaired energies\n \n')
        datafile.write('{} \n \n'.format(sum_minus))

        datafile.write('7) list of counts with unchanged energies\n \n')
        datafile.write('{} \n \n'.format(list_zero))

        datafile.write('8) list of counts with improved energies\n \n')
        datafile.write('{} \n \n'.format(list_plus))

        datafile.write('9) list of counts with impaired energies\n \n')
        datafile.write('{} \n \n'.format(list_minus))

        datafile.write('10) percentage of umutated, improved, impaired \n \n')
        datafile.write('{}, {}, {}'.format(
            np.sum(sum_zero) / cellsum,
            np.sum(sum_plus) / cellsum,
            np.sum(sum_minus) / cellsum))
        datafile.close()
예제 #6
0
def selection_vs_mutation(store_export='dictionary', d_export=True):
    """ Performs a single simulation of a given size using a specified protocol
    of vaccination boosters. At specified timepoints, a specified number of
    memory cells is sampled and the affinities of their ancestors as well as
    their current affinities are written to a list. Also written to list
    are the binding energies of the naive cells. These three lists are then
    passed on to be plotted as distribution histograms.

    Plots produced include a collection of three histograms (unselected,
    selected germline energies, actual energies after mutations) for each
    queried timepoint and a more complex scatter plot with marginal histograms
    for each queried timepoint.

    For each timepoint, the fraction of cells with unaltered/improved/impaired
    affinity is printed to screen.

    If store_export is set 'datafile', the simulation data is stored in a
    hdf5 file for future purposes, for 'dictionary' the data is passed
    internally and lost after the run.

    If d_export is set True, textfiles containing the sampled data (used for
    plotting) are exported for each plot individually.
    """
    # parameters relevant to this analysis
    # evaluation timepoint in days
    analysis_times = [29]
    # prepare lists
    ancestor_dists = []
    final_dists = []
    # get runID from current system time
    runID = int(time.time())
    # run simulation and get filepath or dict
    simdata = main(runID, store_export=store_export, evalperday=1)
    # import required information for small scale plots
    l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, GCPans, \
        ms_times, ms_vals, ms_fams, ms_muts, mut_list, E_list = \
        import_file(simdata)
    # extract the affinities and ancestor affinities at the analysis points
    tList = list(freePan["timepoint"].values)
    for i in range(len(analysis_times)):
        # limit cell number to be drawn in order not to clatter the plot
        tp = analysis_times[i]
        freePan_no_na = freePan.sel(timepoint=tList[tp]).dropna("dim_0")
        cellnum = min(2000, len(freePan_no_na))
        cellnum = len(freePan_no_na)
        cell_id = np.random.choice(len(freePan_no_na), cellnum)
        cells = freePan_no_na[cell_id, :]
        afflist = list(cells.loc[dict(dim_1="affinity")].values)
        final_dists.append(afflist)
        aff0list = list(cells.loc[dict(dim_1="affinity0")].values)
        ancestor_dists.append(aff0list)
    # send energy lists to histogram plot
    for i in range(len(analysis_times)):
        energy_distributions_plot(E_list, ancestor_dists[i], final_dists[i],
                                  analysis_times[i])
        energy_scatter_plot(ancestor_dists[i], final_dists[i],
                            analysis_times[i])

    if d_export:
        datafile = open('processed_data/energy_distribution_data', 'w')

        datafile.write('1) naive distribution \n \n')
        datafile.write('{} \n \n'.format(E_list))

        datafile.write('2) analysis days \n \n')
        datafile.write('{} \n \n'.format(analysis_times))

        datafile.write('3) ancestor distributions per time point \n \n')
        datafile.write('{} \n \n'.format(ancestor_dists))

        datafile.write('4) memory distributions per time point \n \n')
        datafile.write('{} \n \n'.format(final_dists))

        datafile.close()

    return (simdata)
예제 #7
0
def oneGC(repeats=100):
    """ fig 3C/D, showing clone number, cell number and mutation number per day
    in an average GC """

    cenL = []
    clnL = []
    mmL = []
    bmL = []
    for r in range(repeats):
        # get runID from current system time
        runID = int(time.time())
        # run simulation and get filepath or dict
        simdata = main(runID, store_export='datafile', evalperday=12)
        # import required information for small scale plots
        l_times, l_fn, l_fm, l_GCs, LFcurve, Agcurve, evaltimes, freePan, \
            GCPans, ms_times, ms_vals, ms_fams, ms_muts, mut_list, \
            E_list = import_file(simdata)
        tList, cen, cln, endtime, mm, bm = GC_phases(GCPans[0], mut_list)

        cenL.append(cen)
        clnL.append(cln)
        mmL.append(mm)
        bmL.append(bm)

    cen = np.nanmean(np.array(cenL), axis=0)
    cln = np.nanmean(np.array(clnL), axis=0)
    mm = np.nanmean(np.array(mmL), axis=0)
    bm = np.nanmean(np.array(bmL), axis=0)

    # bin mutation counts into days
    mmbin = []
    bmbin = []
    tend = int(tList[-1]/12)
    for i in [12*j for j in range(tend+1)]:
        if np.isinf(np.nansum(mm[i:i+12])):
            mmbin.append(0)
            bmbin.append(0)
        else:
            mmbin.append(np.nansum(mm[i:i+12]))
            bmbin.append(np.nansum(bm[i:i+12]))

    """ plot """
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(10, 10))
    ax1.plot(np.array(tList)/12., cen, label='cells/GC', color='crimson')
    ax1.plot(np.array(tList)/12., cln, label='clones/GC',
             color='cornflowerblue')
    ax1.set_ylabel('count')
    ax1.legend(loc=0)
    seaborn.despine()

    ax2.plot(range(tend+1), mmbin, '-o', label='all', color='crimson')
    ax2.plot(range(tend+1), np.array(bmbin)*10, '-o',
             label='beneficial ($\cdot 10$)', color='cornflowerblue')
    ax2.legend(loc=0)
    ax2.set_ylabel('mutations/(clone$\cdot$day)')
    ax2.set_xlabel('time after infection (days)')

    seaborn.despine()
    pylab.savefig('figures/oneGC.pdf', bbox_inches='tight')

    # write the matrices to file
    datasave = open('processed_data/datafile_oneGC', 'w')
    datasave.write(str(tList)+'\n')
    datasave.write(str(cen)+'\n')
    datasave.write(str(cln)+'\n')
    datasave.write(str(mmbin)+'\n')
    datasave.write(str(bmbin)+'\n')
    datasave.close()