Ejemplo n.º 1
0
def fitnesscost_distribution(regions, minor_af, synnonsyn, synnonsyn_uc, mut_rates, fname=None, ref=None):
    '''
    produce figure of distribution of selection coefficients separately for
    synonymous, nonsynonymous sites, and synonymous sites in reading frame overlaps.
    FIGURE 4 of the manuscript
    '''
    from util import add_panel_label

    if ref is not None:
        if not hasattr(ref, 'fitness_cost'):
            ref.fitness_cost = np.zeros_like(ref.entropy)
    fig, axs = plt.subplots(1, 3, sharey=True, figsize=(10,6))

    if type(regions)==str: # if only one region is passed as string
        regions = [regions]
    for ni,ax,label_str in ((0, axs[0], 'synonymous'),
                            (1, axs[1], 'syn-overlaps'),
                            (2, axs[2], 'nonsyn')):
        slist = []
        for region in regions:
            if label_str=='synonymous':
                ind = synnonsyn[region]
            elif label_str=='syn-overlaps':
                ind = synnonsyn_uc[region]&(~synnonsyn[region])
            else:
                ind = ~synnonsyn_uc[region]
            ind = ind&(~np.isnan(minor_af[region]))
            slist.extend(mut_rates[region][ind]/(minor_af[region][ind]+af_cutoff))
        s = np.array(slist)
        s[s>=0.1] = 0.1
        s[s<=0.001] = 0.001
        if ref is not None:
            bg = ref.annotation[region].location.start
            ed = ref.annotation[region].location.end
            ref.fitness_cost[bg:ed][ind] = s
        if len(s):
            ax.hist(s, color=cols[ni],
                 weights=np.ones(len(s), dtype=float)/len(s), bins=np.logspace(-3,-1,11), label=label_str+', n='+str(len(s)))
        ax.set_xscale('log')
        ax.tick_params(labelsize=fs*0.8)
        ax.text(0.1, 0.8, 'position: '+str(ni))
        if ni==0:
            ax.set_ylabel('fraction of sites', fontsize=fs)
            ax.set_yscale('linear')
        ax.set_xlabel('fitness cost', fontsize=fs)
        ax.set_xticks([0.001, 0.01, 0.1])
        ax.set_xticklabels([r'$<10^{-3}$', r'$10^{-2}$', r'$>10^{-1}$'])
        ax.legend(loc=2, fontsize=fs*0.8)

        add_panel_label(ax, ['A', 'B', 'C'][ni],
                        x_offset=-0.2 - 0.1 * (ni == 0))

    plt.tight_layout()
    if fname is not None:
        for ext in ['png', 'svg', 'pdf']:
            plt.savefig(fname+'.'+ext)
Ejemplo n.º 2
0
def plot_figure_1(data, mu, dmulog10, muA, dmuAlog10,suffix=''):
    '''Plot figure 1 of the paper'''
    print('Plot Figure 1')
    fig = plt.figure(figsize=(12, 11))
    ax1 = plt.subplot2grid((2,2), (0,0))
    ax2 = plt.subplot2grid((2,2), (0,1))
    ax3 = plt.subplot2grid((2,2), (1, 0), colspan=2)
    # plot linear regression
    plot_mutation_increase(data, mu=mu, axs=[ax1, ax2])

    mu_all = pd.DataFrame({'mu': mu,
                              'muA': muA,
                              'dmulog10': dmulog10,
                              'dmuAlog10': dmuAlog10,
                            })
    # plot matrix of arrows
    plot_mutation_rate_graph(mu_all,
                             ax=ax3)
    plt.tight_layout()

    # Add labels
    from util import add_panel_label
    add_panel_label(ax1, 'A', x_offset=-0.2)
    add_panel_label(ax2, 'B', x_offset=-0.2)
    add_panel_label(ax3, 'C', x_offset=-0.08)

    plt.ion()
    plt.show()


    for ext in ['svg', 'png', 'pdf']:
        fig.savefig('../figures/figure_1'+suffix+'.'+ext)
Ejemplo n.º 3
0
def plot_figure_1(data, mu, dmulog10, muA, dmuAlog10, suffix=''):
    '''Plot figure 1 of the paper'''
    print('Plot Figure 1')
    fig = plt.figure(figsize=(12, 11))
    ax1 = plt.subplot2grid((2, 2), (0, 0))
    ax2 = plt.subplot2grid((2, 2), (0, 1))
    ax3 = plt.subplot2grid((2, 2), (1, 0), colspan=2)
    # plot linear regression
    plot_mutation_increase(data, mu=mu, axs=[ax1, ax2])

    mu_all = pd.DataFrame({
        'mu': mu,
        'muA': muA,
        'dmulog10': dmulog10,
        'dmuAlog10': dmuAlog10,
    })
    # plot matrix of arrows
    plot_mutation_rate_graph(mu_all, ax=ax3)
    plt.tight_layout()

    # Add labels
    from util import add_panel_label
    add_panel_label(ax1, 'A', x_offset=-0.2)
    add_panel_label(ax2, 'B', x_offset=-0.2)
    add_panel_label(ax3, 'C', x_offset=-0.08)

    plt.ion()
    plt.show()

    for ext in ['svg', 'png', 'pdf']:
        fig.savefig('../figures/figure_1' + suffix + '.' + ext)
Ejemplo n.º 4
0
def plot_to_away(data, fig_filename=None, figtypes=['.png', '.svg', '.pdf'],
                 sequence_type='nuc'):
    '''Makes a two panel figure summarizing the results on reversion

    Args:
        data (dict): data to be plotted (see below)
    '''

    import seaborn as sns
    from matplotlib import pyplot as plt

    plt.ion()
    sns.set_style('darkgrid')
    figpath = 'figures/'
    fs=fig_fontsize
    fig_size = (1.0*fig_width, 0.6*fig_width)
    fig, axs = plt.subplots(1, 2, figsize=fig_size)
    nbs=100 # number of bootstrap replicates

    # set the colors for the plots, both panels use the same color scheme
    cols = HIVEVO_colormap()
    colors = [cols(x) for x in [0.0, 0.33, 0.66, 0.99]]

    ####################################################################################
    # make panel divergence vs entropy
    ####################################################################################
    ax=axs[1]
    if sequence_type == 'nuc':
        Sbins = np.array([0, 0.02, 0.08, 0.25, 2])
    else:
        Sbins = np.array([0, 0.1, 0.3, 3])

    Sbinc = 0.5*(Sbins[1:]+Sbins[:-1])
    def get_Sbin_mean(df): # regroup and calculate mean in entropy bins
        return df.groupby(by=['S_bin'], as_index=False).mean()
    color_count = 0
    for lblstr, subtype, ls in [('subtype', 'patient', '--'), ('group M', 'any', '-')]:
        mv = data[subtype]['minor_variants']
        # subset to a specific time interval
        mv = mv.loc[(mv.loc[:,'time'] > 1500)&(mv.loc[:,'time'] < 2500),:]
        print "average time:", mv.loc[:,'time'].mean() / 365.25
        mv.loc[:,['af_away_minor', 'af_away_derived', 'af_to_minor', 'af_to_derived']] = \
            mv.loc[:,['af_away_minor', 'af_away_derived', 'af_to_minor', 'af_to_derived']].astype(float)
        mean_to_away =get_Sbin_mean(mv)
        bs = boot_strap_patients(mv,
                                 eval_func=get_Sbin_mean,
                                 n_bootstrap=nbs, 
                                 columns=['af_away_minor',
                                          'af_away_derived',
                                          'af_to_minor',
                                          'af_to_derived',
                                          'S_bin'])

        print mean_to_away
        col = 'af_away_derived'
        ax.errorbar(Sbinc,
                    mean_to_away.loc[:,col], 
                    replicate_func(bs, col, np.std, bin_index='S_bin'),
                    ls=ls, 
                    lw=3,
                    label='founder = '+lblstr,
                    c=colors[color_count])
        color_count+=1
        col = 'af_to_derived'
        ax.errorbar(Sbinc, mean_to_away.loc[:,col], 
                    replicate_func(bs, col, np.std, bin_index='S_bin'), ls=ls,
                    lw = 3, label = u'founder \u2260 '+lblstr, c=colors[color_count])
        color_count+=1
    ax.set_yscale('log')
    ax.set_xscale('log')
    ax.set_ylabel('Divergence from founder', fontsize = fig_fontsize)
    ax.set_xlabel('Variability [bits]', fontsize = fig_fontsize)
    add_panel_label(ax, 'B', x_offset=-0.32)
    for item in ax.get_yticklabels()+ax.get_xticklabels():
        item.set_fontsize(fs-2)
    ax.set_xlim([0.005, 2])

    ####################################################################################
    # print reversion statistics
    ####################################################################################
    def get_time_bin_means(df): # get mean of divergence, reversion divergence and time for each time bin
        return df.loc[:,['divergence', 'reversion','time_bin']].groupby(by=['time_bin'], as_index=False).mean()
    for subtype in ['patient', 'any']:
        to_away = data[subtype]['to_away']
        time_bins = np.array([0, 500, 1000, 1500, 2500, 3500])
        binc = 0.5*(time_bins[1:]+time_bins[:-1])
        add_binned_column(to_away, time_bins, 'time')
        to_away.loc[:,['reversion', 'divergence']] = \
                to_away.loc[:,['reversion', 'divergence']].astype(float)
        rev_div = get_time_bin_means(to_away)
        bs = boot_strap_patients(to_away, get_time_bin_means,  n_bootstrap = nbs, 
                                 columns = ['reversion','divergence','time_bin'])
        reversion_std = replicate_func(bs, 'reversion', np.std, bin_index='time_bin')
        total_div_std = replicate_func(bs, 'divergence', np.std, bin_index='time_bin')
        fraction = rev_div.loc[:,'reversion']/rev_div.loc[:,'divergence']
        print "Comparison:", subtype
        print "Reversions:\n", rev_div.loc[:,'reversion']
        print "Divergence:\n", rev_div.loc[:,'divergence']
        # print the fraction of divergence that is due to reversion at different times
        # gives errors as standard deviations over patient bootstraps
        print "Fraction:"
        for frac, total, num_std, denom_std in zip(fraction, rev_div.loc[:,'divergence'],reversion_std, total_div_std):
            print frac, '+/-', np.sqrt(num_std**2/total**2 + denom_std**2*frac**2/total**2)

        print "Consensus!=Founder:",np.mean(data[subtype]['consensus_distance'].values())

    ####################################################################################
    # make panel divergence vs time
    ####################################################################################
    to_histogram=data['to_histogram']
    away_histogram=data['away_histogram']
    time_bins=data['time_bins']
    af_bins=data['af_bins']
    af_binc=0.5*(af_bins[1:]+af_bins[:-1])

    def bin_time(freq_arrays, time_bins):  
        '''sum up allele frequency histgrams corresponding to the same time bin'''
        binned_hists = [np.zeros_like(af_binc) for ti in time_bins[1:]]
        for hists in freq_arrays.values():
            for t, y in hists.iteritems():
                ti = np.searchsorted(time_bins, t)
                if ti>0 and ti<len(time_bins):
                    binned_hists[ti-1]+=y

        return binned_hists

    def get_div(afhist, fixed=False):
        '''return the fraction of fixed alleles or the mean divergence'''
        if fixed:
            return afhist[0]/afhist.sum()
        else:
            return np.array(afhist[:-1]*(1-af_binc[:-1])).sum()/afhist.sum()

    from random import choice
    ax = axs[0]
    time_binc = 0.5*(time_bins[1:]+time_bins[:-1])
    sym='o'
    fs = fig_fontsize
    color_count=0
    for subtype, ls in [('patient', '--'), ('any','-')]:
        for toaway, H in [(u'founder = '+('group M' if subtype=='any' else 'subtype'),  away_histogram[subtype]), 
                          (u'founder \u2260 '+('group M' if subtype=='any' else 'subtype'), to_histogram[subtype])]:
            mean_hists = bin_time(H,time_bins)
            div = [get_div(mean_hists[ti]) for ti in range(len(time_bins)-1)]
            # make replicates and calculate bootstrap confidence intervals
            replicates = []
            all_keys = H.keys()
            for ri in xrange(nbs):
                bootstrap_keys = [all_keys[ii] for ii in np.random.randint(len(all_keys), size=len(all_keys))]
                tmp = bin_time({key:H[key] for key in bootstrap_keys}, time_bins)
                replicates.append([get_div(tmp[ti]) for ti in range(len(time_bins)-1)])
            std_dev = np.array(replicates).std(axis=0)
            ax.errorbar(time_binc/365.25, div, std_dev, ls = ls, lw=3, c=colors[color_count])
            ax.plot(time_binc/365.25, div, label = toaway, ls = ls, lw=3, c=colors[color_count]) # plot again with label to avoid error bars in legend
            color_count+=1

    if sequence_type == 'nuc':
        ax.set_ylim([0,0.16])
        ax.set_yticks([0, 0.04, 0.08, 0.12])
    else:
        ax.set_ylim([0,0.32])
        ax.set_yticks([0, 0.08, 0.16, 0.24])

    ax.set_xlabel('ETI [years]', fontsize=fs)
    ax.set_ylabel('Divergence from founder', fontsize=fs)
    ax.legend(loc=2, fontsize=fs-2, labelspacing=0)
    add_panel_label(ax, 'A', x_offset=-0.32)
    ax.tick_params(axis='both', labelsize=fs-2)
    plt.tight_layout(pad=0.3, h_pad=0.5) #rect=(0.0, 0.02, 0.98, 0.98), pad=0.05, h_pad=0.5, w_pad=0.4)
    for ext in figtypes:
        fig.savefig(fig_filename+ext)
Ejemplo n.º 5
0
def plot_divdiv(data, fig_filename=None, figtypes=['.png', '.svg', '.pdf']):
    '''
    plot divergence and diversity of synonymous and nonsynonymous mutations
    includes:
        - a panel that compares syn diversity/nonsyn divergence
    '''
    n_bootstrap = 50
    ####### plotting ###########
    import seaborn as sns
    from matplotlib import pyplot as plt
    plt.ion()
    sns.set_style('darkgrid')
    figpath = 'figures/'
    fs = fig_fontsize
    fig_size = (fig_width, 1.0 * fig_width)
    cols = HIVEVO_colormap()

    fig, axs = plt.subplots(2, 2, figsize=fig_size)
    divdiv = data['divdiv']
    # in rough the order in which they most dominantly appear in the plot
    regions = ['envelope', 'accessory', 'structural', 'enzymes']
    time_bins = np.array([0, 200, 500, 1000, 1500, 2000, 4000])
    time_binc = 0.5 * (time_bins[:-1] + time_bins[1:])
    add_binned_column(divdiv, time_bins, 'time')

    # map the regions to rough genomic order to match the genome color map in panel C
    colors = {
        reg: c
        for reg, c in zip(regions, [cols(x) for x in [0.66, 0.99, 0.01, 0.33]])
    }

    def get_time_bin_mean(df):
        return df.loc[:, ['time_bin', 'diversity', 'divergence']].groupby(
            by=['time_bin'], as_index=False).mean()

    def label_func(
        mutclass, region, divordiv
    ):  # assign labels to Panels A and B separately to make a combinatorial legend (regions vs syn/nonsyn)
        if divordiv == 'divergence' and mutclass == 'nonsyn':
            return region
        elif divordiv == 'diversity' and region == 'accessory':
            return mutclass
        else:
            return None

    ########## panel A and B #####################
    csv_out = open(fig_filename + '_AB.tsv', 'w')
    for ax, dtype in izip(axs[0, :], ['divergence', 'diversity']):
        add_panel_label(ax,
                        'A' if dtype == 'divergence' else 'B',
                        x_offset=-0.3)
        for mutclass in ['nonsyn', 'syn']:
            for region in regions:
                ind = (divdiv.loc[:, 'region']
                       == region) & (divdiv.loc[:, 'mutclass'] == mutclass)
                tmp = divdiv.loc[
                    ind, ['time_bin', 'diversity', 'divergence', 'pcode']]
                avg_divdiv = get_time_bin_mean(tmp)
                bs = boot_strap_patients(tmp,
                                         eval_func=get_time_bin_mean,
                                         n_bootstrap=n_bootstrap)
                # plot the same line with and without error bars, labels for legend without
                ax.plot(time_binc / 365.25,
                        avg_divdiv.loc[:, dtype],
                        ls='-' if mutclass == 'nonsyn' else '--',
                        c=colors[region],
                        lw=3,
                        label=label_func(mutclass, region, dtype))
                ax.errorbar(time_binc / 365.25,
                            avg_divdiv.loc[:, dtype],
                            replicate_func(bs,
                                           dtype,
                                           np.std,
                                           bin_index='time_bin'),
                            ls='-' if mutclass == 'nonsyn' else '--',
                            c=colors[region],
                            lw=3)
                csv_out.write('\t'.join(
                    map(str, [dtype, mutclass, region] +
                        list(avg_divdiv.loc[:, dtype]))) + '\n')

        ax.legend(loc=2, fontsize=fs - 1, numpoints=2, labelspacing=0)
        ax.set_xticks([0, 2, 4, 6, 8])
        if dtype == 'divergence':
            ax.set_yticks([0, .02, .04])
            ax.set_ylim([0, .048])
        else:
            ax.set_yticks([0, .01, .02])
            ax.set_ylim([0, .028])
        ax.set_xlim([0, 8.5])
        ax.set_ylabel(dtype)
        ax.tick_params(labelsize=fs - 2)
        ax.set_xlabel('Years since EDI', fontsize=fs)
    csv_out.close()

    ########## panel C: anti correlation of syn diversity and nonsyn divergence #############
    csv_out = open(fig_filename + '_C.tsv', 'w')
    (avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs) = data['divdiv_corr']
    ax = axs[1, 0]
    add_panel_label(ax, 'C', x_offset=-0.3)
    x_data, y_data = avg_nonsyn_divg[::500], avg_syn_divs[::500]
    ax.scatter(x_data,
               y_data,
               c=[cols(p) for p in np.linspace(0, 1, len(x_data))],
               s=50)
    csv_out.write('\t'.join(map(str, ["nonsyn_divergence"] + list(x_data))) +
                  '\n')
    csv_out.write('\t'.join(map(str, ["syn_diversity"] + list(y_data))) + '\n')
    csv_out.close()

    ax.set_xlabel('nonsyn divergence', fontsize=fig_fontsize)
    ax.set_ylabel('syn diversity', fontsize=fig_fontsize)
    ax.set_ylim([0, 0.028])
    ax.set_xlim([0, 0.012])
    ax.set_xticks([0, 0.005, 0.01])
    ax.set_yticks([0, 0.01, 0.02])
    ax.tick_params(labelsize=fig_fontsize - 2)

    ########## sfs in panel D ##############
    csv_out = open(fig_filename + '_D.tsv', 'w')
    sfs = data['sfs']
    ax = axs[1, 1]
    add_panel_label(ax, 'D', x_offset=-0.3)
    colors = sns.color_palette(n_colors=2)
    binc = binc = 0.5 * (sfs['bins'][1:] + sfs['bins'][:-1])
    ax.bar(binc - 0.045,
           sfs['syn'] / np.sum(sfs['syn']),
           width=0.04,
           label='syn',
           color=colors[0])
    ax.bar(binc,
           sfs['nonsyn'] / np.sum(sfs['nonsyn']),
           width=0.04,
           label='nonsyn',
           color=colors[1])
    csv_out.write('\t'.join(map(str, ["bin_centers"] + list(binc))) + '\n')
    csv_out.write('\t'.join(
        map(str, ["sfs_nonsyn"] +
            list(sfs['nonsyn'] / np.sum(sfs['nonsyn'])))) + '\n')
    csv_out.write('\t'.join(
        map(str, ["sfs_syn"] + list(sfs['syn'] / np.sum(sfs['syn'])))) + '\n')
    csv_out.close()
    ax.set_ylim([0.005, 2.0])
    ax.set_yscale('log')
    ax.set_xlabel('Frequency', fontsize=fs)
    ax.set_ylabel('Fractions of SNPs', fontsize=fs)
    ax.legend(loc=1, fontsize=fs - 2)
    ax.tick_params(labelsize=fig_fontsize - 2)

    # finalize and save the figure
    plt.tight_layout(rect=(0.0, 0.02, 0.98, 0.98),
                     pad=0.05,
                     h_pad=0.5,
                     w_pad=0.4)
    if fig_filename is not None:
        for ext in figtypes:
            fig.savefig(fig_filename + ext)
    else:
        plt.ion()
        plt.show()
def plot_allele_frequency_overlap(data, title='', VERBOSE=0, use_logit=False,
                                  fig_filename=None,
                                  separate_axes=False):
    '''Plot allele frequency in the overlap regions'''
    if VERBOSE >= 2:
        print 'Plot allele frequency in overlaps'
    import matplotlib.pyplot as plt
    import seaborn as sns
    from util import add_panel_label

    sns.set_style('darkgrid')
    colors = sns.color_palette('Set1', 5)
    fs = fig_fontsize
    xmin = 1e-3
    
    if not separate_axes:
        fig, ax = plt.subplots(figsize=(fig_width, 0.8 * fig_width))
        axs = [ax]
    else:
        fig = plt.figure(figsize=(1.5 * fig_width, 1.5 * 0.8 * fig_width))
        fun = fig.add_axes
        lpad = 0.07
        hpad = 0.05
        vpad = 0.05
        width = (1.0 - lpad - 3 * hpad) / 3
        height = (1.0 - 4 * vpad) / 2
        axs = [fun([lpad, 0.5 + vpad, width, height]),
               fun([lpad +  hpad + width, 0.5 + vpad, width, height]),
               fun([lpad + 2 * hpad + 2 * width, 0.5 + vpad, width, height]),
               fun([0.5 * (1 - hpad) - width, vpad, width, height]),
               fun([0.5 * (1 + hpad), vpad, width, height]),
              ]

    # NOTE (Fabio): my logit patch has made it to matplotlib master but now there
    # is a weird bug here that crashes the figure. If I manually call
    # plt.yscale('logit')
    # it works, so I don't quite understand. Anyway, this is only aesthetics and
    # does not affect any data.
    if use_logit:
        for iax, ax in enumerate(axs):
            ax.set_xlim(xmin, 1 - xmin)
            ax.set_ylim(xmin, 1 - xmin)
            ax.set_xscale('logit')
            ax.set_yscale('logit')
            ax.xaxis.set_tick_params(labelsize=fs)
            ax.yaxis.set_tick_params(labelsize=fs)

            if iax not in (0, 3):
                ax.set_yticklabels([])

    for ida, datum in enumerate(data):
        if separate_axes:
            ax = axs[ida]

        afjoint = datum['af']
        color = colors[datum['io']]

        x = afjoint[0].ravel()
        y = afjoint[1].ravel()

        ind = ~(x.mask | y.mask)
        x = x[ind]
        y = y[ind]

        ind = (x >= xmin) & (x <= 1 - xmin) & (y >= xmin) & (y <= 1 - xmin)
        x = x[ind]
        y = y[ind]

        ax.scatter(x, y,
                   s=50,
                   color=color,
                   alpha=0.7,
                   edgecolor='none')

        ## Plot stddev in Poisson sampling
        #n = datum['n']
        #x = np.linspace(np.log10(xmin), 0, 1000)
        #x = 1.0 / (1 + 10**(-x))
        #y = x - np.sqrt(x / n)
        #ax.plot(np.concatenate([x, 1 - y[::-1]]), np.concatenate([y, 1 - x[::-1]]),
        #        lw=3, c=color, alpha=0.5)
        #ax.plot(np.concatenate([y, 1 - x[::-1]]), np.concatenate([x, 1 - y[::-1]]),
        #        lw=3, c=color, alpha=0.5,
        #        label=datum['overlap'])

        if separate_axes or (ida == len(data) - 1):
            ax.plot([xmin, 1 - xmin], [xmin, 1 - xmin], lw=2, color='k', alpha=0.5)
            #ax.set_xlabel('SNP frequency leading fragment', fontsize=fs)
            #ax.set_ylabel('SNP frequency trailing fragment', fontsize=fs)


    if not separate_axes:
        add_panel_label(ax, 'C', x_offset=-0.22)

    if title:
        ax.set_title(title)

    if not separate_axes:
        plt.tight_layout()

    if fig_filename is not None:
        for ext in ['.pdf','.svg', '.png']:
            fig.savefig(fig_filename+ext)
            plt.close(fig)

    else:
        plt.ion()
        plt.show()
Ejemplo n.º 7
0
def plot_allele_frequency_overlap(data,
                                  title='',
                                  VERBOSE=0,
                                  use_logit=False,
                                  fig_filename=None,
                                  separate_axes=False):
    '''Plot allele frequency in the overlap regions'''
    if VERBOSE >= 2:
        print 'Plot allele frequency in overlaps'
    import matplotlib.pyplot as plt
    import seaborn as sns
    from util import add_panel_label

    sns.set_style('darkgrid')
    colors = sns.color_palette('Set1', 5)
    fs = fig_fontsize
    xmin = 1e-3

    if not separate_axes:
        fig, ax = plt.subplots(figsize=(fig_width, 0.8 * fig_width))
        axs = [ax]
    else:
        fig = plt.figure(figsize=(1.5 * fig_width, 1.5 * 0.8 * fig_width))
        fun = fig.add_axes
        lpad = 0.07
        hpad = 0.05
        vpad = 0.05
        width = (1.0 - lpad - 3 * hpad) / 3
        height = (1.0 - 4 * vpad) / 2
        axs = [
            fun([lpad, 0.5 + vpad, width, height]),
            fun([lpad + hpad + width, 0.5 + vpad, width, height]),
            fun([lpad + 2 * hpad + 2 * width, 0.5 + vpad, width, height]),
            fun([0.5 * (1 - hpad) - width, vpad, width, height]),
            fun([0.5 * (1 + hpad), vpad, width, height]),
        ]

    # NOTE (Fabio): my logit patch has made it to matplotlib master but now there
    # is a weird bug here that crashes the figure. If I manually call
    # plt.yscale('logit')
    # it works, so I don't quite understand. Anyway, this is only aesthetics and
    # does not affect any data.
    if use_logit:
        for iax, ax in enumerate(axs):
            ax.set_xlim(xmin, 1 - xmin)
            ax.set_ylim(xmin, 1 - xmin)
            ax.set_xscale('logit')
            ax.set_yscale('logit')
            ax.xaxis.set_tick_params(labelsize=fs)
            ax.yaxis.set_tick_params(labelsize=fs)

            if iax not in (0, 3):
                ax.set_yticklabels([])

    for ida, datum in enumerate(data):
        if separate_axes:
            ax = axs[ida]

        afjoint = datum['af']
        color = colors[datum['io']]

        x = afjoint[0].ravel()
        y = afjoint[1].ravel()

        ind = ~(x.mask | y.mask)
        x = x[ind]
        y = y[ind]

        ind = (x >= xmin) & (x <= 1 - xmin) & (y >= xmin) & (y <= 1 - xmin)
        x = x[ind]
        y = y[ind]

        ax.scatter(x, y, s=50, color=color, alpha=0.7, edgecolor='none')

        ## Plot stddev in Poisson sampling
        #n = datum['n']
        #x = np.linspace(np.log10(xmin), 0, 1000)
        #x = 1.0 / (1 + 10**(-x))
        #y = x - np.sqrt(x / n)
        #ax.plot(np.concatenate([x, 1 - y[::-1]]), np.concatenate([y, 1 - x[::-1]]),
        #        lw=3, c=color, alpha=0.5)
        #ax.plot(np.concatenate([y, 1 - x[::-1]]), np.concatenate([x, 1 - y[::-1]]),
        #        lw=3, c=color, alpha=0.5,
        #        label=datum['overlap'])

        if separate_axes or (ida == len(data) - 1):
            ax.plot([xmin, 1 - xmin], [xmin, 1 - xmin],
                    lw=2,
                    color='k',
                    alpha=0.5)
            #ax.set_xlabel('SNP frequency leading fragment', fontsize=fs)
            #ax.set_ylabel('SNP frequency trailing fragment', fontsize=fs)

    if not separate_axes:
        add_panel_label(ax, 'C', x_offset=-0.22)

    if title:
        ax.set_title(title)

    if not separate_axes:
        plt.tight_layout()

    if fig_filename is not None:
        for ext in ['.pdf', '.svg', '.png']:
            fig.savefig(fig_filename + ext)
            plt.close(fig)

    else:
        plt.ion()
        plt.show()
Ejemplo n.º 8
0
def plot_subtype_correlation(data, fig_filename=None, figtypes=['.png', '.svg', '.pdf']):
    '''Plot results'''
    import seaborn as sns
    from matplotlib import pyplot as plt
    plt.ion()
    sns.set_style('darkgrid')
    figpath = 'figures/'
    fs=fig_fontsize
    fig_size = (fig_width, 0.5*fig_width)
    fig, axs = plt.subplots(1, 2, figsize=fig_size)

    ax=axs[0]
    add_panel_label(ax, 'A', x_offset=-0.15)
    patients = sorted(data['correlations']['pcode'].unique(), key=lambda x:int(x[1:]))
    colors = patient_colors

    # calculate mean and variance across regions for each time point and patient
    mean_rho = data['correlations'].groupby(by=['time', 'pcode'], as_index=False).mean().groupby('pcode')
    var_rho = data['correlations'].groupby(by=['time', 'pcode'], as_index=False).var().groupby('pcode')

    # loop over patients and plot the mean/std of the previously grouped data 
    for pat in patients:
        ax.errorbar(np.array(mean_rho.get_group(pat)['time']/365.25),
                    np.array(mean_rho.get_group(pat)['rho']),
                    yerr=np.array(np.sqrt(var_rho.get_group(pat)['rho'])),
                    color=colors[pat], ls="none",
                    markersize=8, marker='o', label=pat)

    ax.legend(loc=2, fontsize=fs-3, ncol=2, labelspacing=0.1, columnspacing=0.1)
    ax.set_yticks([0,0.25,0.5])
    ax.set_xticks([0,2,4,6,8])
    ax.set_xlim([-2,8.5])
    ax.set_ylim([-0.1, 0.8])
    ax.set_xlabel('ETI [years]', fontsize=fs)
    ax.set_title(r"Spearman's $\rho$", fontsize=fs)
    for item in ax.get_xticklabels()+ax.get_yticklabels():
        item.set_fontsize(fs)

    # add a second plot that shows the fraction of variable sites by entropy bin
    ax=axs[1]
    add_panel_label(ax, 'B', x_offset=-0.15)
    div = data['diverse_fraction']
    colors = sns.color_palette(n_colors=4)
    # add a time bin column
    time_bins = np.arange(0,4000,500)
    binc = 0.5*(time_bins[1:] + time_bins[:-1])
    div.loc[:,'time_bin'] = np.minimum(len(time_bins)-2, np.maximum(0,np.searchsorted(time_bins, div["time"])-1))
    for i in range(4): 
        ent = 'S'+str(i+1)
        div.loc[:,ent] = div.loc[:,ent].astype(float)
        # calculate mean and variance over regions and patients and samples within a time bin
        mean_div = div.loc[:,[ent, 'time_bin']].groupby(by=['time_bin'], as_index=False).mean()
        var_div = div.loc[:,[ent, 'time_bin']].groupby(by=['time_bin'], as_index=False).var()
        ax.errorbar(np.array(binc/365.25),
                    np.array(mean_div.loc[:,ent]),
                    yerr=np.array(np.sqrt(var_div.loc[:,ent])),
                    label='Q'+str(i+1),
                    c=colors[i])

    ax.set_ylim([0,0.35])
    ax.set_yticks([0, 0.1, 0.2, 0.3])
    ax.set_xticks([0,2,4,6,8])
    ax.set_title('Fraction of SNPs > 0.01')
    ax.set_xlabel('ETI [years]', fontsize=fs)
    for item in ax.get_xticklabels()+ax.get_yticklabels():
        item.set_fontsize(fs)
    ax.legend(loc=2, ncol=2,fontsize=fs-3, title='Conservation',
              labelspacing=0.1, columnspacing=0.5)

    # plot output
    plt.tight_layout(rect=(0.0, 0.02, 0.98, 0.98), pad=0.05, h_pad=0.5, w_pad=0.4)
    if fig_filename is not None:
        for ext in figtypes:
            fig.savefig(fig_filename+ext)
    else:
        plt.ion()
        plt.show()
Ejemplo n.º 9
0
    aa_mutation_rates, total_nonsyn_mutation_rates = calc_amino_acid_mutation_rates()
    selcoeff = {}
    for region in regions:
        s = fitness_costs_per_site(region, data, total_nonsyn_mutation_rates)
        s[s>1] = 1
        selcoeff[region] = s

    aa_ref = 'NL4-3'
    global_ref = HIVreference(refname=aa_ref, subtype=args.subtype)

    ### FIGURE 5
    fig,axs = plt.subplots(1,2, figsize=(10,5))
    #fitness_costs_in_optimal_epis(['gag', 'nef'], selcoeff, ax=axs[0])
    #add_panel_label(axs[0], 'A', x_offset=-0.15)
    plot_fraction_associated(regions, selcoeff, associations, axs=axs, slope=2.0)
    add_panel_label(axs[0], 'A', x_offset=-0.15)
    region='nef'
    reference = HIVreferenceAminoacid(region, refname=aa_ref, subtype = args.subtype)
    tmp, rho, pval =  fitness_scatter(region, selcoeff, associations, reference, ax=axs[0])
    add_panel_label(axs[1], 'B', x_offset=-0.15)
    axs[0].legend(loc=3, fontsize=fs)
    axs[0].set_ylim([0.03,3])
    plt.tight_layout()
    for fmt in ['pdf', 'png', 'svg']:
        plt.savefig('../figures/figure_5_'+region+'_st_'+args.subtype+'.'+fmt)


    # calculate corrleations between fitness costs and phenotypes
    phenotype_correlations = {}
    erich = np.zeros((2,2,2))
    for region in regions:
def plot_fit(data_sat, data_pooled):
    from matplotlib import cm
    from util import add_panel_label

    palette = sns.color_palette('colorblind')

    fig_width = 5
    fs = 16
    fig, axs = plt.subplots(1, 2,
                            figsize=(2 * fig_width, fig_width))


    data_to_fit = data_sat['data_to_fit']
    mu = data_sat['mu']
    s = data_sat['s']

    fun = lambda x, s: mu / s * (1.0 - np.exp(-s * x))

    # PANEL A: data and fits
    ax = axs[0]
    for iS, (S, datum) in enumerate(data_to_fit.iterrows()):
        x = np.array(datum.index)
        y = np.array(datum)
        color = cm.jet(1.0 * iS / data_to_fit.shape[0])

        # Most conserved group is dashed
        if iS == 0:
            ls = '--'
        else:
            ls = '-'

        ax.scatter(x, y,
                   s=70,
                   color=color,
                  )

        xfit = np.linspace(0, 3000)
        yfit = fun(xfit, s.loc[S, 's'])
        ax.plot(xfit, yfit,
                lw=2,
                color=color,
                ls=ls,
               )

    ax.set_xlabel('days since EDI', fontsize=fs)
    ax.set_ylabel('average SNP frequency', fontsize=fs)
    ax.set_xlim(-200, 3200)
    ax.set_ylim(-0.0005, 0.025)
    ax.set_xticks(np.linspace(0, 0.005, 5))
    ax.set_xticks([0, 1000, 2000, 3000])
    ax.xaxis.set_tick_params(labelsize=fs)
    ax.yaxis.set_tick_params(labelsize=fs)

    ax.text(0, 0.023,
            r'$\mu = 1.2 \cdot 10^{-5}$ per day',
            fontsize=16)
    ax.plot([200, 1300], [0.007, 0.007 + (1300 - 200) * mu], lw=1.5, c='k')

    # PANEL B: costs
    ax = axs[1]

    # B1: Saturation fit
    x = np.array(s.index)
    y = np.array(s['s'])
    dy = np.array(s['ds'])

    ymin = 0.1

    x = x[1:]
    y = y[1:]
    dy = dy[1:]

    ax.errorbar(x, y,
                yerr=dy,
                ls='-',
                marker='o',
                lw=2,
                color=palette[0],
                label='Sat',
               )

    # B2: pooled
    x = data_pooled['all'][:-1, 0]
    y = data_pooled['all'][:-1, 1]
    dy = data_pooled['all_std'][:-1, 1]
    ax.errorbar(x, y, yerr=dy,
                ls='-',
                marker='o',
                lw=2,
                color=palette[2],
                label='Pooled',
               )

    ax.legend(loc='upper right', fontsize=16)
    ax.set_xlabel('variability in group M [bits]', fontsize=fs)
    ax.set_ylabel('fitness cost', fontsize=fs)
    ax.set_xlim(0.9e-3, 2.5)
    ax.set_ylim(9e-5, 0.11)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.xaxis.set_tick_params(labelsize=fs)
    ax.yaxis.set_tick_params(labelsize=fs)


    # Panel labels
    add_panel_label(axs[0], 'A', x_offset=-0.27)
    add_panel_label(axs[1], 'B', x_offset=-0.21)

    plt.tight_layout()
    plt.ion()
    plt.show()
Ejemplo n.º 11
0
from hivevo.hivevo.patients import Patient
from hivevo.hivevo.samples import all_fragments

from util import store_data, load_data, draw_genome, fig_width, fig_fontsize, add_panel_label

# Script
if __name__ == "__main__":

    patients = ['p' + str(i) for i in range(1, 12) if i not in [4, 7]]

    # make two figures, each showing one method of template quantification
    sns.set_style('darkgrid')
    fs = fig_fontsize
    fig1, ax1 = plt.subplots(figsize=(fig_width, 0.8 * fig_width))
    fig2, ax2 = plt.subplots(figsize=(fig_width, 0.8 * fig_width))
    add_panel_label(ax1, 'A', x_offset=-0.15)
    add_panel_label(ax2, 'B', x_offset=-0.15)

    # define colors for patients and fragments
    pat_colors = sns.color_palette(
        sns.color_palette([
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',
            '#fdbf6f', '#ff7f00', '#cab2d6'
        ],
                          n_colors=len(patients)))
    frag_colors = sns.color_palette(n_colors=6)

    depth_estimates = []
    total_viral_load_dilutions_list = []
    overlap_dilution_list = {i: [] for i in range(6)}
    for pi, pcode in enumerate(patients):
Ejemplo n.º 12
0
def plot_divdiv(data, fig_filename=None, figtypes=['.png', '.svg', '.pdf']):
    '''
    plot divergence and diversity of synonymous and nonsynonymous mutations
    includes:
        - a panel that compares syn diversity/nonsyn divergence
    '''
    n_bootstrap=50
    ####### plotting ###########
    import seaborn as sns
    from matplotlib import pyplot as plt
    plt.ion()
    sns.set_style('darkgrid')
    figpath = 'figures/'
    fs=fig_fontsize
    fig_size = (fig_width, 1.0*fig_width)
    cols = HIVEVO_colormap()

    fig, axs = plt.subplots(2, 2,figsize=fig_size)
    divdiv = data['divdiv']
    # in rough the order in which they most dominantly appear in the plot
    regions = ['envelope', 'accessory', 'structural','enzymes']
    time_bins = np.array([0, 200, 500, 1000, 1500, 2000, 4000])
    time_binc = 0.5*(time_bins[:-1]+time_bins[1:])
    add_binned_column(divdiv, time_bins, 'time')

    # map the regions to rough genomic order to match the genome color map in panel C
    colors = {reg:c for reg, c in zip(regions, [cols(x) for x in [0.66, 0.99, 0.01, 0.33]])}
    def get_time_bin_mean(df):
        return df.loc[:,['time_bin', 'diversity', 'divergence']].groupby(by=['time_bin'], as_index=False).mean()
    def label_func(mutclass, region, divordiv):  # assign labels to Panels A and B separately to make a combinatorial legend (regions vs syn/nonsyn)
        if divordiv=='divergence' and mutclass=='nonsyn':
            return region
        elif divordiv=='diversity' and region=='accessory':
            return mutclass
        else:
            return None

    ########## panel A and B #####################
    csv_out = open(fig_filename+'_AB.tsv', 'w')
    for ax, dtype in izip(axs[0,:], ['divergence', 'diversity']):
        add_panel_label(ax, 'A' if dtype=='divergence' else 'B', x_offset = -0.3)
        for mutclass in ['nonsyn', 'syn']:
            for region in regions:
                ind = (divdiv.loc[:,'region']==region) & (divdiv.loc[:,'mutclass']==mutclass)
                tmp = divdiv.loc[ind,['time_bin', 'diversity', 'divergence', 'pcode']]
                avg_divdiv = get_time_bin_mean(tmp)
                bs = boot_strap_patients(tmp, eval_func = get_time_bin_mean, n_bootstrap=n_bootstrap)
                # plot the same line with and without error bars, labels for legend without
                ax.plot(time_binc/365.25, avg_divdiv.loc[:,dtype], ls='-' if mutclass=='nonsyn' else '--',
                            c=colors[region], lw=3, label=label_func(mutclass, region, dtype))
                ax.errorbar(time_binc/365.25, avg_divdiv.loc[:,dtype], replicate_func(bs, dtype, np.std, bin_index='time_bin'),
                            ls='-' if mutclass=='nonsyn' else '--', c=colors[region], lw=3)
                csv_out.write('\t'.join(map(str,[dtype, mutclass, region]+list(avg_divdiv.loc[:,dtype])))+'\n')

        ax.legend(loc=2, fontsize=fs-1, numpoints=2, labelspacing = 0)
        ax.set_xticks([0,2,4,6,8])
        if dtype=='divergence':
            ax.set_yticks([0,.02,.04])
            ax.set_ylim([0,.048])
        else:
            ax.set_yticks([0,.01,.02])
            ax.set_ylim([0,.028])
        ax.set_xlim([0,8.5])
        ax.set_ylabel(dtype)
        ax.tick_params(labelsize=fs-2)
        ax.set_xlabel('Years since EDI', fontsize=fs)
    csv_out.close()

    ########## panel C: anti correlation of syn diversity and nonsyn divergence #############
    csv_out = open(fig_filename+'_C.tsv', 'w')
    (avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs) = data['divdiv_corr']
    ax = axs[1,0]
    add_panel_label(ax, 'C', x_offset = -0.3)
    x_data, y_data = avg_nonsyn_divg[::500], avg_syn_divs[::500]
    ax.scatter(x_data, y_data, c=[cols(p) for p in np.linspace(0,1,len(x_data))], s=50)
    csv_out.write('\t'.join(map(str, ["nonsyn_divergence"]+list(x_data)))+'\n')
    csv_out.write('\t'.join(map(str, ["syn_diversity"]+list(y_data)))+'\n')
    csv_out.close()

    ax.set_xlabel('nonsyn divergence', fontsize = fig_fontsize)
    ax.set_ylabel('syn diversity', fontsize = fig_fontsize)
    ax.set_ylim([0,0.028])
    ax.set_xlim([0,0.012])
    ax.set_xticks([0, 0.005,0.01])
    ax.set_yticks([0, 0.01, 0.02])
    ax.tick_params(labelsize=fig_fontsize-2)


    ########## sfs in panel D ##############
    csv_out = open(fig_filename+'_D.tsv', 'w')
    sfs=data['sfs']
    ax = axs[1,1]
    add_panel_label(ax, 'D', x_offset = -0.3)
    colors = sns.color_palette(n_colors=2)
    binc = binc = 0.5*(sfs['bins'][1:]+sfs['bins'][:-1])
    ax.bar(binc-0.045, sfs['syn']/np.sum(sfs['syn']),width = 0.04, label='syn', color=colors[0])
    ax.bar(binc, sfs['nonsyn']/np.sum(sfs['nonsyn']),width = 0.04, label='nonsyn', color=colors[1])
    csv_out.write('\t'.join(map(str, ["bin_centers"]+list(binc)))+'\n')
    csv_out.write('\t'.join(map(str, ["sfs_nonsyn"]+list(sfs['nonsyn']/np.sum(sfs['nonsyn']))))+'\n')
    csv_out.write('\t'.join(map(str, ["sfs_syn"]+list(sfs['syn']/np.sum(sfs['syn']))))+'\n')
    csv_out.close()
    ax.set_ylim([0.005,2.0])
    ax.set_yscale('log')
    ax.set_xlabel('Frequency',fontsize=fs)
    ax.set_ylabel('Fractions of SNPs',fontsize=fs)
    ax.legend(loc=1, fontsize=fs-2)
    ax.tick_params(labelsize=fig_fontsize-2)

    # finalize and save the figure
    plt.tight_layout(rect=(0.0, 0.02, 0.98, 0.98), pad=0.05, h_pad=0.5, w_pad=0.4)
    if fig_filename is not None:
        for ext in figtypes:
            fig.savefig(fig_filename+ext)
    else:
        plt.ion()
        plt.show()
from util import store_data, load_data, draw_genome, fig_width, fig_fontsize, add_panel_label



# Script
if __name__=="__main__":

    patients = ['p'+str(i) for i in range(1,12) if i not in [4,7]]
    
    # make two figures, each showing one method of template quantification
    sns.set_style('darkgrid')
    fs=fig_fontsize
    fig1, ax1 = plt.subplots(figsize=(fig_width, 0.8*fig_width))
    fig2, ax2 = plt.subplots(figsize=(fig_width, 0.8*fig_width))
    add_panel_label(ax1, 'A', x_offset=-0.15)
    add_panel_label(ax2, 'B', x_offset=-0.15)

    # define colors for patients and fragments
    pat_colors = sns.color_palette(sns.color_palette(['#a6cee3', '#1f78b4',
                                                      '#b2df8a', '#33a02c',
                                                      '#fb9a99', '#e31a1c',
                                                      '#fdbf6f', '#ff7f00',
                                                      '#cab2d6'],
                                                     n_colors=len(patients)))
    frag_colors = sns.color_palette(n_colors=6)
    
    depth_estimates = []
    total_viral_load_dilutions_list = []
    overlap_dilution_list = {i:[] for i in range(6)}
    for pi, pcode in enumerate(patients):
Ejemplo n.º 14
0
def plot_subtype_correlation(data,
                             fig_filename=None,
                             figtypes=['.png', '.svg', '.pdf']):
    '''Plot results'''
    import seaborn as sns
    from matplotlib import pyplot as plt
    plt.ion()
    sns.set_style('darkgrid')
    figpath = 'figures/'
    fs = fig_fontsize
    fig_size = (fig_width, 0.5 * fig_width)
    fig, axs = plt.subplots(1, 2, figsize=fig_size)

    ax = axs[0]
    add_panel_label(ax, 'A', x_offset=-0.15)
    patients = sorted(data['correlations']['pcode'].unique(),
                      key=lambda x: int(x[1:]))
    colors = patient_colors

    # calculate mean and variance across regions for each time point and patient
    mean_rho = data['correlations'].groupby(
        by=['time', 'pcode'], as_index=False).mean().groupby('pcode')
    var_rho = data['correlations'].groupby(
        by=['time', 'pcode'], as_index=False).var().groupby('pcode')

    # loop over patients and plot the mean/std of the previously grouped data
    for pat in patients:
        ax.errorbar(np.array(mean_rho.get_group(pat)['time'] / 365.25),
                    np.array(mean_rho.get_group(pat)['rho']),
                    yerr=np.array(np.sqrt(var_rho.get_group(pat)['rho'])),
                    color=colors[pat],
                    ls="none",
                    markersize=8,
                    marker='o',
                    label=pat)

    ax.legend(loc=2,
              fontsize=fs - 3,
              ncol=2,
              labelspacing=0.1,
              columnspacing=0.1)
    ax.set_yticks([0, 0.25, 0.5])
    ax.set_xticks([0, 2, 4, 6, 8])
    ax.set_xlim([-2, 8.5])
    ax.set_ylim([-0.1, 0.8])
    ax.set_xlabel('ETI [years]', fontsize=fs)
    ax.set_title(r"Spearman's $\rho$", fontsize=fs)
    for item in ax.get_xticklabels() + ax.get_yticklabels():
        item.set_fontsize(fs)

    # add a second plot that shows the fraction of variable sites by entropy bin
    ax = axs[1]
    add_panel_label(ax, 'B', x_offset=-0.15)
    div = data['diverse_fraction']
    colors = sns.color_palette(n_colors=4)
    # add a time bin column
    time_bins = np.arange(0, 4000, 500)
    binc = 0.5 * (time_bins[1:] + time_bins[:-1])
    div.loc[:, 'time_bin'] = np.minimum(
        len(time_bins) - 2,
        np.maximum(0,
                   np.searchsorted(time_bins, div["time"]) - 1))
    for i in range(4):
        ent = 'S' + str(i + 1)
        div.loc[:, ent] = div.loc[:, ent].astype(float)
        # calculate mean and variance over regions and patients and samples within a time bin
        mean_div = div.loc[:,
                           [ent, 'time_bin']].groupby(by=['time_bin'],
                                                      as_index=False).mean()
        var_div = div.loc[:, [ent, 'time_bin']].groupby(by=['time_bin'],
                                                        as_index=False).var()
        ax.errorbar(np.array(binc / 365.25),
                    np.array(mean_div.loc[:, ent]),
                    yerr=np.array(np.sqrt(var_div.loc[:, ent])),
                    label='Q' + str(i + 1),
                    c=colors[i])

    ax.set_ylim([0, 0.35])
    ax.set_yticks([0, 0.1, 0.2, 0.3])
    ax.set_xticks([0, 2, 4, 6, 8])
    ax.set_title('Fraction of SNPs > 0.01')
    ax.set_xlabel('ETI [years]', fontsize=fs)
    for item in ax.get_xticklabels() + ax.get_yticklabels():
        item.set_fontsize(fs)
    ax.legend(loc=2,
              ncol=2,
              fontsize=fs - 3,
              title='Conservation',
              labelspacing=0.1,
              columnspacing=0.5)

    # plot output
    plt.tight_layout(rect=(0.0, 0.02, 0.98, 0.98),
                     pad=0.05,
                     h_pad=0.5,
                     w_pad=0.4)
    if fig_filename is not None:
        for ext in figtypes:
            fig.savefig(fig_filename + ext)
    else:
        plt.ion()
        plt.show()
def plot_non_coding_figure(data, minor_af, synnonsyn, reference, fname=None):
    '''Plot fitness cost at noncoding features'''
    from util import add_panel_label

    ymax = 0.25
    ymin = 0.0005

    y_second_gene = 1.18

    fig, axs = plt.subplots(1, 4, sharey=True, figsize =(10,5),
                            gridspec_kw={'width_ratios':[4, 1, 2.5, 1]})

    # plot the 5' region
    start, stop = 500, 900
    feature_names = ['polyA', 'U5', 'U5 stem', 'PBS', 'PSI SL1-4']
    ax = plot_fitness_costs_along_genome(start, stop, feature_names, data,
                                     minor_af, reference, pheno=None,
                                     synnonsyn=synnonsyn['genomewide'],
                                     ws=8, ws_syn=4, ax=axs[0])
    # add label and dimension to left-most axis, all other are tied to this one
    ax.set_ylabel('fitness cost [1/day]', fontsize=fs)
    ax.set_ylim(ymin, ymax)
    add_panel_label(ax, 'B', x_offset=-0.15)

    ax.plot([start,reference.annotation["LTR5'"].location.end],
            ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(start, ax.get_ylim()[0]*1.17, "LTR5'", fontsize=fs*0.8, horizontalalignment='left')

    ax.plot([reference.annotation['gag'].location.start, stop],
            ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(stop, ax.get_ylim()[0]*1.17, 'gag', fontsize=fs*0.8, horizontalalignment='right')
    ax.set_ylim(ymin, ymax)


    # frame shift region -- no syn fitness cost here since this is in an overlap
    start, stop = 2050, 2150
    feature_names = ['frameshift']
    ax = plot_fitness_costs_along_genome(start, stop, feature_names, data,
                                             minor_af, reference, pheno=None,
                                             ws=8, ws_syn=4, ax=axs[1])

    ax.plot([start, reference.annotation['gag'].location.end],
            ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(start, ax.get_ylim()[0]*1.17, 'gag', fontsize=fs*0.8, horizontalalignment='left')

    ax.plot([reference.annotation['pol'].location.start,stop],
            y_second_gene*ax.get_ylim()[0]*np.ones(2), lw=5, c='k', alpha=0.7)
    ax.text(stop, ax.get_ylim()[0]*(y_second_gene+0.17), 'pol', fontsize=fs*0.8, horizontalalignment='right')
    ax.set_xticks([2050, 2150])
    ax.set_ylim(ymin, ymax)

    # plot the cPPT region
    start, stop = 4750, 5000
    feature_names = ['A1','D2', 'cPPT']
    ax = plot_fitness_costs_along_genome(start, stop, feature_names, data,
                                     minor_af, reference, pheno=None,
                                     synnonsyn=synnonsyn['genomewide'],
                                     ws=8, ws_syn=4, ax=axs[2])

    # add label and dimension to left-most axis, all other are tied to this one
    ax.set_ylim(ymin, ymax)

    ax.plot([start,reference.annotation["IN"].location.end],
            ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(start, ax.get_ylim()[0]*1.17, "IN", fontsize=fs*0.8, horizontalalignment='left')

    ax.plot([reference.annotation['vif'].location.start, stop],
            y_second_gene*ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(stop, ax.get_ylim()[0]*(y_second_gene+0.17), 'vif', fontsize=fs*0.8, horizontalalignment='right')
    ax.set_xticks([4800, 4900])
    ax.set_ylim(ymin, ymax)


    # plot the 3' region
    start, stop = 9050, 9150
    feature_names = ['PPT']
    ax = plot_fitness_costs_along_genome(start, stop, feature_names, data,
                                             minor_af, reference, pheno=None,
                                             synnonsyn=synnonsyn['genomewide'],
                                             ws=8, ws_syn=4, ax=axs[3])

    ax.plot([start, reference.annotation['nef'].location.end],
            ax.get_ylim()[0]*np.ones(2), lw=10, c='k', alpha=0.7)
    ax.text(start, ax.get_ylim()[0]*1.17, 'nef', fontsize=fs*0.8, horizontalalignment='left')

    ax.plot([reference.annotation["LTR3'"].location.start,stop],
            y_second_gene*ax.get_ylim()[0]*np.ones(2), lw=5, c='k', alpha=0.7)
    ax.text(stop, ax.get_ylim()[0]*(y_second_gene+0.17), "LTR3'", fontsize=fs*0.8, horizontalalignment='right')
    ax.set_xticks([9050, 9100,9150])
    ax.set_ylim(ymin, ymax)

    fig.text(0.5, 0.01, 'Position in HIV-1 reference (HXB2) [bp]',
             ha='center',
             fontsize=fs)
    plt.tight_layout(rect=(0, 0.04, 1, 1),w_pad=-1)

    if fname is not None:
        for ext in ['.png', '.svg', '.pdf']:
            plt.savefig(fname+ext)
Ejemplo n.º 16
0
def plot_fitness_cost_along_genome(regions, data, minor_af, synnonsyn, reference, ws=30):
    '''Plot the fitness costs along the genome

    We have the fitness costs per site, but we only plot a running average over
    30 bp as a smoothing, for visual clarity. Later on we export the actual
    per-site fitness costs to file.
    '''
    from util import add_panel_label

    all_sel_coeff = []

    # Fitness costs along the genome
    fig, axs = plt.subplots(2, 1, sharex=True,
                            gridspec_kw={'height_ratios':[6, 1]})

    for ni,label_str in ((1,'nonsynonymous'), (0,'synonymous')):
        for ri, region in enumerate(regions):
            ind = synnonsyn[region] if label_str=='synonymous' else ~synnonsyn[region]
            ind = ind&(~np.isnan(minor_af[region]))
            #axs[0].plot([x for x in reference.annotation[region] if x%3==0], 1.0/np.convolve(np.ones(ws, dtype=float)/ws, 1.0/sc[region], mode='same'), c=cols[ri])
            sc = (data['mut_rate'][region]/(af_cutoff+minor_af[region]))
            sc[sc>0.1] = 0.1
            sc[sc<0.001] = 0.001
            axs[0].plot(running_average(np.array(list(reference.annotation[region]))[ind], ws),
                        np.exp(running_average(np.log(sc[ind]), ws)),
                        c=cols[ri%len(cols)],
                        ls='--' if label_str=='synonymous' else '-',
                       label=label_str if region=='gag' else None)
            if ni and region not in ['vpr', 'vpu']:
                all_sel_coeff.extend([(region, pos, np.log10(sc[pos]), synnonsyn[region][pos]) for pos in range(len(sc))])

    axs[0].legend(loc=1, fontsize=fs*0.8)
    axs[0].set_yscale('log')
    axs[0].set_ylabel('fitness cost [1/day]', fontsize=fs)
    axs[0].set_ylim(0.002, 0.25)
    axs[0].tick_params(labelsize=fs*0.8)

    # The genome annotations
    regs = ['p17', 'p6', 'p7', 'p24',
            'PR', 'RT', 'IN', 'p15',
            'nef',
            'gp120', 'gp41',
            'vif', 'vpu', 'vpr', 'rev', 'tat',
            'V1', 'V2', 'V3', 'V5']
    annotations = {k: val for k, val in reference.annotation.iteritems() if k in regs}
    annotations = draw_genome(annotations, axs[1])
    axs[1].set_axis_off()
    feas = ['p17', 'p24', 'PR', 'RT', 'p15', 'IN', 'vif', 'gp120', 'gp41', 'nef']
    vlines = np.unique(annotations.loc[annotations['name'].isin(feas), ['x1', 'x2']])
    for xtmp in vlines:
        axs[0].axvline(xtmp, lw=1, color='0.8')

    plt.tight_layout()
    add_panel_label(axs[0], 'A', x_offset=-0.1)
    for ext in ['png', 'svg', 'pdf']:
        fig.savefig('../figures/figure_3A_st_' + reference.subtype + '.'+ext)


    # Violin plots of the fitness cost distributions for syn and nonsyn
    all_sel_coeff = pd.DataFrame(data=all_sel_coeff, columns=['gene', 'position', 'selection', 'synonymous'])
    all_sel_coeff.loc[all_sel_coeff['synonymous'] == True, 'synonymous'] = 'synonymous'
    all_sel_coeff.loc[all_sel_coeff['synonymous'] == False, 'synonymous'] = 'nonsynonymous'
    fig = plt.figure()
    ax = sns.violinplot(x='gene', y='selection', hue='synonymous', data=all_sel_coeff,
                       inner='quartile', split=True, cut=0, scale='area')
    ax.set_yticks([-3,-2,-1])
    ax.set_yticklabels([r'$10^{'+str(i)+'}$' for i in [-3,-2,-1]])
    ax.tick_params(labelsize=0.8*fs)
    ax.set_ylabel('fitness cost [1/day]', fontsize=fs)
    ax.set_xlabel('')
    ax.set_ylim(-3, -0.5)
    ax.legend(loc=1, fontsize=fs, title=None)

    plt.tight_layout()
    #add_panel_label(ax, 'B', x_offset=-0.1)
    for ext in ['png', 'svg', 'pdf']:
        fig.savefig('../figures/figure_S6_st_' + reference.subtype +'.'+ext)
Ejemplo n.º 17
0
def plot_minor_allele_example(data, title='', VERBOSE=0, fig_filename=None):
    '''Plot minor allele in a typical sample'''
    import matplotlib.pyplot as plt
    import seaborn as sns
    from util import add_panel_label

    plt.ioff()

    if VERBOSE:
        print 'Plot minor alleles of example sample'

    fig_size = (fig_width, 0.8*fig_width)
    fig, axs = plt.subplots(1, 2,
                            figsize=fig_size,
                            sharey=True,
                            gridspec_kw={'width_ratios': [3, 1]})
    sns.set_style('darkgrid')

    labels = ['control', 'patient']
    alphas = [0.6, 1]
    colors = [sns.color_palette()[i] for i in [2, 0]]
    shapes = ['s', 'o']

    for idat, datum in enumerate(data):
        y = datum['freq_minor']
        x = np.arange(len(y))
        #axs[0].plot(x, y, lw=1.5, alpha=0.8)
        axs[0].scatter(x, y,
                       marker=shapes[idat],
                       lw=1.5, edgecolor='none',
                       facecolor=colors[idat],
                       zorder=idat+1)

        h = np.histogram(y, bins=np.logspace(-4, 0, 27))
        axs[1].barh(h[1][:-1], h[0], (h[1][1:] - h[1][:-1]),
                    color=colors[idat],
                    alpha=alphas[idat],
                    zorder=2 - idat)

    axs[0].set_xlabel('Position [bp]', fontsize=fig_fontsize)
    axs[0].set_ylabel('SNP frequency', fontsize=fig_fontsize)
    axs[0].set_yscale('log')
    axs[0].set_ylim(10**(-4), 1)
    axs[0].set_xlim(-20, y.nonzero()[0][-1] + 21)
    axs[0].grid(True)
    axs[0].tick_params(axis='both', labelsize=fig_fontsize)

    axs[1].set_xlabel('Number of positions', fontsize=fig_fontsize)
    axs[1].grid(True)
    axs[1].set_yscale('log')
    axs[1].set_xlim(0.8, 2 * h[0].max())
    axs[1].set_xscale('log')
    axs[1].tick_params(axis='x', labelsize=fig_fontsize)

    add_panel_label(axs[0], 'C', x_offset=-0.22)
    plt.tight_layout(pad=0.1, h_pad=0.001, w_pad=0.001)

    if title:
        fig.suptitle(title)

    if fig_filename is not None:
        for ext in ['.pdf','.svg', '.png']:
            fig.savefig(fig_filename+ext)
            plt.close(fig)

    else:
        plt.ion()
        plt.show()
def plot_fit(data_sat, data_pooled, bins_sat):
    from matplotlib import cm
    from util import add_panel_label

    palette = sns.color_palette('colorblind')

    fig_width = 5
    fs = 16
    fig, axs = plt.subplots(1, 2, figsize=(2 * fig_width, fig_width))

    data_to_fit = data_sat['data_to_fit']
    mu = data_sat['mu']
    s = data_sat['s']

    fun = lambda x, s: mu / s * (1.0 - np.exp(-s * x))

    # PANEL A: data and fits
    ax = axs[0]
    for iS, (S, datum) in enumerate(data_to_fit.iterrows()):
        x = np.array(datum.index)
        y = np.array(datum)
        color = cm.jet(1.0 * iS / data_to_fit.shape[0])

        # Most conserved group is dashed
        if iS == 0:
            ls = '--'
        else:
            ls = '-'

        ax.scatter(
            x,
            y,
            s=70,
            color=color,
        )

        xfit = np.linspace(0, 3000)
        yfit = fun(xfit, s.loc[S, 's'])
        ax.plot(
            xfit,
            yfit,
            lw=2,
            color=color,
            ls=ls,
        )

    ax.set_xlabel('days since EDI', fontsize=fs)
    ax.set_ylabel('divergence', fontsize=fs)
    ax.set_xlim(-200, 3200)
    ax.set_ylim(-0.0005, 0.025)
    ax.set_xticks(np.linspace(0, 0.005, 5))
    ax.set_xticks([0, 1000, 2000, 3000])
    ax.xaxis.set_tick_params(labelsize=fs)
    ax.yaxis.set_tick_params(labelsize=fs)

    ax.text(0, 0.023, r'$\mu = 1.2 \cdot 10^{-5}$ per day', fontsize=16)
    ax.plot([200, 1300], [0.007, 0.007 + (1300 - 200) * mu], lw=1.5, c='k')

    # PANEL B: costs
    ax = axs[1]

    # B1: Saturation fit
    x = np.array(s.index)
    y = np.array(s['s'])
    dy = np.array(s['ds'])

    ymin = 0.1

    x = x[1:]
    y = y[1:]
    dx = np.array((x - bins_sat[1:-1], bins_sat[2:] - x))
    dy = dy[1:]
    ax.errorbar(
        x,
        y,
        yerr=dy,
        xerr=dx,
        ls='-',
        marker='o',
        lw=2,
        color=palette[0],
        label='Sat',
    )

    # Annotate with colors from panel A
    #ax.scatter(x, y,
    #           marker='o',
    #           s=130,
    #           edgecolor=cm.jet(1.0 * np.arange(1, data_to_fit.shape[0]) / data_to_fit.shape[0]),
    #           facecolor='none',
    #           lw=2,
    #           zorder=5,
    #           )
    for iS in xrange(1, data_to_fit.shape[0]):
        ax.annotate(
            '',
            xy=(x[iS - 1],
                y[iS - 1] * 0.7 if iS != data_to_fit.shape[0] - 1 else 1e-4),
            xytext=(x[iS - 1], y[iS - 1] * 1.0 /
                    3 if iS != data_to_fit.shape[0] - 1 else 2e-4),
            arrowprops={
                'facecolor': cm.jet(1.0 * iS / data_to_fit.shape[0]),
                'edgecolor': 'none',
                'shrink': 0.05
            },
        )

    # B2: pooled
    x = data_pooled['all'][:-1, 0]
    y = data_pooled['all'][:-1, -1]
    dy = data_pooled['all_std'][:-1, -1]
    dx = np.array(
        (x - data_pooled['all'][:-1, 1], data_pooled['all'][:-1, 2] - x))
    ax.errorbar(
        x,
        y,
        yerr=dy,
        xerr=dx,
        ls='-',
        marker='o',
        lw=2,
        color=palette[2],
        label='Pooled',
    )

    ax.legend(loc='upper right', fontsize=16)
    ax.set_xlabel('variability in group M [bits]', fontsize=fs)
    ax.set_ylabel('fitness cost [1/day]', fontsize=fs)
    ax.set_xlim(0.9e-3, 2.5)
    ax.set_ylim(9e-5, 0.11)
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.xaxis.set_tick_params(labelsize=fs)
    ax.yaxis.set_tick_params(labelsize=fs)

    # Panel labels
    add_panel_label(axs[0], 'A', x_offset=-0.27)
    add_panel_label(axs[1], 'B', x_offset=-0.21)

    plt.tight_layout()
    plt.ion()
    plt.show()
def plot_non_coding_figure(data, minor_af, synnonsyn, reference, fname=None):
    '''Plot fitness cost at noncoding features'''
    from util import add_panel_label

    ymax = 0.25
    ymin = 0.0005

    y_second_gene = 1.18

    fig, axs = plt.subplots(1,
                            4,
                            sharey=True,
                            figsize=(10, 5),
                            gridspec_kw={'width_ratios': [4, 1, 2.5, 1]})

    # plot the 5' region
    start, stop = 500, 900
    feature_names = ['polyA', 'U5', 'U5 stem', 'PBS', 'PSI SL1-4']
    ax = plot_fitness_costs_along_genome(start,
                                         stop,
                                         feature_names,
                                         data,
                                         minor_af,
                                         reference,
                                         pheno=None,
                                         synnonsyn=synnonsyn['genomewide'],
                                         ws=8,
                                         ws_syn=4,
                                         ax=axs[0])
    # add label and dimension to left-most axis, all other are tied to this one
    ax.set_ylabel('fitness cost [1/day]', fontsize=fs)
    ax.set_ylim(ymin, ymax)
    add_panel_label(ax, 'B', x_offset=-0.15)

    ax.plot([start, reference.annotation["LTR5'"].location.end],
            ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(start,
            ax.get_ylim()[0] * 1.17,
            "LTR5'",
            fontsize=fs * 0.8,
            horizontalalignment='left')

    ax.plot([reference.annotation['gag'].location.start, stop],
            ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(stop,
            ax.get_ylim()[0] * 1.17,
            'gag',
            fontsize=fs * 0.8,
            horizontalalignment='right')
    ax.set_ylim(ymin, ymax)

    # frame shift region -- no syn fitness cost here since this is in an overlap
    start, stop = 2050, 2150
    feature_names = ['frameshift']
    ax = plot_fitness_costs_along_genome(start,
                                         stop,
                                         feature_names,
                                         data,
                                         minor_af,
                                         reference,
                                         pheno=None,
                                         ws=8,
                                         ws_syn=4,
                                         ax=axs[1])

    ax.plot([start, reference.annotation['gag'].location.end],
            ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(start,
            ax.get_ylim()[0] * 1.17,
            'gag',
            fontsize=fs * 0.8,
            horizontalalignment='left')

    ax.plot([reference.annotation['pol'].location.start, stop],
            y_second_gene * ax.get_ylim()[0] * np.ones(2),
            lw=5,
            c='k',
            alpha=0.7)
    ax.text(stop,
            ax.get_ylim()[0] * (y_second_gene + 0.17),
            'pol',
            fontsize=fs * 0.8,
            horizontalalignment='right')
    ax.set_xticks([2050, 2150])
    ax.set_ylim(ymin, ymax)

    # plot the cPPT region
    start, stop = 4750, 5000
    feature_names = ['A1', 'D2', 'cPPT']
    ax = plot_fitness_costs_along_genome(start,
                                         stop,
                                         feature_names,
                                         data,
                                         minor_af,
                                         reference,
                                         pheno=None,
                                         synnonsyn=synnonsyn['genomewide'],
                                         ws=8,
                                         ws_syn=4,
                                         ax=axs[2])

    # add label and dimension to left-most axis, all other are tied to this one
    ax.set_ylim(ymin, ymax)

    ax.plot([start, reference.annotation["IN"].location.end],
            ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(start,
            ax.get_ylim()[0] * 1.17,
            "IN",
            fontsize=fs * 0.8,
            horizontalalignment='left')

    ax.plot([reference.annotation['vif'].location.start, stop],
            y_second_gene * ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(stop,
            ax.get_ylim()[0] * (y_second_gene + 0.17),
            'vif',
            fontsize=fs * 0.8,
            horizontalalignment='right')
    ax.set_xticks([4800, 4900])
    ax.set_ylim(ymin, ymax)

    # plot the 3' region
    start, stop = 9050, 9150
    feature_names = ['PPT']
    ax = plot_fitness_costs_along_genome(start,
                                         stop,
                                         feature_names,
                                         data,
                                         minor_af,
                                         reference,
                                         pheno=None,
                                         synnonsyn=synnonsyn['genomewide'],
                                         ws=8,
                                         ws_syn=4,
                                         ax=axs[3])

    ax.plot([start, reference.annotation['nef'].location.end],
            ax.get_ylim()[0] * np.ones(2),
            lw=10,
            c='k',
            alpha=0.7)
    ax.text(start,
            ax.get_ylim()[0] * 1.17,
            'nef',
            fontsize=fs * 0.8,
            horizontalalignment='left')

    ax.plot([reference.annotation["LTR3'"].location.start, stop],
            y_second_gene * ax.get_ylim()[0] * np.ones(2),
            lw=5,
            c='k',
            alpha=0.7)
    ax.text(stop,
            ax.get_ylim()[0] * (y_second_gene + 0.17),
            "LTR3'",
            fontsize=fs * 0.8,
            horizontalalignment='right')
    ax.set_xticks([9050, 9100, 9150])
    ax.set_ylim(ymin, ymax)

    fig.text(0.5,
             0.01,
             'Position in HIV-1 reference (HXB2) [bp]',
             ha='center',
             fontsize=fs)
    plt.tight_layout(rect=(0, 0.04, 1, 1), w_pad=-1)

    if fname is not None:
        for ext in ['.png', '.svg', '.pdf']:
            plt.savefig(fname + ext)
Ejemplo n.º 20
0
def plot_minor_allele_example(data, title='', VERBOSE=0, fig_filename=None):
    '''Plot minor allele in a typical sample'''
    import matplotlib.pyplot as plt
    import seaborn as sns
    from util import add_panel_label

    plt.ioff()

    if VERBOSE:
        print 'Plot minor alleles of example sample'

    fig_size = (fig_width, 0.8 * fig_width)
    fig, axs = plt.subplots(1,
                            2,
                            figsize=fig_size,
                            sharey=True,
                            gridspec_kw={'width_ratios': [3, 1]})
    sns.set_style('darkgrid')

    labels = ['control', 'patient']
    alphas = [0.6, 1]
    colors = [sns.color_palette()[i] for i in [2, 0]]
    shapes = ['s', 'o']

    for idat, datum in enumerate(data):
        y = datum['freq_minor']
        x = np.arange(len(y))
        #axs[0].plot(x, y, lw=1.5, alpha=0.8)
        axs[0].scatter(x,
                       y,
                       marker=shapes[idat],
                       lw=1.5,
                       edgecolor='none',
                       facecolor=colors[idat],
                       zorder=idat + 1)

        h = np.histogram(y, bins=np.logspace(-4, 0, 27))
        axs[1].barh(h[1][:-1],
                    h[0], (h[1][1:] - h[1][:-1]),
                    color=colors[idat],
                    alpha=alphas[idat],
                    zorder=2 - idat)

    axs[0].set_xlabel('Position [bp]', fontsize=fig_fontsize)
    axs[0].set_ylabel('SNP frequency', fontsize=fig_fontsize)
    axs[0].set_yscale('log')
    axs[0].set_ylim(10**(-4), 1)
    axs[0].set_xlim(-20, y.nonzero()[0][-1] + 21)
    axs[0].grid(True)
    axs[0].tick_params(axis='both', labelsize=fig_fontsize)

    axs[1].set_xlabel('Number of positions', fontsize=fig_fontsize)
    axs[1].grid(True)
    axs[1].set_yscale('log')
    axs[1].set_xlim(0.8, 2 * h[0].max())
    axs[1].set_xscale('log')
    axs[1].tick_params(axis='x', labelsize=fig_fontsize)

    add_panel_label(axs[0], 'C', x_offset=-0.22)
    plt.tight_layout(pad=0.1, h_pad=0.001, w_pad=0.001)

    if title:
        fig.suptitle(title)

    if fig_filename is not None:
        for ext in ['.pdf', '.svg', '.png']:
            fig.savefig(fig_filename + ext)
            plt.close(fig)

    else:
        plt.ion()
        plt.show()