Python gffの例、hivwholeseq.sequencing.filenames.gff Pythonの例

コード例 #1

0

ファイルを表示

def plot_distance_histogram(data_folder,
                            adaID,
                            fragment,
                            counts,
                            savefig=False):
    '''Plot the histogram of distance from consensus'''
    from hivwholeseq.sequencing.filenames import get_distance_from_consensus_figure_filename as gff
    import matplotlib.pyplot as plt

    if savefig:
        is_ion = plt.isinteractive()
        plt.ioff()

    # Linear histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(-0.5, 0.5 + counts.nonzero()[0][-1])

    ax.plot(np.arange(len(counts)), counts, 'b', lw=2)
    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)

    # Log cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs < x')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(-0.5, 0.5 + counts.nonzero()[0][-1])
    ax.set_ylim(1.0 / counts.sum() * 0.9, 1.1)
    ax.set_yscale('log')

    y = 1.0 - 1.0 * np.cumsum(counts) / counts.sum()
    ax.plot(np.arange(len(counts)), y, 'b', lw=2)
    if savefig:
        outputfile = gff(data_folder,
                         adaID,
                         fragment,
                         cumulative=True,
                         yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)

        if is_ion:
            plt.ion()

コード例 #2

0

ファイルを表示

def plot_coverage(data_folder,
                  adaID,
                  fragment,
                  counts,
                  VERBOSE=0,
                  savefig=False):
    '''Plot figure with the coverage'''
    from hivwholeseq.sequencing.filenames import get_coverage_figure_filename as gff

    if VERBOSE >= 1:
        print 'Plotting coverage: ' + adaID + ' ' + fragment

    coverage = counts.sum(axis=1).sum(axis=0)

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(coverage + 0.5)
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel('Coverage')
    ax.set_title('adaID ' + adaID + ', fragment ' + fragment)

    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)

    else:
        plt.ion()
        plt.show()

コード例 #3

0

ファイルを表示

ファイル: one_site_statistics.py プロジェクト: iosonofabio/hivwholeseq

def plot_coverage(data_folder, adaID, fragment, counts, VERBOSE=0, savefig=False):
    '''Plot figure with the coverage'''
    from hivwholeseq.sequencing.filenames import get_coverage_figure_filename as gff

    if VERBOSE >= 1:
        print 'Plotting coverage: '+adaID+' '+fragment

    coverage = counts.sum(axis=1).sum(axis=0)

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(coverage + 0.5)
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel('Coverage')
    ax.set_title('adaID '+adaID+', fragment '+fragment)

    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)

    else:
        plt.ion()
        plt.show()

コード例 #4

0

ファイルを表示

ファイル: minor_allele_frequency_merged.py プロジェクト: iosonofabio/hivwholeseq

def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0,
                                savefig=False):
    '''Plot minor allele frequency along the genome''' 
    nus = np.load(get_merged_allele_frequencies_filename(data_folder, adaID, fragments))

    nu_min = np.ma.masked_all(nus.shape[-1])
    for pos, nutmp in enumerate(nus.T):
        try:
            if not np.ma.is_masked(nutmp):
                nu_min[pos] = np.sort(nutmp)[-2]
        except ValueError:
            print pos, np.ma.is_masked(nutmp)
            import ipdb; ipdb.set_trace()

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(nu_min, lw=1.5, c='k')
    ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k')
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel(r'$\nu$', fontsize=20)
    ax.set_title('adaID '+adaID+', '+'-'.join(fragments))
    ax.set_xlim(-100, len(nu_min) + 100)

    plt.tight_layout()

    if savefig:
        from hivwholeseq.sequencing.filenames import \
                get_minor_allele_frequency_merged_figure_filename as gff
        outputfile = gff(data_folder, adaID, fragments)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()

コード例 #5

0

ファイルを表示

ファイル: filter_mapped_reads.py プロジェクト: iosonofabio/hivwholeseq

def plot_distance_histogram(data_folder, adaID, fragment, counts, savefig=False):
    '''Plot the histogram of distance from consensus'''
    from hivwholeseq.sequencing.filenames import get_distance_from_consensus_figure_filename as gff
    import matplotlib.pyplot as plt

    if savefig:
        is_ion = plt.isinteractive()
        plt.ioff()

    # Linear histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(-0.5, 0.5 + counts.nonzero()[0][-1])

    ax.plot(np.arange(len(counts)), counts, 'b', lw=2)
    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)

    # Log cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs < x')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(-0.5, 0.5 + counts.nonzero()[0][-1])
    ax.set_ylim(1.0 / counts.sum() * 0.9, 1.1)
    ax.set_yscale('log')

    y = 1.0 - 1.0 * np.cumsum(counts) / counts.sum()
    ax.plot(np.arange(len(counts)), y, 'b', lw=2)
    if savefig:
        outputfile = gff(data_folder, adaID, fragment, cumulative=True, yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)

        if is_ion:
            plt.ion()

コード例 #6

0

ファイルを表示

ファイル: minor_allele_frequency_merged.py プロジェクト: 5l1v3r1/hivwholeseq

def plot_minor_allele_frequency_filtered(data_folder,
                                         adaID,
                                         fragments,
                                         VERBOSE=0,
                                         savefig=False):
    '''Plot minor allele frequency along the genome'''
    nus = np.load(
        get_merged_allele_frequencies_filename(data_folder, adaID, fragments))

    nu_min = np.ma.masked_all(nus.shape[-1])
    for pos, nutmp in enumerate(nus.T):
        try:
            if not np.ma.is_masked(nutmp):
                nu_min[pos] = np.sort(nutmp)[-2]
        except ValueError:
            print pos, np.ma.is_masked(nutmp)
            import ipdb
            ipdb.set_trace()

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(nu_min, lw=1.5, c='k')
    ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k')
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel(r'$\nu$', fontsize=20)
    ax.set_title('adaID ' + adaID + ', ' + '-'.join(fragments))
    ax.set_xlim(-100, len(nu_min) + 100)

    plt.tight_layout()

    if savefig:
        from hivwholeseq.sequencing.filenames import \
                get_minor_allele_frequency_merged_figure_filename as gff
        outputfile = gff(data_folder, adaID, fragments)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()

コード例 #7

0

ファイルを表示

def plot_SFS_folded(data_folder,
                    adaID,
                    fragment,
                    nu_filtered,
                    VERBOSE=0,
                    savefig=False):
    '''Plot the site frequency spectrum (folded)'''
    if VERBOSE >= 1:
        print 'Plotting folded SFS'

    from hivwholeseq.sequencing.filenames import get_SFS_figure_filename as gff
    import matplotlib.pyplot as plt
    import numpy as np

    nu_maj = np.ma.masked_all(nu_filtered.shape[1])
    nu_min = np.ma.masked_all(nu_filtered.shape[1])
    for pos, nus in enumerate(nu_filtered.T):
        if nus[0] == np.ma.masked:
            continue
        nus = np.sort(nus)
        if (nus[-1] < 0.5):
            if VERBOSE >= 3:
                print pos, 'has 3+ alleles:', nus, 'skipping.'
            continue

        nu_maj[pos] = nus[-1]
        nu_min[pos] = nus[-2]

    nu_maj_fold = 1 - nu_maj

    nu_mm = np.concatenate([nu_maj_fold, nu_min])
    nu_mm = np.array(nu_mm[nu_mm > 1e-5])
    nu_mm.sort()

    # Cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel(r'$\nu$', fontsize=20)
    ax.set_ylabel('# alleles < x folded')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(10**(np.floor(np.log10(nu_mm[0] * 0.9))), 0.6)
    ax.set_xscale('log')
    ax.set_ylim(1.0 / len(nu_mm) * 0.9, 1.1)
    ax.set_yscale('log')
    ax.plot(nu_mm,
            1.0 - np.linspace(0, 1 - 1.0 / len(nu_mm), len(nu_mm)),
            lw=2,
            c='b')

    if savefig:
        outputfile = gff(data_folder,
                         adaID,
                         fragment,
                         cumulative=True,
                         yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)

    # Histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel(r'$\nu$', fontsize=20)
    ax.set_ylabel('SFS folded (density)')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(10**(np.floor(np.log10(nu_mm[0] * 0.9))), 0.6)
    ax.set_xscale('log')
    ax.set_yscale('log')

    bins = np.logspace(-4, np.log10(0.5), 50)
    h = np.histogram(nu_mm, bins=bins, density=True)
    x = np.sqrt(h[1][1:] * h[1][:-1])
    y = h[0]
    ax.plot(x, y, lw=2, c='b')
    ax.scatter(x, y, s=50, edgecolor='none', facecolor='b')
    ax.grid()

    if savefig:
        outputfile = gff(data_folder,
                         adaID,
                         fragment,
                         cumulative=False,
                         yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()

コード例 #8

0

ファイルを表示

ファイル: one_site_statistics.py プロジェクト: iosonofabio/hivwholeseq

def plot_SFS_folded(data_folder, adaID, fragment, nu_filtered, VERBOSE=0, savefig=False):
    '''Plot the site frequency spectrum (folded)'''
    if VERBOSE >= 1:
        print 'Plotting folded SFS'

    from hivwholeseq.sequencing.filenames import get_SFS_figure_filename as gff
    import matplotlib.pyplot as plt
    import numpy as np

    nu_maj = np.ma.masked_all(nu_filtered.shape[1])
    nu_min = np.ma.masked_all(nu_filtered.shape[1])
    for pos, nus in enumerate(nu_filtered.T):
        if nus[0] == np.ma.masked:
            continue
        nus = np.sort(nus)
        if (nus[-1] < 0.5):
            if VERBOSE >= 3:
                print pos, 'has 3+ alleles:', nus, 'skipping.'
            continue

        nu_maj[pos] = nus[-1]
        nu_min[pos] = nus[-2]

    nu_maj_fold = 1 - nu_maj

    nu_mm = np.concatenate([nu_maj_fold, nu_min])
    nu_mm = np.array(nu_mm[nu_mm > 1e-5])
    nu_mm.sort()

    # Cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel(r'$\nu$', fontsize=20)
    ax.set_ylabel('# alleles < x folded')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(10**(np.floor(np.log10(nu_mm[0] * 0.9))), 0.6)
    ax.set_xscale('log')
    ax.set_ylim(1.0 / len(nu_mm) * 0.9, 1.1)
    ax.set_yscale('log')
    ax.plot(nu_mm, 1.0 - np.linspace(0, 1 - 1.0 / len(nu_mm), len(nu_mm)), lw=2, c='b')

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, cumulative=True, yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)

    # Histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel(r'$\nu$', fontsize=20)
    ax.set_ylabel('SFS folded (density)')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(10**(np.floor(np.log10(nu_mm[0] * 0.9))), 0.6)
    ax.set_xscale('log')
    ax.set_yscale('log')

    bins = np.logspace(-4, np.log10(0.5), 50)
    h = np.histogram(nu_mm, bins=bins, density=True)
    x = np.sqrt(h[1][1:] * h[1][:-1])
    y = h[0]
    ax.plot(x, y, lw=2, c='b')
    ax.scatter(x, y, s=50, edgecolor='none', facecolor='b')
    ax.grid()

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, cumulative=False, yscale='log')
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()

コード例 #9

0

ファイルを表示

def plot_distance_histogram_sliding_window(data_folder,
                                           adaID,
                                           fragment,
                                           lref,
                                           counts,
                                           binsize=200,
                                           savefig=False):
    '''Plot the distance histogram along the genome'''
    from hivwholeseq.sequencing.filenames import get_distance_from_consensus_figure_filename as gff
    import matplotlib.pyplot as plt
    from matplotlib import cm

    if savefig:
        is_ion = plt.isinteractive()
        plt.ioff()

    # Figure max x
    xmax = counts.nonzero()[1].max()

    # Linear histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(-0.5, 0.5 + xmax)

    for i, count in enumerate(counts):
        color = cm.jet(int(255.0 * i / counts.shape[0]))
        start = binsize * i
        end = min(binsize * (i + 1), lref)
        ax.plot(np.arange(counts.shape[1]),
                count,
                lw=2,
                color=color,
                label=str(start) + ' to ' + str(end))
    ax.legend(loc=1)

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, sliding_window=True)
        fig.savefig(outputfile)
        plt.close(fig)

    # Log cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID ' + adaID + ', ' + fragment)
    ax.set_xlim(-0.5, 0.5 + xmax)
    ax.set_ylim(1.0 / counts.sum(axis=1).max() * 0.9, 1.1)
    ax.set_yscale('log')

    for i, count in enumerate(counts):
        color = cm.jet(int(255.0 * i / counts.shape[0]))
        start = binsize * i
        end = min(binsize * (i + 1), lref)
        y = 1.0 - 1.0 * np.cumsum(count) / count.sum()
        ax.plot(np.arange(counts.shape[1]),
                y,
                lw=2,
                color=color,
                label=str(start) + ' to ' + str(end))
    ax.legend(loc=1)

    if savefig:
        outputfile = gff(data_folder,
                         adaID,
                         fragment,
                         cumulative=True,
                         sliding_window=True)
        fig.savefig(outputfile)
        plt.close(fig)

        if is_ion:
            plt.ion()

コード例 #10

0

ファイルを表示

ファイル: filter_mapped_reads.py プロジェクト: iosonofabio/hivwholeseq

def plot_distance_histogram_sliding_window(data_folder, adaID, fragment,
                                           lref,
                                           counts, binsize=200,
                                           savefig=False):
    '''Plot the distance histogram along the genome'''
    from hivwholeseq.sequencing.filenames import get_distance_from_consensus_figure_filename as gff
    import matplotlib.pyplot as plt
    from matplotlib import cm

    if savefig:
        is_ion = plt.isinteractive()
        plt.ioff()

    # Figure max x
    xmax = counts.nonzero()[1].max()

    # Linear histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(-0.5, 0.5 + xmax)

    for i, count in enumerate(counts):
        color = cm.jet(int(255.0 * i / counts.shape[0]))
        start = binsize * i
        end = min(binsize * (i+1), lref)
        ax.plot(np.arange(counts.shape[1]), count, lw=2,
                color=color, label=str(start)+' to '+str(end))
    ax.legend(loc=1)

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, sliding_window=True)
        fig.savefig(outputfile)
        plt.close(fig)


    # Log cumulative histogram
    fig, ax = plt.subplots(1, 1)
    ax.set_xlabel('Hamming distance')
    ax.set_ylabel('# read pairs')
    ax.set_title('adaID '+adaID+', '+fragment)
    ax.set_xlim(-0.5, 0.5 + xmax)
    ax.set_ylim(1.0 / counts.sum(axis=1).max() * 0.9, 1.1)
    ax.set_yscale('log')

    for i, count in enumerate(counts):
        color = cm.jet(int(255.0 * i / counts.shape[0]))
        start = binsize * i
        end = min(binsize * (i+1), lref)
        y = 1.0 - 1.0 * np.cumsum(count) / count.sum()
        ax.plot(np.arange(counts.shape[1]), y, lw=2,
                color=color, label=str(start)+' to '+str(end))
    ax.legend(loc=1)

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, cumulative=True,
                         sliding_window=True)
        fig.savefig(outputfile)
        plt.close(fig)

        if is_ion:
            plt.ion()

コード例 #11

0

ファイルを表示

def plot_minor_allele_frequency(data_folder, adaID, fragments, VERBOSE=0,
                                savefig=False):
    '''Plot minor allele frequency along the genome'''
    from hivwholeseq.sequencing.filenames import get_minor_allele_frequency_figure_filename as gff
    import matplotlib
    params = {'axes.labelsize': 20, 
              'text.fontsize': 20,
              'legend.fontsize': 8,
              'xtick.labelsize': 16,
              'ytick.labelsize': 16,
              'text.usetex': False}
    matplotlib.rcParams.update(params)
    from matplotlib import cm
    import matplotlib.pyplot as plt

    plot_grid = [(1, 1), (1, 2), (1, 3), (2, 2), (1, 5), (2, 3)]

    # Store in globals structures
    covs = {}
    nus_minor = {}
    alls_minor = {}
    nus_filtered = {}
    nus_minor_filtered = {}

    for fragment in fragments:
        coverage = np.load(get_coverage_filename(data_folder, adaID, fragment))
        covs[fragment] = coverage
    
        counts = np.load(get_allele_counts_filename(data_folder, adaID, fragment))
        (counts_major,
         counts_minor,
         counts_minor2) = get_minor_allele_counts(counts, n_minor=2)
    
        # Get minor allele frequencies and identities
        nu_minor = 1.0 * counts_minor[:, :, 1] / (coverage + 1e-6)
        nus_minor[fragment] = nu_minor
        all_minor = counts_minor[:, :, 0]
        alls_minor[fragment] = all_minor
    
        # Filter the minor frequencies by comparing the read types
        try:
            nu_filtered = np.load(get_allele_frequencies_filename(data_folder, adaID, fragment))
        except IOError:
            nu_filtered = filter_nus(counts, coverage)
        nut = np.zeros(nu_filtered.shape[-1])
        for pos, nupos in enumerate(nu_filtered.T):
            nut[pos] = np.sort(nupos)[-2]
        
        nus_filtered[fragment] = nu_filtered
        nus_minor_filtered[fragment] = nut

    # Plot them
    (n_plots_y, n_plots_x) = plot_grid[len(fragments) - 1]
    fig, axs = plt.subplots(n_plots_y, n_plots_x, figsize=(13, 8))
    if len(fragments) > 1:
        axs = axs.ravel()
    else:
        axs = [axs]
    fig.suptitle('adapterID '+adaID, fontsize=20)
    labss = {'read1 f': 'read1 fwd', 'read1 r': 'read1 rev',
             'read2 f': 'read2 fwd', 'read2 r': 'read2 rev'}
    for i, fragment in enumerate(fragments):
        ax = axs[i]
        ax.set_yscale('log')
        ax.set_title(fragment)
        if i in [0, 3]:
            ax.set_ylabel(r'$\nu$')
        if i > 2:
            ax.set_xlabel('Position')
    
        # Plot divided by readtype
        for js, nu_minorjs in enumerate(nus_minor[fragment]):
            color = cm.jet(int(255.0 * js / len(read_types)))
            ax.plot(nu_minorjs, lw=1.5, c=color, label=labss[read_types[js]])
            ax.scatter(np.arange(len(nu_minorjs)), nu_minorjs, lw=1.5,
                       color=color)
        
        # Plot filtered
        ax.plot(nus_minor_filtered[fragment], lw=1.5, c='k',
                alpha=0.5, label='Filtered')
        ax.scatter(np.arange(len(nus_minor_filtered[fragment])),
                   nus_minor_filtered[fragment], lw=1.5, c='k',
                   alpha=0.5)

        # Plot 1/max(coverage)
        coverage = covs[fragment]
        cov_tot = coverage.sum(axis=0)
        ax.plot(1.0 / cov_tot, lw=1.2, c='r', label='Detection limit')

        ax.set_xlim(-100, len(nu_minorjs) + 100)
    
    plt.grid()
    plt.legend(loc='upper right')
    plt.tight_layout(rect=(0, 0, 1, 0.95))

    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()

コード例 #12

0

ファイルを表示

def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0,
                                         savefig=False):
    '''Plot minor allele frequency along the genome'''
    from hivwholeseq.sequencing.filenames import get_minor_allele_frequency_figure_filename as gff
    import matplotlib
    params = {'axes.labelsize': 20, 
              'text.fontsize': 20,
              'legend.fontsize': 8,
              'xtick.labelsize': 16,
              'ytick.labelsize': 16,
              'text.usetex': False}
    matplotlib.rcParams.update(params)
    from matplotlib import cm
    import matplotlib.pyplot as plt

    # Store in globals structures
    covs = {}
    nus_minor_filtered = {}
    for fragment in fragments:
        coverage = np.load(get_coverage_filename(data_folder, adaID, fragment))
        covs[fragment] = coverage

        try:
            nu_filtered = np.load(get_allele_frequencies_filename(data_folder,
                                                                  adaID, fragment))
        except IOError:
            counts = np.load(get_allele_counts_filename(data_folder, adaID, fragment))
            nu_filtered = filter_nus(counts)

        nut = np.zeros(nu_filtered.shape[-1])
        for pos, nupos in enumerate(nu_filtered.T):
            nut[pos] = np.sort(nupos)[-2]
        
        nus_minor_filtered[fragment] = nut
 
    # Plot them
    plot_grid = [(1, 1), (1, 2), (1, 3), (2, 2), (1, 5), (2, 3)]
    (n_plots_y, n_plots_x) = plot_grid[len(fragments) - 1]
    fig, axs = plt.subplots(n_plots_y, n_plots_x, figsize=(13, 8))
    if len(fragments) > 1:
        axs = axs.ravel()
    else:
        axs = [axs]
    fig.suptitle('adapterID '+adaID, fontsize=20)
    for i, fragment in enumerate(fragments):
        ax = axs[i]
        ax.set_yscale('log')
        ax.set_title(fragment)
        if i in [0, 3]:
            ax.set_ylabel(r'$\nu$')
        if i > 2:
            ax.set_xlabel('Position')
        
        # Plot filtered
        ax.plot(nus_minor_filtered[fragment], lw=1.5, c='k',
                alpha=0.5, label='Filtered')
        ax.scatter(np.arange(len(nus_minor_filtered[fragment])),
                   nus_minor_filtered[fragment], lw=1.5, c='k',
                   alpha=0.5)

        ax.set_xlim(-100, len(nus_minor_filtered[fragment]) + 100)
    
    #plt.legend(loc='upper right')
    plt.tight_layout(rect=(0, 0, 1, 0.95))

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, only_filtered=True)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()