def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0,
                                savefig=False):
    '''Plot minor allele frequency along the genome''' 
    nus = np.load(get_merged_allele_frequencies_filename(data_folder, adaID, fragments))

    nu_min = np.ma.masked_all(nus.shape[-1])
    for pos, nutmp in enumerate(nus.T):
        try:
            if not np.ma.is_masked(nutmp):
                nu_min[pos] = np.sort(nutmp)[-2]
        except ValueError:
            print pos, np.ma.is_masked(nutmp)
            import ipdb; ipdb.set_trace()

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(nu_min, lw=1.5, c='k')
    ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k')
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel(r'$\nu$', fontsize=20)
    ax.set_title('adaID '+adaID+', '+'-'.join(fragments))
    ax.set_xlim(-100, len(nu_min) + 100)

    plt.tight_layout()

    if savefig:
        from hivwholeseq.sequencing.filenames import \
                get_minor_allele_frequency_merged_figure_filename as gff
        outputfile = gff(data_folder, adaID, fragments)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()
def plot_minor_allele_frequency_filtered(data_folder,
                                         adaID,
                                         fragments,
                                         VERBOSE=0,
                                         savefig=False):
    '''Plot minor allele frequency along the genome'''
    nus = np.load(
        get_merged_allele_frequencies_filename(data_folder, adaID, fragments))

    nu_min = np.ma.masked_all(nus.shape[-1])
    for pos, nutmp in enumerate(nus.T):
        try:
            if not np.ma.is_masked(nutmp):
                nu_min[pos] = np.sort(nutmp)[-2]
        except ValueError:
            print pos, np.ma.is_masked(nutmp)
            import ipdb
            ipdb.set_trace()

    import matplotlib.pyplot as plt
    fig, ax = plt.subplots(1, 1, figsize=(15, 8))
    ax.plot(nu_min, lw=1.5, c='k')
    ax.scatter(np.arange(len(nu_min)), nu_min, s=30, c='k')
    ax.set_yscale('log')
    ax.set_xlabel('Position')
    ax.set_ylabel(r'$\nu$', fontsize=20)
    ax.set_title('adaID ' + adaID + ', ' + '-'.join(fragments))
    ax.set_xlim(-100, len(nu_min) + 100)

    plt.tight_layout()

    if savefig:
        from hivwholeseq.sequencing.filenames import \
                get_minor_allele_frequency_merged_figure_filename as gff
        outputfile = gff(data_folder, adaID, fragments)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()
    samples = dataset.samples
    if adaIDs is not None:
        samples = samples.loc[samples.adapter.isin(adaIDs)]
    if VERBOSE >= 3:
        print 'adaIDs', samples.adapter

    for samplename, sample in samples.iterrows():
        sample = SampleSeq(sample)
        adaID = sample.adapter

        if VERBOSE >= 1:
            print adaID, samplename

        fragments = [fr[:2] for fr in sample.regions_complete]

        if (len(fragments) != 6) and (VERBOSE >= 1):
            print 'WARNING: only '+str(len(fragments))+' regions found!'

        # Write one or more merged consensi
        consensus = merge_consensi(data_folder, adaID, fragments, VERBOSE=VERBOSE)
        for (frags, cons) in consensus:
            output_filename = get_merged_consensus_filename(data_folder, adaID, frags)
            SeqIO.write(cons, output_filename, 'fasta')

        # Write allele frequencies
        if do_nus:
            nu = merge_allele_frequencies(data_folder, adaID, fragments, VERBOSE=VERBOSE)
            for (frags, nuf) in nu:
                output_filename = get_merged_allele_frequencies_filename(data_folder, adaID, frags)
                nuf.dump(output_filename)