Ejemplo n.º 1
0
def write_frequency_files(data_folder,
                          adaID,
                          fragment,
                          nu_filtered,
                          VERBOSE=0):
    '''Write the corrected allele frequencies to file'''
    if VERBOSE:
        print 'Storing allele frequencies to file:', adaID, fragment

    nu_filtered.dump(
        get_allele_frequencies_filename(data_folder, adaID, fragment))
def merge_allele_frequencies(data_folder, adaID, fragments, VERBOSE=0):
    '''Merge allele frequencies at overlapping pairs'''
    import warnings
    import numpy as np

    consensi = {frag: SeqIO.read(get_consensus_filename(data_folder, adaID, frag,
                                             trim_primers=True), 'fasta')
                for frag in fragments}
    nus = {frag: np.load(get_allele_frequencies_filename(data_folder, adaID, frag))
           for frag in fragments}

    pairs = get_overlapping_fragments(fragments)
    overlaps = {}
    for (frag1, frag2) in pairs:
        overlap = get_overlap(data_folder, adaID, frag1, frag2, VERBOSE=VERBOSE)
        is_diff = check_overlap_consensus(data_folder, adaID, frag1, frag2,
                                          overlap, VERBOSE=VERBOSE)
        if is_diff:
            warnings.warn(frag1+' and '+frag2+' have different consensi.', RuntimeWarning)
        overlaps[(frag1, frag2)] = overlap

    nu = []
    fragments = sorted(fragments)
    for i, frag in enumerate(fragments):
        # If the start is not an overlap, start a new chunk and copy all
        if (i == 0) or (fragments[i-1], frag) not in overlaps:
            nuf = [[frag], nus[frag]]
            nu.append(nuf)

        # else, copy from the end of the overlap on
        # FIXME: we could average the consensus zone out of indels...
        else:
            nuf = nu[-1]
            nuf[0].append(frag)
            tmp = overlaps[(fragments[i-1], frag)]
            if tmp is not None:
                (_, start, _) = tmp
                #(recursion is not the most efficient but -- oh, well)
                nuf[1] = np.concatenate([nuf[1], nus[frag][:, start:]], axis=1)
            else:
                tmp = np.zeros((nuf[1].shape[0], 10), float)
                tmp[-1] = 1
                nuf[1] = np.concatenate([nuf[1], tmp, nus[frag][:, start:]], axis=1)

    return nu
Ejemplo n.º 3
0
def plot_minor_allele_frequency(data_folder, adaID, fragments, VERBOSE=0,
                                savefig=False):
    '''Plot minor allele frequency along the genome'''
    from hivwholeseq.sequencing.filenames import get_minor_allele_frequency_figure_filename as gff
    import matplotlib
    params = {'axes.labelsize': 20, 
              'text.fontsize': 20,
              'legend.fontsize': 8,
              'xtick.labelsize': 16,
              'ytick.labelsize': 16,
              'text.usetex': False}
    matplotlib.rcParams.update(params)
    from matplotlib import cm
    import matplotlib.pyplot as plt

    plot_grid = [(1, 1), (1, 2), (1, 3), (2, 2), (1, 5), (2, 3)]

    # Store in globals structures
    covs = {}
    nus_minor = {}
    alls_minor = {}
    nus_filtered = {}
    nus_minor_filtered = {}

    for fragment in fragments:
        coverage = np.load(get_coverage_filename(data_folder, adaID, fragment))
        covs[fragment] = coverage
    
        counts = np.load(get_allele_counts_filename(data_folder, adaID, fragment))
        (counts_major,
         counts_minor,
         counts_minor2) = get_minor_allele_counts(counts, n_minor=2)
    
        # Get minor allele frequencies and identities
        nu_minor = 1.0 * counts_minor[:, :, 1] / (coverage + 1e-6)
        nus_minor[fragment] = nu_minor
        all_minor = counts_minor[:, :, 0]
        alls_minor[fragment] = all_minor
    
        # Filter the minor frequencies by comparing the read types
        try:
            nu_filtered = np.load(get_allele_frequencies_filename(data_folder, adaID, fragment))
        except IOError:
            nu_filtered = filter_nus(counts, coverage)
        nut = np.zeros(nu_filtered.shape[-1])
        for pos, nupos in enumerate(nu_filtered.T):
            nut[pos] = np.sort(nupos)[-2]
        
        nus_filtered[fragment] = nu_filtered
        nus_minor_filtered[fragment] = nut

    # Plot them
    (n_plots_y, n_plots_x) = plot_grid[len(fragments) - 1]
    fig, axs = plt.subplots(n_plots_y, n_plots_x, figsize=(13, 8))
    if len(fragments) > 1:
        axs = axs.ravel()
    else:
        axs = [axs]
    fig.suptitle('adapterID '+adaID, fontsize=20)
    labss = {'read1 f': 'read1 fwd', 'read1 r': 'read1 rev',
             'read2 f': 'read2 fwd', 'read2 r': 'read2 rev'}
    for i, fragment in enumerate(fragments):
        ax = axs[i]
        ax.set_yscale('log')
        ax.set_title(fragment)
        if i in [0, 3]:
            ax.set_ylabel(r'$\nu$')
        if i > 2:
            ax.set_xlabel('Position')
    
        # Plot divided by readtype
        for js, nu_minorjs in enumerate(nus_minor[fragment]):
            color = cm.jet(int(255.0 * js / len(read_types)))
            ax.plot(nu_minorjs, lw=1.5, c=color, label=labss[read_types[js]])
            ax.scatter(np.arange(len(nu_minorjs)), nu_minorjs, lw=1.5,
                       color=color)
        
        # Plot filtered
        ax.plot(nus_minor_filtered[fragment], lw=1.5, c='k',
                alpha=0.5, label='Filtered')
        ax.scatter(np.arange(len(nus_minor_filtered[fragment])),
                   nus_minor_filtered[fragment], lw=1.5, c='k',
                   alpha=0.5)

        # Plot 1/max(coverage)
        coverage = covs[fragment]
        cov_tot = coverage.sum(axis=0)
        ax.plot(1.0 / cov_tot, lw=1.2, c='r', label='Detection limit')

        ax.set_xlim(-100, len(nu_minorjs) + 100)
    
    plt.grid()
    plt.legend(loc='upper right')
    plt.tight_layout(rect=(0, 0, 1, 0.95))

    if savefig:
        outputfile = gff(data_folder, adaID, fragment)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()
def write_frequency_files(data_folder, adaID, fragment, nu_filtered, VERBOSE=0):
    '''Write the corrected allele frequencies to file'''
    if VERBOSE:
        print 'Storing allele frequencies to file:', adaID, fragment

    nu_filtered.dump(get_allele_frequencies_filename(data_folder, adaID, fragment))
Ejemplo n.º 5
0
def plot_minor_allele_frequency_filtered(data_folder, adaID, fragments, VERBOSE=0,
                                         savefig=False):
    '''Plot minor allele frequency along the genome'''
    from hivwholeseq.sequencing.filenames import get_minor_allele_frequency_figure_filename as gff
    import matplotlib
    params = {'axes.labelsize': 20, 
              'text.fontsize': 20,
              'legend.fontsize': 8,
              'xtick.labelsize': 16,
              'ytick.labelsize': 16,
              'text.usetex': False}
    matplotlib.rcParams.update(params)
    from matplotlib import cm
    import matplotlib.pyplot as plt

    # Store in globals structures
    covs = {}
    nus_minor_filtered = {}
    for fragment in fragments:
        coverage = np.load(get_coverage_filename(data_folder, adaID, fragment))
        covs[fragment] = coverage

        try:
            nu_filtered = np.load(get_allele_frequencies_filename(data_folder,
                                                                  adaID, fragment))
        except IOError:
            counts = np.load(get_allele_counts_filename(data_folder, adaID, fragment))
            nu_filtered = filter_nus(counts)

        nut = np.zeros(nu_filtered.shape[-1])
        for pos, nupos in enumerate(nu_filtered.T):
            nut[pos] = np.sort(nupos)[-2]
        
        nus_minor_filtered[fragment] = nut
 
    # Plot them
    plot_grid = [(1, 1), (1, 2), (1, 3), (2, 2), (1, 5), (2, 3)]
    (n_plots_y, n_plots_x) = plot_grid[len(fragments) - 1]
    fig, axs = plt.subplots(n_plots_y, n_plots_x, figsize=(13, 8))
    if len(fragments) > 1:
        axs = axs.ravel()
    else:
        axs = [axs]
    fig.suptitle('adapterID '+adaID, fontsize=20)
    for i, fragment in enumerate(fragments):
        ax = axs[i]
        ax.set_yscale('log')
        ax.set_title(fragment)
        if i in [0, 3]:
            ax.set_ylabel(r'$\nu$')
        if i > 2:
            ax.set_xlabel('Position')
        
        # Plot filtered
        ax.plot(nus_minor_filtered[fragment], lw=1.5, c='k',
                alpha=0.5, label='Filtered')
        ax.scatter(np.arange(len(nus_minor_filtered[fragment])),
                   nus_minor_filtered[fragment], lw=1.5, c='k',
                   alpha=0.5)

        ax.set_xlim(-100, len(nus_minor_filtered[fragment]) + 100)
    
    #plt.legend(loc='upper right')
    plt.tight_layout(rect=(0, 0, 1, 0.95))

    if savefig:
        outputfile = gff(data_folder, adaID, fragment, only_filtered=True)
        fig.savefig(outputfile)
        plt.close(fig)
    else:
        plt.ion()
        plt.show()