Example #1
0
def plot_cumulative_histogram(data_folder, adaID, fragment, insert_sizes,
                              title=None,
                              ax=None,
                              show=False, savefig=False,
                              **kwargs):
    '''Plot cumulative histogram of insert sizes'''
    import matplotlib.pyplot as plt
    if ax is None:
        fig, ax = plt.subplots(1, 1)
    ax.plot(insert_sizes, np.linspace(0, 1, len(insert_sizes)), **kwargs)
    ax.set_xlabel('Insert size')
    ax.set_ylabel('Cumulative fraction')
    ax.set_xlim(-1, 1000)
    ax.set_ylim(-0.02, 1.02)
    if title is not None:
        ax.set_title(title)

    plt.tight_layout()

    if show:
        plt.ion()
        plt.show()

    if savefig:
        output_filename = get_insert_size_distribution_cumulative_filename(data_folder,
                                                                           adaID,
                                                                           fragment)
        from hivwholeseq.utils.generic import mkdirs
        from hivwholeseq.sequencing.filenames import get_figure_folder
        mkdirs(get_figure_folder(data_folder, adaID))
        fig.savefig(output_filename)
def report_insert_size(data_folder, adaID, seq_run, VERBOSE=0, summary=True):
    '''Produce figures of the insert size distribution'''
    from hivwholeseq.sequencing.check_insert_distribution import get_insert_size_distribution, \
            plot_cumulative_histogram, plot_histogram

    bins = np.linspace(0, 1000, 100)
    isz, h = get_insert_size_distribution(
        data_folder,
        adaID,
        'premapped',
        bins=bins,
        maxreads=10000,
        VERBOSE=VERBOSE)

    plot_cumulative_histogram(
        data_folder,
        adaID,
        'premapped',
        isz,
        savefig=True,
        title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap',
        lw=2,
        c='b')
    plot_histogram(
        data_folder,
        adaID,
        'premapped',
        h,
        savefig=True,
        title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap',
        lw=2,
        color='b')

    if summary:
        with open(get_premap_summary_filename(data_folder, adaID), 'a') as f:
            f.write('\nInsert size distribution plotted:\n')
            f.write(
                get_insert_size_distribution_cumulative_filename(
                    data_folder, adaID, 'premapped') + '\n')
            f.write(
                get_insert_size_distribution_filename(data_folder, adaID,
                                                      'premapped') + '\n')
def report_insert_size(data_folder, adaID, seq_run, VERBOSE=0, summary=True):
    '''Produce figures of the insert size distribution'''
    from hivwholeseq.sequencing.check_insert_distribution import get_insert_size_distribution, \
            plot_cumulative_histogram, plot_histogram

    bins = np.linspace(0, 1000, 100)
    isz, h = get_insert_size_distribution(data_folder,
                                          adaID,
                                          'premapped',
                                          bins=bins,
                                          maxreads=10000,
                                          VERBOSE=VERBOSE)

    plot_cumulative_histogram(data_folder,
                              adaID,
                              'premapped',
                              isz,
                              savefig=True,
                              title='run ' + str(seq_run) + ', adaID ' +
                              str(adaID) + ', premap',
                              lw=2,
                              c='b')
    plot_histogram(data_folder,
                   adaID,
                   'premapped',
                   h,
                   savefig=True,
                   title='run ' + str(seq_run) + ', adaID ' + str(adaID) +
                   ', premap',
                   lw=2,
                   color='b')

    if summary:
        with open(get_premap_summary_filename(data_folder, adaID), 'a') as f:
            f.write('\nInsert size distribution plotted:\n')
            f.write(
                get_insert_size_distribution_cumulative_filename(
                    data_folder, adaID, 'premapped') + '\n')
            f.write(
                get_insert_size_distribution_filename(data_folder, adaID,
                                                      'premapped') + '\n')