def plot_histogram(data_folder, adaID, fragment, h, title=None, ax=None, show=False, savefig=False, **kwargs): '''Plot histogram of insert sizes''' import matplotlib.pyplot as plt if ax is None: fig, ax = plt.subplots(1, 1) if title is not None: ax.set_title(title) x = 0.5 * (h[1][1:] + h[1][:-1]) y = h[0] ax.plot(x, y, **kwargs) ax.set_xlabel('Insert size') ax.set_ylabel('Density') plt.tight_layout() if show: plt.ion() plt.show() if savefig: output_filename = get_insert_size_distribution_filename(data_folder, adaID, fragment) from hivwholeseq.utils.generic import mkdirs from hivwholeseq.sequencing.filenames import get_figure_folder mkdirs(get_figure_folder(data_folder, adaID)) plt.savefig(output_filename)
def report_insert_size(data_folder, adaID, seq_run, VERBOSE=0, summary=True): '''Produce figures of the insert size distribution''' from hivwholeseq.sequencing.check_insert_distribution import get_insert_size_distribution, \ plot_cumulative_histogram, plot_histogram bins = np.linspace(0, 1000, 100) isz, h = get_insert_size_distribution( data_folder, adaID, 'premapped', bins=bins, maxreads=10000, VERBOSE=VERBOSE) plot_cumulative_histogram( data_folder, adaID, 'premapped', isz, savefig=True, title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap', lw=2, c='b') plot_histogram( data_folder, adaID, 'premapped', h, savefig=True, title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap', lw=2, color='b') if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nInsert size distribution plotted:\n') f.write( get_insert_size_distribution_cumulative_filename( data_folder, adaID, 'premapped') + '\n') f.write( get_insert_size_distribution_filename(data_folder, adaID, 'premapped') + '\n')
def report_insert_size(data_folder, adaID, seq_run, VERBOSE=0, summary=True): '''Produce figures of the insert size distribution''' from hivwholeseq.sequencing.check_insert_distribution import get_insert_size_distribution, \ plot_cumulative_histogram, plot_histogram bins = np.linspace(0, 1000, 100) isz, h = get_insert_size_distribution(data_folder, adaID, 'premapped', bins=bins, maxreads=10000, VERBOSE=VERBOSE) plot_cumulative_histogram(data_folder, adaID, 'premapped', isz, savefig=True, title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap', lw=2, c='b') plot_histogram(data_folder, adaID, 'premapped', h, savefig=True, title='run ' + str(seq_run) + ', adaID ' + str(adaID) + ', premap', lw=2, color='b') if summary: with open(get_premap_summary_filename(data_folder, adaID), 'a') as f: f.write('\nInsert size distribution plotted:\n') f.write( get_insert_size_distribution_cumulative_filename( data_folder, adaID, 'premapped') + '\n') f.write( get_insert_size_distribution_filename(data_folder, adaID, 'premapped') + '\n')