def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), title=title, caption=title, thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, title=title, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append( PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def _make_plot(data, png_fn, bounds=None, dpi=60, nolegend=False): """Make a scatterplot of read length and concordance""" fig = plt.figure() axes = fig.add_subplot(111) axes.axesPatch.set_facecolor('#ffffff') axes.grid(color='#e0e0e0', linewidth=0.5, linestyle='-') axes.set_axisbelow(True) # from color brewer # qv_colors = ['#a6cee3', '#1f77b4', '#b2df8a', '#33a02c', '#fb9a99', #'#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', #'#ffff99'] qv_colors = ['#fc9272', '#fb6a4a', '#ef3b2c', '#cb181d'] # qv_colors.extend(qv_colors) # qv_colors.extend(qv_colors) # plot by z-values qv_min = 1.0 #qv_delta = 3.0 handles = [] labels = [] # Make sure the max actually gets in a bin qv_max = max(data[:, 2]) + 1 qv_delta = (qv_max - qv_min) / len(qv_colors) for qv_bin, color in zip( #np.arange(qv_min, qv_min + qv_delta * len(qv_colors), qv_delta), np.arange(qv_min, qv_max, qv_delta), qv_colors): if qv_bin > qv_max: break qv_bin_max = qv_bin + qv_delta points = data[(data[:, 2] >= qv_bin) * (data[:, 2] < qv_bin_max), :] if len(points[:, 0]) > 0: l, = axes.plot(points[:, 0], points[:, 1], 'o', c=color, mec=color, alpha=0.1, ms=2.0) handles.append(l) labels.append('QV >= %d' % qv_bin) if not nolegend: axes.legend(handles, labels, loc='lower right', numpoints=1, borderpad=0.3, markerscale=2.0, handletextpad=0.3, labelspacing=0.3, handlelength=0.5) axes.get_legend().get_frame().set_edgecolor('#a0a0a0') if bounds: intbounds = map(int, bounds.split(":")) axes.set_xlim(xmin=intbounds[0], xmax=intbounds[1]) axes.set_ylim(ymin=intbounds[2], ymax=intbounds[3]) axes.set_xlabel('Subread Length / bp') axes.set_ylabel('% Concordance') save_figure_with_thumbnail(fig, png_fn, dpi=int(dpi)) plt.close(fig)
def addQmodMotifHist(csvFile, kinData, outputFolder, dpi=DEFAULT_DPI, max_motifs=10): # Apart from passing in motif_summary.csv file name, nearly identical to # addQmodHist image_name = os.path.join(outputFolder, Constants.I_MOTIFS_QMOD) # Generate modification detection plot fig, ax = plotMotifHist(csvFile, kinData, max_motifs=max_motifs) png, thumbpng = PH.save_figure_with_thumbnail(fig, image_name, dpi=dpi) log.info((png, thumbpng)) plot = Plot(Constants.P_MOD_QV, image=os.path.basename(png), thumbnail=os.path.basename(thumbpng)) pg = PlotGroup(Constants.PG_MOD_QV, title=get_plotgroup_title(spec, Constants.PG_MOD_QV), plots=[plot], thumbnail=os.path.basename(thumbpng)) return pg
def _coverage_vs_quality_plot(contigs, output_dir): """ Creates a scatter plot coverage vs quality plot for each contig in the polished assembly. Each point represents one contig. :param contigs: (dict) contig id -> ContigInfo object :param output_dir: (str) path to output directory :return: (Plot) object that has already been saved as a PNG to output_dir """ fig, axes = PH.get_fig_axes_lpr() axes = fig.add_subplot(111) axes.set_axisbelow(True) axes.set_ylabel("Mean Confidence (QV)") axes.set_xlabel("Mean Coverage Depth") PH.set_tick_label_font_size(axes, 12, 12) PH.set_axis_label_font_size(axes, 16) x_vals = [x.mean_coverage for x in contigs.values()] y_vals = [x.mean_qv for x in contigs.values()] axes.set_xlim(0, max(x_vals) * 1.2) axes.set_ylim(0, max(y_vals) * 1.2) axes.scatter(x_vals, y_vals, s=12) png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png") png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path) return Plot('cov_vs_qual', os.path.basename(png), thumbnail=os.path.basename(thumbpng))
def to_hq_hist_plot(hqbasefraction_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ) x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ) y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ) nbins = int(hqbasefraction_dist['NumBins'].metavalue) bin_counts = hqbasefraction_dist['BinCounts'] heights = [int(bc.metavalue) for bc in bin_counts] edges = [float(bn) / float(nbins) for bn in xrange(nbins)] bin_width = float(hqbasefraction_dist['BinWidth'].metavalue) fig, ax = get_fig_axes_lpr() ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) hq_plot = Plot(Constants.P_HQ, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])] return plot_groups
def to_readlen_plotgroup(readlen_dist, output_dir): plot_name = get_plot_title( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) x_label = get_plot_xlabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) y_label = get_plot_ylabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) nbins = readlen_dist.numBins heights = readlen_dist.bins bin_width = readlen_dist.binWidth edges = [float(bn) * bin_width for bn in xrange(nbins)] edges, heights, bin_width = reshape(readlen_dist, edges, heights) fig, ax = get_fig_axes_lpr() if sum(readlen_dist.bins) > 0: ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) png_fn = os.path.join( output_dir, "{p}.png".format(p=Constants.P_READLENGTH)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) readlen_plot = Plot(Constants.P_READLENGTH, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])] return plot_groups
def _coverage_vs_quality_plot(contigs, output_dir): """ Creates a scatter plot coverage vs quality plot for each contig in the polished assembly. Each point represents one contig. :param contigs: (dict) contig id -> ContigInfo object :param output_dir: (str) path to output directory :return: (Plot) object that has already been saved as a PNG to output_dir """ import pbreports.plot.helper as PH fig, axes = PH.get_fig_axes_lpr() axes = fig.add_subplot(111) axes.set_axisbelow(True) axes.set_ylabel( get_plot_ylabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)) axes.set_xlabel( get_plot_xlabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)) PH.set_tick_label_font_size(axes, 12, 12) PH.set_axis_label_font_size(axes, 16) x_vals = [x.mean_coverage for x in contigs.values()] y_vals = [x.mean_qv for x in contigs.values()] axes.set_xlim(0, max(x_vals) * 1.2) axes.set_ylim(0, max(y_vals) * 1.2) axes.scatter(x_vals, y_vals, s=12) png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png") png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path) return Plot(Constants.P_COVERAGE, os.path.basename(png), thumbnail=os.path.basename(thumbpng))
def _create_coverage_histo_plot_grp(self, stats, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plot histogram :param stats: (ReferenceStats) see _get_reference_coverage_stats :param output_dir: (string) where to write images """ fig, ax = self._create_histogram(stats) fname, thumb = [ os.path.basename(f) for f in save_figure_with_thumbnail( fig, os.path.join(output_dir, 'coverage_histogram.png')) ] plot = Plot(Constants.P_COVERAGE_HIST, fname, caption=get_plot_caption(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST), title=get_plot_title(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST)) plot_group = PlotGroup(Constants.PG_COVERAGE_HIST, thumbnail=thumb, plots=[plot], title=get_plotgroup_title( self.spec, Constants.PG_COVERAGE_HIST)) return plot_group
def _get_plot_group_length(control_data, sample_data, output_dir): """ Create the quality plot group and return it. """ fig = _create_length_figure(control_data, sample_data) fname = 'control_non-control_readlength.png' thumb = save_figure_with_thumbnail(fig, os.path.join(output_dir, fname))[1] plots = [Plot(Constants.P_LENGTH, fname)] pg = PlotGroup(Constants.PG_LENGTH, thumbnail=os.path.basename(thumb), plots=plots) return pg
def _get_plot_group_score(control_data, sample_data, output_dir): """ Create the length plot group and return it. """ fig = _create_score_figure(control_data, sample_data) fname = 'control_non-control_readquality.png' thumb = save_figure_with_thumbnail(fig, os.path.join(output_dir, fname))[1] plots = [Plot('control_noncontrol_readquality', fname)] pg = PlotGroup('polymerase_read_quality', title='Polymerase Read Quality', thumbnail=os.path.basename(thumb), plots=plots) return pg
def test_save_histograms(self): """Save std histograms.""" tmpdir = tempfile.mkdtemp(prefix='pbreport_output') fig, ax = get_fig_axes_lpr() dump = os.path.join(tmpdir, 'dmp.data') log.debug('dmp file is {d}'.format(d=dump)) apply_histogram_data(ax, [1, 2, 3, 4, 5, 6], 2, axis_labels=('foo', 'bar'), barcolor='#505050', xlim=(0, 20000), data_file=dump) self.assertTrue(os.path.exists(dump)) log.info("Writing temp histogram to {t}".format(t=tmpdir)) save_figure_with_thumbnail(fig, os.path.join(tmpdir, 'foo.png')) self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo.png'))) self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo_thumb.png')))
def _create_coverage_histo_plot_grp(stats, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plot histogram :param stats: (ReferenceStats) see _get_reference_coverage_stats :param output_dir: (string) where to write images """ fig, ax = _create_histogram(stats) fname, thumb = [os.path.basename(f) for f in save_figure_with_thumbnail(fig, os.path.join(output_dir, 'coverage_histogram.png'))] plot = Plot('coverage_histogram', fname, 'Depth of coverage distribution ') plot_group = PlotGroup('coverage_histogram_plot_group', title='Depth of Coverage', thumbnail=thumb, plots=[plot]) return plot_group
def get_qmod_hist(basemods_h5, output_dir, dpi): """ Return a plot object """ fig, ax = _create_fig_template() plot_kinetics_hist(basemods_h5, ax) png_path = os.path.join(output_dir, "kinetic_histogram.png") png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path, dpi=dpi) return Plot(Constants.P_HIST, os.path.basename(png), thumbnail=os.path.basename(thumbpng))
def _create_coverage_histo_plot_grp(stats, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plot histogram :param stats: (ReferenceStats) see _get_reference_coverage_stats :param output_dir: (string) where to write images """ fig, ax = _create_histogram(stats) fname, thumb = [os.path.basename(f) for f in save_figure_with_thumbnail( fig, os.path.join(output_dir, 'coverage_histogram.png'))] plot = Plot(Constants.P_COVERAGE_HIST, fname, meta_rpt.get_meta_plotgroup( Constants.PG_COVERAGE_HIST).get_meta_plot(Constants.P_COVERAGE_HIST).caption) plot_group = PlotGroup(Constants.PG_COVERAGE_HIST, thumbnail=thumb, plots=[plot]) return plot_group
def _create_variants_plot_grp(top_contigs, var_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains variants plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param var_map: (dict string:ContigVariants) mapping of contig.header to ContigVariants object :param output_dir: (string) where to write images """ plots = [] thumbnail = None legend = None idx = 0 for tc in top_contigs: if not tc.header in var_map: # no coverage of this contig continue ctg_var = var_map[tc.header] bars = _create_bars(ctg_var) if legend is None: legend = _get_legend_file(bars, output_dir) fig, ax = _create_contig_fig_ax(bars, _get_x_labels(ctg_var)) fname = os.path.join(output_dir, ctg_var.file_name) if thumbnail is None: imgfiles = PH.save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname, dpi=DEFAULT_DPI) id_ = 'coverage_variants_{i}'.format(i=str(idx)) caption = "Observed variants across {c}".format(c=ctg_var.name) plot = Plot(id_, os.path.basename(fname), title=caption, caption=caption) plots.append(plot) idx += 1 plt.close(fig) plot_group = PlotGroup(Constants.PG_VARIANTS, thumbnail=thumbnail, legend=legend, plots=plots) return plot_group
def _create_coverage_plot_grp(self, top_contigs, cov_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param cov_map: (dict string:ContigCoverage) mapping of contig.id to ContigCoverage object :param output_dir: (string) where to write images """ plots = [] thumbnail = None idx = 0 log.debug('Creating plots for {n} top contig(s)'.format( n=str(len(top_contigs)))) for tc in top_contigs: if not tc.id in cov_map: # no coverage of this contig log.debug('contig {c} has no coverage info '.format(c=tc.id)) continue ctg_cov = cov_map[tc.id] fig, ax = self._create_contig_plot(ctg_cov) fname = os.path.join(output_dir, ctg_cov.file_name) if thumbnail is None: imgfiles = save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname) plt.close(fig) id_ = "coverage_contig_{i}".format(i=str(idx)) caption = self.spec.get_plotgroup_spec( Constants.PG_COVERAGE).get_plot_spec( Constants.P_COVERAGE).caption + " {c}." plot = Plot(id_, os.path.basename(fname), caption.format(c=ctg_cov.name), title=caption.format(c=ctg_cov.name)) plots.append(plot) idx += 1 plot_group = PlotGroup(Constants.PG_COVERAGE, title=get_plotgroup_title( self.spec, Constants.PG_COVERAGE), thumbnail=thumbnail, plots=plots) return plot_group
def _create_variants_plot_grp(top_contigs, var_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains variants plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param var_map: (dict string:ContigVariants) mapping of contig.header to ContigVariants object :param output_dir: (string) where to write images """ plots = [] thumbnail = None legend = None idx = 0 for tc in top_contigs: if not tc.header in var_map: # no coverage of this contig continue ctg_var = var_map[tc.header] bars = _create_bars(ctg_var) if legend is None: legend = _get_legend_file(bars, output_dir) fig, ax = _create_contig_fig_ax(bars, _get_x_labels(ctg_var)) fname = os.path.join(output_dir, ctg_var.file_name) if thumbnail is None: imgfiles = PH.save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname) id_ = 'coverage_variants_{i}'.format(i=str(idx)) caption = "Observed variants across {c}".format(c=ctg_var.name) plot = Plot(id_, os.path.basename(fname), title=caption, caption=caption) plots.append(plot) idx += 1 plt.close(fig) plot_group = PlotGroup(Constants.PG_VARIANTS, thumbnail=thumbnail, legend=legend, plots=plots) return plot_group
def _create_coverage_plot_grp(self, top_contigs, cov_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param cov_map: (dict string:ContigCoverage) mapping of contig.id to ContigCoverage object :param output_dir: (string) where to write images """ plots = [] thumbnail = None idx = 0 log.debug('Creating plots for {n} top contig(s)'.format( n=str(len(top_contigs)))) for tc in top_contigs: if not tc.id in cov_map: # no coverage of this contig log.debug('contig {c} has no coverage info '.format(c=tc.id)) continue ctg_cov = cov_map[tc.id] fig, ax = self._create_contig_plot(ctg_cov) fname = os.path.join(output_dir, ctg_cov.file_name) if thumbnail is None: imgfiles = save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname) plt.close(fig) id_ = "coverage_contig_{i}".format(i=str(idx)) caption = self.spec.get_plotgroup_spec(Constants.PG_COVERAGE ).get_plot_spec(Constants.P_COVERAGE).caption + " {c}." plot = Plot(id_, os.path.basename(fname), caption.format(c=ctg_cov.name), title=caption.format(c=ctg_cov.name)) plots.append(plot) idx += 1 plot_group = PlotGroup( Constants.PG_COVERAGE, title=get_plotgroup_title(self.spec, Constants.PG_COVERAGE), thumbnail=thumbnail, plots=plots) return plot_group
def addQmodMotifHist(csvFile, kinData, outputFolder, dpi=72): # Apart from passing in motif_summary.csv file name, nearly identical to # addQmodHist image_name = os.path.join(outputFolder, Constants.I_MOTIFS_QMOD) # Generate modification detection plot fig, ax = plotMotifHist(csvFile, kinData) png, thumbpng = PH.save_figure_with_thumbnail(fig, image_name, dpi=dpi) log.info((png, thumbpng)) plot = Plot(Constants.P_MOD_HISTOGRAM, image=os.path.basename(png), thumbnail=os.path.basename(thumbpng)) pg = PlotGroup(Constants.PG_MOD_HISTOGRAM, title="Modification QVs", plots=[plot], thumbnail=os.path.basename(thumbpng)) return pg
def to_rl_overlay_plot(numunfilteredbasecalls_dist, readlen_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_RRL, Constants.P_RRL) x_label = get_plot_xlabel(spec, Constants.PG_RRL, Constants.P_RRL) y_label = get_plot_ylabel(spec, Constants.PG_RRL, Constants.P_RRL) unfiltered_bins = [ int(bc.metavalue) for bc in numunfilteredbasecalls_dist['BinCounts'] ] poly_bins = [int(bc.metavalue) for bc in readlen_dist['BinCounts']] max_unfiltered = len(unfiltered_bins) * \ int(numunfilteredbasecalls_dist['BinWidth'].metavalue) max_poly = len(poly_bins) * int(readlen_dist['BinWidth'].metavalue) unfiltered_data = expand_data(unfiltered_bins, max_unfiltered) poly_data = expand_data(poly_bins, max_poly) fig, ax = get_fig_axes_lpr() ax.hist(unfiltered_data, label="Unfiltered", histtype='stepfilled', alpha=0.3, bins=len(unfiltered_bins), range=[0, max_unfiltered]) ax.hist(poly_data, label="Polymerase", histtype='stepfilled', alpha=0.3, bins=len(poly_bins), range=[0, max_poly]) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.legend() png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_RRL)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) rrl_plot = Plot(Constants.P_RRL, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_RRL, plots=[rrl_plot])] return plot_groups
def to_plotgroup(plot_json, output_dir): n_samples = len(plot_json) if n_samples > 0: fig, ax = plt.subplots(n_samples, 2, figsize=(15, n_samples * 5), squeeze=False) od = collections.OrderedDict(sorted(plot_json.items())) counter = 0 y_max = [0, 0] for sample, data in od.iteritems(): add_subplots(fig, ax, sample, data, counter, y_max) counter += 1 label_rows(fig, ax, od.keys()) label_columns(fig, ax) for row in xrange(0, n_samples): ax[row, 0].set_ylim(top=y_max[0] * 1.1) ax[row, 1].set_ylim(top=y_max[1] * 1.1) p1 = mpatches.Patch(color='#FF7E79', linewidth=0) p2 = mpatches.Patch(color='#A9D18E', linewidth=0) fig.legend((p1, p2), ("Deletions", "Insertions"), "upper left", fontsize=15) else: fig = plt.figure() plot_name = get_plot_title(spec, Constants.PG_SV, Constants.P_SV) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_SV)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI, bbox_inches='tight') plot = Plot(Constants.P_SV, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_group = PlotGroup(Constants.PG_SV, plots=[plot]) return plot_group
def _create_coverage_plot_grp(top_contigs, cov_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param cov_map: (dict string:ContigCoverage) mapping of contig.id to ContigCoverage object :param output_dir: (string) where to write images """ plots = [] thumbnail = None idx = 0 log.debug('Creating plots for {n} top contig(s)'.format( n=str(len(top_contigs)))) for tc in top_contigs: if not tc.id in cov_map: # no coverage of this contig log.debug('contig {c} has no coverage info '.format(c=tc.id)) continue ctg_cov = cov_map[tc.id] fig, ax = _create_contig_plot(ctg_cov) fname = os.path.join(output_dir, ctg_cov.file_name) if thumbnail is None: imgfiles = save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname) plt.close(fig) id_ = 'coverage_contig_{i}'.format(i=str(idx)) caption = "Observed depth of coverage across {c} (window size = {b}bp)." plot = Plot(id_, os.path.basename(fname), caption.format( c=ctg_cov.name, b=ctg_cov.aveRegionSize())) plots.append(plot) idx += 1 plot_group = PlotGroup('coverage_plots', title='Coverage Across Reference', thumbnail=thumbnail, plots=plots) return plot_group
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI): if not dset.metadata.summaryStats.medianInsertDists: raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found " "or missing key distributions") # Pull some stats: adapter_dimers = np.round(100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round(100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) attributes = [ Attribute(i, v) for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts]) ] if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags: dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST] if len(dist) > 1: log.warn("Dataset was merged, local base rate not applicable") else: base_rate = dist[0].sampleMed attributes.append(Attribute(Constants.A_BASE_RATE, base_rate)) else: log.warn("No local base rate distribution available") plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper( dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel( get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) ax.set_ylabel( get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append( Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] tables = [] report = Report( Constants.R_ID, attributes=attributes, tables=tables, ) # plotgroups=plot_groups) return spec.apply_view(report)
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) len_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) qual_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot( PlotConstants.P_QUAL).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_report(stats_xml, output_dir, dpi=72): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel) ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] attributes = [Attribute(i, v) for i,v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts])] tables = [] report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attributes, tables=tables, )#plotgroups=plot_groups) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir, dpi=72): #TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): for i, ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel('Median Distance Between Adapters') ax.set_ylabel('Reads') png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base), thumbnail=os.path.relpath(thumbnail_base))) plot_groups = [PlotGroup("adapter_xml_plot_group", title="Observed Insert Length Distribution", plots=plots, thumbnail=os.path.relpath(thumbnail_base))] columns = [Column("adaper_xml_conditions", None, ('Adapter Dimers (0-10bp)', 'Short Inserts (11-100bp)')), Column("adaper_xml_results", None, (adapter_dimers, short_inserts))] tables = [Table("adapter_xml_table", "Adapter Statistics", columns)] report = Report("adapter_xml_report", title="Adapter Report", tables=tables, attributes=None, plotgroups=plot_groups) return report
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = DataSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Build the stats table: nbases = 0 nreads = 0 n50 = 0 readscoretotal = 0 readscorenumber = 0 approx_read_lens = [] # if a merge failed there may be more than one dist: for rlendist in dset.metadata.summaryStats.readLenDists: nbases += _total_from_bins(rlendist.bins, rlendist.minBinValue, rlendist.binWidth) nreads += sum(rlendist.bins) # N50: for i, lbin in enumerate(rlendist.bins): # use the average, except for the last bin if i != len(rlendist.bins) - 1: value = ((i * rlendist.binWidth) + rlendist.minBinValue + rlendist.binWidth / 2) # for the last bin, just use the value else: value = (i * rlendist.binWidth) + rlendist.minBinValue approx_read_lens.extend([value] * lbin) # TODO(mdsmith)(2016-02-09) make sure maxOutlierValue is updated # during a merge /todo # but pop off that last value and replace it with the # maxOutlierValue: # approx_read_lens.pop() # approx_read_lens.append(rlendist.maxBinValue) n50 = np.round(compute_n50(approx_read_lens)) for rqualdist in dset.metadata.summaryStats.readQualDists: readscoretotal += _total_from_bins(rqualdist.bins, rqualdist.minBinValue, rqualdist.binWidth) readscorenumber += sum(rqualdist.bins) readlen = 0 if nreads != 0: readlen = np.round(nbases / nreads, decimals=2) readQuality = 0 if readscorenumber != 0: readQuality = np.round(readscoretotal / readscorenumber, decimals=2) row_names = ["Polymerase Read Bases", "Polymerase Reads", "Polymerase Read N50", "Polymerase Read Length", "Polymerase Read Quality"] _pre_filter = [np.round(nbases, decimals=2), nreads, n50, readlen, readQuality] plots = [] # ReadLen distribution to barplot: for i, rlendist in enumerate(dset.metadata.summaryStats.readLenDists): len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel('Read Length') len_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readLenDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) plots.append(Plot("filter_len_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup("filter_len_xml_plot_group", title="Polymerase Read Length", plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] plots = [] # ReadQual distribution to barplot: for i, rqualdist in enumerate(dset.metadata.summaryStats.readQualDists): qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel('Read Quality') qual_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readQualDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) plots.append(Plot("filter_qual_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup("filter_qual_xml_plot_group", title="Polymerase Read Quality", plots=plots)) # build the report: columns = [Column("filter_names_column", header="Metrics", values=row_names)] columns.append(Column("filter_stats_column", header="Values", values=_pre_filter)) tables = [Table("filter_xml_table", "Filtering Statistics", columns)] report = Report("filtering_stats_xml_report", title="Filtering stats XML report", tables=tables, attributes=None, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return report
def _to_plot(fig, plot_id, base_dir): img_name = plot_id + ".png" _, thumb_name = save_figure_with_thumbnail(fig, op.join(base_dir, img_name)) return Plot(plot_id, img_name, thumbnail=op.basename(thumb_name))