def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), title=title, caption=title, thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, title=title, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append( PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_hq_hist_plot(hqbasefraction_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ) x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ) y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ) nbins = int(hqbasefraction_dist['NumBins'].metavalue) bin_counts = hqbasefraction_dist['BinCounts'] heights = [int(bc.metavalue) for bc in bin_counts] edges = [float(bn) / float(nbins) for bn in xrange(nbins)] bin_width = float(hqbasefraction_dist['BinWidth'].metavalue) fig, ax = get_fig_axes_lpr() ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) hq_plot = Plot(Constants.P_HQ, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])] return plot_groups
def _coverage_vs_quality_plot(contigs, output_dir): """ Creates a scatter plot coverage vs quality plot for each contig in the polished assembly. Each point represents one contig. :param contigs: (dict) contig id -> ContigInfo object :param output_dir: (str) path to output directory :return: (Plot) object that has already been saved as a PNG to output_dir """ fig, axes = PH.get_fig_axes_lpr() axes = fig.add_subplot(111) axes.set_axisbelow(True) axes.set_ylabel("Mean Confidence (QV)") axes.set_xlabel("Mean Coverage Depth") PH.set_tick_label_font_size(axes, 12, 12) PH.set_axis_label_font_size(axes, 16) x_vals = [x.mean_coverage for x in contigs.values()] y_vals = [x.mean_qv for x in contigs.values()] axes.set_xlim(0, max(x_vals) * 1.2) axes.set_ylim(0, max(y_vals) * 1.2) axes.scatter(x_vals, y_vals, s=12) png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png") png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path) return Plot('cov_vs_qual', os.path.basename(png), thumbnail=os.path.basename(thumbpng))
def _coverage_vs_quality_plot(contigs, output_dir): """ Creates a scatter plot coverage vs quality plot for each contig in the polished assembly. Each point represents one contig. :param contigs: (dict) contig id -> ContigInfo object :param output_dir: (str) path to output directory :return: (Plot) object that has already been saved as a PNG to output_dir """ import pbreports.plot.helper as PH fig, axes = PH.get_fig_axes_lpr() axes = fig.add_subplot(111) axes.set_axisbelow(True) axes.set_ylabel( get_plot_ylabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)) axes.set_xlabel( get_plot_xlabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)) PH.set_tick_label_font_size(axes, 12, 12) PH.set_axis_label_font_size(axes, 16) x_vals = [x.mean_coverage for x in contigs.values()] y_vals = [x.mean_qv for x in contigs.values()] axes.set_xlim(0, max(x_vals) * 1.2) axes.set_ylim(0, max(y_vals) * 1.2) axes.scatter(x_vals, y_vals, s=12) png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png") png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path) return Plot(Constants.P_COVERAGE, os.path.basename(png), thumbnail=os.path.basename(thumbpng))
def to_readlen_plotgroup(readlen_dist, output_dir): plot_name = get_plot_title( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) x_label = get_plot_xlabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) y_label = get_plot_ylabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) nbins = readlen_dist.numBins heights = readlen_dist.bins bin_width = readlen_dist.binWidth edges = [float(bn) * bin_width for bn in xrange(nbins)] edges, heights, bin_width = reshape(readlen_dist, edges, heights) fig, ax = get_fig_axes_lpr() if sum(readlen_dist.bins) > 0: ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) png_fn = os.path.join( output_dir, "{p}.png".format(p=Constants.P_READLENGTH)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) readlen_plot = Plot(Constants.P_READLENGTH, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])] return plot_groups
def _generate_histogram(datum, title, xlabel, ylabel=None): fig, ax = get_fig_axes_lpr() fig.suptitle(title) ax.hist(datum) ax.set_xlabel(xlabel) if ylabel: ax.set_ylabel(ylabel) return fig, ax
def _apply_plot_data(x_data, y1_data, y2_data, labels, legend_loc=None): """Default labels assume y1_data==control, y2_data==sample""" h1_color = '#5050f0' h2_color = '#f05050' # log option isn't really working yet ... funky with polygons # these are unintuitively inverted b/c we require the sample # on the left side of the doubleY axis h2, t = np.histogram(y1_data, bins=x_data) h1, t = np.histogram(y2_data, bins=x_data) fig, ax = get_fig_axes_lpr() x_data = x_data[:-1] y0 = np.zeros(len(x_data)) # if log: # h1 = np.log10(h1) # h2 = np.log10(h2) ax.fill_between(x_data, y0, h1.T, alpha=0.6, edgecolor=h1_color, facecolor=h1_color) fake_h2 = mp.Rectangle((0.1, 0.1), 0.1, 0.1, facecolor=h2_color, edgecolor=h2_color, alpha=0.6) fake_h1 = mp.Rectangle((0.1, 0.1), 0.1, 0.1, facecolor=h1_color, edgecolor=h1_color, alpha=0.6) ax.set_xlabel(labels[0]) ax.set_ylabel('%s (Sample)' % labels[1]) ax.legend([fake_h2, fake_h1], ['Control', 'Sample'], loc=legend_loc) # gray border around legend ax.get_legend().get_frame().set_edgecolor('#a0a0a0') set_tick_label_font_size(ax, 12, 12) set_axis_label_font_size(ax, 16) ax2 = ax.twinx() ax2.fill_between(x_data, y0, h2.T, alpha=0.6, edgecolor=h2_color, facecolor=h2_color) ax2.set_ylabel('%s (Control)' % labels[1]) set_tick_label_font_size(ax2, 12, 12) set_axis_label_font_size(ax2, 16) return fig
def _create_contig_fig_ax(bars, xlabels): """ Returns a fig,ax plot for this contig :param contig_variants: (ContigVariants) """ fig, ax = PH.get_fig_axes_lpr() PH.apply_bar_data( ax, bars, xlabels, (meta_rpt.get_meta_plotgroup(Constants.PG_VARIANTS).get_meta_plot(Constants.P_VARIANTS).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_VARIANTS).get_meta_plot(Constants.P_VARIANTS).ylabel)) return fig, ax
def _create_contig_fig_ax(bars, xlabels): """ Returns a fig,ax plot for this contig :param contig_variants: (ContigVariants) """ fig, ax = PH.get_fig_axes_lpr() PH.apply_bar_data( ax, bars, xlabels, ('Reference Start Position', 'Variants')) return fig, ax
def _create_contig_fig_ax(bars, xlabels): """ Returns a fig,ax plot for this contig :param contig_variants: (ContigVariants) """ fig, ax = PH.get_fig_axes_lpr() xlabel = get_plot_xlabel(spec, Constants.PG_VARIANTS, Constants.P_VARIANTS) ylabel = get_plot_ylabel(spec, Constants.PG_VARIANTS, Constants.P_VARIANTS) PH.apply_bar_data(ax, bars, xlabels, (xlabel, ylabel)) return fig, ax
def _create_fig_template(dims=(8, 6), facecolor='#ffffff', gridcolor='#e0e0e0'): fig, ax = PH.get_fig_axes_lpr(dims=dims) ax = fig.add_subplot(111) ax.axesPatch.set_facecolor(facecolor) ax.grid(color=gridcolor, linewidth=0.5, linestyle='-') ax.set_axisbelow(True) PH.set_tick_label_font_size(ax, 12, 12) PH.set_axis_label_font_size(ax, 16) return fig, ax
def _make_histogram(data, axis_labels, nbins, barcolor): """Create a fig, ax instance and generate a histogram. :param data: np.array :param axis_labels: (tuple of str) (axis label, y axis label) :return: matplotlib fig, ax """ # axis_labels = ('Median Distance Between Adapters', 'Pre-Filter Reads') fig, ax = get_fig_axes_lpr() apply_histogram_data(ax, data, nbins, axis_labels=axis_labels, barcolor=barcolor) return fig, ax
def make_nreads_line_plot(bc_groups, base_dir): x = [i for (i, g) in enumerate(bc_groups, start=1)] y = [g.n_reads for g in bc_groups] mean_nreads = 0 if len(y) == 0 else sum(y) / len(y) fig, ax = get_fig_axes_lpr() ax.plot(x, y, color='blue') line = ax.axhline(mean_nreads, color='red', label="Mean Number of Reads") ax.set_xlabel("Barcode Rank Order") ax.set_ylabel("Count of Reads") fig.legend((line, ), ("Mean Number of Reads", ), ("upper right")) return _to_plot(fig, Constants.P_NREADS, base_dir)
def _make_histogram(datum, axis_labels, nbins, barcolor): """Create a fig, ax instance and generate a histogram. :param datum: np.array :param axis_labels: (tuple of str) (axis label, y axis label) :return: matplotlib fig, ax """ # axis_labels = ('Median Distance Between Adapters', 'Pre-Filter Reads') fig, ax = get_fig_axes_lpr() apply_histogram_data(ax, datum, nbins, axis_labels=axis_labels, barcolor=barcolor) return fig, ax
def __generate_histogram_comparison(method_name, title, xlabel, list_fastq_stats): fig, ax = get_fig_axes_lpr() fig.suptitle(title) alpha = 0.3 hs = OrderedDict() for fastq_stat in list_fastq_stats: label = os.path.basename(fastq_stat.file_name) h = ax.hist(getattr(fastq_stat, method_name), alpha=alpha, bins=85, label=label) hs[label] = h ax.set_xlabel(xlabel) ax.legend(loc="best") return fig, ax
def _create_histogram(stats): """ Returns a fig,ax histogram plot for this reference :param stats: (ReferenceStats) """ numBins = 100 binSize = max(1, int(stats.maxbin / numBins)) # handle case where the coverage is zero. This prevents the histogram # construction from crashing with an index error. m = 1 if stats.maxbin == 0.0 else stats.maxbin bins = np.arange(0, m, binSize) fig, ax = get_fig_axes_lpr() apply_histogram_data(ax, stats.means, bins, ('Coverage', 'Reference Regions'), barcolor=Constants.COLOR_STEEL_BLUE_DARK, showEdges=False) return fig, ax
def make_bq_qq_plot(bc_groups, base_dir): """ Create Q-Q plot for barcode quality scores. """ try: import scipy.stats except ImportError: warnings.warn("Can't import scipy.stats") return None else: data = [] for g in bc_groups: data.append(g.mean_bcqual()) fig, ax = get_fig_axes_lpr() scipy.stats.probplot(data, dist="norm", plot=ax) ax.set_title("Q-Q Plot of Barcode Quality Scores") return _to_plot(fig, Constants.P_BQ_QQ, base_dir)
def scatter_plot_accuracy_vs_concordance( data, axis_labels=( meta_rpt.get_meta_plotgroup(Constants.PG_QV_CALIBRATION).get_meta_plot(Constants.P_QV_CALIBRATION).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_QV_CALIBRATION).get_meta_plot(Constants.P_QV_CALIBRATION).ylabel, ), nbins=None, barcolor=None, ): accuracy, concordance = data fig, ax = get_fig_axes_lpr() data = [Line(xData=accuracy, yData=concordance, style="+")] apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) xlim = ax.get_xlim() xy = np.linspace(xlim[0], xlim[1]) ax.plot(xy, xy, "-", color="r") return fig, ax
def scatter_plot_accuracy_vs_numpasses( data, axis_labels=( get_plot_xlabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER), get_plot_ylabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER), ), nbins=None, barcolor=None, ): """ """ npasses, accuracy = data qvs = accuracy_as_phred_qv(accuracy) fig, ax = get_fig_axes_lpr() data = [Line(xData=npasses, yData=qvs, style="o")] apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) return fig, ax
def scatter_plot_accuracy_vs_numpasses(data, axis_labels=("Number of passes", "Predicted accuracy (Phred QV)"), nbins=None, barcolor=None): """ """ npasses, accuracy = data qvs = accuracy_as_phred_qv(accuracy) fig, ax = get_fig_axes_lpr() data = [Line(xData=npasses, yData=qvs, style='o')] apply_line_data( ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) return fig, ax
def _create_contig_plot(contig_coverage): """ Returns a fig,ax plot for this contig :param contig_coverage: (ContigCoverage) """ npXData = np.array(contig_coverage.xData) line_fill = LineFill(xData=npXData, yData=np.array(contig_coverage.yDataMean), linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6, yDataMin=np.array(contig_coverage.yDataStdevMinus), yDataMax=np.array(contig_coverage.yDataStdevPlus), edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT, facecolor=Constants.COLOR_STEEL_BLUE_LIGHT) lines_fills = [line_fill] fig, ax = get_fig_axes_lpr() apply_line_data(ax, lines_fills, ('Reference Start Position', 'Coverage')) apply_line_fill_data(ax, lines_fills) return fig, ax
def scatter_plot_accuracy_vs_numpasses( data, axis_labels=(get_plot_xlabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER), get_plot_ylabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER)), nbins=None, barcolor=None): """ """ npasses, accuracy = data qvs = accuracy_as_phred_qv(accuracy) fig, ax = get_fig_axes_lpr() data = [Line(xData=npasses, yData=qvs, style='o')] apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) return fig, ax
def scatter_plot_accuracy_vs_concordance( data, axis_labels=("Predicted accuracy", "Mapped concordance"), nbins=None, barcolor=None): accuracy, concordance = data fig, ax = get_fig_axes_lpr() data = [Line(xData=accuracy, yData=concordance, style='+')] apply_line_data( ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) xlim = ax.get_xlim() xy = np.linspace(xlim[0], xlim[1]) ax.plot(xy, xy, '-', color='r') return fig, ax
def scatter_plot_accuracy_vs_concordance( data, axis_labels=(get_plot_xlabel(spec, Constants.PG_QV_CALIBRATION, Constants.P_QV_CALIBRATION), get_plot_ylabel(spec, Constants.PG_QV_CALIBRATION, Constants.P_QV_CALIBRATION)), nbins=None, barcolor=None): accuracy, concordance = data fig, ax = get_fig_axes_lpr() data = [Line(xData=accuracy, yData=concordance, style='+')] apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) xlim = ax.get_xlim() xy = np.linspace(xlim[0], xlim[1]) ax.plot(xy, xy, '-', color='r') return fig, ax
def _create_contig_plot(contig_coverage): """ Returns a fig,ax plot for this contig :param contig_coverage: (ContigCoverage) """ npXData = np.array(contig_coverage.xData) line_fill = LineFill(xData=npXData, yData=np.array(contig_coverage.yDataMean), linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6, yDataMin=np.array(contig_coverage.yDataStdevMinus), yDataMax=np.array(contig_coverage.yDataStdevPlus), edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT, facecolor=Constants.COLOR_STEEL_BLUE_LIGHT) lines_fills = [line_fill] fig, ax = get_fig_axes_lpr() apply_line_data(ax, lines_fills, (meta_rpt.get_meta_plotgroup(Constants.PG_COVERAGE).get_meta_plot( Constants.P_COVERAGE).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_COVERAGE).get_meta_plot(Constants.P_COVERAGE).ylabel)) apply_line_fill_data(ax, lines_fills) return fig, ax
def scatter_plot_accuracy_vs_numpasses(data, axis_labels=( meta_rpt.get_meta_plotgroup(Constants.PG_SCATTER).get_meta_plot(Constants.P_SCATTER).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_SCATTER).get_meta_plot(Constants.P_SCATTER).ylabel), nbins=None, barcolor=None): """ """ npasses, accuracy = data qvs = accuracy_as_phred_qv(accuracy) fig, ax = get_fig_axes_lpr() data = [Line(xData=npasses, yData=qvs, style='o')] apply_line_data( ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) return fig, ax
def scatter_plot_accuracy_vs_concordance( data, axis_labels, nbins, barcolor): accuracy, concordance = data fig, ax = get_fig_axes_lpr() data = [Line(xData=accuracy, yData=concordance, style='+')] apply_line_data( ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) xlim = ax.get_xlim() xy = np.linspace(xlim[0], xlim[1]) ax.plot(xy, xy, '-', color='r') return fig, ax
def _create_histogram(self, stats): """ Returns a fig,ax histogram plot for this reference :param stats: (ReferenceStats) """ numBins = 100 binSize = max(1, int(stats.maxbin / numBins)) # handle case where the coverage is zero. This prevents the histogram # construction from crashing with an index error. m = 1 if stats.maxbin == 0.0 else stats.maxbin bins = np.arange(0, m, binSize) fig, ax = get_fig_axes_lpr() xlabel = get_plot_xlabel(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST) ylabel = get_plot_ylabel(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST) apply_histogram_data(ax, stats.means, bins, (xlabel, ylabel), barcolor=Constants.COLOR_STEEL_BLUE_DARK, showEdges=False) return fig, ax
def test_save_histograms(self): """Save std histograms.""" tmpdir = tempfile.mkdtemp(prefix='pbreport_output') fig, ax = get_fig_axes_lpr() dump = os.path.join(tmpdir, 'dmp.data') log.debug('dmp file is {d}'.format(d=dump)) apply_histogram_data(ax, [1, 2, 3, 4, 5, 6], 2, axis_labels=('foo', 'bar'), barcolor='#505050', xlim=(0, 20000), data_file=dump) self.assertTrue(os.path.exists(dump)) log.info("Writing temp histogram to {t}".format(t=tmpdir)) save_figure_with_thumbnail(fig, os.path.join(tmpdir, 'foo.png')) self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo.png'))) self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo_thumb.png')))
def to_rl_overlay_plot(numunfilteredbasecalls_dist, readlen_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_RRL, Constants.P_RRL) x_label = get_plot_xlabel(spec, Constants.PG_RRL, Constants.P_RRL) y_label = get_plot_ylabel(spec, Constants.PG_RRL, Constants.P_RRL) unfiltered_bins = [ int(bc.metavalue) for bc in numunfilteredbasecalls_dist['BinCounts'] ] poly_bins = [int(bc.metavalue) for bc in readlen_dist['BinCounts']] max_unfiltered = len(unfiltered_bins) * \ int(numunfilteredbasecalls_dist['BinWidth'].metavalue) max_poly = len(poly_bins) * int(readlen_dist['BinWidth'].metavalue) unfiltered_data = expand_data(unfiltered_bins, max_unfiltered) poly_data = expand_data(poly_bins, max_poly) fig, ax = get_fig_axes_lpr() ax.hist(unfiltered_data, label="Unfiltered", histtype='stepfilled', alpha=0.3, bins=len(unfiltered_bins), range=[0, max_unfiltered]) ax.hist(poly_data, label="Polymerase", histtype='stepfilled', alpha=0.3, bins=len(poly_bins), range=[0, max_poly]) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.legend() png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_RRL)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) rrl_plot = Plot(Constants.P_RRL, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_RRL, plots=[rrl_plot])] return plot_groups
def scatter_plot_accuracy_vs_concordance( data, axis_labels=( get_plot_xlabel(spec, Constants.PG_QV_CALIBRATION, Constants.P_QV_CALIBRATION), get_plot_ylabel(spec, Constants.PG_QV_CALIBRATION, Constants.P_QV_CALIBRATION)), nbins=None, barcolor=None): accuracy, concordance = data fig, ax = get_fig_axes_lpr() data = [Line(xData=accuracy, yData=concordance, style='+')] apply_line_data( ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False) xlim = ax.get_xlim() xy = np.linspace(xlim[0], xlim[1]) ax.plot(xy, xy, '-', color='r') return fig, ax
def _create_contig_plot(self, contig_coverage): """ Returns a fig,ax plot for this contig :param contig_coverage: (ContigCoverage) """ npXData = np.array(contig_coverage.xData) line_fill = LineFill(xData=npXData, yData=np.array(contig_coverage.yDataMean), linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6, yDataMin=np.array( contig_coverage.yDataStdevMinus), yDataMax=np.array(contig_coverage.yDataStdevPlus), edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT, facecolor=Constants.COLOR_STEEL_BLUE_LIGHT) lines_fills = [line_fill] fig, ax = get_fig_axes_lpr() xlabel = get_plot_xlabel( self.spec, Constants.PG_COVERAGE, Constants.P_COVERAGE) ylabel = get_plot_ylabel( self.spec, Constants.PG_COVERAGE, Constants.P_COVERAGE) apply_line_data(ax, lines_fills, (xlabel, ylabel)) apply_line_fill_data(ax, lines_fills) return fig, ax
def plot_aggregator_histogram(a, plot_view, output_dir): """ h is the histogram (the h from h, bin_edges = np.histogram(data)) bin_edges list of This does NOT save the image! :param a: Histogram Aggregator :param plot_view: Instance of PlotViewProperites :type plot_view: PlotViewProperties :type a: HistogramAggregator :type output_dir: str """ h = a.bins bin_edges = [a.dx * i for i in xrange(len(a.bins) + 1)] # need to look up values in the histogram that aren't 0 to find # the max and min ranges to plot over. Use the indexes to look up # the actual values as a.dx * i first_index = 0 for i, v in enumerate(a.bins): if v != 0: first_index = i break last_index = len(a.bins) - 1 for i, v in enumerate(a.bins[::-1]): if v != 0: last_index = i break min_x = first_index * a.dx max_x = (a.nbins - last_index) * a.dx log.debug(a) log.debug((len(h), len(bin_edges))) log.debug(("Min, Max", min_x, max_x)) #assert len(h) == (len(bin_edges) + 1) #dims = (12, 9) fig, ax = get_fig_axes_lpr() # need to inspect the data to find where to the plotting range # and the bin width. Should this just be dx? # this should be in PlotViewProperties ax.bar(bin_edges[:-1], h, width=a.dx, color=plot_view.color, edgecolor=plot_view.edgecolor) # Custom limits ax.set_xlim(min_x, max_x) ax.set_ylabel(plot_view.ylabel) ax.set_xlabel(plot_view.xlabel) if plot_view.title is not None: ax.set_title(plot_view.title) return fig, ax
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI): if not dset.metadata.summaryStats.medianInsertDists: raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found " "or missing key distributions") # Pull some stats: adapter_dimers = np.round(100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round(100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) attributes = [ Attribute(i, v) for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts]) ] if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags: dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST] if len(dist) > 1: log.warn("Dataset was merged, local base rate not applicable") else: base_rate = dist[0].sampleMed attributes.append(Attribute(Constants.A_BASE_RATE, base_rate)) else: log.warn("No local base rate distribution available") plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper( dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel( get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) ax.set_ylabel( get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append( Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] tables = [] report = Report( Constants.R_ID, attributes=attributes, tables=tables, ) # plotgroups=plot_groups) return spec.apply_view(report)
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) len_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) qual_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot( PlotConstants.P_QUAL).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = DataSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Build the stats table: nbases = 0 nreads = 0 n50 = 0 readscoretotal = 0 readscorenumber = 0 approx_read_lens = [] # if a merge failed there may be more than one dist: for rlendist in dset.metadata.summaryStats.readLenDists: nbases += _total_from_bins(rlendist.bins, rlendist.minBinValue, rlendist.binWidth) nreads += sum(rlendist.bins) # N50: for i, lbin in enumerate(rlendist.bins): # use the average, except for the last bin if i != len(rlendist.bins) - 1: value = ((i * rlendist.binWidth) + rlendist.minBinValue + rlendist.binWidth / 2) # for the last bin, just use the value else: value = (i * rlendist.binWidth) + rlendist.minBinValue approx_read_lens.extend([value] * lbin) # TODO(mdsmith)(2016-02-09) make sure maxOutlierValue is updated # during a merge /todo # but pop off that last value and replace it with the # maxOutlierValue: # approx_read_lens.pop() # approx_read_lens.append(rlendist.maxBinValue) n50 = np.round(compute_n50(approx_read_lens)) for rqualdist in dset.metadata.summaryStats.readQualDists: readscoretotal += _total_from_bins(rqualdist.bins, rqualdist.minBinValue, rqualdist.binWidth) readscorenumber += sum(rqualdist.bins) readlen = 0 if nreads != 0: readlen = np.round(nbases / nreads, decimals=2) readQuality = 0 if readscorenumber != 0: readQuality = np.round(readscoretotal / readscorenumber, decimals=2) row_names = ["Polymerase Read Bases", "Polymerase Reads", "Polymerase Read N50", "Polymerase Read Length", "Polymerase Read Quality"] _pre_filter = [np.round(nbases, decimals=2), nreads, n50, readlen, readQuality] plots = [] # ReadLen distribution to barplot: for i, rlendist in enumerate(dset.metadata.summaryStats.readLenDists): len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel('Read Length') len_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readLenDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) plots.append(Plot("filter_len_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup("filter_len_xml_plot_group", title="Polymerase Read Length", plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] plots = [] # ReadQual distribution to barplot: for i, rqualdist in enumerate(dset.metadata.summaryStats.readQualDists): qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel('Read Quality') qual_axes.set_ylabel('Reads') png_fn = os.path.join(output_dir, "readQualDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) plots.append(Plot("filter_qual_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup("filter_qual_xml_plot_group", title="Polymerase Read Quality", plots=plots)) # build the report: columns = [Column("filter_names_column", header="Metrics", values=row_names)] columns.append(Column("filter_stats_column", header="Values", values=_pre_filter)) tables = [Table("filter_xml_table", "Filtering Statistics", columns)] report = Report("filtering_stats_xml_report", title="Filtering stats XML report", tables=tables, attributes=None, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return report
def to_report(stats_xml, output_dir, dpi=72): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel) ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] attributes = [Attribute(i, v) for i,v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts])] tables = [] report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attributes, tables=tables, )#plotgroups=plot_groups) return meta_rpt.apply_view(report)
def _make_plot(data, png_fn, bounds=None, dpi=DEFAULT_DPI, nolegend=False, x_label="Subread Length (bp)"): """Make a scatterplot of read length and concordance""" fig, axes = get_fig_axes_lpr() # from color brewer # qv_colors = ['#a6cee3', '#1f77b4', '#b2df8a', '#33a02c', '#fb9a99', #'#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', #'#ffff99'] qv_colors = ['#fc9272', '#fb6a4a', '#ef3b2c', '#cb181d'] # qv_colors.extend(qv_colors) # qv_colors.extend(qv_colors) # plot by z-values qv_min = 1.0 #qv_delta = 3.0 handles = [] labels = [] # Make sure the max actually gets in a bin qv_max = max(data[:, 2]) + 1 qv_delta = (qv_max - qv_min) / len(qv_colors) for qv_bin, color in zip( #np.arange(qv_min, qv_min + qv_delta * len(qv_colors), qv_delta), np.arange(qv_min, qv_max, qv_delta), qv_colors): if qv_bin > qv_max: break qv_bin_max = qv_bin + qv_delta points = data[(data[:, 2] >= qv_bin) * (data[:, 2] < qv_bin_max), :] if len(points[:, 0]) > 0: l, = axes.plot(points[:, 0], points[:, 1], 'o', c=color, mec=color, alpha=0.1, ms=2.0) handles.append(l) labels.append('QV >= %d' % qv_bin) if not nolegend: axes.legend(handles, labels, loc='lower right', numpoints=1, borderpad=0.3, markerscale=2.0, handletextpad=0.3, labelspacing=0.3, handlelength=0.5) axes.get_legend().get_frame().set_edgecolor('#a0a0a0') if bounds: intbounds = map(int, bounds.split(":")) axes.set_xlim(xmin=intbounds[0], xmax=intbounds[1]) axes.set_ylim(ymin=intbounds[2], ymax=intbounds[3]) axes.set_xlabel(x_label) axes.set_ylabel('Mapped Concordance') save_figure_with_thumbnail(fig, png_fn, dpi=int(dpi)) plt.close(fig)
def to_report(stats_xml, output_dir, dpi=72): #TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise RuntimeError("No Pipeline Summary Stats (sts.xml) found") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): for i, ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel('Median Distance Between Adapters') ax.set_ylabel('Reads') png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base), thumbnail=os.path.relpath(thumbnail_base))) plot_groups = [PlotGroup("adapter_xml_plot_group", title="Observed Insert Length Distribution", plots=plots, thumbnail=os.path.relpath(thumbnail_base))] columns = [Column("adaper_xml_conditions", None, ('Adapter Dimers (0-10bp)', 'Short Inserts (11-100bp)')), Column("adaper_xml_results", None, (adapter_dimers, short_inserts))] tables = [Table("adapter_xml_table", "Adapter Statistics", columns)] report = Report("adapter_xml_report", title="Adapter Report", tables=tables, attributes=None, plotgroups=plot_groups) return report