def test_reports_with_fixed_bins(self): # TODO readQualDists are currently unpopulated, turn back on when # they're repopulated # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists', # 'readQualDists'], [200, 200, 50]): for dist_name, nbins in zip(['medianInsertDists', 'readLenDists'], [200, 200]): ss = SubreadSet() ss.loadStats(get_fixed_bin_sts()) ss2 = SubreadSet() ss2.loadStats(get_fixed_bin_sts()) # shift ss2 mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins mdist = [0, 0, 0] + mdist[:-3] getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist ss3 = ss + ss2 ss4 = SubreadSet() ss4.loadStats(get_fixed_bin_sts()) # shift ss4 mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins mdist = [0 for _ in mdist] getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist dists = getattr(ss4.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) sss = [ss, ss2, ss3] for sset in sss: dists = getattr(sset.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) # 0, requested nbins > numBins fails back to no-op ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500] no_ops = [0] for n in no_ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) for n in ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), n) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH, PlotConstants.P_LENGTH)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), title=title, caption=title, thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, title=title, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL, PlotConstants.P_QUAL)) png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append( PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def test_reports_with_fixed_bins(self): # TODO readQualDists are currently unpopulated, turn back on when # they're repopulated # for dist_name, nbins in zip(['medianInsertDists', 'readLenDists', # 'readQualDists'], [200, 200, 50]): for dist_name, nbins in zip(["medianInsertDists", "readLenDists"], [200, 200]): ss = SubreadSet() ss.loadStats(get_fixed_bin_sts()) ss2 = SubreadSet() ss2.loadStats(get_fixed_bin_sts()) # shift ss2 mdist = getattr(ss2.metadata.summaryStats, dist_name)[0].bins mdist = [0, 0, 0] + mdist[:-3] getattr(ss2.metadata.summaryStats, dist_name)[0].bins = mdist ss3 = ss + ss2 ss4 = SubreadSet() ss4.loadStats(get_fixed_bin_sts()) # shift ss4 mdist = getattr(ss4.metadata.summaryStats, dist_name)[0].bins mdist = [0 for _ in mdist] getattr(ss4.metadata.summaryStats, dist_name)[0].bins = mdist dists = getattr(ss4.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) for n in [0, 1, 2, 10, 40, 41, 49, 50, 51, 200, 500]: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) sss = [ss, ss2, ss3] for sset in sss: dists = getattr(sset.metadata.summaryStats, dist_name) self.assertEqual(len(dists), 1) # 0, requested nbins > numBins fails back to no-op ops = [1, 2, 3, 4, 7, 10, 40, 41, 49, 50, 51, 200, 500] no_ops = [0] for n in no_ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), nbins) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins)) for n in ops: ds = continuous_dist_shaper(dists, nbins=n) fixed_dists = [ds(dist) for dist in dists] self.assertEqual(len(dists[0].bins), nbins) self.assertEqual(len(fixed_dists[0].bins), n) self.assertEqual(sum(dists[0].bins), sum(fixed_dists[0].bins))
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # we want all of the length distributions in this report to look the same, # so we make the shaper here and pass it around: alldists = (dset.metadata.summaryStats.readLenDists[:] + dset.metadata.summaryStats.insertReadLenDists[:]) len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True) attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend( to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper) plot_groups.extend( to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper)) # build the report: report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # we want all of the length distributions in this report to look the same, # so we make the shaper here and pass it around: alldists = (dset.metadata.summaryStats.readLenDists[:] + dset.metadata.summaryStats.insertReadLenDists[:]) len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True) attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend(to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper) plot_groups.extend(to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper)) # build the report: report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def to_report(stats_xml, output_dir, dpi=72): # TODO: make dpi matter """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = SubreadSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.medianInsertDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # Pull some stats: adapter_dimers = np.round( 100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round( 100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel) ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append(Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir))] attributes = [Attribute(i, v) for i,v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts])] tables = [] report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attributes, tables=tables, )#plotgroups=plot_groups) return meta_rpt.apply_view(report)
def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists, output_dir, dpi=72, lenDistShaper=None): length_plots = [] # ReadLen distribution to barplot: if lenDistShaper is None: lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True) for i, orig_rlendist in enumerate(readLenDists): rlendist = lenDistShaper(orig_rlendist) assert sum(orig_rlendist.bins) == sum(rlendist.bins) len_fig, len_axes = get_fig_axes_lpr() len_axes.bar(rlendist.labels, rlendist.bins, color=get_green(0), edgecolor=get_green(0), width=(rlendist.binWidth * 0.75)) len_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) len_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_LENGTH_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn, dpi=dpi) length_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(PlotConstants.PG_LENGTH, plots=length_plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] return plot_groups # FIXME these aren't useful yet qual_plots = [] # ReadQual distribution to barplot: shaper = continuous_dist_shaper(readQualDists, trim_excess=True) for i, orig_rqualdist in enumerate(readQualDists): rqualdist = shaper(orig_rqualdist) qual_fig, qual_axes = get_fig_axes_lpr() qual_axes.bar(rqualdist.labels, rqualdist.bins, color=get_green(0), edgecolor=get_green(0), width=(rqualdist.binWidth * 0.75)) qual_axes.set_xlabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot( PlotConstants.P_LENGTH).xlabel) qual_axes.set_ylabel( meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot( PlotConstants.P_QUAL).ylabel) png_fn = os.path.join( output_dir, "{p}{i}.png".format(i=i, p=PlotConstants.P_QUAL_PREFIX)) png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn, dpi=dpi) qual_plots.append( Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots)) return plot_groups
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI): if not dset.metadata.summaryStats.medianInsertDists: raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found " "or missing key distributions") # Pull some stats: adapter_dimers = np.round(100.0 * dset.metadata.summaryStats.adapterDimerFraction, decimals=2) short_inserts = np.round(100.0 * dset.metadata.summaryStats.shortInsertFraction, decimals=2) attributes = [ Attribute(i, v) for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS], [adapter_dimers, short_inserts]) ] if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags: dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST] if len(dist) > 1: log.warn("Dataset was merged, local base rate not applicable") else: base_rate = dist[0].sampleMed attributes.append(Attribute(Constants.A_BASE_RATE, base_rate)) else: log.warn("No local base rate distribution available") plots = [] # Pull some histograms (may have dupes (unmergeable distributions)): shaper = continuous_dist_shaper( dset.metadata.summaryStats.medianInsertDists) for i, orig_ins_len_dist in enumerate( dset.metadata.summaryStats.medianInsertDists): ins_len_dist = shaper(orig_ins_len_dist) # make a bar chart: fig, ax = get_fig_axes_lpr() ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins, color=get_green(0), edgecolor=get_green(0), width=(ins_len_dist.binWidth * 0.75)) ax.set_xlabel( get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) ax.set_ylabel( get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER)) png_fn = os.path.join(output_dir, "interAdapterDist{i}.png".format(i=i)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=dpi) # build the report: plots.append( Plot("adapter_xml_plot_{i}".format(i=i), os.path.relpath(png_base, output_dir), thumbnail=os.path.relpath(thumbnail_base, output_dir))) plot_groups = [ PlotGroup(Constants.PG_ADAPTER, plots=plots, thumbnail=os.path.relpath(thumbnail_base, output_dir)) ] tables = [] report = Report( Constants.R_ID, attributes=attributes, tables=tables, ) # plotgroups=plot_groups) return spec.apply_view(report)