def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report("redfang") a = Attribute("a", "b") a2 = Attribute("a2", "b2") r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup("pgid") pg.add_plot(Plot("pid", "anImg")) pg.add_plot(Plot("pid2", "anImg2")) r.add_plotgroup(pg) t = Table("tabid") t.add_column(Column("c1")) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual("redfang", d["id"]) self.assertEqual("redfang.a", d["attributes"][0]["id"]) self.assertEqual("redfang.a2", d["attributes"][1]["id"]) self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"]) self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"]) self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"]) self.assertEqual("redfang.tabid", d["tables"][0]["id"]) self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"])
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d[ 'plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d[ 'plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d[ 'plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d[ 'plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][ 0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d[ 'tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
def test_get_plotgroup_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) pg = r.get_plotgroup_by_id('pgid1') self.assertEqual(pg, pg1)
def test_get_plotgroup_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist') self.assertIsNone(bad_pg)
def test_get_plot_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') p1 = Plot('pid1', 'anImg') pg1.add_plot(p1) r.add_plotgroup(pg1) p = r.get_plotgroup_by_id('pgid1').get_plot_by_id('pid1') assert p == p1
def test_get_plot_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') p1 = Plot('pid1', 'anImg') pg1.add_plot(p1) r.add_plotgroup(pg1) bad_p = r.get_plotgroup_by_id('pgid1').get_plot_by_id( 'id_that_does_not_exist') assert bad_p is None
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report("redfang") a = Attribute("a", "b") a2 = Attribute("a2", "b2") r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup("pgid") pg.add_plot(Plot("pid", "anImg")) pg.add_plot(Plot("pid2", "anImg2")) r.add_plotgroup(pg) pg = PlotGroup("pgid2") pg.add_plot(Plot("pid2", "anImg2")) pg.add_plot(Plot("pid22", "anImg22")) r.add_plotgroup(pg) t = Table("tabid") t.add_column(Column("c1")) r.add_table(t) t = Table("tabid2") t.add_column(Column("c2")) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual("redfang", d["id"]) self.assertEqual("redfang.a", d["attributes"][0]["id"]) self.assertEqual("redfang.a2", d["attributes"][1]["id"]) self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"]) self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"]) self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"]) self.assertEqual("redfang.pgid2", d["plotGroups"][1]["id"]) self.assertEqual("redfang.pgid2.pid2", d["plotGroups"][1]["plots"][0]["id"]) self.assertEqual("redfang.pgid2.pid22", d["plotGroups"][1]["plots"][1]["id"]) self.assertEqual("redfang.tabid", d["tables"][0]["id"]) self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"]) self.assertEqual("redfang.tabid2", d["tables"][1]["id"]) self.assertEqual("redfang.tabid2.c2", d["tables"][1]["columns"][0]["id"]) log.info(repr(r)) self.assertIsNotNone(repr(r))
def test_basic(self): p1 = Plot("p1", "image.png", thumbnail="thumb.png") p2 = Plot("p2", "image2.png", thumbnail="thumb2.png") plots = [p1, p2] title = "My Plots" legend = "Legend" thumbnail = p1.thumbnail pg = PlotGroup("my_id", title=title, legend=legend, thumbnail=thumbnail, plots=plots) d = pg.to_dict() validate_plot_group(d) self.assertIsNotNone(d)
def to_report(ccs_set, output_dir): bam_files = list(ccs_set.toExternalFiles()) log.info("Generating report from files: {f}".format(f=bam_files)) bam_stats, movie_names = _stats_from_dataset(ccs_set) movie_results = _stats_to_movie_results(bam_stats, movie_names) log.debug("\n" + pformat(movie_results)) rs = [m.read_lengths for m in movie_results] readlengths = np.concatenate(rs) ac = [m.accuracies for m in movie_results] accuracies = np.concatenate(ac) ps = [m.num_passes for m in movie_results] num_passes = np.concatenate(ps) readlength_plot = create_readlength_plot(readlengths, output_dir) accuracy_plot = create_accuracy_plot(accuracies, output_dir) npasses_plot = create_npasses_plot(num_passes, output_dir) scatter_plot = create_scatter_plot((num_passes, accuracies), output_dir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) accuracy_group = PlotGroup(Constants.PG_ACCURACY, plots=[accuracy_plot], thumbnail=accuracy_plot.thumbnail) npasses_group = PlotGroup(Constants.PG_NPASSES, plots=[npasses_plot], thumbnail=npasses_plot.thumbnail) scatter_group = PlotGroup(Constants.PG_SCATTER, plots=[scatter_plot], thumbnail=scatter_plot.thumbnail) movie_table = _movie_results_to_table(movie_results) log.info(str(movie_table)) tables = [movie_table] if ccs_set.isBarcoded: tables.append(_make_barcode_table(bam_stats, ccs_set)) attributes = _movie_results_to_attributes(movie_results) report = Report(Constants.R_ID, title="CCS Report", tables=tables, attributes=attributes, plotgroups=[ readlength_group, accuracy_group, npasses_group, scatter_group ], dataset_uuids=(ccs_set.uuid, )) return meta_rpt.apply_view(report)
def test_basic(self): p1 = Plot('p1', 'image.png', thumbnail='thumb.png') p2 = Plot('p2', 'image2.png', thumbnail='thumb2.png') plots = [p1, p2] title = "My Plots" legend = "Legend" thumbnail = p1.thumbnail pg = PlotGroup('my_id', title=title, legend=legend, thumbnail=thumbnail, plots=plots) d = pg.to_dict() validate_plot_group(d) self.assertIsNotNone(d)
def test_to_dict(self): """Test plotGroup to_dict function.""" a = PlotGroup('123', title='foo title', legend='foo legend', thumbnail='foo thumbnail') a.add_plot(Plot('id', 'i1', caption='a caption')) d = a.to_dict() log.debug(pformat(d)) self.assertEquals('123', d['id']) self.assertEquals('foo title', d['title']) self.assertEquals('foo legend', d['legend']) self.assertEquals('foo thumbnail', d['thumbnail']) self.assertEquals(1, len(d['plots'])) log.info(a) self.assertIsNotNone(repr(a))
def _create_coverage_histo_plot_grp(self, stats, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plot histogram :param stats: (ReferenceStats) see _get_reference_coverage_stats :param output_dir: (string) where to write images """ fig, ax = self._create_histogram(stats) fname, thumb = [ os.path.basename(f) for f in save_figure_with_thumbnail( fig, os.path.join(output_dir, 'coverage_histogram.png')) ] plot = Plot(Constants.P_COVERAGE_HIST, fname, caption=get_plot_caption(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST), title=get_plot_title(self.spec, Constants.PG_COVERAGE_HIST, Constants.P_COVERAGE_HIST)) plot_group = PlotGroup(Constants.PG_COVERAGE_HIST, thumbnail=thumb, plots=[plot], title=get_plotgroup_title( self.spec, Constants.PG_COVERAGE_HIST)) return plot_group
def to_readlen_plotgroup(readlen_dist, output_dir): plot_name = get_plot_title( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) x_label = get_plot_xlabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) y_label = get_plot_ylabel( spec, Constants.PG_READLENGTH, Constants.P_READLENGTH) nbins = readlen_dist.numBins heights = readlen_dist.bins bin_width = readlen_dist.binWidth edges = [float(bn) * bin_width for bn in xrange(nbins)] edges, heights, bin_width = reshape(readlen_dist, edges, heights) fig, ax = get_fig_axes_lpr() if sum(readlen_dist.bins) > 0: ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) png_fn = os.path.join( output_dir, "{p}.png".format(p=Constants.P_READLENGTH)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) readlen_plot = Plot(Constants.P_READLENGTH, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])] return plot_groups
def fasta_to_plot_group(fasta_file, output_dir): lengths = [] with FastaReader(fasta_file) as f: for record in f: lengths.append(len(record.sequence)) from pbreports.plot.helper import get_fig_axes #pylint: disable=import-error from pbcommand.models.report import PlotGroup, Plot fig, ax = get_fig_axes() if len(lengths) == 1: v = lengths[0] hrange = (v - 1, v + 1) ax.hist(lengths, range=hrange) else: ax.hist(lengths) ax.set_title("Sequence Length Histogram") ax.set_xlabel("Sequence Length") name = "sequence_length_hist.png" png_path = os.path.join(output_dir, name) fig.savefig(png_path) plots = [Plot("sequence_lengths", name)] pg = PlotGroup("reference_hist", "Sequence Lengths", plots=plots) return pg
def to_hq_hist_plot(hqbasefraction_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ) x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ) y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ) nbins = int(hqbasefraction_dist['NumBins'].metavalue) bin_counts = hqbasefraction_dist['BinCounts'] heights = [int(bc.metavalue) for bc in bin_counts] edges = [float(bn) / float(nbins) for bn in xrange(nbins)] bin_width = float(hqbasefraction_dist['BinWidth'].metavalue) fig, ax = get_fig_axes_lpr() ax.bar(edges, heights, color=get_green(0), edgecolor=get_green(0), width=(bin_width * 0.75)) ax.set_xlabel(x_label) ax.set_ylabel(y_label) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) hq_plot = Plot(Constants.P_HQ, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])] return plot_groups
def addQmodMotifHist(csvFile, kinData, outputFolder, dpi=DEFAULT_DPI, max_motifs=10): # Apart from passing in motif_summary.csv file name, nearly identical to # addQmodHist image_name = os.path.join(outputFolder, Constants.I_MOTIFS_QMOD) # Generate modification detection plot fig, ax = plotMotifHist(csvFile, kinData, max_motifs=max_motifs) png, thumbpng = PH.save_figure_with_thumbnail(fig, image_name, dpi=dpi) log.info((png, thumbpng)) plot = Plot(Constants.P_MOD_QV, image=os.path.basename(png), thumbnail=os.path.basename(thumbpng)) pg = PlotGroup(Constants.PG_MOD_QV, title=get_plotgroup_title(spec, Constants.PG_MOD_QV), plots=[plot], thumbnail=os.path.basename(thumbpng)) return pg
def make_report(in_fn, out_dir='.', bounds=None, nolegend=False, reference=None, dpi=60, name=None): """AlignmentToPng Report Convert an input bam or DataSet XML file to a figure of Concordance vs. Subread Length. Args: in_fn: the bam, DataSet XML or cmp.h5 file to turn into a length vs concordance plot out_dir: the output directory to be used with the file name or default name: the file name to be used with the outdir or default (no full path filenames!) bounds: the figure limits (in xmin:xmax:ymin:ymax) nolegend: exclude the figure legend reference: the reference to use in the figure. Default of all references dpi: the dots per inch (resolution) of the figure """ data = _read_in_file(in_fn, reference) report = Report('alignment_to_png_report') if not name: name = '%s.png' % os.path.splitext(os.path.basename(in_fn))[0] png_fn = os.path.join(out_dir, name) _make_plot(data, png_fn, bounds, dpi, nolegend) plot_group = PlotGroup(Constants.PLOT_GROUP_ID, plots=[Plot('alignment_to_png_plot', os.path.basename(png_fn))]) report.add_plotgroup(plot_group) return report
def test_to_dict(self): """Test plotGroup to_dict function.""" a = PlotGroup('123', title='foo title', legend='foo legend', thumbnail='foo thumbnail') a.add_plot(Plot('id', 'i1', caption='a caption')) d = a.to_dict() log.debug(pformat(d)) assert '123' == d['id'] assert 'foo title' == d['title'] assert 'foo legend' == d['legend'] assert 'foo thumbnail' == d['thumbnail'] assert 1 == len(d['plots']) log.info(a) assert repr(a) is not None
def _get_plot_group_length(control_data, sample_data, output_dir): """ Create the quality plot group and return it. """ fig = _create_length_figure(control_data, sample_data) fname = 'control_non-control_readlength.png' thumb = save_figure_with_thumbnail(fig, os.path.join(output_dir, fname))[1] plots = [Plot(Constants.P_LENGTH, fname)] pg = PlotGroup(Constants.PG_LENGTH, thumbnail=os.path.basename(thumb), plots=plots) return pg
def makeReport(inReadsFN, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms average length of consensus isoforms Attributes of the report are extracted from this file. """ log.info("Plotting read length histogram from file: {f}". format(f=inReadsFN)) # Collect read lengths of reader = ContigSet(inReadsFN) rs = [len(r.sequence) for r in reader] reader.close() readlengths = np.array(rs) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, outDir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) log.info("Plotting summary attributes from file: {f}". format(f=inSummaryFN)) # Produce attributes based on summary. dataset_uuids = [ContigSet(inReadsFN).uuid] attributes = _report_to_attributes(inSummaryFN) r = load_report_from_json(inSummaryFN) # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these # reports; should we be? table = attributesToTable(attributes) log.info(str(table)) # A report is consist of ID, tables, attributes, and plotgroups. report = Report(Constants.R_ID, title=meta_rpt.title, attributes=attributes, plotgroups=[readlength_group], dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def make_report(hq_transcripts_file, lq_transcripts_file, output_dir): """ Generate a report with ID, tables, attributes and plot groups. """ log.info("Plotting read length histogram from files: {h} {l}". format(h=hq_transcripts_file, l=lq_transcripts_file)) # Collect read lengths and average qvs ds_hq = TranscriptSet(hq_transcripts_file, strict=True) ds_lq = TranscriptSet(lq_transcripts_file, strict=True) readlengths, hq_qvs, lq_qvs = [], [], [] for k, ds in enumerate([ds_hq, ds_lq]): for rec in ds: readlengths.append(float(rec.qLen)) if ds is ds_hq: hq_qvs.append(rec.readScore) else: lq_qvs.append(rec.readScore) readlengths = np.array(readlengths) avgqvs = np.array(hq_qvs + lq_qvs) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, output_dir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) # Plot average qv histogram avgqv_plot = create_avgqv_plot(avgqvs, output_dir) avgqv_group = PlotGroup(Constants.PG_AVGQV, plots=[avgqv_plot], thumbnail=avgqv_plot.thumbnail) attributes = [ Attribute(Constants.A_N_HQ_ID, value=len(ds_hq)), Attribute(Constants.A_N_LQ_ID, value=len(ds_lq)) ] report = Report(Constants.R_ID, attributes=attributes, plotgroups=[readlength_group, avgqv_group], dataset_uuids=[ds_hq.uuid, ds_lq.uuid]) return spec.apply_view(report)
def make_plots(bc_groups, base_dir): """ Generate all plots, both 1D and 2D, and return a list of PlotGroups. """ groups = [g for g in bc_groups if g.label != Constants.LABEL_NONE] groups.sort(lambda a, b: cmp(b.n_reads, a.n_reads)) plot_nreads = make_nreads_line_plot(groups, base_dir) log.info("Generating 1D histograms...") plot_nreads_hist = make_nreads_histogram(groups, base_dir) plot_rl = make_readlength_histogram(groups, base_dir) log.info("Generating barcode quality score plots...") plot_bq = make_bcqual_histogram(groups, base_dir) bq_plots = [plot_bq] log.info("Generating 2D histograms...") plot_rl2d = make_readlength_hist2d(groups, base_dir) plot_bq = make_bcqual_hist2d(groups, base_dir) return [ PlotGroup(Constants.PG_STATS, plots=[plot_nreads, plot_nreads_hist, plot_rl]), PlotGroup(Constants.PG_BQ, plots=bq_plots), PlotGroup(Constants.PG_HIST2D, plots=[plot_rl2d, plot_bq]) ]
def _to_plot_group(d): id_ = _to_id(d['id']) legend = d.get('legend', None) thumbnail = d.get('thumbnail', None) # is this optional? title = d.get('title', None) if 'plots' in d: plots = [_to_plot(pd) for pd in d['plots']] else: plots = [] return PlotGroup(id_, title=title, legend=legend, plots=plots, thumbnail=thumbnail)
def make_modifications_report(modifications_h5, report, output_dir, dpi=72): """ Entry point to report generation. """ basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, title=get_plotgroup_title(spec, Constants.PG_KIN), thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(spec.id, plotgroups=[pg]) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def apply_plotgroup_view(self, plotgroup): legend = plotgroup.legend title = plotgroup.title if legend is None: legend = self.legend if title is None: title = self.title return PlotGroup(self.id, title=title, legend=legend, thumbnail=plotgroup.thumbnail, plots=[ self.get_meta_plot(p.id).apply_plot_view(p) for p in plotgroup.plots ])
def _create_variants_plot_grp(top_contigs, var_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains variants plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param var_map: (dict string:ContigVariants) mapping of contig.header to ContigVariants object :param output_dir: (string) where to write images """ plots = [] thumbnail = None legend = None idx = 0 for tc in top_contigs: if not tc.header in var_map: # no coverage of this contig continue ctg_var = var_map[tc.header] bars = _create_bars(ctg_var) if legend is None: legend = _get_legend_file(bars, output_dir) fig, ax = _create_contig_fig_ax(bars, _get_x_labels(ctg_var)) fname = os.path.join(output_dir, ctg_var.file_name) if thumbnail is None: imgfiles = PH.save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname, dpi=DEFAULT_DPI) id_ = 'coverage_variants_{i}'.format(i=str(idx)) caption = "Observed variants across {c}".format(c=ctg_var.name) plot = Plot(id_, os.path.basename(fname), title=caption, caption=caption) plots.append(plot) idx += 1 plt.close(fig) plot_group = PlotGroup(Constants.PG_VARIANTS, thumbnail=thumbnail, legend=legend, plots=plots) return plot_group
def _create_coverage_plot_grp(self, top_contigs, cov_map, output_dir): """ Returns io.model.PlotGroup object Create the plotGroup element that contains the coverage plots of the top contigs. :param top_contigs: (list of Contig objects) sorted by contig size :param cov_map: (dict string:ContigCoverage) mapping of contig.id to ContigCoverage object :param output_dir: (string) where to write images """ plots = [] thumbnail = None idx = 0 log.debug('Creating plots for {n} top contig(s)'.format( n=str(len(top_contigs)))) for tc in top_contigs: if not tc.id in cov_map: # no coverage of this contig log.debug('contig {c} has no coverage info '.format(c=tc.id)) continue ctg_cov = cov_map[tc.id] fig, ax = self._create_contig_plot(ctg_cov) fname = os.path.join(output_dir, ctg_cov.file_name) if thumbnail is None: imgfiles = save_figure_with_thumbnail(fig, fname) thumbnail = os.path.basename(imgfiles[1]) else: fig.savefig(fname) plt.close(fig) id_ = "coverage_contig_{i}".format(i=str(idx)) caption = self.spec.get_plotgroup_spec( Constants.PG_COVERAGE).get_plot_spec( Constants.P_COVERAGE).caption + " {c}." plot = Plot(id_, os.path.basename(fname), caption.format(c=ctg_cov.name), title=caption.format(c=ctg_cov.name)) plots.append(plot) idx += 1 plot_group = PlotGroup(Constants.PG_COVERAGE, title=get_plotgroup_title( self.spec, Constants.PG_COVERAGE), thumbnail=thumbnail, plots=plots) return plot_group
def to_report(fastq_files, qv_hist=None, readlength_hist=None): """Generate a histogram of read lengths and quality values""" fastq_stats = fastq_files_to_stats(fastq_files) table = _generate_table(fastq_stats.values()) log.debug(str(table)) if qv_hist is not None: fig, ax = to_qv_histogram(fastq_stats.values()) fig.savefig(qv_hist) if readlength_hist is not None: fig, ax = to_read_length_histogram(fastq_stats.values()) fig.savefig(readlength_hist) plt.close(fig) readlength_hist_plot = Plot(Constants.P_RL, readlength_hist) plotgroup = PlotGroup(Constants.PG_RL, plots=[readlength_hist_plot]) report = Report(Constats.R_ID, tables=[table], plotgroups=[plotgroup]) return spec.apply_view(report)
def _to_workflow_report(job_resources, bg, workflow_opts, task_opts, state, was_successful, plot_images): """ Copy images to image local directory and return a pbreport Report """ plot_groups = [] if plot_images: plots = [] for i, plot_image in enumerate(plot_images): html_image_abs = os.path.join(job_resources.images, os.path.basename(plot_image)) shutil.copy(plot_image, html_image_abs) # Make the file path relative to images/my-image.png html_image = os.path.join(os.path.basename(job_resources.images), os.path.basename(plot_image)) p = Plot("plot_{i}".format(i=i), html_image) plots.append(p) pg = PlotGroup("workflow_state_plots", plots=plots) plot_groups.append(pg) return Report("workflow_report", plotgroups=plot_groups)
def to_mod_report(motif_summary_csv, output_dir): # Set up the modifications report #report = GraphReportItem() #report.title = 'Modifications' #graphGroup = GraphGroupItem(title ='Kinetic Detections') kinData = readModificationCsvGz(motif_summary_csv) p1 = addQmodPlot(kinData, output_dir) p2 = addQmodHist(kinData, output_dir) plots = [p1, p2] pg = PlotGroup(Constants.PG_MOD, title=get_plotgroup_title(spec, Constants.PG_MOD), plots=plots) r = Report(Constants.R_ID, plotgroups=[pg]) return spec.apply_view(r)
def make_polished_assembly_report(report, gff, fastq, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param fastq: (str) path to polished fastq file :param report: (str) report name create a polished assembly report. """ log.info("Starting version {f} v{x}".format(x=__version__, f=os.path.basename(__file__))) log.debug("Loading {f}".format(f=fastq)) contigs = _get_contigs(fastq) log.debug("Loading {f}".format(f=gff)) _get_contig_coverage(gff, contigs) log.debug("Computing and creating plots") cvqp = _coverage_vs_quality_plot(contigs, output_dir) pgrp = PlotGroup(Constants.PG_COVERAGE, thumbnail=cvqp.thumbnail, plots=[cvqp]) rep = Report(Constants.R_ID) rep.add_attribute(Attribute(Constants.A_N_CONTIGS, len(contigs))) read_lengths = [c.length for c in contigs.values()] read_lengths.sort() rep.add_attribute(_get_att_max_contig_length(read_lengths)) rep.add_attribute(_get_att_n_50_contig_length(read_lengths)) rep.add_attribute(_get_att_sum_contig_lengths(read_lengths)) rep.add_attribute(_get_att_esize_contig_length(read_lengths)) rep.add_plotgroup(pgrp) rep = spec.apply_view(rep) rep.write_json(os.path.join(output_dir, report)) _write_coverage_vs_quality_csv(contigs, output_dir) return 0
def to_rl_overlay_plot(numunfilteredbasecalls_dist, readlen_dist, output_dir): plot_name = get_plot_title(spec, Constants.PG_RRL, Constants.P_RRL) x_label = get_plot_xlabel(spec, Constants.PG_RRL, Constants.P_RRL) y_label = get_plot_ylabel(spec, Constants.PG_RRL, Constants.P_RRL) unfiltered_bins = [ int(bc.metavalue) for bc in numunfilteredbasecalls_dist['BinCounts'] ] poly_bins = [int(bc.metavalue) for bc in readlen_dist['BinCounts']] max_unfiltered = len(unfiltered_bins) * \ int(numunfilteredbasecalls_dist['BinWidth'].metavalue) max_poly = len(poly_bins) * int(readlen_dist['BinWidth'].metavalue) unfiltered_data = expand_data(unfiltered_bins, max_unfiltered) poly_data = expand_data(poly_bins, max_poly) fig, ax = get_fig_axes_lpr() ax.hist(unfiltered_data, label="Unfiltered", histtype='stepfilled', alpha=0.3, bins=len(unfiltered_bins), range=[0, max_unfiltered]) ax.hist(poly_data, label="Polymerase", histtype='stepfilled', alpha=0.3, bins=len(poly_bins), range=[0, max_poly]) ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.legend() png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_RRL)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI) rrl_plot = Plot(Constants.P_RRL, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_groups = [PlotGroup(Constants.PG_RRL, plots=[rrl_plot])] return plot_groups
def to_plotgroup(plot_json, output_dir): n_samples = len(plot_json) if n_samples > 0: fig, ax = plt.subplots(n_samples, 2, figsize=(15, n_samples * 5), squeeze=False) od = collections.OrderedDict(sorted(plot_json.items())) counter = 0 y_max = [0, 0] for sample, data in od.iteritems(): add_subplots(fig, ax, sample, data, counter, y_max) counter += 1 label_rows(fig, ax, od.keys()) label_columns(fig, ax) for row in xrange(0, n_samples): ax[row, 0].set_ylim(top=y_max[0] * 1.1) ax[row, 1].set_ylim(top=y_max[1] * 1.1) p1 = mpatches.Patch(color='#FF7E79', linewidth=0) p2 = mpatches.Patch(color='#A9D18E', linewidth=0) fig.legend((p1, p2), ("Deletions", "Insertions"), "upper left", fontsize=15) else: fig = plt.figure() plot_name = get_plot_title(spec, Constants.PG_SV, Constants.P_SV) png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_SV)) png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI, bbox_inches='tight') plot = Plot(Constants.P_SV, os.path.relpath(png_base, output_dir), title=plot_name, caption=plot_name, thumbnail=os.path.relpath(thumbnail_base, output_dir)) plot_group = PlotGroup(Constants.PG_SV, plots=[plot]) return plot_group
def make_modifications_report(modifications_h5, report, output_dir, dpi=DEFAULT_DPI): """ Entry point to report generation. """ try: import h5py except ImportError: raise ImportError("This module requires that h5py be installed") basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, title=get_plotgroup_title(spec, Constants.PG_KIN), thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(spec.id, plotgroups=[pg]) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def _test(): pg = PlotGroup('foo') pg.add_plot(Plot('id', 'i1')) pg.add_plot(Plot('id', 'i2'))
def to_report(self, output_dir, report_id=Constants.R_ID): """ This needs to be cleaned up. Keeping the old interface for testing purposes. """ started_at = time.time() log.info("Found {n} movies.".format(n=len(self.movies))) log.info("Working from {n} alignment file{s}: {f}".format( n=len(self.alignment_file_list), s='s' if len(self.alignment_file_list) > 1 else '', f=self.alignment_file_list)) # make this a dict {attribute_key_name:Aggreggator} so it's easy to # access the instances after they've been computed. # there's duplicated keys in the attributes? # number_of_aligned_reads/mapped_reads_n _total_aggregators = self._get_total_aggregators() null_filter = lambda r: True total_model = StatisticsModel( _total_aggregators.values(), filter_func=null_filter) # need to create specific instances for a given movie. This is used to # create the mapping reports stats table movie_models = {} def _my_filter(movie_name1, movie_name2): return movie_name1 == movie_name2 for movie in self.movies: ags = [k() for k in self.COLUMN_AGGREGATOR_CLASSES] # Note this WILL NOT work because of how scope works in python # filter_by_movie_func = lambda m_name: movie.name == m_name _my_filter_func = functools.partial(_my_filter, movie) model = StatisticsModel(ags, filter_func=_my_filter_func) movie_models[movie] = model # The statistic models that will be run all_models = [total_model] + movie_models.values() log.debug(all_models) # Run all the analysis. Now the aggregators can be accessed analyze_movies(self.movies, self.alignment_file_list, all_models) # temp structure used to create the report table. The order is # important # add total values _to_a = lambda k: _total_aggregators[k].attribute _row = [_to_a(n) for n in self.COLUMN_ATTR] _row.insert(0, 'All Movies') movie_datum = [_row] # Add each individual movie stats for movie_name_, model_ in movie_models.iteritems(): _row = [movie_name_] for a in model_.aggregators: _row.append(a.attribute) movie_datum.append(_row) log.info(movie_datum) # create the Report table table = self._to_table(movie_datum) for movie_name, model in movie_models.iteritems(): log.info("Movie name {n}".format(n=movie_name)) for a in model.aggregators: log.info(movie_name + " " + repr(a)) log.info("") log.info("Total models") for a in total_model.aggregators: log.info(a) attributes = get_attributes(_total_aggregators) log.info("Attributes from streaming mapping Report.") for a in attributes: log.info(a) plot_config_views = self._get_plot_view_configs() plot_groups = [] ds = openDataFile(self.alignment_file) ds.updateCounts() if len(ds) > 0: # keeping the ids independent requires a bit of dictionary madness # {report_id:HistogramAggregator} id_to_aggregators = {k: _total_aggregators[v] for k, v in self.HISTOGRAM_IDS.iteritems()} plot_groups = to_plot_groups(plot_config_views, output_dir, id_to_aggregators) rb_pg = PlotGroup(Constants.PG_RAINBOW) rb_png = "mapped_concordance_vs_read_length.png" make_rainbow_plot(self.alignment_file, rb_png) rb_plt = Plot(Constants.P_RAINBOW, rb_png, caption=get_plot_caption(spec, Constants.PG_RAINBOW, Constants.P_RAINBOW)) rb_pg.add_plot(rb_plt) plot_groups.append(rb_pg) self.add_more_plots(plot_groups, output_dir) tables = [table] report = Report(report_id, attributes=attributes, plotgroups=plot_groups, tables=tables, dataset_uuids=self.dataset_uuids) log.debug(report) run_time = time.time() - started_at log.info("Completed running in {s:.2f} sec.".format(s=run_time)) return report