def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = {"n_reads": "Number of reads", "n_zmws": "Number of ZMWs"} chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs") ], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs") ]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [ Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At") ] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) ds = AlignmentSet(aligned_reads_file) rpt = Report(meta_rpt.id, dataset_uuids=(ds.uuid, )) rpt.add_attribute( Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT])) rpt.add_attribute( Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE])) rpt.add_attribute( Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE])) rpt.add_attribute( Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH])) rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS])) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report))
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_json) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def make_control_report(control_cmph5, filtered_subreads_csv, report, output_dir, dpi, dumpdata): """ Entry to report. :param control_cmph5: (str) path to control_reads.cmp.h5 :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv """ _validate_inputs(control_cmph5, filtered_subreads_csv) name, control_reads = _get_control_reads(control_cmph5) filtered_reads = _get_filtered_reads(filtered_subreads_csv) control_data, sample_data = _process_reads(control_reads, filtered_reads) nr = _get_num_control_reads(control_data) if nr == 0: # Not sure this ever happens, but logic exists in makeControlReport.py r = _get_error_report() r.write_json(os.path.join(output_dir, report)) return atts = _get_attributes(name, control_data, sample_data) pgs = [ _get_plot_group_score(control_data, sample_data, output_dir), _get_plot_group_length(control_data, sample_data, output_dir) ] r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs) r = meta_rpt.apply_view(r) r.write_json(os.path.join(output_dir, report))
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ tags = ["alpha", "beta", "gamma"] r = Report('redfang', tags=tags) a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) assert 'redfang' == d['id'] assert 'redfang.a' == d['attributes'][0]['id'] assert 'redfang.a2' == d['attributes'][1]['id'] assert 'redfang.pgid' == d['plotGroups'][0]['id'] assert 'redfang.pgid.pid' == d['plotGroups'][0]['plots'][0]['id'] assert 'redfang.pgid.pid2' == d['plotGroups'][0]['plots'][1]['id'] assert 'redfang.pgid2' == d['plotGroups'][1]['id'] assert 'redfang.pgid2.pid2' == d['plotGroups'][1]['plots'][0]['id'] assert 'redfang.pgid2.pid22' == d['plotGroups'][1]['plots'][1]['id'] assert 'redfang.tabid' == d['tables'][0]['id'] assert 'redfang.tabid.c1' == d['tables'][0]['columns'][0]['id'] assert 'redfang.tabid2' == d['tables'][1]['id'] assert 'redfang.tabid2.c2' == d['tables'][1]['columns'][0]['id'] assert list(sorted(d['tags'])) == list(sorted(tags)) loaded_report = load_report_from(d) assert list(sorted(loaded_report.tags)) == list(sorted(tags)) log.info(repr(r)) assert repr(r) is not None
def make_report(in_fn, out_dir='.', bounds=None, nolegend=False, reference=None, dpi=60, name=None): """AlignmentToPng Report Convert an input bam or DataSet XML file to a figure of Concordance vs. Subread Length. Args: in_fn: the bam, DataSet XML or cmp.h5 file to turn into a length vs concordance plot out_dir: the output directory to be used with the file name or default name: the file name to be used with the outdir or default (no full path filenames!) bounds: the figure limits (in xmin:xmax:ymin:ymax) nolegend: exclude the figure legend reference: the reference to use in the figure. Default of all references dpi: the dots per inch (resolution) of the figure """ data = _read_in_file(in_fn, reference) report = Report('alignment_to_png_report') if not name: name = '%s.png' % os.path.splitext(os.path.basename(in_fn))[0] png_fn = os.path.join(out_dir, name) _make_plot(data, png_fn, bounds, dpi, nolegend) plot_group = PlotGroup(Constants.PLOT_GROUP_ID, plots=[Plot('alignment_to_png_plot', os.path.basename(png_fn))]) report.add_plotgroup(plot_group) return report
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True): """ Entry to report. :param aln_summ_gff: (str) path to alignment_summary.gff :param variants_gff: (str) path to variants_gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs([('aln_summ_gff', aln_summ_gff), ('variants_gff', variants_gff), ('reference', reference)]) # reference entry & top contings ref = openReference(reference) top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot) # extract gff data from files ref_data, contig_variants = _extract_alignment_summ_data( aln_summ_gff, top_contigs) _append_variants_gff_data(ref_data, variants_gff) # make report objects table, atts = _get_consensus_table_and_attributes(ref_data, ref) plotgroup = _create_variants_plot_grp( top_contigs, contig_variants, output_dir) rpt = Report(Constants.R_ID, plotgroups=[plotgroup], attributes=atts, tables=[table], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return rpt
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report, output_dir): """ Entry to report. :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz :param reference: (str) path to reference dir :param how_many: (int) :param batch_sort_size: (int) :param report: (str) report name :param batch_sort_size: (str) output dir """ _validate_inputs(gff, reference, how_many, batch_sort_size) table_builder = VariantTableBuilder() vf = VariantFinder(gff, reference, how_many, batch_sort_size) top = vf.find_top() for v in top: table_builder.add_variant(v) r = Report(Constants.R_ID, tables=[table_builder.table], dataset_uuids=(ReferenceSet(reference).uuid, )) r = spec.apply_view(r) r.write_json(os.path.join(output_dir, report)) return 0
def as_report(self, attributes=(), plotgroups=(), tables=(), uuid=None): return Report(self.id, self.title, attributes=attributes, plotgroups=plotgroups, tables=tables, uuid=uuid)
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def dict_to_report(dct): if '_version' in dct: version = dct['_version'] if version not in SUPPORTED_VERSIONS: # should this raise an exception? log.warn("{v} is an unsupported version. Supported versions {vs}". format(v=version, vs=SUPPORTED_VERSIONS)) report_id = dct['id'] plot_groups = [] if 'plotGroups' in dct: pg = dct['plotGroups'] if pg: plot_groups = [_to_plot_group(d) for d in pg] attributes = [] for r_attr in dct.get('attributes', []): attr = _to_attribute(r_attr) attributes.append(attr) tables = [] for table_d in dct.get('tables', []): t = _to_table(table_d) tables.append(t) report = Report(report_id, plotgroups=plot_groups, tables=tables, attributes=attributes) return report
def to_report(self, dataset_uuids=()): """Convert a summary object to pbcommand.report object.""" attributes = [Attribute(id_=attribute_id, value=attribute_val, name=attribute_name) for attribute_id, attribute_name, attribute_val in zip(self.fieldsIDs, self.fieldsNames, self.fields)] return Report(id_=self.REPORT_ID, attributes=attributes, dataset_uuids=dataset_uuids)
def run_to_report(summary_csv): log.info("Generating PCR report v{v} from summary '{s}'".format( v=__version__, s=summary_csv)) # Convert the data into a PBreports table table = create_table(summary_csv) # ids must be lowercase. r = Report(Constants.R_ID, tables=[table]) return spec.apply_view(r)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def test_version_and_changelist(self): r = Report('example') d = r.to_dict() log.info("\n" + pformat(d)) fields = ('version', 'uuid', 'plotGroups', 'tables', 'dataset_uuids') for field in fields: self.assertTrue(field in d)
def dataset_to_report(ds): """ :type ds: DataSet :param ds: :return: """ attributes = _dataset_to_attribute_reports(ds) return Report("ds_report", attributes=attributes, dataset_uuids=[ds.uuid])
def test_get_plotgroup_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) pg = r.get_plotgroup_by_id('pgid1') self.assertEqual(pg, pg1)
def test_get_plotgroup_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist') self.assertIsNone(bad_pg)
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def to_report(stats_xml): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.prodDist: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") dsets = [dset] for subdset in dset.subdatasets: if subdset.metadata.summaryStats: dsets.append(subdset) col_ids = [ Constants.C_CONTEXT, Constants.C_ZMWS, Constants.C_PROD_0, Constants.C_PROD_1, Constants.C_PROD_2 ] col_values = [[], [], [], [], []] for dset in dsets: if len(dsets) > 1 and len(col_values[0]) == 0: movie_name = "Combined" else: try: collection = list(dset.metadata.collections)[0] movie_name = collection.context except AttributeError: movie_name = "NA" productive_zmws = int(dset.metadata.summaryStats.numSequencingZmws) empty, productive, other, _ = dset.metadata.summaryStats.prodDist.bins prod0 = np.round(100.0 * empty / float(productive_zmws), decimals=Constants.DECIMALS) prod1 = np.round(100.0 * productive / float(productive_zmws), decimals=Constants.DECIMALS) prod2 = np.round(100.0 * other / float(productive_zmws), decimals=Constants.DECIMALS) this_row = [movie_name, productive_zmws, prod0, prod1, prod2] map(lambda (x, y): x.append(y), zip(col_values, this_row)) columns = [ Column(cid, values=vals) for cid, vals in zip(col_ids, col_values) ] tables = [Table(Constants.T_LOADING, columns=columns)] report = Report(meta_rpt.id, title=meta_rpt.title, tables=tables, attributes=None, plotgroups=None) return meta_rpt.apply_view(report)
def produce_report(genome_length, raw_reads, raw_mean, raw_n50, raw_p95, raw_esize, raw_bases, raw_coverage, length_cutoff, seed_reads, seed_bases, seed_mean, seed_n50, seed_p95, seed_esize, seed_coverage, preassembled_reads, preassembled_mean, preassembled_n50, preassembled_p95, preassembled_esize, preassembled_bases, preassembled_coverage, preassembled_yield, preassembled_seed_fragmentation, preassembled_seed_truncation, **ignored): """Return a preassembly report as JSON string. Parameters are as defined in the spec-file. Extra parameters are ignored, so that the caller may be augmented in a separate commit prior to updates here. (That facilitates cross-team collaboration.) """ log.info("Starting {f!r}".format(f=os.path.basename(__file__))) # Report Attributes attrs = [] attrs.append(Attribute('genome_length', genome_length)) attrs.append(Attribute('raw_reads', raw_reads)) attrs.append(Attribute('raw_mean', int(round(raw_mean)))) attrs.append(Attribute('raw_n50', raw_n50)) attrs.append(Attribute('raw_p95', raw_p95)) attrs.append(Attribute('raw_esize', raw_esize)) attrs.append(Attribute('raw_bases', raw_bases)) attrs.append(Attribute('raw_coverage', raw_coverage)) attrs.append(Attribute('length_cutoff', length_cutoff)) attrs.append(Attribute('seed_reads', seed_reads)) attrs.append(Attribute('seed_mean', int(round(seed_mean)))) attrs.append(Attribute('seed_n50', seed_n50)) attrs.append(Attribute('seed_p95', seed_p95)) attrs.append(Attribute('seed_esize', seed_esize)) attrs.append(Attribute('seed_bases', seed_bases)) attrs.append(Attribute('seed_coverage', seed_coverage)) attrs.append(Attribute('preassembled_reads', preassembled_reads)) attrs.append(Attribute('preassembled_mean', int(round(preassembled_mean)))) attrs.append(Attribute('preassembled_n50', preassembled_n50)) attrs.append(Attribute('preassembled_p95', preassembled_p95)) attrs.append(Attribute('preassembled_esize', preassembled_esize)) attrs.append(Attribute('preassembled_bases', preassembled_bases)) attrs.append( Attribute('preassembled_coverage', int(round(preassembled_coverage)))) attrs.append(Attribute('preassembled_yield', preassembled_yield)) attrs.append( Attribute('preassembled_seed_fragmentation', preassembled_seed_fragmentation)) attrs.append( Attribute('preassembled_seed_truncation', preassembled_seed_truncation)) report = Report(Constants.R_ID, title='Preassembly', attributes=attrs) from pbreports.io.specs import load_spec spec = load_spec(Constants.R_ID) report = spec.apply_view(report) return report.to_json()
def write_random_report(path, nrecords): attributes = [ Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords) ] r = Report("mock_report", attributes=attributes) r.write_json(path) return r
def test_get_plot_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') p1 = Plot('pid1', 'anImg') pg1.add_plot(p1) r.add_plotgroup(pg1) p = r.get_plotgroup_by_id('pgid1').get_plot_by_id('pid1') assert p == p1
def _to_workflow_settings_report(bg, workflow_opts, task_opts, state, was_successful): tables = [ _workflow_opts_to_table(workflow_opts), _task_opts_to_table(task_opts) ] report = Report("workflow_settings_report", tables=tables) return report
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def test_get_attribute_by_id(self): a = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a, a2] r = Report('redfang', attributes=attributes) a1 = r.get_attribute_by_id('a') self.assertEqual(a, a1)
def to_report(stats_xml, output_dir, dpi=72): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) # stats_xml should be a dataset: dset = SubreadSet(stats_xml) dataset_uuids = [dset.uuid] # but if it isn't, no problem: if not dset.metadata.summaryStats: dset.loadStats(stats_xml) # an sts file was provided which will generate a new random uuid dataset_uuids = [] if not dset.metadata.summaryStats.readLenDists: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") # we want all of the length distributions in this report to look the same, # so we make the shaper here and pass it around: alldists = (dset.metadata.summaryStats.readLenDists[:] + dset.metadata.summaryStats.insertReadLenDists[:]) len_dist_shaper = continuous_dist_shaper(alldists, trim_excess=True) attr = to_read_stats_attributes( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists) attr.extend( to_insert_stats_attributes( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists)) plot_groups = to_read_stats_plots( readLenDists=dset.metadata.summaryStats.readLenDists, readQualDists=dset.metadata.summaryStats.readQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper) plot_groups.extend( to_insert_stats_plots( readLenDists=dset.metadata.summaryStats.insertReadLenDists, readQualDists=dset.metadata.summaryStats.insertReadQualDists, output_dir=output_dir, lenDistShaper=len_dist_shaper)) # build the report: report = Report(meta_rpt.id, title=meta_rpt.title, attributes=attr, plotgroups=plot_groups, dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def fasta_to_report(fasta_file, output_json): nrecords = 0 with FastaReader(fasta_file) as r: for _ in r: nrecords += 1 attr = Attribute("num_records", nrecords, "Number of Records") plot_groups = try_fasta_to_plot_group(fasta_file, output_json) return Report("fasta_report", attributes=[attr], plotgroups=plot_groups)
def test_bad_01(self): r = Report("stuff", uuid=1234) d = r.to_dict() def fx(): # when the Report validation is enabled, use to_json # r.to_json() return validate_report(d) self.assertRaises(IOError, fx)