def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
def make_control_report(control_cmph5, filtered_subreads_csv, report, output_dir, dpi, dumpdata): """ Entry to report. :param control_cmph5: (str) path to control_reads.cmp.h5 :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv """ _validate_inputs(control_cmph5, filtered_subreads_csv) name, control_reads = _get_control_reads(control_cmph5) filtered_reads = _get_filtered_reads(filtered_subreads_csv) control_data, sample_data = _process_reads(control_reads, filtered_reads) nr = _get_num_control_reads(control_data) if nr == 0: # Not sure this ever happens, but logic exists in makeControlReport.py r = _get_error_report() r.write_json(os.path.join(output_dir, report)) return atts = _get_attributes(name, control_data, sample_data) pgs = [_get_plot_group_score(control_data, sample_data, output_dir), _get_plot_group_length(control_data, sample_data, output_dir)] r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs) r = meta_rpt.apply_view(r) r.write_json(os.path.join(output_dir, report))
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_json) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report, output_dir): """ Entry to report. :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz :param reference: (str) path to reference dir :param how_many: (int) :param batch_sort_size: (int) :param report: (str) report name :param batch_sort_size: (str) output dir """ _validate_inputs(gff, reference, how_many, batch_sort_size) table_builder = VariantTableBuilder() vf = VariantFinder(gff, reference, how_many, batch_sort_size) top = vf.find_top() for v in top: table_builder.add_variant(v) r = Report(Constants.R_ID, tables=[table_builder.table], dataset_uuids=(ReferenceSet(reference).uuid, )) r = spec.apply_view(r) r.write_json(os.path.join(output_dir, report)) return 0
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True): """ Entry to report. :param aln_summ_gff: (str) path to alignment_summary.gff :param variants_gff: (str) path to variants_gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs([('aln_summ_gff', aln_summ_gff), ('variants_gff', variants_gff), ('reference', reference)]) # reference entry & top contings ref = openReference(reference) top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot) # extract gff data from files ref_data, contig_variants = _extract_alignment_summ_data( aln_summ_gff, top_contigs) _append_variants_gff_data(ref_data, variants_gff) # make report objects table, atts = _get_consensus_table_and_attributes(ref_data, ref) plotgroup = _create_variants_plot_grp( top_contigs, contig_variants, output_dir) rpt = Report(Constants.R_ID, plotgroups=[plotgroup], attributes=atts, tables=[table], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return rpt
def make_control_report(control_cmph5, filtered_subreads_csv, report, output_dir, dpi, dumpdata): """ Entry to report. :param control_cmph5: (str) path to control_reads.cmp.h5 :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv """ _validate_inputs(control_cmph5, filtered_subreads_csv) name, control_reads = _get_control_reads(control_cmph5) filtered_reads = _get_filtered_reads(filtered_subreads_csv) control_data, sample_data = _process_reads(control_reads, filtered_reads) nr = _get_num_control_reads(control_data) if nr == 0: # Not sure this ever happens, but logic exists in makeControlReport.py r = _get_error_report() r.write_json(os.path.join(output_dir, report)) return atts = _get_attributes(name, control_data, sample_data) pgs = [ _get_plot_group_score(control_data, sample_data, output_dir), _get_plot_group_length(control_data, sample_data, output_dir) ] r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs) r = meta_rpt.apply_view(r) r.write_json(os.path.join(output_dir, report))
def make_report(in_fn, out_dir='.', bounds=None, nolegend=False, reference=None, dpi=60, name=None): """AlignmentToPng Report Convert an input bam or DataSet XML file to a figure of Concordance vs. Subread Length. Args: in_fn: the bam, DataSet XML or cmp.h5 file to turn into a length vs concordance plot out_dir: the output directory to be used with the file name or default name: the file name to be used with the outdir or default (no full path filenames!) bounds: the figure limits (in xmin:xmax:ymin:ymax) nolegend: exclude the figure legend reference: the reference to use in the figure. Default of all references dpi: the dots per inch (resolution) of the figure """ data = _read_in_file(in_fn, reference) report = Report('alignment_to_png_report') if not name: name = '%s.png' % os.path.splitext(os.path.basename(in_fn))[0] png_fn = os.path.join(out_dir, name) _make_plot(data, png_fn, bounds, dpi, nolegend) plot_group = PlotGroup(Constants.PLOT_GROUP_ID, plots=[Plot('alignment_to_png_plot', os.path.basename(png_fn))]) report.add_plotgroup(plot_group) return report
def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = {"n_reads": "Number of reads", "n_zmws": "Number of ZMWs"} chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs") ], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs") ]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report("redfang") a = Attribute("a", "b") a2 = Attribute("a2", "b2") r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup("pgid") pg.add_plot(Plot("pid", "anImg")) pg.add_plot(Plot("pid2", "anImg2")) r.add_plotgroup(pg) t = Table("tabid") t.add_column(Column("c1")) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual("redfang", d["id"]) self.assertEqual("redfang.a", d["attributes"][0]["id"]) self.assertEqual("redfang.a2", d["attributes"][1]["id"]) self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"]) self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"]) self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"]) self.assertEqual("redfang.tabid", d["tables"][0]["id"]) self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"])
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_dir) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report, output_dir, is_minor_variants_rpt=False): """ Entry to report. :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz :param reference: (str) path to reference dir :param how_many: (int) :param batch_sort_size: (int) :param report: (str) report name :param batch_sort_size: (str) output dir :param is_minor_variants_rpt: (bool) True to create a minor top variant report. False to create a variant report. """ _validate_inputs(gff, reference, how_many, batch_sort_size) table_builder = None if is_minor_variants_rpt: table_builder = MinorVariantTableBuilder() else: table_builder = VariantTableBuilder() vf = VariantFinder(gff, reference, how_many, batch_sort_size) top = vf.find_top() for v in top: table_builder.add_variant(v) r = Report(Constants.R_ID, tables=[table_builder.table], dataset_uuids=(ReferenceSet(reference).uuid,)) r = spec.apply_view(r) r.write_json(os.path.join(output_dir, report)) return 0
def _example_main(input_file, output_file, **kwargs): """ This func should be imported from your python package. This should have *no* dependency on the pbcommand IO, such as the RTC/TC models. """ # This is just for test purposes log.info("Running example main with {i} {o} kw:{k}".format(i=input_file, o=output_file, k=kwargs)) # Open dset CSV. Store absolute path of each alignment set. dset_paths = _get_dset_paths(input_file[0]) # Open plots CSV. Store names of plots to produce. plots_to_generate = _get_plots_to_generate(input_file[1]) dsets_kpis = {} for f in dset_paths: dset = openDataSet(dset_paths[f]['aset']) subsampled_dset = _subsample_alignments(dset) dsets_kpis[f] = _getKPIs(dset, subsampled_dset) figures = [] # figure tuple has form (plot_group_id, plot_id, figure) if 'accuracy_vs_readlength' in plots_to_generate: figures.append(('accuracy', 'accuracy_vs_readlength', accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis))) if 'accuracy' in plots_to_generate: figures.append(('accuracy', 'accuracy', accuracy_plots._plot_accuracy_distribution(dsets_kpis))) if 'accuracy_boxplot' in plots_to_generate: figures.append(('accuracy', 'accuracy_boxplot', accuracy_plots._plot_accuracy_boxplots(dsets_kpis))) all_plots = {} # dictionary of plots. keys are groups for plot_group, plot_id, fig in figures: if plot_group not in all_plots.keys(): all_plots[plot_group] = [] plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False) phantomjs_driver.set_window_size(1920, 1080) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}.png'.format(i=plot_id)) phantomjs_driver.get('{i}.html'.format(i=plot_id)) phantomjs_driver.save_screenshot('{i}_thumb.png'.format(i=plot_id)) os.remove('{i}.html'.format(i=plot_id)) plot_path = '{i}.png'.format(i=plot_id) thumb_path = '{i}_thumb.png'.format(i=plot_id) all_plots[plot_group].append(Plot(plot_id, plot_path, thumbnail=thumb_path)) plot_groups = [] for plot_group_title in all_plots.keys(): plot_group = PlotGroup( plot_group_title, plots=all_plots[plot_group_title]) plot_groups.append(plot_group) report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=()) report.write_json( output_file ) phantomjs_driver.quit() return 0
def test_version_and_changelist(self): r = Report('example') d = r.to_dict() log.info("\n" + pformat(d)) fields = ('version', 'uuid', 'plotGroups', 'tables', 'dataset_uuids') for field in fields: self.assertTrue(field in d)
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def test_get_plotgroup_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) pg = r.get_plotgroup_by_id('pgid1') self.assertEqual(pg, pg1)
def test_get_plotgroup_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist') self.assertIsNone(bad_pg)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def test_get_plotgroup_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) bad_pg = r.get_plotgroup_by_id('id_that_does_not_exist') self.assertIsNone(bad_pg)
def test_version_and_changelist(self): r = Report('example') d = r.to_dict() log.info("\n" + pformat(d)) fields = ('version', 'uuid', 'plotGroups', 'tables', 'dataset_uuids') for field in fields: self.assertTrue(field in d)
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def test_get_plotgroup_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') pg1.add_plot(Plot('pid1', 'anImg')) r.add_plotgroup(pg1) pg = r.get_plotgroup_by_id('pgid1') self.assertEqual(pg, pg1)
def produce_report(genome_length, raw_reads, raw_mean, raw_n50, raw_p95, raw_esize, raw_bases, raw_coverage, length_cutoff, seed_reads, seed_bases, seed_mean, seed_n50, seed_p95, seed_esize, seed_coverage, preassembled_reads, preassembled_mean, preassembled_n50, preassembled_p95, preassembled_esize, preassembled_bases, preassembled_coverage, preassembled_yield, preassembled_seed_fragmentation, preassembled_seed_truncation, **ignored): """Return a preassembly report as JSON string. Parameters are as defined in the spec-file. Extra parameters are ignored, so that the caller may be augmented in a separate commit prior to updates here. (That facilitates cross-team collaboration.) """ log.info("Starting {f!r}".format(f=os.path.basename(__file__))) # Report Attributes attrs = [] attrs.append(Attribute('genome_length', genome_length)) attrs.append(Attribute('raw_reads', raw_reads)) attrs.append(Attribute('raw_mean', int(round(raw_mean)))) attrs.append(Attribute('raw_n50', raw_n50)) attrs.append(Attribute('raw_p95', raw_p95)) attrs.append(Attribute('raw_esize', raw_esize)) attrs.append(Attribute('raw_bases', raw_bases)) attrs.append(Attribute('raw_coverage', raw_coverage)) attrs.append(Attribute('length_cutoff', length_cutoff)) attrs.append(Attribute('seed_reads', seed_reads)) attrs.append(Attribute('seed_mean', int(round(seed_mean)))) attrs.append(Attribute('seed_n50', seed_n50)) attrs.append(Attribute('seed_p95', seed_p95)) attrs.append(Attribute('seed_esize', seed_esize)) attrs.append(Attribute('seed_bases', seed_bases)) attrs.append(Attribute('seed_coverage', seed_coverage)) attrs.append(Attribute('preassembled_reads', preassembled_reads)) attrs.append(Attribute('preassembled_mean', int(round(preassembled_mean)))) attrs.append(Attribute('preassembled_n50', preassembled_n50)) attrs.append(Attribute('preassembled_p95', preassembled_p95)) attrs.append(Attribute('preassembled_esize', preassembled_esize)) attrs.append(Attribute('preassembled_bases', preassembled_bases)) attrs.append( Attribute('preassembled_coverage', int(round(preassembled_coverage)))) attrs.append(Attribute('preassembled_yield', preassembled_yield)) attrs.append( Attribute('preassembled_seed_fragmentation', preassembled_seed_fragmentation)) attrs.append( Attribute('preassembled_seed_truncation', preassembled_seed_truncation)) report = Report(Constants.R_ID, title='Preassembly', attributes=attrs) from pbreports.io.specs import load_spec spec = load_spec(Constants.R_ID) report = spec.apply_view(report) return report.to_json()
def test_get_attribute_by_id(self): a = Attribute("a", "b") a2 = Attribute("b", "b2") attributes = [a, a2] r = Report("redfang", attributes=attributes) a1 = r.get_attribute_by_id("a") self.assertEqual(a, a1)
def write_random_report(path, nrecords): attributes = [ Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords) ] r = Report("mock_report", attributes=attributes) r.write_json(path) return r
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def test_get_attribute_by_id(self): a = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a, a2] r = Report('redfang', attributes=attributes) a1 = r.get_attribute_by_id('a') self.assertEqual(a, a1)
def test_get_plot_by_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') p1 = Plot('pid1', 'anImg') pg1.add_plot(p1) r.add_plotgroup(pg1) p = r.get_plotgroup_by_id('pgid1').get_plot_by_id('pid1') assert p == p1
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def test_get_attribute_by_id(self): a = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a, a2] r = Report('redfang', attributes=attributes) a1 = r.get_attribute_by_id('a') self.assertEqual(a, a1)
def test_bad_01(self): r = Report("stuff", uuid=1234) d = r.to_dict() def fx(): # when the Report validation is enabled, use to_json # r.to_json() return validate_report(d) self.assertRaises(IOError, fx)
def test_bad_01(self): r = Report("stuff", uuid=1234) d = r.to_dict() def fx(): # when the Report validation is enabled, use to_json # r.to_json() return validate_report(d) self.assertRaises(IOError, fx)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') assert t == t1 columns_d = t.to_columns_d() assert len(columns_d) == 0
def test_get_column_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) bad_c = r.get_table_by_id('tabid1').get_column_by_id( 'id_that_does_not_exist') assert bad_c is None
def test_get_plot_by_id_with_bad_id(self): r = Report('redfang') pg1 = PlotGroup('pgid1') p1 = Plot('pid1', 'anImg') pg1.add_plot(p1) r.add_plotgroup(pg1) bad_p = r.get_plotgroup_by_id('pgid1').get_plot_by_id( 'id_that_does_not_exist') assert bad_p is None
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute("a", "b") a2 = Attribute("b", "b2") attributes = [a1, a2] report = Report("redfang", attributes=attributes) a = report.get_attribute_by_id("a") self.assertEqual(a.value, "b") bad_a = report.get_attribute_by_id("id_that_does_not_exist") self.assertIsNone(bad_a)
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a1, a2] report = Report('redfang', attributes=attributes) a = report.get_attribute_by_id('a') assert a.value == 'b' bad_a = report.get_attribute_by_id('id_that_does_not_exist') assert bad_a is None
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a1, a2] report = Report('redfang', attributes=attributes) a = report.get_attribute_by_id('a') self.assertEqual(a.value, 'b') bad_a = report.get_attribute_by_id('id_that_does_not_exist') self.assertIsNone(bad_a)
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a1, a2] report = Report('redfang', attributes=attributes) a = report.get_attribute_by_id('a') self.assertEqual(a.value, 'b') bad_a = report.get_attribute_by_id('id_that_does_not_exist') self.assertIsNone(bad_a)
def test_merge(self): r = Report.merge([ Report.from_simple_dict("pbcommand_test", {"n_reads": 50, "n_zmws": 10}, "pbcommand"), Report.from_simple_dict("pbcommand_test", {"n_reads": 250, "n_zmws": 50}, "pbcommand")]) attr = {a.id: a.value for a in r.attributes} self.assertEqual(attr['pbcommand_n_reads'], 300) self.assertEqual(attr['pbcommand_n_zmws'], 60)
def test_version_and_changelist(self): r = Report('example') d = r.to_dict() log.info("\n" + pformat(d)) self.assertTrue('_version' in d) self.assertTrue('_changelist' in d) # Not used anymore. The all version information is encoded in _version. # that should be sufficient. # self.assertTrue(isinstance(d['_changelist'], int)) rx = re.compile(r'[0-9]*\.[0-9]*') self.assertIsNotNone(rx.search(d['_version']))
def test_version_and_changelist(self): r = Report('example') d = r.to_dict() log.info("\n" + pformat(d)) self.assertTrue('_version' in d) self.assertTrue('_changelist' in d) # Not used anymore. The all version information is encoded in _version. # that should be sufficient. # self.assertTrue(isinstance(d['_changelist'], int)) rx = re.compile(r'[0-9]*\.[0-9]*') self.assertIsNotNone(rx.search(d['_version']))
def write_task_report(run_time, nproc, exit_code): attributes = [ Attribute("host", value=os.uname()[1]), Attribute("system", value=os.uname()[0]), Attribute("nproc", value=nproc), Attribute("run_time", value=run_time), Attribute("exit_code", value=exit_code) ] report = Report("workflow_task", title="Workflow Task Report", attributes=attributes, tags=("internal", )) report.write_json("task-report.json")
def run_dev_ccs_report(rtc): from pbcore.io import ConsensusReadSet with ConsensusReadSet(rtc.task.input_files[0]) as ds: ds.updateCounts() attr = [ Attribute("number_of_records", value=ds.numRecords), Attribute("total_length", value=ds.totalLength) ] report = Report("ccs_report", title="ConsensusReadSet XML Report", attributes=attr) report.write_json(rtc.task.output_files[0]) return 0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72): """ Entry point to report generation. """ basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(meta_rpt.id, plotgroups=[pg]) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72): """ Entry point to report generation. """ basemods_h5 = h5py.File(modifications_h5) scatter = get_qmod_plot(basemods_h5, output_dir, dpi) hist = get_qmod_hist(basemods_h5, output_dir, dpi) pg = PlotGroup(Constants.PG_KIN, title=get_plotgroup_title(spec, Constants.PG_KIN), thumbnail=scatter.thumbnail, plots=[scatter, hist]) rpt = Report(spec.id, plotgroups=[pg]) rpt = spec.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report)) return 0
def test_merge_tables(self): names = ["laa_report1.json", "laa_report2.json"] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] self.assertEqual(len(table.columns), 7) self.assertEqual( [col.header for col in table.columns], [ "BarcodeName", "FastaName", "CoarseCluster", "Phase", "TotalCoverage", "SequenceLength", "PredictedAccuracy", ], ) for col in table.columns: self.assertEqual(len(col.values), 4) if col.header == "BarcodeName": self.assertEqual(col.values, ["Barcode1", "Barcode2", "Barcode4", "Barcode3"]) elif col.header == "FastaName": self.assertEqual(col.values, ["BarcodeFasta1", "BarcodeFasta2", "BarcodeFasta4", "BarcodeFasta3"]) else: self.assertEqual(col.values, [1, 2, 4, 3])
def as_report(self, attributes=(), plotgroups=(), tables=(), uuid=None): return Report(self.id, self.title, attributes=attributes, plotgroups=plotgroups, tables=tables, uuid=uuid)
def test_merge_tables(self): names = ['laa_report1.json', 'laa_report2.json'] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] assert len(table.columns) == 7 assert [col.header for col in table.columns] == [ 'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase', 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy' ] for col in table.columns: assert len(col.values) == 4 if col.header == 'BarcodeName': assert col.values == [ 'Barcode1', 'Barcode2', 'Barcode4', 'Barcode3' ] elif col.header == 'FastaName': assert col.values == [ 'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4', 'BarcodeFasta3' ] else: assert col.values == [1, 2, 4, 3] column_list_d = table.to_columns_d() assert len(column_list_d) == 4
def resolved_tool_contract_runner(rtc): opts = rtc.task.options # XXX to handle chunking I am simply re-using the old i/N arguments, but # embedded in the input pickle instead of passed on the command line final_pickle_fn = rtc.task.input_files[2] _tmp = cPickle.load(open(final_pickle_fn, 'rb')) i_chunk = 0 n_chunks = 1 if "__chunk_i" in _tmp: i_chunk = _tmp['__chunk_i'] n_chunks = _tmp['__chunk_n'] final_pickle_fn = _tmp['pickle_file'] output_dir = os.path.dirname(final_pickle_fn) IceFiles.final_consensus_fa = property( lambda self: rtc.task.input_files[1]) IceFiles.final_pickle_fn = property(lambda self: final_pickle_fn) IceFiles.nfl_all_pickle_fn = property(lambda self: rtc.task.input_files[3]) iceq = IceQuiverRTC( root_dir=output_dir, subread_set=rtc.task.input_files[0], nproc=rtc.task.nproc) iceq.validate_inputs() iceq.process_chunk_i(i=i_chunk, num_chunks=n_chunks) with open(rtc.task.output_files[0], 'w') as f: report = Report.from_simple_dict( report_id="isoseq_ice_quiver", raw_d={'n_chunks': 1}, namespace="ice_quiver") f.write(report.to_json()) return 0
def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = { "n_reads": "Number of reads", "n_zmws": "Number of ZMWs" } chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs")], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs")]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def test_merge_tables(self): names = ['laa_report1.json', 'laa_report2.json'] r = Report.merge([_to_report(names[0]), _to_report(names[1])]) table = r.tables[0] self.assertEqual(len(table.columns), 7) self.assertEqual([col.header for col in table.columns], [ 'BarcodeName', 'FastaName', 'CoarseCluster', 'Phase', 'TotalCoverage', 'SequenceLength', 'PredictedAccuracy' ]) for col in table.columns: self.assertEqual(len(col.values), 4) if col.header == 'BarcodeName': self.assertEqual( col.values, ['Barcode1', 'Barcode2', 'Barcode4', 'Barcode3']) elif col.header == 'FastaName': self.assertEqual(col.values, [ 'BarcodeFasta1', 'BarcodeFasta2', 'BarcodeFasta4', 'BarcodeFasta3' ]) else: self.assertEqual(col.values, [1, 2, 4, 3]) column_list_d = table.to_columns_d() self.assertEqual(len(column_list_d), 4)
def to_motifs_report(gff_file, motif_summary_csv, output_dir): _d = dict(g=gff_file, c=motif_summary_csv, o=output_dir) log.info( "starting Motif report generations with: \nGFF:{g}\nCSV:{c}\ndir:{o}".format(**_d)) # Generate a histogram with lines corresponding to motifs kinData = readMotifFiles(gff_file) plot_group = addQmodMotifHist(motif_summary_csv, kinData, output_dir) plot_groups = [plot_group] motif_records = _motif_csv_to_records(motif_summary_csv) table = to_table(motif_records) r = Report(Constants.R_ID, plotgroups=plot_groups, tables=[table]) log.debug(pformat(r.to_dict(), indent=4)) return r
def gather_report(json_files, output_file): """ Combines statistics (usually raw counts) stored as JSON files. Data models: pbcommand.models.report """ reports = [ load_report_from_json(fn) for fn in json_files ] merged = Report.merge(reports) with open(output_file, "w") as writer: writer.write(merged.to_json()) return output_file
def make_coverage_report(gff, reference, max_contigs_to_plot, report, output_dir): """ Entry to report. :param gff: (str) path to alignment_summary.gff :param reference: (str) path to reference_dir :param max_contigs_to_plot: (int) max number of contigs to plot """ _validate_inputs(gff, reference) top_contigs = get_top_contigs(reference, max_contigs_to_plot) cov_map = _get_contigs_to_plot(gff, top_contigs) # stats may be None stats = _get_reference_coverage_stats(cov_map.values()) a1 = _get_att_mean_coverage(stats) a2 = _get_att_percent_missing(stats) plot_grp_coverage = _create_coverage_plot_grp( top_contigs, cov_map, output_dir) plot_grp_histogram = None if stats is not None: plot_grp_histogram = _create_coverage_histo_plot_grp(stats, output_dir) plotgroups = [] # Don't add the Plot Group if no plots are added if plot_grp_coverage.plots: plotgroups.append(plot_grp_coverage) if plot_grp_histogram is not None: # Don't add the Plot Group if no plots are added if plot_grp_histogram.plots: plotgroups.append(plot_grp_histogram) rpt = Report('coverage', title="Coverage", plotgroups=plotgroups, attributes=[a1, a2], dataset_uuids=(ReferenceSet(reference).uuid,)) rpt.write_json(os.path.join(output_dir, report)) return rpt
def test_from_simple_dict(self): r = Report.from_simple_dict("pbcommand_test", {"n_reads": 50}, "pbcommand") json_dict = json.loads(r.to_json()) self.assertEqual(json_dict['attributes'], [ { "id": "pbcommand_test.pbcommand_n_reads", "name": "n_reads", "value": 50 }, ])
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ attributes = _dataset_to_attribute_reports(reference_ds) fasta_file = reference_ds.toExternalFiles()[0] output_dir = os.path.dirname(output_json) plot_groups = try_fasta_to_plot_group(fasta_file, output_dir) report = Report("ds_reference_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0