def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir): """ Entry to report. :param aligned_reads_file: (str) path to aligned_reads.xml :param mapping_stats_report: (str) path to mapping stats json report :param variants_report: (str) path to variants report """ _validate_inputs([('aligned_reads_file', aligned_reads_file), ('mapping_stats_report', mapping_stats_report), ('variants_report', variants_report)]) d_map = _get_mapping_stats_data(mapping_stats_report) reads, inst = _get_reads_info(aligned_reads_file) d_bam = _get_read_hole_data(reads, inst) d_var = _get_variants_data(variants_report) ds = AlignmentSet(aligned_reads_file) rpt = Report(meta_rpt.id, dataset_uuids=(ds.uuid, )) rpt.add_attribute( Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT])) rpt.add_attribute( Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE])) rpt.add_attribute( Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE])) rpt.add_attribute( Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH])) rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS])) rpt = meta_rpt.apply_view(rpt) rpt.write_json(os.path.join(output_dir, report))
def run_reference_dataset_report(reference_ds, output_json): """ :param reference_ds: :type reference_ds: ReferenceSet :param output_json: :return: """ output_dir = os.path.dirname(output_json) host = socket.getfqdn() attributes = _dataset_to_attribute_reports(reference_ds) _add = attributes.append _add(Attribute("host", host, name="Host")) _add(Attribute("task_dir", output_dir, name="Task Directory")) fasta_file = reference_ds.toExternalFiles()[0] plot_groups = try_fasta_to_plot_group(fasta_file, output_json) report = Report("dev_diagnostic_report", attributes=attributes, plotgroups=plot_groups, dataset_uuids=[reference_ds.uuid]) report.write_json(output_json) return 0
def test_report_validation(self): rpt = _to_report("test_report.json") r = self.spec.validate_report(rpt) assert isinstance(r, Report) rpt.attributes.append(Attribute("attribute5", value=12345)) def error_len(e): return len(str(e).split("\n")) try: self.spec.validate_report(rpt) except ValueError as e: assert error_len(e) == 2 else: self.fail("Expected exception") assert not self.spec.is_valid_report(rpt) rpt.attributes[0] = Attribute("attribute1", value=1.2345) try: self.spec.validate_report(rpt) except ValueError as e: print(e) assert error_len(e) == 3 else: self.fail("Expected exception") assert not self.spec.is_valid_report(rpt)
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ tags = ["alpha", "beta", "gamma"] r = Report('redfang', tags=tags) a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) assert 'redfang' == d['id'] assert 'redfang.a' == d['attributes'][0]['id'] assert 'redfang.a2' == d['attributes'][1]['id'] assert 'redfang.pgid' == d['plotGroups'][0]['id'] assert 'redfang.pgid.pid' == d['plotGroups'][0]['plots'][0]['id'] assert 'redfang.pgid.pid2' == d['plotGroups'][0]['plots'][1]['id'] assert 'redfang.pgid2' == d['plotGroups'][1]['id'] assert 'redfang.pgid2.pid2' == d['plotGroups'][1]['plots'][0]['id'] assert 'redfang.pgid2.pid22' == d['plotGroups'][1]['plots'][1]['id'] assert 'redfang.tabid' == d['tables'][0]['id'] assert 'redfang.tabid.c1' == d['tables'][0]['columns'][0]['id'] assert 'redfang.tabid2' == d['tables'][1]['id'] assert 'redfang.tabid2.c2' == d['tables'][1]['columns'][0]['id'] assert list(sorted(d['tags'])) == list(sorted(tags)) loaded_report = load_report_from(d) assert list(sorted(loaded_report.tags)) == list(sorted(tags)) log.info(repr(r)) assert repr(r) is not None
def test_merge(self): EXPECTED_VALUES = { "n_reads": 300, "n_zmws": 60, } NAMES = {"n_reads": "Number of reads", "n_zmws": "Number of ZMWs"} chunks = [ Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=50, name="Number of reads"), Attribute(id_="n_zmws", value=10, name="Number of ZMWs") ], dataset_uuids=["12345"]), Report("pbcommand_test", attributes=[ Attribute(id_="n_reads", value=250, name="Number of reads"), Attribute(id_="n_zmws", value=50, name="Number of ZMWs") ]), ] r = Report.merge(chunks) self.assertEqual([a.id for a in r.attributes], ["n_reads", "n_zmws"]) self.assertEqual(r._dataset_uuids, ["12345"]) for attr in r.attributes: self.assertEqual(attr.value, EXPECTED_VALUES[attr.id]) self.assertEqual(attr.name, NAMES[attr.id]) for table in r.tables: for column in table.columns: self.assertEqual(column.header, NAMES[column.id])
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def test_eq(self): a = Attribute('a', 1234, "My Attribute") b = Attribute('b', 1234, "My B Attribute") c = Attribute('a', 1234, "My Attribute") assert a == c assert a != b assert b != c
def to_concordance_mean(readqual_dist): if sum(readqual_dist.bins) == 0: attribute = Attribute(Constants.A_CONCORDANCE_MEAN, None) else: concordance_mean = readqual_dist.sampleMean attribute = Attribute(Constants.A_CONCORDANCE_MEAN, concordance_mean) return attribute
def to_concordance_mode(readqual_dist): if sum(readqual_dist.bins) == 0: attribute = Attribute(Constants.A_CONCORDANCE_MODE, None) else: concordance_mode = readqual_dist.sampleMode attribute = Attribute(Constants.A_CONCORDANCE_MODE, concordance_mode) return attribute
def to_readlength_mean(readlen_dist): if sum(readlen_dist.bins) == 0: attribute = Attribute(Constants.A_READLENGTH_MEAN, None) else: readlength_mean = int(readlen_dist.sampleMean) attribute = Attribute(Constants.A_READLENGTH_MEAN, readlength_mean) return attribute
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [ Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At") ] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
def test_eq(self): a = Attribute('a', 1234, "My Attribute") b = Attribute('b', 1234, "My B Attribute") c = Attribute('a', 1234, "My Attribute") self.assertTrue(a == c) self.assertTrue(a != b) self.assertTrue(b != c)
def test_get_attribute_by_id(self): a = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a, a2] r = Report('redfang', attributes=attributes) a1 = r.get_attribute_by_id('a') self.assertEqual(a, a1)
def test_to_dict(self): """ Test attribute to_dict function """ a = Attribute('bob', 123, "Bob is the name") d = a.to_dict() assert 'bob' == d['id'] assert 123 == d['value'] assert 'Bob is the name' == d['name']
def test_to_dict(self): """ Test attribute to_dict function """ a = Attribute('bob', 123, "Bob is the name") d = a.to_dict() self.assertEquals('bob', d['id']) self.assertEquals(123, d['value']) self.assertEquals('Bob is the name', d['name'])
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a1, a2] report = Report('redfang', attributes=attributes) a = report.get_attribute_by_id('a') assert a.value == 'b' bad_a = report.get_attribute_by_id('id_that_does_not_exist') assert bad_a is None
def test_attributes_to_table(self): attr = [ Attribute("id1", value=1234), Attribute("id2", value=1.234), Attribute("id3", value="1234"), Attribute("id4", value=True) ] t = attributes_to_table(attr, "table1") self.assertEqual(len(t.columns), 4) self.assertEqual(t.columns[0].id, "id1") self.assertEqual(t.columns[0].values, [1234])
def test_get_attribute_by_id_with_bad_id(self): a1 = Attribute('a', 'b') a2 = Attribute('b', 'b2') attributes = [a1, a2] report = Report('redfang', attributes=attributes) a = report.get_attribute_by_id('a') self.assertEqual(a.value, 'b') bad_a = report.get_attribute_by_id('id_that_does_not_exist') self.assertIsNone(bad_a)
def write_task_report(run_time, nproc, exit_code): attributes = [ Attribute("host", value=os.uname()[1]), Attribute("system", value=os.uname()[0]), Attribute("nproc", value=nproc), Attribute("run_time", value=run_time), Attribute("exit_code", value=exit_code) ] report = Report("workflow_task", title="Workflow Task Report", attributes=attributes, tags=("internal", )) report.write_json("task-report.json")
def run_dev_ccs_report(rtc): from pbcore.io import ConsensusReadSet with ConsensusReadSet(rtc.task.input_files[0]) as ds: ds.updateCounts() attr = [ Attribute("number_of_records", value=ds.numRecords), Attribute("total_length", value=ds.totalLength) ] report = Report("ccs_report", title="ConsensusReadSet XML Report", attributes=attr) report.write_json(rtc.task.output_files[0]) return 0
def _get_att_n_50_contig_length(read_lengths): """ Get the n50 or 0 if n50 cannot be calculated :param read_lengths: sorted list """ n50 = compute_n50(read_lengths) return Attribute(Constants.A_N50_LEN, int(n50))
def _dataset_to_attribute_reports(ds): is_valid = all(os.path.exists(p) for p in ds.toExternalFiles()) datum = [("uuid", ds.uuid, "Unique Id"), ("total_records", ds.numRecords, "num Records"), ("valid_files", is_valid, "External files exist")] attributes = [Attribute(x, y, name=z) for x, y, z in datum] return attributes
def _get_att_esize_contig_length(read_lengths): """ Get esize, or 0.0 if empty. :param read_lengths: sorted list :return: (float) E-size of contigs """ return Attribute(Constants.A_ESIZE, get_esize(read_lengths))
def to_report(self, dataset_uuids=()): """Convert a summary object to pbcommand.report object.""" attributes = [Attribute(id_=attribute_id, value=attribute_val, name=attribute_name) for attribute_id, attribute_name, attribute_val in zip(self.fieldsIDs, self.fieldsNames, self.fields)] return Report(id_=self.REPORT_ID, attributes=attributes, dataset_uuids=dataset_uuids)
def write_random_report(path, nrecords): attributes = [ Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords) ] r = Report("mock_report", attributes=attributes) r.write_json(path) return r
def _get_att_mean_coverage(stats): """ :param stats (dict) """ v = None if stats is not None: v = stats.mean_depth_of_coverage a = Attribute(Constants.A_COVERAGE, v) return a
def _get_error_report(): """ Convenience function to return a report object. If num_control_reads is 0, returns a special report with a single "warning" attribute. """ log.warn('Returning a report with a warning that 0 controls reads have ' 'been found.') a = Attribute('warning', 'No control reads found', 'Warning') return Report('control', title="Control", attributes=[a])
def fasta_to_report(fasta_file, output_json): nrecords = 0 with FastaReader(fasta_file) as r: for _ in r: nrecords += 1 attr = Attribute("num_records", nrecords, "Number of Records") plot_groups = try_fasta_to_plot_group(fasta_file, output_json) return Report("fasta_report", attributes=[attr], plotgroups=plot_groups)
def run(dataset_file): """Reads in the input.fofn and counts movies and cells. Outputs in XML.""" with openDataSet(dataset_file) as ds: movies = None movies = set([]) for file_name in ds.toExternalFiles(): if type(ds).__name__ == "HdfSubreadSet": movies.add(path_to_movie(file_name)) else: with BamReader(file_name) as bam: for rg in bam.peer.header["RG"]: movies.add(rg["PU"]) cells = set([movie_to_cell(movie) for movie in movies]) ncells_attr = Attribute(Constants.A_NCELLS, len(cells)) nmovies_attr = Attribute(Constants.A_NMOVIES, len(movies)) attrs = [ncells_attr, nmovies_attr] report = Report(Constants.R_ID, attributes=attrs) return spec.apply_view(report)
def _to_attributes(nreads, nbases, mean_readlength, n50): """ Returns a list of attributes """ attr_nbases = Attribute(Constants.A_NBASES, nbases) attr_total = Attribute(Constants.A_NREADS, nreads) attr_mean = Attribute(Constants.A_MEAN, int(mean_readlength)) attr_n50 = Attribute(Constants.A_N50, n50) attributes = [attr_mean, attr_n50, attr_nbases, attr_total] for attribute in attributes: log.debug(attribute) return attributes
def test_basic(self): a = Attribute('my_id', 12, name="My Attribute") d = a.to_dict() validate_attribute(d)