def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ tags = ["alpha", "beta", "gamma"] r = Report('redfang', tags=tags) a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) assert 'redfang' == d['id'] assert 'redfang.a' == d['attributes'][0]['id'] assert 'redfang.a2' == d['attributes'][1]['id'] assert 'redfang.pgid' == d['plotGroups'][0]['id'] assert 'redfang.pgid.pid' == d['plotGroups'][0]['plots'][0]['id'] assert 'redfang.pgid.pid2' == d['plotGroups'][0]['plots'][1]['id'] assert 'redfang.pgid2' == d['plotGroups'][1]['id'] assert 'redfang.pgid2.pid2' == d['plotGroups'][1]['plots'][0]['id'] assert 'redfang.pgid2.pid22' == d['plotGroups'][1]['plots'][1]['id'] assert 'redfang.tabid' == d['tables'][0]['id'] assert 'redfang.tabid.c1' == d['tables'][0]['columns'][0]['id'] assert 'redfang.tabid2' == d['tables'][1]['id'] assert 'redfang.tabid2.c2' == d['tables'][1]['columns'][0]['id'] assert list(sorted(d['tags'])) == list(sorted(tags)) loaded_report = load_report_from(d) assert list(sorted(loaded_report.tags)) == list(sorted(tags)) log.info(repr(r)) assert repr(r) is not None
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table', columns=self.columns)
def test_add_column(self): """Cannot add column with duplicate id.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) def add_dupe(): t.add_column(Column('2')) self.assertSequenceEqual(cs, t.columns) self.assertRaises(PbReportError, add_dupe)
def test_add_column(self): """Cannot add column with duplicate id.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) def add_dupe(): t.add_column(Column('2')) assert cs == t.columns with pytest.raises(PbReportError): add_dupe()
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table_with_values', columns=self.columns) datum = [('one', list(xrange(3))), ('two', list('abc')), ('three', 'file1 file2 file3'.split())] for k, values in datum: for value in values: self.table.add_data_by_column_id(k, value)
def test_add_data_by_column_id(self): """Added data values by column identifier.""" columns = [Column('one'), Column('two')] table = Table('mytable', columns=columns) datum = {'one': 12.0, 'two': 1234.0} for k, v in datum.iteritems(): table.add_data_by_column_id(k, v) self.assertTrue(12.0 in table.columns[0].values) self.assertTrue(1234.0 in table.columns[1].values)
def test_append_data(self): """Append data to columns by index.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) t.append_data(0, 'whatev') t.append_data(0, 'huh') t.append_data(1, 'ernie') t.append_data(1, 'bert') self.assertSequenceEqual(['whatev', 'huh'], t.columns[0].values) self.assertSequenceEqual(['ernie', 'bert'], t.columns[1].values)
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [ Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At") ] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread concordance), ...] """ table = Table(Constants.T_STATS, columns=(Column(c_id) for c_id in self.COL_IDS)) for movie_data in movie_datum: if len(movie_data) != len(self.COL_IDS): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS))) for value, c_id in zip(movie_data, self.COL_IDS): table.add_data_by_column_id(c_id, value) log.debug(str(table)) return table
def attributes_to_table(attributes, table_id): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(table_id, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def to_sample_table(table_json): col_ids = [ Constants.C_SAMPLE, Constants.C_INS, Constants.C_DEL, Constants.C_HOM, Constants.C_HET, Constants.C_TOTAL ] sample_table = table_json[Constants.SAMPLE_KEY] t = [] if len(sample_table) == 0: table = [[], [], [], [], [], []] else: for row in sample_table: r = [row[0]] r.append(_my_combine(row[1], row[2])) r.append(_my_combine(row[3], row[4])) r.append(row[5]) r.append(row[6]) r.append(_my_combine(row[7], row[8])) t.append(r) table = zip(*t) columns = [] for i, col_id in enumerate(col_ids): columns.append(Column(col_id, values=table[i])) sample_table = Table(Constants.T_SAMPLE, columns=columns) return sample_table
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None): """ High Level Report of the workflow state Write the output of workflow datastore to pbreports report object Workflow summary .dot/svg (collapsed workflow) Workflow details .dot/svg (chunked workflow) To add: - Resolved WorkflowSettings (e.g., nproc, max_workers) - :type bg: BindingsGraph """ emsg = "" if error_message is None else error_message attributes = [Attribute('was_successful', was_successful, name="Was Successful"), Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"), Attribute('error_message', emsg, name="Error Message"), Attribute('job_id', job_id, name="Job Id"), Attribute('job_state', state, name="Job State"), Attribute('job_output_dir', job_output_dir, name="Job Output Directory"), Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")] columns = [Column('task_id', header='Task id'), Column('was_successful', header='Was Successful'), Column('state', header="Task State"), Column('run_time_sec', header="Run Time (sec)"), Column('nproc', header="# of procs")] tasks_table = Table('tasks', columns=columns) for tnode in bg.all_task_type_nodes(): tasks_table.add_data_by_column_id('task_id', str(tnode)) tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc']) tasks_table.add_data_by_column_id('state', bg.node[tnode]['state']) tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL) # rt_ = bg.node[tnode]['run_time'] # rtime = None if rt_ is None else int(rt_) tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time']) ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes()) fnodes_table = _to_table("file_node", bg, bg.file_nodes()) report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table], attributes=attributes) return report
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def to_report(stats_xml): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.prodDist: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") dsets = [dset] for subdset in dset.subdatasets: if subdset.metadata.summaryStats: dsets.append(subdset) col_ids = [ Constants.C_CONTEXT, Constants.C_ZMWS, Constants.C_PROD_0, Constants.C_PROD_1, Constants.C_PROD_2 ] col_values = [[], [], [], [], []] for dset in dsets: if len(dsets) > 1 and len(col_values[0]) == 0: movie_name = "Combined" else: try: collection = list(dset.metadata.collections)[0] movie_name = collection.context except AttributeError: movie_name = "NA" productive_zmws = int(dset.metadata.summaryStats.numSequencingZmws) empty, productive, other, _ = dset.metadata.summaryStats.prodDist.bins prod0 = np.round(100.0 * empty / float(productive_zmws), decimals=Constants.DECIMALS) prod1 = np.round(100.0 * productive / float(productive_zmws), decimals=Constants.DECIMALS) prod2 = np.round(100.0 * other / float(productive_zmws), decimals=Constants.DECIMALS) this_row = [movie_name, productive_zmws, prod0, prod1, prod2] map(lambda (x, y): x.append(y), zip(col_values, this_row)) columns = [ Column(cid, values=vals) for cid, vals in zip(col_ids, col_values) ] tables = [Table(Constants.T_LOADING, columns=columns)] report = Report(meta_rpt.id, title=meta_rpt.title, tables=tables, attributes=None, plotgroups=None) return meta_rpt.apply_view(report)
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header='')) columns.append(Column("good", header='')) columns.append(Column("good_pct", header='')) columns.append(Column("chimera", header='')) columns.append(Column("chimera_pct", header='')) columns.append(Column("noise", header='')) columns.append(Column("noise_pct", header='')) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def to_task_summary_report(bg): cs = [Column("workflow_task_id", header="Task Id"), Column("workflow_task_status", header="Status"), Column("workflow_task_run_time", header="Task Runtime"), Column('workflow_task_nproc', header="Number of Procs"), Column("workflow_task_emsg", header="Error Message")] t = Table("workflow_task_summary", title="Task Summary", columns=cs) for tnode in bg.all_task_type_nodes(): if isinstance(tnode, VALID_ALL_TASK_NODE_CLASSES): t.add_data_by_column_id("workflow_task_id", tnode.idx) t.add_data_by_column_id("workflow_task_status", bg.node[tnode]['state']) t.add_data_by_column_id("workflow_task_run_time", bg.node[tnode]['run_time']) t.add_data_by_column_id("workflow_task_nproc", bg.node[tnode]['nproc']) t.add_data_by_column_id("workflow_task_emsg", bg.node[tnode]['error_message']) return Report("workflow_task_summary", tables=[t])
def _to_table(tid, bg, nodes): """Create a table from File nodes or Entry nodes""" columns = [Column('id', header="Id"), Column('is_resolved', header='Is Resolved'), Column('path', header="Path")] table = Table(tid, columns=columns) for node in nodes: table.add_data_by_column_id('id', str(node)) table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved']) try: table.add_data_by_column_id('path', bg.node[node]['path']) except KeyError as e: slog.error("Failed to get path from {n}".format(n=repr(node))) slog.error(e) table.add_data_by_column_id('path', "NA") return table
def attributesToTable(attributes): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def _dict_to_report_table(table_id, key_attr, value_attr, d): """ General {k->v} to create a pbreport Table :param table_id: Table id :param key_attr: Column id :param value_attr: Column id :param d: dict :return: """ columns = [Column(key_attr, header="Attribute"), Column(value_attr, header="Value")] table = Table(table_id, columns=columns) for k, v in d.iteritems(): table.add_data_by_column_id(key_attr, k) table.add_data_by_column_id(value_attr, v) return table
def test_get_column_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) bad_c = r.get_table_by_id('tabid1').get_column_by_id( 'id_that_does_not_exist') assert bad_c is None
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') assert t == t1 columns_d = t.to_columns_d() assert len(columns_d) == 0
def create_table(timings): """Long Amplicon Analysis Timing Result table""" columns = [] columns.append(Column("barcode_col", header="")) columns.append(Column("hour_col", header="")) columns.append(Column("minute_col", header="")) columns.append(Column("second_col", header="")) t = Table("result_table", title="", columns=columns) seconds = [] for barcode in sorted(timings): if barcode != 'All': data = timings[barcode] t.add_data_by_column_id('barcode_col', barcode) t.add_data_by_column_id('hour_col', data.seconds / 3600) t.add_data_by_column_id('minute_col', data.seconds / 60) t.add_data_by_column_id('second_col', data.seconds) seconds.append(data.seconds) # Add the average time information seconds_sum = sum(seconds) avg_seconds = seconds_sum / len(timings) t.add_data_by_column_id('barcode_col', 'Mean') t.add_data_by_column_id('hour_col', avg_seconds / 3600) t.add_data_by_column_id('minute_col', avg_seconds / 60) t.add_data_by_column_id('second_col', avg_seconds) # Add the median time information median_seconds = int(median(seconds)) t.add_data_by_column_id('barcode_col', 'Median') t.add_data_by_column_id('hour_col', median_seconds / 3600) t.add_data_by_column_id('minute_col', median_seconds / 60) t.add_data_by_column_id('second_col', median_seconds) # Add the total time information t.add_data_by_column_id('barcode_col', 'Total') t.add_data_by_column_id('hour_col', timings['All'].seconds / 3600) t.add_data_by_column_id('minute_col', timings['All'].seconds / 60) t.add_data_by_column_id('second_col', timings['All'].seconds) log.info(str(t)) return t
def to_sample_table(variant_table): sample_table = aggregate_variant_table(variant_table) columns = [] for i, col_id in enumerate(Constants.SAMPLES_COL_IDS): columns.append(Column(col_id, values=sample_table[i])) sample_table_r = Table(Constants.T_SAMPLES, columns=columns) return sample_table_r
def create_table(timings): """Long Amplicon Analysis Timing Result table""" columns = [] columns.append(Column(Constants.C_BC)) columns.append(Column(Constants.C_HOUR)) columns.append(Column(Constants.C_MIN)) columns.append(Column(Constants.C_SEC)) t = Table(Constants.T_ID, columns=columns) seconds = [] for barcode in sorted(timings): if barcode != 'All': data = timings[barcode] t.add_data_by_column_id(Constants.C_BC, barcode) t.add_data_by_column_id(Constants.C_HOUR, data.seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, data.seconds / 60) t.add_data_by_column_id(Constants.C_SEC, data.seconds) seconds.append(data.seconds) # Add the average time information seconds_sum = sum(seconds) avg_seconds = seconds_sum / len(timings) t.add_data_by_column_id(Constants.C_BC, 'Mean') t.add_data_by_column_id(Constants.C_HOUR, avg_seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, avg_seconds / 60) t.add_data_by_column_id(Constants.C_SEC, avg_seconds) # Add the median time information median_seconds = int(median(seconds)) t.add_data_by_column_id(Constants.C_BC, 'Median') t.add_data_by_column_id(Constants.C_HOUR, median_seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, median_seconds / 60) t.add_data_by_column_id(Constants.C_SEC, median_seconds) # Add the total time information t.add_data_by_column_id(Constants.C_BC, 'Total') t.add_data_by_column_id(Constants.C_HOUR, timings['All'].seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, timings['All'].seconds / 60) t.add_data_by_column_id(Constants.C_SEC, timings['All'].seconds) log.debug(str(t)) return t
def _generate_table(list_fastq_stats): columns = [Column(Constants.C_FN, header=''), Column(Constants.C_NREADS, header=""), Column(Constants.C_TOT_BASES, header=""), Column(Constants.C_READLENGTH, header=""), Column(Constants.C_QV, header="")] table = Table(Constants.T_FASTQ, columns=columns) for fastq_stat in list_fastq_stats: table.add_data_by_column_id( Constants.C_FN, os.path.basename(fastq_stat.file_name)) table.add_data_by_column_id(Constants.C_NREADS, fastq_stat.reads.shape[0]) table.add_data_by_column_id( Constants.C_TOT_BASES, int(np.sum(fastq_stat.reads))) table.add_data_by_column_id( Constants.C_READLENGTH, int(fastq_stat.reads.mean())) table.add_data_by_column_id(Constants.C_QV, np.round( fastq_stat.qvs.mean(), decimals=2)) return table
def _attributes_to_table(attributes): """Build a report table from Iso-Seq Classify attributes. """ columns = [Column(x.id) for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="")) columns.append(Column("coarsecluster", header="")) columns.append(Column("phase", header="")) columns.append(Column("sequencelength", header="")) columns.append(Column("predictedaccuracy", header="")) columns.append(Column("totalcoverage", header="")) t = Table("result_table", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t