def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column(Constants.C_BARCODE)) columns.append(Column(Constants.C_CLUSTER)) columns.append(Column(Constants.C_PHASE)) columns.append(Column(Constants.C_LENGTH)) columns.append(Column(Constants.C_ACCURACY)) columns.append(Column(Constants.C_COVERAGE)) t = Table(Constants.T_ID, columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread accuracy), ...] """ columns = [Column(k, header=h) for k,h in self.COLUMNS] table = Table(Constants.T_STATS, title="Mapping Statistics Summary", columns=columns) for movie_data in movie_datum: if len(movie_data) != len(columns): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(columns))) for value, c in zip(movie_data, columns): table.add_data_by_column_id(c.id, value) log.debug(str(table)) print table return table
def to_table(motif_records): columns = [ Column('motif_id', header=""), Column('modified_position', header=""), Column('modification_type', header=""), Column('percent_motifs_detected', header=""), Column('ndetected_motifs', header=""), Column('nmotifs_in_genome', header=""), Column('mean_readscore', header=''), Column('mean_coverage', header=""), Column('partner_motif', header=""), Column('mean_ipd_ratio', header=""), Column('group_tag', header=""), Column('objective_score', header='') ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, title="", columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
class TestEmptyTable: """Basic Smoke tests""" def setup_method(self, method): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table', columns=self.columns) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) assert str(self.table) is not None def test_columns(self): """Test Columns""" assert len(self.table.columns) == 3 def test_column_values(self): """Basic check for column values""" for column in self.table.columns: assert len(column.values) == 0 def test_to_dict(self): """Conversion to dictionary""" assert isinstance(self.table.to_dict(), dict) log.info(self.table.to_dict())
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread concordance), ...] """ table = Table(Constants.T_STATS, columns=(Column(c_id) for c_id in self.COL_IDS)) for movie_data in movie_datum: if len(movie_data) != len(self.COL_IDS): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS))) for value, c_id in zip(movie_data, self.COL_IDS): table.add_data_by_column_id(c_id, value) log.debug(str(table)) return table
def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id'])
def test_to_dict(self): """ The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report("redfang") a = Attribute("a", "b") a2 = Attribute("a2", "b2") r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup("pgid") pg.add_plot(Plot("pid", "anImg")) pg.add_plot(Plot("pid2", "anImg2")) r.add_plotgroup(pg) t = Table("tabid") t.add_column(Column("c1")) r.add_table(t) d = r.to_dict() log.debug("\n" + pformat(d)) self.assertEqual("redfang", d["id"]) self.assertEqual("redfang.a", d["attributes"][0]["id"]) self.assertEqual("redfang.a2", d["attributes"][1]["id"]) self.assertEqual("redfang.pgid", d["plotGroups"][0]["id"]) self.assertEqual("redfang.pgid.pid", d["plotGroups"][0]["plots"][0]["id"]) self.assertEqual("redfang.pgid.pid2", d["plotGroups"][0]["plots"][1]["id"]) self.assertEqual("redfang.tabid", d["tables"][0]["id"]) self.assertEqual("redfang.tabid.c1", d["tables"][0]["columns"][0]["id"])
class TestBasicTable(unittest.TestCase): """Basic Smoke tests""" def setUp(self): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table_with_values', columns=self.columns) datum = {'one': list(xrange(3)), 'two': list('abc'), 'three': 'file1 file2 file3'.split()} for k, values in datum.iteritems(): for value in values: self.table.add_data_by_column_id(k, value) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) self.assertIsNotNone(str(self.table)) def test_columns(self): """Test Columns""" self.assertEqual(len(self.table.columns), 3) def test_column_values(self): """Basic check for column values""" for column in self.table.columns: self.assertEqual(len(column.values), 3) def test_to_dict(self): """Conversion to dictionary""" self.assertTrue(isinstance(self.table.to_dict(), dict)) log.info(self.table.to_dict())
class TestEmptyTable(unittest.TestCase): """Basic Smoke tests""" def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table', columns=self.columns) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) self.assertIsNotNone(str(self.table)) def test_columns(self): """Test Columns""" self.assertEqual(len(self.table.columns), 3) def test_column_values(self): """Basic check for column values""" for column in self.table.columns: self.assertEqual(len(column.values), 0) def test_to_dict(self): """Conversion to dictionary""" self.assertTrue(isinstance(self.table.to_dict(), dict)) log.info(self.table.to_dict())
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header='')) columns.append(Column("good", header='')) columns.append(Column("good_pct", header='')) columns.append(Column("chimera", header='')) columns.append(Column("chimera_pct", header='')) columns.append(Column("noise", header='')) columns.append(Column("noise_pct", header='')) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def run_to_report(reads, barcodes, subreads=True, dataset_uuids=()): """ Generate a Report instance from a SubreadSet and BarcodeSet. :param subreads: If the ccs fofn is given this needs to be set to False """ class MyRow(object): def __init__(self, label): self.label = label self.bases = 0 self.reads = 0 label2row = {} for label, barcode, read in _labels_reads_iterator(reads, barcodes, subreads=subreads): if not label in label2row: label2row[label] = MyRow(label) label2row[label].bases += len(read) label2row[label].reads += 1 columns = [Column(Constants.C_BARCODE), Column(Constants.C_NREADS), Column(Constants.C_NBASES)] table = Table("barcode_table", columns=columns) labels = sorted(label2row.keys()) for label in labels: row = label2row[label] table.add_data_by_column_id(Constants.C_BARCODE, label) table.add_data_by_column_id(Constants.C_NREADS, row.reads) table.add_data_by_column_id(Constants.C_NBASES, row.bases) report = Report(meta_rpt.id, tables=[table], dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def attributes_to_table(attributes, table_id): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(table_id, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
class TestEmptyTable(unittest.TestCase): """Basic Smoke tests""" def setUp(self): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table', columns=self.columns) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) self.assertIsNotNone(str(self.table)) def test_columns(self): """Test Columns""" self.assertEqual(len(self.table.columns), 3) def test_column_values(self): """Basic check for column values""" for column in self.table.columns: self.assertEqual(len(column.values), 0) def test_to_dict(self): """Conversion to dictionary""" self.assertTrue(isinstance(self.table.to_dict(), dict)) log.info(self.table.to_dict())
def to_table(motif_records): columns = [Column(Constants.C_ID), Column(Constants.C_POS), Column(Constants.C_TYPE), Column(Constants.C_PCT_MOTIF), Column(Constants.C_NMOTIF), Column(Constants.C_NMOTIF_GEN), Column(Constants.C_READSCORE), Column(Constants.C_COV), Column(Constants.C_PARTNER), Column(Constants.C_IPD), Column(Constants.C_GRP), Column(Constants.C_OBJ_SCORE)] # Record attr name ordered by index in columns attr_names = ['motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'] table = Table(Constants.T_ID, columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="")) columns.append(Column("coarsecluster", header="")) columns.append(Column("phase", header="")) columns.append(Column("sequencelength", header="")) columns.append(Column("predictedaccuracy", header="")) columns.append(Column("totalcoverage", header="")) t = Table("result_table", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def to_table(motif_records): columns = [ Column(Constants.C_ID), Column(Constants.C_POS), Column(Constants.C_TYPE), Column(Constants.C_PCT_MOTIF), Column(Constants.C_NMOTIF), Column(Constants.C_NMOTIF_GEN), Column(Constants.C_READSCORE), Column(Constants.C_COV), Column(Constants.C_PARTNER), Column(Constants.C_IPD), Column(Constants.C_GRP), Column(Constants.C_OBJ_SCORE) ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="Barcode")) columns.append(Column("coarsecluster", header="Sequence Cluster")) columns.append(Column("phase", header="Sequence Phase")) columns.append(Column("sequencelength", header="Length (bp)")) columns.append(Column("predictedaccuracy", header="Estimated Accuracy")) columns.append(Column("totalcoverage", header="Subreads coverage")) t = Table("result_table", title="Amplicon Consensus Summary", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: #if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) #else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def to_table(motif_records): columns = [Column('motif_id', header="Motif"), Column('modified_position', header="Modified Position"), Column('modification_type', header="Motification Type"), Column('percent_motifs_detected', header="% of Motifs Detected"), Column('ndetected_motifs', header="# of Motifs Detected"), Column('nmotifs_in_genome', header="# of Motifs in Genome"), Column('mean_readscore', header='Mean QV'), Column('mean_coverage', header="Mean Coverage"), Column('partner_motif', header="Partner Motif"), Column('mean_ipd_ration', header="Mean IPD ratio"), Column('group_tag', header="Group Tag"), Column('objective_score', header='Objective Score')] # Record attr name ordered by index in columns attr_names = ['motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'] table = Table(Constants.T_ID, title="Motifs", columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table', columns=self.columns)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def attributesToTable(attributes): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_get_column_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) bad_c = r.get_table_by_id('tabid1').get_column_by_id( 'id_that_does_not_exist') assert bad_c is None
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') assert t == t1 columns_d = t.to_columns_d() assert len(columns_d) == 0
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header="Sample")) columns.append(Column("good", header="Good")) columns.append(Column("good_pct", header="Good (%)")) columns.append(Column("chimera", header="Chimeric")) columns.append(Column("chimera_pct", header="Chimeric (%)")) columns.append(Column("noise", header="Noise")) columns.append(Column("noise_pct", header="Noise (%)")) t = Table("result_table", title="Amplicon Input Molecule Summary", columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header="")) columns.append(Column("good", header="")) columns.append(Column("good_pct", header="")) columns.append(Column("chimera", header="")) columns.append(Column("chimera_pct", header="")) columns.append(Column("noise", header="")) columns.append(Column("noise_pct", header="")) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != "all": t.add_data_by_column_id("barcode_col", barcode) for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id("barcode_col", "All") for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]: t.add_data_by_column_id(column_id, tabulated_data["all"][column_id]) log.info(str(t)) return t
def test_basic(self): n = 3 columns = [_column_generator("my_id1", list(xrange(n))), _column_generator("my_id2", string.lowercase[:n])] title = "My Table" table = Table("my_table", title=title, columns=columns) d = table.to_dict() validate_table(d) self.assertIsNotNone(d)
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table_with_values', columns=self.columns) datum = [('one', list(xrange(3))), ('two', list('abc')), ('three', 'file1 file2 file3'.split())] for k, values in datum: for value in values: self.table.add_data_by_column_id(k, value)
def _attributes_to_table(attributes): """Build a report table from Iso-Seq Classify attributes. """ columns = [Column(x.id) for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def attributesToTable(attributes): """Build a report table from IsoSeq cluster attributes.""" columns = [Column(x.id, header=x.name) for x in attributes] table = Table('isoseq_cluster_table', title="IsoSeq Cluster", columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_add_data_by_column_id(self): """Added data values by column identifier.""" columns = [Column('one'), Column('two')] table = Table('mytable', columns=columns) datum = {'one': 12.0, 'two': 1234.0} for k, v in datum.iteritems(): table.add_data_by_column_id(k, v) self.assertTrue(12.0 in table.columns[0].values) self.assertTrue(1234.0 in table.columns[1].values)
def test_append_data(self): """Append data to columns by index.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) t.append_data(0, 'whatev') t.append_data(0, 'huh') t.append_data(1, 'ernie') t.append_data(1, 'bert') self.assertSequenceEqual(['whatev', 'huh'], t.columns[0].values) self.assertSequenceEqual(['ernie', 'bert'], t.columns[1].values)
def _attributes_to_table(attributes): """Build a report table from IsoSeq Classify attributes. """ columns = [Column(x.id, header=x.name) for x in attributes] table = Table('isoseq_classify_table', title="IsoSeq Transcript Classification", columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_basic(self): n = 3 columns = [ _column_generator('my_id1', list(xrange(n))), _column_generator('my_id2', string.lowercase[:n]) ] title = "My Table" table = Table('my_table', title=title, columns=columns) d = table.to_dict() validate_table(d) self.assertIsNotNone(d)
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ tags = ["alpha", "beta", "gamma"] r = Report('redfang', tags=tags) a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) assert 'redfang' == d['id'] assert 'redfang.a' == d['attributes'][0]['id'] assert 'redfang.a2' == d['attributes'][1]['id'] assert 'redfang.pgid' == d['plotGroups'][0]['id'] assert 'redfang.pgid.pid' == d['plotGroups'][0]['plots'][0]['id'] assert 'redfang.pgid.pid2' == d['plotGroups'][0]['plots'][1]['id'] assert 'redfang.pgid2' == d['plotGroups'][1]['id'] assert 'redfang.pgid2.pid2' == d['plotGroups'][1]['plots'][0]['id'] assert 'redfang.pgid2.pid22' == d['plotGroups'][1]['plots'][1]['id'] assert 'redfang.tabid' == d['tables'][0]['id'] assert 'redfang.tabid.c1' == d['tables'][0]['columns'][0]['id'] assert 'redfang.tabid2' == d['tables'][1]['id'] assert 'redfang.tabid2.c2' == d['tables'][1]['columns'][0]['id'] assert list(sorted(d['tags'])) == list(sorted(tags)) loaded_report = load_report_from(d) assert list(sorted(loaded_report.tags)) == list(sorted(tags)) log.info(repr(r)) assert repr(r) is not None
def __init__(self): cols = [] cols.append(Column(Constants.C_SEQ, '')) cols.append(Column(Constants.C_POS, '')) cols.append(Column(Constants.C_VAR, '')) cols.append(Column(Constants.C_TYP, '')) cols.append(Column(Constants.C_COV, '')) cols.append(Column(Constants.C_CON, '')) log.debug('# columns {n}'.format(n=len(cols))) self._table = Table(self._get_table_id(), title=self._get_table_title(), columns=cols)
def to_sample_table(table_json): col_ids = [ Constants.C_SAMPLE, Constants.C_INS, Constants.C_DEL, Constants.C_HOM, Constants.C_HET, Constants.C_TOTAL ] sample_table = table_json[Constants.SAMPLE_KEY] t = [] if len(sample_table) == 0: table = [[], [], [], [], [], []] else: for row in sample_table: r = [row[0]] r.append(_my_combine(row[1], row[2])) r.append(_my_combine(row[3], row[4])) r.append(row[5]) r.append(row[6]) r.append(_my_combine(row[7], row[8])) t.append(r) table = zip(*t) columns = [] for i, col_id in enumerate(col_ids): columns.append(Column(col_id, values=table[i])) sample_table = Table(Constants.T_SAMPLE, columns=columns) return sample_table
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None): """ High Level Report of the workflow state Write the output of workflow datastore to pbreports report object Workflow summary .dot/svg (collapsed workflow) Workflow details .dot/svg (chunked workflow) To add: - Resolved WorkflowSettings (e.g., nproc, max_workers) - :type bg: BindingsGraph """ emsg = "" if error_message is None else error_message attributes = [Attribute('was_successful', was_successful, name="Was Successful"), Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"), Attribute('error_message', emsg, name="Error Message"), Attribute('job_id', job_id, name="Job Id"), Attribute('job_state', state, name="Job State"), Attribute('job_output_dir', job_output_dir, name="Job Output Directory"), Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")] columns = [Column('task_id', header='Task id'), Column('was_successful', header='Was Successful'), Column('state', header="Task State"), Column('run_time_sec', header="Run Time (sec)"), Column('nproc', header="# of procs")] tasks_table = Table('tasks', columns=columns) for tnode in bg.all_task_type_nodes(): tasks_table.add_data_by_column_id('task_id', str(tnode)) tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc']) tasks_table.add_data_by_column_id('state', bg.node[tnode]['state']) tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL) # rt_ = bg.node[tnode]['run_time'] # rtime = None if rt_ is None else int(rt_) tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time']) ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes()) fnodes_table = _to_table("file_node", bg, bg.file_nodes()) report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table], attributes=attributes) return report
def _make_barcode_table(bam_stats, ccs_set): """ Generate a table of per-barcode results """ barcode_counts = defaultdict(int) barcode_nbases = defaultdict(int) barcode_npasses = defaultdict(list) barcode_readscores = defaultdict(list) is_symmetric = all([r.bcForward == r.bcReverse for r in bam_stats]) for r in bam_stats: key = r.bcForward if not is_symmetric: key = (r.bcForward, r.bcReverse) barcode_counts[key] += 1 barcode_nbases[key] += r.qLen barcode_npasses[key].append(r.numPasses) barcode_readscores[key].append(r.readScore) barcode_labels = {} for er in ccs_set.externalResources: bcs = er.barcodes if bcs is not None: with BarcodeSet(bcs) as bc_set: for i_bc, rec in enumerate(bc_set): if i_bc in barcode_labels: assert barcode_labels[ i_bc] == rec.id, "Barcode ID mismatch: {l} versus {r}".format( l=barcode_labels[i_bc], r=rec.id) else: barcode_labels[i_bc] = rec.id barcode_ids = sorted(barcode_counts.keys()) counts = [barcode_counts[i_bc] for i_bc in barcode_ids] nbases = [barcode_nbases[i_bc] for i_bc in barcode_ids] mean_length = [int(float(n) / c) for (c, n) in zip(counts, nbases)] labels = [] for i_bc in barcode_ids: if is_symmetric: labels.append(barcode_labels.get(i_bc, Constants.NO_BC_LABEL)) else: labels.append("{f}, {r}".format( f=barcode_labels.get(i_bc[0], Constants.NO_BC_LABEL), r=barcode_labels.get(i_bc[1], Constants.NO_BC_LABEL))) npasses = [ sum(barcode_npasses[i_bc]) / len(barcode_npasses[i_bc]) for i_bc in barcode_ids ] readquals = [ sum(barcode_readscores[i_bc]) / len(barcode_readscores[i_bc]) for i_bc in barcode_ids ] assert len(labels) == len(counts) == len(nbases) columns = [ Column(Constants.C_BARCODE_ID, values=labels), Column(Constants.C_BARCODE_COUNTS, values=counts), Column(Constants.C_BARCODE_NBASES, values=nbases), Column(Constants.C_BARCODE_READLENGTH, values=mean_length), Column(Constants.C_BARCODE_QUALITY, values=readquals), Column(Constants.C_BARCODE_NPASSES, values=npasses) ] return Table(Constants.T_BARCODES, columns=columns)
def to_report(stats_xml): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.prodDist: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") dsets = [dset] for subdset in dset.subdatasets: if subdset.metadata.summaryStats: dsets.append(subdset) col_ids = [ Constants.C_CONTEXT, Constants.C_ZMWS, Constants.C_PROD_0, Constants.C_PROD_1, Constants.C_PROD_2 ] col_values = [[], [], [], [], []] for dset in dsets: if len(dsets) > 1 and len(col_values[0]) == 0: movie_name = "Combined" else: try: collection = list(dset.metadata.collections)[0] movie_name = collection.context except AttributeError: movie_name = "NA" productive_zmws = int(dset.metadata.summaryStats.numSequencingZmws) empty, productive, other, _ = dset.metadata.summaryStats.prodDist.bins prod0 = np.round(100.0 * empty / float(productive_zmws), decimals=Constants.DECIMALS) prod1 = np.round(100.0 * productive / float(productive_zmws), decimals=Constants.DECIMALS) prod2 = np.round(100.0 * other / float(productive_zmws), decimals=Constants.DECIMALS) this_row = [movie_name, productive_zmws, prod0, prod1, prod2] map(lambda (x, y): x.append(y), zip(col_values, this_row)) columns = [ Column(cid, values=vals) for cid, vals in zip(col_ids, col_values) ] tables = [Table(Constants.T_LOADING, columns=columns)] report = Report(meta_rpt.id, title=meta_rpt.title, tables=tables, attributes=None, plotgroups=None) return meta_rpt.apply_view(report)
def create_table(summary_csv): """Long Amplicon Analysis results table""" columns = [] columns.append(Column(Constants.C_BC)) columns.append(Column(Constants.C_GOOD)) columns.append(Column(Constants.C_GOOD_PCT)) columns.append(Column(Constants.C_CHIM)) columns.append(Column(Constants.C_CHIM_PCT)) columns.append(Column(Constants.C_NOISE)) columns.append(Column(Constants.C_NOISE_PCT)) t = Table(Constants.T_R, columns=columns) COL_IDS = [ Constants.C_GOOD, Constants.C_GOOD_PCT, Constants.C_CHIM, Constants.C_CHIM_PCT, Constants.C_NOISE, Constants.C_NOISE_PCT ] def add_column(barcode_id, n_good, n_chimera, n_noise): pct_good = pct_chimera = pct_noise = 0 total = n_good + n_chimera + n_noise if total > 0: pct_good = n_good / float(total) pct_chimera = n_chimera / float(total) pct_noise = n_noise / float(total) values = [n_good, pct_good, n_chimera, pct_chimera, n_noise, pct_noise] t.add_data_by_column_id(Constants.C_BC, bc_id) for column_id, value in zip(COL_IDS, values): t.add_data_by_column_id(column_id, value) with open(summary_csv) as csv_in: reader = csv.reader(csv_in, delimiter=',') reader.next() for rec in reader: assert len(rec) == 7, rec bc_id = rec[0] if bc_id == "All": continue add_column(bc_id, int(rec[1]), int(rec[3]), int(rec[5])) n_good = sum(t.get_column_by_id(Constants.C_GOOD).values) n_chimera = sum(t.get_column_by_id(Constants.C_CHIM).values) n_noise = sum(t.get_column_by_id(Constants.C_NOISE).values) add_column("All", n_good, n_chimera, n_noise) return t
def _to_table(tid, bg, nodes): """Create a table from File nodes or Entry nodes""" columns = [Column('id', header="Id"), Column('is_resolved', header='Is Resolved'), Column('path', header="Path")] table = Table(tid, columns=columns) for node in nodes: table.add_data_by_column_id('id', str(node)) table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved']) try: table.add_data_by_column_id('path', bg.node[node]['path']) except KeyError as e: slog.error("Failed to get path from {n}".format(n=repr(node))) slog.error(e) table.add_data_by_column_id('path', "NA") return table
def create_table(summary_csv): """Long Amplicon Analysis results table""" columns = [] columns.append(Column(Constants.C_BC)) columns.append(Column(Constants.C_GOOD)) columns.append(Column(Constants.C_GOOD_PCT)) columns.append(Column(Constants.C_CHIM)) columns.append(Column(Constants.C_CHIM_PCT)) columns.append(Column(Constants.C_NOISE)) columns.append(Column(Constants.C_NOISE_PCT)) t = Table(Constants.T_R, columns=columns) COL_IDS = [Constants.C_GOOD, Constants.C_GOOD_PCT, Constants.C_CHIM, Constants.C_CHIM_PCT, Constants.C_NOISE, Constants.C_NOISE_PCT] def add_column(barcode_id, n_good, n_chimera, n_noise): pct_good = pct_chimera = pct_noise = 0 total = n_good + n_chimera + n_noise if total > 0: pct_good = n_good / float(total) pct_chimera = n_chimera / float(total) pct_noise = n_noise / float(total) values = [n_good, pct_good, n_chimera, pct_chimera, n_noise, pct_noise] t.add_data_by_column_id(Constants.C_BC, bc_id) for column_id, value in zip(COL_IDS, values): t.add_data_by_column_id(column_id, value) with open(summary_csv) as csv_in: reader = csv.reader(csv_in, delimiter=',') reader.next() for rec in reader: assert len(rec) == 7, rec bc_id = rec[0] if bc_id == "All": continue add_column(bc_id, int(rec[1]), int(rec[3]), int(rec[5])) n_good = sum(t.get_column_by_id(Constants.C_GOOD).values) n_chimera = sum(t.get_column_by_id(Constants.C_CHIM).values) n_noise = sum(t.get_column_by_id(Constants.C_NOISE).values) add_column("All", n_good, n_chimera, n_noise) return t
def _dict_to_report_table(table_id, key_attr, value_attr, d): """ General {k->v} to create a pbreport Table :param table_id: Table id :param key_attr: Column id :param value_attr: Column id :param d: dict :return: """ columns = [Column(key_attr, header="Attribute"), Column(value_attr, header="Value")] table = Table(table_id, columns=columns) for k, v in d.iteritems(): table.add_data_by_column_id(key_attr, k) table.add_data_by_column_id(value_attr, v) return table
def setUp(self): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table_with_values', columns=self.columns) datum = {'one': list(xrange(3)), 'two': list('abc'), 'three': 'file1 file2 file3'.split()} for k, values in datum.iteritems(): for value in values: self.table.add_data_by_column_id(k, value)
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d[ 'plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d[ 'plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d[ 'plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d[ 'plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][ 0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d[ 'tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def __init__(self): cols = [] cols.append(Column(Constants.C_SEQ)) cols.append(Column(Constants.C_POS)) cols.append(Column(Constants.C_VAR)) cols.append(Column(Constants.C_TYP)) cols.append(Column(Constants.C_COV)) cols.append(Column(Constants.C_CON)) log.debug('# columns {n}'.format(n=len(cols))) self._table = Table(self._get_table_id(), title=self._get_table_title(), columns=cols)