def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ r = Report('redfang') a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) self.assertEqual('redfang', d['id']) self.assertEqual('redfang.a', d['attributes'][0]['id']) self.assertEqual('redfang.a2', d['attributes'][1]['id']) self.assertEqual('redfang.pgid', d['plotGroups'][0]['id']) self.assertEqual('redfang.pgid.pid', d['plotGroups'][0]['plots'][0]['id']) self.assertEqual('redfang.pgid.pid2', d['plotGroups'][0]['plots'][1]['id']) self.assertEqual('redfang.pgid2', d['plotGroups'][1]['id']) self.assertEqual('redfang.pgid2.pid2', d['plotGroups'][1]['plots'][0]['id']) self.assertEqual('redfang.pgid2.pid22', d['plotGroups'][1]['plots'][1]['id']) self.assertEqual('redfang.tabid', d['tables'][0]['id']) self.assertEqual('redfang.tabid.c1', d['tables'][0]['columns'][0]['id']) self.assertEqual('redfang.tabid2', d['tables'][1]['id']) self.assertEqual('redfang.tabid2.c2', d['tables'][1]['columns'][0]['id']) log.info(repr(r)) self.assertIsNotNone(repr(r))
def test_to_dict_multi(self): """ Multiple complex elements. The id of report sub elements is prepended with the id of the parent element when to_dict is called. """ tags = ["alpha", "beta", "gamma"] r = Report('redfang', tags=tags) a = Attribute('a', 'b') a2 = Attribute('a2', 'b2') r.add_attribute(a) r.add_attribute(a2) pg = PlotGroup('pgid') pg.add_plot(Plot('pid', 'anImg')) pg.add_plot(Plot('pid2', 'anImg2')) r.add_plotgroup(pg) pg = PlotGroup('pgid2') pg.add_plot(Plot('pid2', 'anImg2')) pg.add_plot(Plot('pid22', 'anImg22')) r.add_plotgroup(pg) t = Table('tabid') t.add_column(Column('c1')) r.add_table(t) t = Table('tabid2') t.add_column(Column('c2')) r.add_table(t) d = r.to_dict() log.debug(str(d)) assert 'redfang' == d['id'] assert 'redfang.a' == d['attributes'][0]['id'] assert 'redfang.a2' == d['attributes'][1]['id'] assert 'redfang.pgid' == d['plotGroups'][0]['id'] assert 'redfang.pgid.pid' == d['plotGroups'][0]['plots'][0]['id'] assert 'redfang.pgid.pid2' == d['plotGroups'][0]['plots'][1]['id'] assert 'redfang.pgid2' == d['plotGroups'][1]['id'] assert 'redfang.pgid2.pid2' == d['plotGroups'][1]['plots'][0]['id'] assert 'redfang.pgid2.pid22' == d['plotGroups'][1]['plots'][1]['id'] assert 'redfang.tabid' == d['tables'][0]['id'] assert 'redfang.tabid.c1' == d['tables'][0]['columns'][0]['id'] assert 'redfang.tabid2' == d['tables'][1]['id'] assert 'redfang.tabid2.c2' == d['tables'][1]['columns'][0]['id'] assert list(sorted(d['tags'])) == list(sorted(tags)) loaded_report = load_report_from(d) assert list(sorted(loaded_report.tags)) == list(sorted(tags)) log.info(repr(r)) assert repr(r) is not None
def to_table(motif_records): columns = [ Column(Constants.C_ID), Column(Constants.C_POS), Column(Constants.C_TYPE), Column(Constants.C_PCT_MOTIF), Column(Constants.C_NMOTIF), Column(Constants.C_NMOTIF_GEN), Column(Constants.C_READSCORE), Column(Constants.C_COV), Column(Constants.C_PARTNER), Column(Constants.C_IPD), Column(Constants.C_GRP), Column(Constants.C_OBJ_SCORE) ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column(Constants.C_BARCODE)) columns.append(Column(Constants.C_CLUSTER)) columns.append(Column(Constants.C_PHASE)) columns.append(Column(Constants.C_LENGTH)) columns.append(Column(Constants.C_ACCURACY)) columns.append(Column(Constants.C_COVERAGE)) t = Table(Constants.T_ID, columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="")) columns.append(Column("coarsecluster", header="")) columns.append(Column("phase", header="")) columns.append(Column("sequencelength", header="")) columns.append(Column("predictedaccuracy", header="")) columns.append(Column("totalcoverage", header="")) t = Table("result_table", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [ Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At") ] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread concordance), ...] """ table = Table(Constants.T_STATS, columns=(Column(c_id) for c_id in self.COL_IDS)) for movie_data in movie_datum: if len(movie_data) != len(self.COL_IDS): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS))) for value, c_id in zip(movie_data, self.COL_IDS): table.add_data_by_column_id(c_id, value) log.debug(str(table)) return table
def attributes_to_table(attributes, table_id): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(table_id, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def to_table(motif_records): columns = [ Column('motif_id', header=""), Column('modified_position', header=""), Column('modification_type', header=""), Column('percent_motifs_detected', header=""), Column('ndetected_motifs', header=""), Column('nmotifs_in_genome', header=""), Column('mean_readscore', header=''), Column('mean_coverage', header=""), Column('partner_motif', header=""), Column('mean_ipd_ratio', header=""), Column('group_tag', header=""), Column('objective_score', header='') ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, title="", columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header='')) columns.append(Column("good", header='')) columns.append(Column("good_pct", header='')) columns.append(Column("chimera", header='')) columns.append(Column("chimera_pct", header='')) columns.append(Column("noise", header='')) columns.append(Column("noise_pct", header='')) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def to_sample_table(table_json): col_ids = [ Constants.C_SAMPLE, Constants.C_INS, Constants.C_DEL, Constants.C_HOM, Constants.C_HET, Constants.C_TOTAL ] sample_table = table_json[Constants.SAMPLE_KEY] t = [] if len(sample_table) == 0: table = [[], [], [], [], [], []] else: for row in sample_table: r = [row[0]] r.append(_my_combine(row[1], row[2])) r.append(_my_combine(row[3], row[4])) r.append(row[5]) r.append(row[6]) r.append(_my_combine(row[7], row[8])) t.append(r) table = zip(*t) columns = [] for i, col_id in enumerate(col_ids): columns.append(Column(col_id, values=table[i])) sample_table = Table(Constants.T_SAMPLE, columns=columns) return sample_table
def _make_barcode_table(bam_stats, ccs_set): """ Generate a table of per-barcode results """ barcode_counts = defaultdict(int) barcode_nbases = defaultdict(int) barcode_npasses = defaultdict(list) barcode_readscores = defaultdict(list) is_symmetric = all([r.bcForward == r.bcReverse for r in bam_stats]) for r in bam_stats: key = r.bcForward if not is_symmetric: key = (r.bcForward, r.bcReverse) barcode_counts[key] += 1 barcode_nbases[key] += r.qLen barcode_npasses[key].append(r.numPasses) barcode_readscores[key].append(r.readScore) barcode_labels = {} for er in ccs_set.externalResources: bcs = er.barcodes if bcs is not None: with BarcodeSet(bcs) as bc_set: for i_bc, rec in enumerate(bc_set): if i_bc in barcode_labels: assert barcode_labels[ i_bc] == rec.id, "Barcode ID mismatch: {l} versus {r}".format( l=barcode_labels[i_bc], r=rec.id) else: barcode_labels[i_bc] = rec.id barcode_ids = sorted(barcode_counts.keys()) counts = [barcode_counts[i_bc] for i_bc in barcode_ids] nbases = [barcode_nbases[i_bc] for i_bc in barcode_ids] mean_length = [int(float(n) / c) for (c, n) in zip(counts, nbases)] labels = [] for i_bc in barcode_ids: if is_symmetric: labels.append(barcode_labels.get(i_bc, Constants.NO_BC_LABEL)) else: labels.append("{f}, {r}".format( f=barcode_labels.get(i_bc[0], Constants.NO_BC_LABEL), r=barcode_labels.get(i_bc[1], Constants.NO_BC_LABEL))) npasses = [ sum(barcode_npasses[i_bc]) / len(barcode_npasses[i_bc]) for i_bc in barcode_ids ] readquals = [ sum(barcode_readscores[i_bc]) / len(barcode_readscores[i_bc]) for i_bc in barcode_ids ] assert len(labels) == len(counts) == len(nbases) columns = [ Column(Constants.C_BARCODE_ID, values=labels), Column(Constants.C_BARCODE_COUNTS, values=counts), Column(Constants.C_BARCODE_NBASES, values=nbases), Column(Constants.C_BARCODE_READLENGTH, values=mean_length), Column(Constants.C_BARCODE_QUALITY, values=readquals), Column(Constants.C_BARCODE_NPASSES, values=npasses) ] return Table(Constants.T_BARCODES, columns=columns)
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') self.assertEqual(t, t1)
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table', columns=self.columns)
def test_get_table_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) bad_t = r.get_table_by_id('id_that_does_not_exist') self.assertIsNone(bad_t)
def to_report(stats_xml): """Main point of entry :type stats_xml: str :type output_dir: str :type dpi: int :rtype: Report """ log.info("Analyzing XML {f}".format(f=stats_xml)) dset = DataSet(stats_xml) if not dset.metadata.summaryStats: dset.loadStats(stats_xml) if not dset.metadata.summaryStats.prodDist: raise IOError("Pipeline Summary Stats (sts.xml) not found or missing " "key distributions") dsets = [dset] for subdset in dset.subdatasets: if subdset.metadata.summaryStats: dsets.append(subdset) col_ids = [ Constants.C_CONTEXT, Constants.C_ZMWS, Constants.C_PROD_0, Constants.C_PROD_1, Constants.C_PROD_2 ] col_values = [[], [], [], [], []] for dset in dsets: if len(dsets) > 1 and len(col_values[0]) == 0: movie_name = "Combined" else: try: collection = list(dset.metadata.collections)[0] movie_name = collection.context except AttributeError: movie_name = "NA" productive_zmws = int(dset.metadata.summaryStats.numSequencingZmws) empty, productive, other, _ = dset.metadata.summaryStats.prodDist.bins prod0 = np.round(100.0 * empty / float(productive_zmws), decimals=Constants.DECIMALS) prod1 = np.round(100.0 * productive / float(productive_zmws), decimals=Constants.DECIMALS) prod2 = np.round(100.0 * other / float(productive_zmws), decimals=Constants.DECIMALS) this_row = [movie_name, productive_zmws, prod0, prod1, prod2] map(lambda (x, y): x.append(y), zip(col_values, this_row)) columns = [ Column(cid, values=vals) for cid, vals in zip(col_ids, col_values) ] tables = [Table(Constants.T_LOADING, columns=columns)] report = Report(meta_rpt.id, title=meta_rpt.title, tables=tables, attributes=None, plotgroups=None) return meta_rpt.apply_view(report)
def test_get_column_by_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) c = r.get_table_by_id('tabid1').get_column_by_id('c1') self.assertEqual(c, c1)
def _make_barcode_table(bam_stats, ccs_set): """ Generate a table of per-barcode results """ barcode_counts = defaultdict(int) barcode_nbases = defaultdict(int) barcode_npasses = defaultdict(list) barcode_readscores = defaultdict(list) for r in bam_stats: barcode_counts[r.bc] += 1 barcode_nbases[r.bc] += r.qLen barcode_npasses[r.bc].append(r.numPasses) barcode_readscores[r.bc].append(r.readScore) barcode_labels = {} for er in ccs_set.externalResources: bcs = er.barcodes if bcs is not None: with BarcodeSet(bcs) as bc_set: for i_bc, rec in enumerate(bc_set): if i_bc in barcode_labels: assert barcode_labels[ i_bc] == rec.id, "Barcode ID mismatch: {l} versus {r}".format( l=barcode_labels[i_bc], r=rec.id) else: barcode_labels[i_bc] = rec.id barcode_ids = sorted(barcode_counts.keys()) counts = [barcode_counts[i_bc] for i_bc in barcode_ids] nbases = [barcode_nbases[i_bc] for i_bc in barcode_ids] mean_length = [int(float(n) / c) for (c, n) in zip(counts, nbases)] labels = [str(barcode_labels.get(i_bc, i_bc)) for i_bc in barcode_ids] npasses = [ sum(barcode_npasses[i_bc]) / len(barcode_npasses[i_bc]) for i_bc in barcode_ids ] readquals = [ sum(barcode_readscores[i_bc]) / len(barcode_readscores[i_bc]) for i_bc in barcode_ids ] assert len(labels) == len(counts) == len(nbases) columns = [ Column(Constants.C_BARCODE_ID, values=labels, header="Barcode ID"), Column(Constants.C_BARCODE_COUNTS, values=counts, header="CCS reads"), Column(Constants.C_BARCODE_NBASES, values=nbases, header="Number of CCS bases"), Column(Constants.C_BARCODE_READLENGTH, values=mean_length, header="CCS Read Length (mean)"), Column(Constants.C_BARCODE_QUALITY, values=readquals, header="CCS Read Score (mean)"), Column(Constants.C_BARCODE_NPASSES, values=npasses, header="Number of Passes (mean)") ] return Table(Constants.T_BARCODES, columns=columns, title="By Barcode")
def test_get_column_by_id_with_bad_id(self): r = Report('redfang') t1 = Table('tabid1') c1 = Column('c1') t1.add_column(c1) r.add_table(t1) bad_c = r.get_table_by_id('tabid1').get_column_by_id( 'id_that_does_not_exist') assert bad_c is None
def test_get_table_by_id(self): r = Report('redfang') t1 = Table('tabid1') t1.add_column(Column('c1')) r.add_table(t1) t = r.get_table_by_id('tabid1') assert t == t1 columns_d = t.to_columns_d() assert len(columns_d) == 0
def attributesToTable(attributes): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def to_sample_table(variant_table): sample_table = aggregate_variant_table(variant_table) columns = [] for i, col_id in enumerate(Constants.SAMPLES_COL_IDS): columns.append(Column(col_id, values=sample_table[i])) sample_table_r = Table(Constants.T_SAMPLES, columns=columns) return sample_table_r
def apply_table_view(self, table): if table.title is None: title = self.title else: title = table.title return Table(self.id, title=title, columns=[ self.get_meta_column(c.id).apply_column_view(c) for c in table.columns ])
def test_add_column(self): """Cannot add column with duplicate id.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) def add_dupe(): t.add_column(Column('2')) self.assertSequenceEqual(cs, t.columns) self.assertRaises(PbReportError, add_dupe)
def _attributes_to_table(attributes): """Build a report table from Iso-Seq Classify attributes. """ columns = [Column(x.id) for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_add_column(self): """Cannot add column with duplicate id.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) def add_dupe(): t.add_column(Column('2')) assert cs == t.columns with pytest.raises(PbReportError): add_dupe()
def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table_with_values', columns=self.columns) datum = [('one', list(xrange(3))), ('two', list('abc')), ('three', 'file1 file2 file3'.split())] for k, values in datum: for value in values: self.table.add_data_by_column_id(k, value)
def test_append_data(self): """Append data to columns by index.""" cs = [Column('1'), Column('2')] t = Table('foo', columns=cs) t.append_data(0, 'whatev') t.append_data(0, 'huh') t.append_data(1, 'ernie') t.append_data(1, 'bert') self.assertSequenceEqual(['whatev', 'huh'], t.columns[0].values) self.assertSequenceEqual(['ernie', 'bert'], t.columns[1].values)
def test_add_data_by_column_id(self): """Added data values by column identifier.""" columns = [Column('one'), Column('two')] table = Table('mytable', columns=columns) datum = {'one': 12.0, 'two': 1234.0} for k, v in datum.iteritems(): table.add_data_by_column_id(k, v) self.assertTrue(12.0 in table.columns[0].values) self.assertTrue(1234.0 in table.columns[1].values)
def _to_table(d): id_ = _to_id(d['id']) title = d.get('title', None) columns = [] for column_d in d.get('columns', []): c = _to_column(column_d) columns.append(c) # assert that all the columns have the same number of values nvalues = {len(c.values) for c in columns} assert len(nvalues) == 1 return Table(id_, title=title, columns=columns)