def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread concordance), ...] """ table = Table(Constants.T_STATS, columns=(Column(c_id) for c_id in self.COL_IDS)) for movie_data in movie_datum: if len(movie_data) != len(self.COL_IDS): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS))) for value, c_id in zip(movie_data, self.COL_IDS): table.add_data_by_column_id(c_id, value) log.debug(str(table)) return table
def to_table(motif_records): columns = [Column('motif_id', header="Motif"), Column('modified_position', header="Modified Position"), Column('modification_type', header="Motification Type"), Column('percent_motifs_detected', header="% of Motifs Detected"), Column('ndetected_motifs', header="# of Motifs Detected"), Column('nmotifs_in_genome', header="# of Motifs in Genome"), Column('mean_readscore', header='Mean QV'), Column('mean_coverage', header="Mean Coverage"), Column('partner_motif', header="Partner Motif"), Column('mean_ipd_ration', header="Mean IPD ratio"), Column('group_tag', header="Group Tag"), Column('objective_score', header='Objective Score')] # Record attr name ordered by index in columns attr_names = ['motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'] table = Table(Constants.T_ID, title="Motifs", columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def to_table(motif_records): columns = [Column(Constants.C_ID), Column(Constants.C_POS), Column(Constants.C_TYPE), Column(Constants.C_PCT_MOTIF), Column(Constants.C_NMOTIF), Column(Constants.C_NMOTIF_GEN), Column(Constants.C_READSCORE), Column(Constants.C_COV), Column(Constants.C_PARTNER), Column(Constants.C_IPD), Column(Constants.C_GRP), Column(Constants.C_OBJ_SCORE)] # Record attr name ordered by index in columns attr_names = ['motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'] table = Table(Constants.T_ID, columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread concordance), ...] """ table = Table(Constants.T_STATS, columns=(Column(c_id) for c_id in self.COL_IDS)) for movie_data in movie_datum: if len(movie_data) != len(self.COL_IDS): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS))) for value, c_id in zip(movie_data, self.COL_IDS): table.add_data_by_column_id(c_id, value) log.debug(str(table)) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="Barcode")) columns.append(Column("coarsecluster", header="Sequence Cluster")) columns.append(Column("phase", header="Sequence Phase")) columns.append(Column("sequencelength", header="Length (bp)")) columns.append(Column("predictedaccuracy", header="Estimated Accuracy")) columns.append(Column("totalcoverage", header="Subreads coverage")) t = Table("result_table", title="Amplicon Consensus Summary", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: #if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) #else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def attributes_to_table(attributes, table_id): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(table_id, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column(Constants.C_BARCODE)) columns.append(Column(Constants.C_CLUSTER)) columns.append(Column(Constants.C_PHASE)) columns.append(Column(Constants.C_LENGTH)) columns.append(Column(Constants.C_ACCURACY)) columns.append(Column(Constants.C_COVERAGE)) t = Table(Constants.T_ID, columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def to_table(motif_records): columns = [ Column('motif_id', header=""), Column('modified_position', header=""), Column('modification_type', header=""), Column('percent_motifs_detected', header=""), Column('ndetected_motifs', header=""), Column('nmotifs_in_genome', header=""), Column('mean_readscore', header=''), Column('mean_coverage', header=""), Column('partner_motif', header=""), Column('mean_ipd_ratio', header=""), Column('group_tag', header=""), Column('objective_score', header='') ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, title="", columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column("barcodename", header="")) columns.append(Column("coarsecluster", header="")) columns.append(Column("phase", header="")) columns.append(Column("sequencelength", header="")) columns.append(Column("predictedaccuracy", header="")) columns.append(Column("totalcoverage", header="")) t = Table("result_table", columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
class TestBasicTable(unittest.TestCase): """Basic Smoke tests""" def setUp(self): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table_with_values', columns=self.columns) datum = {'one': list(xrange(3)), 'two': list('abc'), 'three': 'file1 file2 file3'.split()} for k, values in datum.iteritems(): for value in values: self.table.add_data_by_column_id(k, value) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) self.assertIsNotNone(str(self.table)) def test_columns(self): """Test Columns""" self.assertEqual(len(self.table.columns), 3) def test_column_values(self): """Basic check for column values""" for column in self.table.columns: self.assertEqual(len(column.values), 3) def test_to_dict(self): """Conversion to dictionary""" self.assertTrue(isinstance(self.table.to_dict(), dict)) log.info(self.table.to_dict())
def to_table(motif_records): columns = [ Column(Constants.C_ID), Column(Constants.C_POS), Column(Constants.C_TYPE), Column(Constants.C_PCT_MOTIF), Column(Constants.C_NMOTIF), Column(Constants.C_NMOTIF_GEN), Column(Constants.C_READSCORE), Column(Constants.C_COV), Column(Constants.C_PARTNER), Column(Constants.C_IPD), Column(Constants.C_GRP), Column(Constants.C_OBJ_SCORE) ] # Record attr name ordered by index in columns attr_names = [ 'motif_str', 'center_position', 'modification_type', 'fraction', 'ndetected', 'ngenome', 'mean_score', 'mean_coverage', 'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score' ] table = Table(Constants.T_ID, columns=columns) for record in motif_records: for attr_name, column in zip(attr_names, columns): v = getattr(record, attr_name) table.add_data_by_column_id(column.id, v) return table
def _to_table(self, movie_datum): """ Create a pbreports Table for each movie. :param movie_datum: List of [( movie_name, reads, mean readlength, polymerase readlength number of subread bases mean subread readlength mean subread accuracy), ...] """ columns = [Column(k, header=h) for k,h in self.COLUMNS] table = Table(Constants.T_STATS, title="Mapping Statistics Summary", columns=columns) for movie_data in movie_datum: if len(movie_data) != len(columns): log.error(movie_datum) raise ValueError( "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(columns))) for value, c in zip(movie_data, columns): table.add_data_by_column_id(c.id, value) log.debug(str(table)) print table return table
def create_table(d, barcode): """Long Amplicon Analysis results table""" columns = [] if barcode: columns.append(Column(Constants.C_BARCODE)) columns.append(Column(Constants.C_CLUSTER)) columns.append(Column(Constants.C_PHASE)) columns.append(Column(Constants.C_LENGTH)) columns.append(Column(Constants.C_ACCURACY)) columns.append(Column(Constants.C_COVERAGE)) t = Table(Constants.T_ID, columns=columns) for fastaname in sorted(d.fastaname): row = d[d.fastaname == fastaname] for column in columns: # if column.id == "predictedaccuracy": # accuracy = round(100 * row[column.id][0], 2) # t.add_data_by_column_id(column.id, accuracy) # else: t.add_data_by_column_id(column.id, row[column.id][0]) log.info(str(t)) return t
def run_to_report(reads, barcodes, subreads=True, dataset_uuids=()): """ Generate a Report instance from a SubreadSet and BarcodeSet. :param subreads: If the ccs fofn is given this needs to be set to False """ class MyRow(object): def __init__(self, label): self.label = label self.bases = 0 self.reads = 0 label2row = {} for label, barcode, read in _labels_reads_iterator(reads, barcodes, subreads=subreads): if not label in label2row: label2row[label] = MyRow(label) label2row[label].bases += len(read) label2row[label].reads += 1 columns = [Column(Constants.C_BARCODE), Column(Constants.C_NREADS), Column(Constants.C_NBASES)] table = Table("barcode_table", columns=columns) labels = sorted(label2row.keys()) for label in labels: row = label2row[label] table.add_data_by_column_id(Constants.C_BARCODE, label) table.add_data_by_column_id(Constants.C_NREADS, row.reads) table.add_data_by_column_id(Constants.C_NBASES, row.bases) report = Report(meta_rpt.id, tables=[table], dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def attributesToTable(attributes): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def attributesToTable(attributes): """Build a report table from Iso-Seq cluster attributes.""" columns = [Column(x.id, header="") for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def attributesToTable(attributes): """Build a report table from IsoSeq cluster attributes.""" columns = [Column(x.id, header=x.name) for x in attributes] table = Table('isoseq_cluster_table', title="IsoSeq Cluster", columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def _attributes_to_table(attributes): """Build a report table from Iso-Seq Classify attributes. """ columns = [Column(x.id) for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_add_data_by_column_id(self): """Added data values by column identifier.""" columns = [Column('one'), Column('two')] table = Table('mytable', columns=columns) datum = {'one': 12.0, 'two': 1234.0} for k, v in datum.iteritems(): table.add_data_by_column_id(k, v) self.assertTrue(12.0 in table.columns[0].values) self.assertTrue(1234.0 in table.columns[1].values)
def _attributes_to_table(attributes): """Build a report table from Iso-Seq Classify attributes. """ columns = [Column(x.id) for x in attributes] table = Table(Constants.T_ATTR, columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def test_add_data_by_column_id(self): """Added data values by column identifier.""" columns = [Column('one'), Column('two')] table = Table('mytable', columns=columns) datum = {'one': 12.0, 'two': 1234.0} for k, v in datum.iteritems(): table.add_data_by_column_id(k, v) self.assertTrue(12.0 in table.columns[0].values) self.assertTrue(1234.0 in table.columns[1].values)
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header="Sample")) columns.append(Column("good", header="Good")) columns.append(Column("good_pct", header="Good (%)")) columns.append(Column("chimera", header="Chimeric")) columns.append(Column("chimera_pct", header="Chimeric (%)")) columns.append(Column("noise", header="Noise")) columns.append(Column("noise_pct", header="Noise (%)")) t = Table("result_table", title="Amplicon Input Molecule Summary", columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header="")) columns.append(Column("good", header="")) columns.append(Column("good_pct", header="")) columns.append(Column("chimera", header="")) columns.append(Column("chimera_pct", header="")) columns.append(Column("noise", header="")) columns.append(Column("noise_pct", header="")) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != "all": t.add_data_by_column_id("barcode_col", barcode) for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id("barcode_col", "All") for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]: t.add_data_by_column_id(column_id, tabulated_data["all"][column_id]) log.info(str(t)) return t
def create_table(tabulated_data): """Long Amplicon Analysis results table""" columns = [] columns.append(Column("barcode_col", header='')) columns.append(Column("good", header='')) columns.append(Column("good_pct", header='')) columns.append(Column("chimera", header='')) columns.append(Column("chimera_pct", header='')) columns.append(Column("noise", header='')) columns.append(Column("noise_pct", header='')) t = Table(Constants.T_R, columns=columns) for barcode, data in tabulated_data.iteritems(): if barcode != 'all': t.add_data_by_column_id('barcode_col', barcode) for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, data[column_id]) t.add_data_by_column_id('barcode_col', 'All') for column_id in [ 'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct' ]: t.add_data_by_column_id(column_id, tabulated_data['all'][column_id]) log.info(str(t)) return t
def _attributes_to_table(attributes): """Build a report table from IsoSeq Classify attributes. """ columns = [Column(x.id, header=x.name) for x in attributes] table = Table('isoseq_classify_table', title="IsoSeq Transcript Classification", columns=columns) for x in attributes: table.add_data_by_column_id(x.id, x.value) return table
def to_task_summary_report(bg): cs = [Column("workflow_task_id", header="Task Id"), Column("workflow_task_status", header="Status"), Column("workflow_task_run_time", header="Task Runtime"), Column('workflow_task_nproc', header="Number of Procs"), Column("workflow_task_emsg", header="Error Message")] t = Table("workflow_task_summary", title="Task Summary", columns=cs) for tnode in bg.all_task_type_nodes(): if isinstance(tnode, VALID_ALL_TASK_NODE_CLASSES): t.add_data_by_column_id("workflow_task_id", tnode.idx) t.add_data_by_column_id("workflow_task_status", bg.node[tnode]['state']) t.add_data_by_column_id("workflow_task_run_time", bg.node[tnode]['run_time']) t.add_data_by_column_id("workflow_task_nproc", bg.node[tnode]['nproc']) t.add_data_by_column_id("workflow_task_emsg", bg.node[tnode]['error_message']) return Report("workflow_task_summary", tables=[t])
def to_task_summary_report(bg): cs = [Column("workflow_task_id", header="Task Id"), Column("workflow_task_status", header="Status"), Column("workflow_task_run_time", header="Task Runtime"), Column('workflow_task_nproc', header="Number of Procs"), Column("workflow_task_emsg", header="Error Message")] t = Table("workflow_task_summary", title="Task Summary", columns=cs) for tnode in bg.all_task_type_nodes(): if isinstance(tnode, VALID_ALL_TASK_NODE_CLASSES): t.add_data_by_column_id("workflow_task_id", tnode.idx) t.add_data_by_column_id("workflow_task_status", bg.node[tnode]['state']) t.add_data_by_column_id("workflow_task_run_time", bg.node[tnode]['run_time']) t.add_data_by_column_id("workflow_task_nproc", bg.node[tnode]['nproc']) t.add_data_by_column_id("workflow_task_emsg", bg.node[tnode]['error_message']) return Report("workflow_task_summary", tables=[t])
def _dict_to_report_table(table_id, key_attr, value_attr, d): """ General {k->v} to create a pbreport Table :param table_id: Table id :param key_attr: Column id :param value_attr: Column id :param d: dict :return: """ columns = [Column(key_attr, header="Attribute"), Column(value_attr, header="Value")] table = Table(table_id, columns=columns) for k, v in d.iteritems(): table.add_data_by_column_id(key_attr, k) table.add_data_by_column_id(value_attr, v) return table
def _dict_to_report_table(table_id, key_attr, value_attr, d): """ General {k->v} to create a pbreport Table :param table_id: Table id :param key_attr: Column id :param value_attr: Column id :param d: dict :return: """ columns = [Column(key_attr, header="Attribute"), Column(value_attr, header="Value")] table = Table(table_id, columns=columns) for k, v in d.iteritems(): table.add_data_by_column_id(key_attr, k) table.add_data_by_column_id(value_attr, v) return table
class TestBasicTable: """Basic Smoke tests""" def setup_method(self, method): self.columns = [Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three")] self.table = Table('my_table_with_values', columns=self.columns) datum = [ ('one', list(range(3))), ('two', list('abc')), ('three', 'file1 file2 file3'.split()) ] for k, values in datum: for value in values: self.table.add_data_by_column_id(k, value) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) assert str(self.table) is not None def test_columns(self): """Test Columns""" assert len(self.table.columns) == 3 def test_column_values(self): """Basic check for column values""" for column in self.table.columns: assert len(column.values) == 3 def test_to_dict(self): """Conversion to dictionary""" assert isinstance(self.table.to_dict(), dict) log.info(self.table.to_dict()) def test_to_csv(self): f = tempfile.NamedTemporaryFile(suffix=".csv").name self.table.to_csv(f) with open(f) as csv_out: assert csv_out.read() == "One,Two,Three\n0,a,file1\n1,b,file2\n2,c,file3\n"
def _generate_table(list_fastq_stats): columns = [Column('file_name', header='File Name'), Column('n_reads', header="Number of Reads"), Column('total_bases', header="Total Bases"), Column('mean_readlength', header="Mean Readlength"), Column('mean_qv', header="Mean Quality Values")] table = Table('fastq_table', columns=columns) for fastq_stat in list_fastq_stats: table.add_data_by_column_id( 'file_name', os.path.basename(fastq_stat.file_name)) table.add_data_by_column_id('n_reads', fastq_stat.reads.shape[0]) table.add_data_by_column_id( 'total_bases', int(np.sum(fastq_stat.reads))) table.add_data_by_column_id( 'mean_readlength', int(fastq_stat.reads.mean())) table.add_data_by_column_id('mean_qv', np.round( fastq_stat.qvs.mean(), decimals=2)) return table
def _generate_table(list_fastq_stats): columns = [Column(Constants.C_FN, header=''), Column(Constants.C_NREADS, header=""), Column(Constants.C_TOT_BASES, header=""), Column(Constants.C_READLENGTH, header=""), Column(Constants.C_QV, header="")] table = Table(Constants.T_FASTQ, columns=columns) for fastq_stat in list_fastq_stats: table.add_data_by_column_id( Constants.C_FN, os.path.basename(fastq_stat.file_name)) table.add_data_by_column_id(Constants.C_NREADS, fastq_stat.reads.shape[0]) table.add_data_by_column_id( Constants.C_TOT_BASES, int(np.sum(fastq_stat.reads))) table.add_data_by_column_id( Constants.C_READLENGTH, int(fastq_stat.reads.mean())) table.add_data_by_column_id(Constants.C_QV, np.round( fastq_stat.qvs.mean(), decimals=2)) return table
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None): """ High Level Report of the workflow state Write the output of workflow datastore to pbreports report object Workflow summary .dot/svg (collapsed workflow) Workflow details .dot/svg (chunked workflow) To add: - Resolved WorkflowSettings (e.g., nproc, max_workers) - :type bg: BindingsGraph """ emsg = "" if error_message is None else error_message attributes = [Attribute('was_successful', was_successful, name="Was Successful"), Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"), Attribute('error_message', emsg, name="Error Message"), Attribute('job_id', job_id, name="Job Id"), Attribute('job_state', state, name="Job State"), Attribute('job_output_dir', job_output_dir, name="Job Output Directory"), Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")] columns = [Column('task_id', header='Task id'), Column('was_successful', header='Was Successful'), Column('state', header="Task State"), Column('run_time_sec', header="Run Time (sec)"), Column('nproc', header="# of procs")] tasks_table = Table('tasks', columns=columns) for tnode in bg.all_task_type_nodes(): tasks_table.add_data_by_column_id('task_id', str(tnode)) tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc']) tasks_table.add_data_by_column_id('state', bg.node[tnode]['state']) tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL) # rt_ = bg.node[tnode]['run_time'] # rtime = None if rt_ is None else int(rt_) tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time']) ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes()) fnodes_table = _to_table("file_node", bg, bg.file_nodes()) report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table], attributes=attributes) return report
def _run_to_report(labels_reads_iterator, reads, barcodes, subreads=True, dataset_uuids=()): """ Generate a Report instance from a SubreadSet and BarcodeSet. :param subreads: If the ccs fofn is given this needs to be set to False """ class MyRow(object): def __init__(self, label): self.label = label self.bases = 0 self.reads = 0 label2row = {} for label, read in labels_reads_iterator(reads, barcodes, subreads=subreads): if not label in label2row: label2row[label] = MyRow(label) label2row[label].bases += len(read) label2row[label].reads += 1 columns = [Column('barcode', header="Barcode Name"), Column('number_of_reads', header="Reads"), Column('number_of_bases', header="Bases")] table = Table('barcode_table', title='Barcodes', columns=columns) labels = sorted(label2row.keys()) for label in labels: row = label2row[label] table.add_data_by_column_id('barcode', label) table.add_data_by_column_id('number_of_reads', row.reads) table.add_data_by_column_id('number_of_bases', row.bases) report = Report('barcode', tables=[table], dataset_uuids=dataset_uuids) return report
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None): """ High Level Report of the workflow state Write the output of workflow datastore to pbreports report object Workflow summary .dot/svg (collapsed workflow) Workflow details .dot/svg (chunked workflow) To add: - Resolved WorkflowSettings (e.g., nproc, max_workers) - :type bg: BindingsGraph """ emsg = "" if error_message is None else error_message attributes = [Attribute('was_successful', was_successful, name="Was Successful"), Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"), Attribute('error_message', emsg, name="Error Message"), Attribute('job_id', job_id, name="Job Id"), Attribute('job_state', state, name="Job State"), Attribute('job_output_dir', job_output_dir, name="Job Output Directory"), Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")] columns = [Column('task_id', header='Task id'), Column('was_successful', header='Was Successful'), Column('state', header="Task State"), Column('run_time_sec', header="Run Time (sec)"), Column('nproc', header="# of procs")] tasks_table = Table('tasks', columns=columns) for tnode in bg.all_task_type_nodes(): tasks_table.add_data_by_column_id('task_id', str(tnode)) tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc']) tasks_table.add_data_by_column_id('state', bg.node[tnode]['state']) tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL) # rt_ = bg.node[tnode]['run_time'] # rtime = None if rt_ is None else int(rt_) tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time']) ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes()) fnodes_table = _to_table("file_node", bg, bg.file_nodes()) report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table], attributes=attributes) return report
def run_to_report(reads, barcodes, subreads=True, dataset_uuids=()): """ Generate a Report instance from a SubreadSet and BarcodeSet. :param subreads: If the ccs fofn is given this needs to be set to False """ class MyRow(object): def __init__(self, label): self.label = label self.bases = 0 self.reads = 0 label2row = {} for label, barcode, read in _labels_reads_iterator(reads, barcodes, subreads=subreads): if not label in label2row: label2row[label] = MyRow(label) label2row[label].bases += len(read) label2row[label].reads += 1 columns = [ Column(Constants.C_BARCODE), Column(Constants.C_NREADS), Column(Constants.C_NBASES) ] table = Table('barcode_table', columns=columns) labels = sorted(label2row.keys()) for label in labels: row = label2row[label] table.add_data_by_column_id(Constants.C_BARCODE, label) table.add_data_by_column_id(Constants.C_NREADS, row.reads) table.add_data_by_column_id(Constants.C_NBASES, row.bases) report = Report(spec.id, tables=[table], dataset_uuids=dataset_uuids) return spec.apply_view(report)
class TestBasicTable(unittest.TestCase): """Basic Smoke tests""" def setUp(self): self.columns = [ Column('one', header="One"), Column('two', header="Two"), Column('three', header="Three") ] self.table = Table('my_table_with_values', columns=self.columns) datum = { 'one': list(xrange(3)), 'two': list('abc'), 'three': 'file1 file2 file3'.split() } for k, values in datum.iteritems(): for value in values: self.table.add_data_by_column_id(k, value) def test_str(self): """Smoke test for conversion to str""" log.info(str(self.table)) self.assertIsNotNone(str(self.table)) def test_columns(self): """Test Columns""" self.assertEqual(len(self.table.columns), 3) def test_column_values(self): """Basic check for column values""" for column in self.table.columns: self.assertEqual(len(column.values), 3) def test_to_dict(self): """Conversion to dictionary""" self.assertTrue(isinstance(self.table.to_dict(), dict)) log.info(self.table.to_dict())
def _to_table(tid, bg, nodes): """Create a table from File nodes or Entry nodes""" columns = [Column('id', header="Id"), Column('is_resolved', header='Is Resolved'), Column('path', header="Path")] table = Table(tid, columns=columns) for node in nodes: table.add_data_by_column_id('id', str(node)) table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved']) try: table.add_data_by_column_id('path', bg.node[node]['path']) except KeyError as e: slog.error("Failed to get path from {n}".format(n=repr(node))) slog.error(e) table.add_data_by_column_id('path', "NA") return table
def _to_table(tid, bg, nodes): """Create a table from File nodes or Entry nodes""" columns = [Column('id', header="Id"), Column('is_resolved', header='Is Resolved'), Column('path', header="Path")] table = Table(tid, columns=columns) for node in nodes: table.add_data_by_column_id('id', str(node)) table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved']) try: table.add_data_by_column_id('path', bg.node[node]['path']) except KeyError as e: slog.error("Failed to get path from {n}".format(n=repr(node))) slog.error(e) table.add_data_by_column_id('path', "NA") return table
class BaseVariantTableBuilder(object): def __init__(self): cols = [] cols.append(Column('sequence', 'Sequence')) cols.append(Column('position', 'Position')) cols.append(Column('variant', 'Variant')) cols.append(Column('type', 'Type')) cols.append(Column('coverage', 'Coverage')) cols.append(Column('confidence', 'Confidence')) log.debug('# columns {n}'.format(n=len(cols))) self._table = Table(self._get_table_id(), title=self._get_table_title(), columns=cols) def _get_table_title(self): pass def _get_table_id(self): pass @property def table(self): """ :returns: Table """ return self._table def _add_common_variant_atts(self, variant): """ Add variant attributes common to the "top" and "top minor" variant reports. :param variant: Variant """ self._table.add_data_by_column_id('sequence', variant.contig) self._table.add_data_by_column_id('position', variant.position) self._table.add_data_by_column_id('variant', variant.variant) self._table.add_data_by_column_id('type', variant.type) self._table.add_data_by_column_id('coverage', variant.coverage) self._table.add_data_by_column_id('confidence', variant.confidence)
def _movie_results_to_table(movie_results): """Group movie results by movie name and build a report table. Table has movie name, # of CCS bases, Total CCS bases, mean CCS readlength and mean CCS accuracy. """ columns = [] columns.append(Column(Constants.C_MOVIE_NAME, values=[])) columns.append(Column(Constants.C_NREADS, values=[])) columns.append(Column(Constants.C_TOTAL_BASES, values=[])) columns.append(Column(Constants.C_MEAN_READLENGTH, values=[])) columns.append(Column(Constants.C_MEAN_ACCURACY, values=[])) columns.append(Column(Constants.C_MEAN_NPASSES, values=[])) table = Table(Constants.T_ID, columns=columns) movie_names = {m.movie_name for m in movie_results} for movie_name in movie_names: rs = [ m.read_lengths for m in movie_results if m.movie_name == movie_name] read_lengths = np.concatenate(rs) ac = [ m.accuracies for m in movie_results if m.movie_name == movie_name] accuracies = np.concatenate(ac) npass = [ m.num_passes for m in movie_results if m.movie_name == movie_name] num_passes = np.concatenate(npass) m_readlength = int( read_lengths.mean()) if read_lengths.size > 0 else 0 m_accuracy = accuracies.mean() if accuracies.size > 0 else 0.0 m_npasses = int(np.round(num_passes.mean(), decimals=0) ) if num_passes.size > 0 else 0 #m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean())))) table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name) table.add_data_by_column_id(Constants.C_NREADS, read_lengths.shape[0]) table.add_data_by_column_id( Constants.C_TOTAL_BASES, int(read_lengths.sum())) table.add_data_by_column_id(Constants.C_MEAN_READLENGTH, m_readlength) table.add_data_by_column_id(Constants.C_MEAN_ACCURACY, m_accuracy) #table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv) table.add_data_by_column_id(Constants.C_MEAN_NPASSES, m_npasses) return table
def create_table(timings): """Long Amplicon Analysis Timing Result table""" columns = [] columns.append(Column(Constants.C_BC)) columns.append(Column(Constants.C_HOUR)) columns.append(Column(Constants.C_MIN)) columns.append(Column(Constants.C_SEC)) t = Table(Constants.T_ID, columns=columns) seconds = [] for barcode in sorted(timings): if barcode != 'All': data = timings[barcode] t.add_data_by_column_id(Constants.C_BC, barcode) t.add_data_by_column_id(Constants.C_HOUR, data.seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, data.seconds / 60) t.add_data_by_column_id(Constants.C_SEC, data.seconds) seconds.append(data.seconds) # Add the average time information seconds_sum = sum(seconds) avg_seconds = seconds_sum / len(timings) t.add_data_by_column_id(Constants.C_BC, 'Mean') t.add_data_by_column_id(Constants.C_HOUR, avg_seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, avg_seconds / 60) t.add_data_by_column_id(Constants.C_SEC, avg_seconds) # Add the median time information median_seconds = int(median(seconds)) t.add_data_by_column_id(Constants.C_BC, 'Median') t.add_data_by_column_id(Constants.C_HOUR, median_seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, median_seconds / 60) t.add_data_by_column_id(Constants.C_SEC, median_seconds) # Add the total time information t.add_data_by_column_id(Constants.C_BC, 'Total') t.add_data_by_column_id(Constants.C_HOUR, timings['All'].seconds / 3600) t.add_data_by_column_id(Constants.C_MIN, timings['All'].seconds / 60) t.add_data_by_column_id(Constants.C_SEC, timings['All'].seconds) log.debug(str(t)) return t
def _get_consensus_table_and_attributes(ref_data, reference_entry): """ Get a tuple: Table and list of Attributes :param ref_data: (dict) dict of data pulled from alignment_summary.gff :param reference_entry: reference entry :return: tuple (Table, [Attributes]) """ ordered_ids = _ref_ids_ordered_by_len(ref_data) sum_lengths = 0.0 mean_bases_called = 0 mean_concord = 'NA' mean_coverage = 0 columns = [] columns.append(Column(Constants.C_CONTIG_NAME)) columns.append(Column(Constants.C_CONTIG_LEN)) columns.append(Column(Constants.C_BASES_CALLED)) columns.append(Column(Constants.C_CONCORDANCE)) columns.append(Column(Constants.C_COVERAGE)) table = Table(Constants.T_STATS, columns=columns) for seqid in ordered_ids: contig = reference_entry.get_contig(seqid) length = float(ref_data[seqid][LENGTH]) gaps = float(ref_data[seqid][GAPS]) errors = float(ref_data[seqid][ERR]) cov = float(ref_data[seqid][COV]) sum_lengths += length bases_called = 1.0 - gaps / length mean_bases_called += bases_called * length concord = 'NA' if length != gaps: log.info('length {f}'.format(f=length)) log.info('gaps {f}'.format(f=gaps)) log.info('errors {f}'.format(f=errors)) concord = 1.0 - errors / (length - gaps) if mean_concord is 'NA': mean_concord = concord * length else: mean_concord += concord * length coverage = cov / length mean_coverage += coverage * length # table shows values for each contig table.add_data_by_column_id(Constants.C_CONTIG_NAME, contig.name) table.add_data_by_column_id(Constants.C_CONTIG_LEN, length) table.add_data_by_column_id(Constants.C_BASES_CALLED, bases_called) table.add_data_by_column_id(Constants.C_CONCORDANCE, concord) table.add_data_by_column_id(Constants.C_COVERAGE, coverage) mean_contig_length = sum_lengths / len(ordered_ids) mean_bases_called = mean_bases_called / sum_lengths if mean_concord is not 'NA': mean_concord = mean_concord / sum_lengths mean_coverage = mean_coverage / sum_lengths attributes = [] attributes.append(Attribute(Constants.MEAN_CONCORDANCE, mean_concord)) attributes.append( Attribute(Constants.MEAN_CONTIG_LENGTH, mean_contig_length)) attributes.append(Attribute(Constants.LONGEST_CONTIG, ordered_ids[0])) attributes.append( Attribute(Constants.MEAN_BASES_CALLED, mean_bases_called)) attributes.append(Attribute(Constants.MEAN_COVERAGE, mean_coverage)) return table, attributes
def _movie_results_to_table(movie_results): """Group movie results by movie name and build a report table. Table has movie name, # of CCS bases, Total CCS bases, mean CCS readlength and mean CCS accuracy. """ columns = [] columns.append(Column(Constants.C_MOVIE_NAME, values=[])) columns.append(Column(Constants.C_NREADS, values=[])) columns.append(Column(Constants.C_TOTAL_BASES, values=[])) columns.append(Column(Constants.C_MEAN_READLENGTH, values=[])) columns.append(Column(Constants.C_MEAN_ACCURACY, values=[])) columns.append(Column(Constants.C_MEAN_NPASSES, values=[])) table = Table(Constants.T_ID, columns=columns) movie_names = {m.movie_name for m in movie_results} for movie_name in movie_names: rs = [ m.read_lengths for m in movie_results if m.movie_name == movie_name ] read_lengths = np.concatenate(rs) ac = [ m.accuracies for m in movie_results if m.movie_name == movie_name ] accuracies = np.concatenate(ac) npass = [ m.num_passes for m in movie_results if m.movie_name == movie_name ] num_passes = np.concatenate(npass) m_readlength = int(read_lengths.mean()) if read_lengths.size > 0 else 0 m_accuracy = accuracies.mean() if accuracies.size > 0 else 0.0 m_npasses = int(np.round(num_passes.mean(), decimals=0)) if num_passes.size > 0 else 0 #m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean())))) table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name) table.add_data_by_column_id(Constants.C_NREADS, read_lengths.shape[0]) table.add_data_by_column_id(Constants.C_TOTAL_BASES, int(read_lengths.sum())) table.add_data_by_column_id(Constants.C_MEAN_READLENGTH, m_readlength) table.add_data_by_column_id(Constants.C_MEAN_ACCURACY, m_accuracy) #table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv) table.add_data_by_column_id(Constants.C_MEAN_NPASSES, m_npasses) return table
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [ Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At") ] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
class BaseVariantTableBuilder(object): def __init__(self): cols = [] cols.append(Column(Constants.C_SEQ, '')) cols.append(Column(Constants.C_POS, '')) cols.append(Column(Constants.C_VAR, '')) cols.append(Column(Constants.C_TYP, '')) cols.append(Column(Constants.C_COV, '')) cols.append(Column(Constants.C_CON, '')) log.debug('# columns {n}'.format(n=len(cols))) self._table = Table(self._get_table_id(), title=self._get_table_title(), columns=cols) def _get_table_title(self): pass def _get_table_id(self): pass @property def table(self): """ :returns: Table """ return self._table def _add_common_variant_atts(self, variant): """ Add variant attributes common to the "top" and "top minor" variant reports. :param variant: Variant """ self._table.add_data_by_column_id(Constants.C_SEQ, variant.contig) self._table.add_data_by_column_id(Constants.C_POS, variant.position) self._table.add_data_by_column_id(Constants.C_VAR, variant.variant) self._table.add_data_by_column_id(Constants.C_TYP, variant.type) self._table.add_data_by_column_id(Constants.C_COV, variant.coverage) self._table.add_data_by_column_id(Constants.C_CON, variant.confidence)
def _movie_results_to_table(movie_results): """Group movie results by movie name and build a report table. Table has movie name, # of CCS bases, Total CCS bases, mean CCS readlength and mean CCS accuracy. """ columns = [Column(Constants.C_MOVIE_NAME, header="Movie"), Column(Constants.A_NREADS, header="Consensus reads"), Column(Constants.A_TOTAL_BASES, header="Number of consensus bases"), Column(Constants.A_MEAN_READLENGTH, header="Mean Consensus Read Length"), Column(Constants.A_MEAN_ACCURACY, header="Mean Consensus Predicted Accuracy"), Column(Constants.A_MEAN_QV, header="Mean Consensus Predicted QV"), Column(Constants.A_MEAN_NPASSES, header="Mean Number of Passes")] table = Table(Constants.T_ID, title="Consensus reads", columns=columns) movie_names = {m.movie_name for m in movie_results} for movie_name in movie_names: rs = [ m.read_lengths for m in movie_results if m.movie_name == movie_name] read_lengths = np.concatenate(rs) ac = [ m.accuracies for m in movie_results if m.movie_name == movie_name] accuracies = np.concatenate(ac) npass = [ m.num_passes for m in movie_results if m.movie_name == movie_name] num_passes = np.concatenate(npass) m_readlength = int( read_lengths.mean()) if read_lengths.size > 0 else 0.0 m_accuracy = np.round( accuracies.mean(), decimals=4) if accuracies.size > 0 else 0.0 m_npasses = np.round( num_passes.mean(), decimals=3) if num_passes.size > 0 else 0.0 m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean())))) table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name) table.add_data_by_column_id(Constants.A_NREADS, read_lengths.shape[0]) table.add_data_by_column_id( Constants.A_TOTAL_BASES, read_lengths.sum()) table.add_data_by_column_id(Constants.A_MEAN_READLENGTH, m_readlength) table.add_data_by_column_id(Constants.A_MEAN_ACCURACY, m_accuracy) table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv) table.add_data_by_column_id(Constants.A_MEAN_NPASSES, m_npasses) return table
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None, report_uuid=None): """ High Level Report of the workflow state Write the output of workflow datastore to pbreports report object Workflow summary .dot/svg (collapsed workflow) Workflow details .dot/svg (chunked workflow) To add: - Resolved WorkflowSettings (e.g., nproc, max_workers) - :type bg: BindingsGraph """ emsg = "" if error_message is None else error_message columns = [ Column('task_id', header='Task id'), Column('was_successful', header='Was Successful'), Column('state', header="Task State"), Column('run_time_sec', header="Run Time (sec)"), Column('nproc', header="# of procs"), Column("num_core_hours", header="Core Hours") ] tasks_table = Table('tasks', title="Tasks", columns=columns) for tnode in bg.all_task_type_nodes(): nproc = bg.node[tnode]['nproc'] # the task might not be completed. run_time_sec = bg.node[tnode]['run_time'] if run_time_sec is None: core_hours = 0.0 else: core_hours = (run_time_sec / 60.0 / 60.0) * nproc tasks_table.add_data_by_column_id('task_id', str(tnode)) tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc']) tasks_table.add_data_by_column_id('state', bg.node[tnode]['state']) tasks_table.add_data_by_column_id( 'was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL) # rt_ = bg.node[tnode]['run_time'] # rtime = None if rt_ is None else int(rt_) tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time']) tasks_table.add_data_by_column_id('num_core_hours', round(core_hours, 4)) total_core_hours = sum( tasks_table.get_column_by_id('num_core_hours').values) attributes = [ Attribute('was_successful', was_successful, name="Was Successful"), Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"), Attribute('error_message', emsg, name="Error Message"), Attribute('job_id', job_id, name="Job Id"), Attribute('job_state', state, name="Job State"), Attribute('job_output_dir', job_output_dir, name="Job Output Directory"), Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version"), Attribute('total_core_hours', round(total_core_hours, 4), "Total core hours") ] ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes(), "Entry Points") fnodes_table = _to_table("file_node", bg, bg.file_nodes(), "File Nodes") # this would be nice if the DataSet UUIDs of the entry-points are added to the # dataset_uuids of the report. report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table], attributes=attributes, uuid=report_uuid) return report
def _get_consensus_table_and_attributes(ref_data, reference_entry): """ Get a tuple: Table and list of Attributes :param ref_data: (dict) dict of data pulled from alignment_summary.gff :param reference_entry: (pbsystem.io.reference_entry) reference entry :return: tuple (pbreports.io.model.Table, [pbreports.io.model.Attributes]) """ table = Table('consensus_table', 'Consensus Calling Results') table.add_column(Column('contig_name', 'Reference')) table.add_column(Column('contig_len', 'Reference Length')) table.add_column(Column('bases_called', 'Bases Called')) table.add_column(Column('concordance', 'Consensus Accuracy')) table.add_column(Column('coverage', 'Base Coverage')) ordered_ids = _ref_ids_ordered_by_len(ref_data) sum_lengths = 0.0 mean_bases_called = 0 mean_concord = 'NA' mean_coverage = 0 for seqid in ordered_ids: contig = reference_entry.get_contig(seqid) length = float(ref_data[seqid][LENGTH]) gaps = float(ref_data[seqid][GAPS]) errors = float(ref_data[seqid][ERR]) cov = float(ref_data[seqid][COV]) sum_lengths += length bases_called = 1.0 - gaps / length mean_bases_called += bases_called * length concord = 'NA' if length != gaps: log.info('length {f}'.format(f=length)) log.info('gaps {f}'.format(f=gaps)) log.info('errors {f}'.format(f=errors)) concord = 1.0 - errors / (length - gaps) if mean_concord is 'NA': mean_concord = concord * length else: mean_concord += concord * length coverage = cov / length mean_coverage += coverage * length # table shows values for each contig table.add_data_by_column_id('contig_name', contig.name) table.add_data_by_column_id('contig_len', length) table.add_data_by_column_id('bases_called', bases_called) table.add_data_by_column_id('concordance', concord) table.add_data_by_column_id('coverage', coverage) mean_contig_length = sum_lengths / len(ordered_ids) mean_bases_called = mean_bases_called / sum_lengths if mean_concord is not 'NA': mean_concord = mean_concord / sum_lengths mean_coverage = mean_coverage / sum_lengths attributes = [Attribute(id_, val, Constants.ATTR_LABELS[id_]) for id_, val in [ (Constants.MEAN_CONTIG_LENGTH, mean_contig_length), (Constants.MEAN_BASES_CALLED, mean_bases_called), (Constants.MEAN_CONCORDANCE, mean_concord), (Constants.MEAN_COVERAGE, mean_coverage), (Constants.LONGEST_CONTIG, ordered_ids[0])]] return table, attributes
def create_table(timings): """Long Amplicon Analysis Timing Result table""" columns = [] columns.append(Column("barcode_col", header="Sample")) columns.append(Column("hour_col", header="Hours")) columns.append(Column("minute_col", header="Minutes")) columns.append(Column("second_col", header="Total Time (seconds)")) t = Table("result_table", title="Amplicon Analysis Timing Summary", columns=columns) seconds = [] for barcode in sorted(timings): if barcode != 'All': data = timings[barcode] t.add_data_by_column_id('barcode_col', barcode) t.add_data_by_column_id('hour_col', data.seconds / 3600) t.add_data_by_column_id('minute_col', data.seconds / 60) t.add_data_by_column_id('second_col', data.seconds) seconds.append(data.seconds) # Add the average time information seconds_sum = sum(seconds) avg_seconds = seconds_sum / len(timings) t.add_data_by_column_id('barcode_col', 'Mean') t.add_data_by_column_id('hour_col', avg_seconds / 3600) t.add_data_by_column_id('minute_col', avg_seconds / 60) t.add_data_by_column_id('second_col', avg_seconds) # Add the median time information median_seconds = int(median(seconds)) t.add_data_by_column_id('barcode_col', 'Median') t.add_data_by_column_id('hour_col', median_seconds / 3600) t.add_data_by_column_id('minute_col', median_seconds / 60) t.add_data_by_column_id('second_col', median_seconds) # Add the total time information t.add_data_by_column_id('barcode_col', 'Total') t.add_data_by_column_id('hour_col', timings['All'].seconds / 3600) t.add_data_by_column_id('minute_col', timings['All'].seconds / 60) t.add_data_by_column_id('second_col', timings['All'].seconds) log.info(str(t)) return t
def create_table(timings): """Long Amplicon Analysis Timing Result table""" columns = [] columns.append(Column("barcode_col", header="")) columns.append(Column("hour_col", header="")) columns.append(Column("minute_col", header="")) columns.append(Column("second_col", header="")) t = Table("result_table", title="", columns=columns) seconds = [] for barcode in sorted(timings): if barcode != 'All': data = timings[barcode] t.add_data_by_column_id('barcode_col', barcode) t.add_data_by_column_id('hour_col', data.seconds / 3600) t.add_data_by_column_id('minute_col', data.seconds / 60) t.add_data_by_column_id('second_col', data.seconds) seconds.append(data.seconds) # Add the average time information seconds_sum = sum(seconds) avg_seconds = seconds_sum / len(timings) t.add_data_by_column_id('barcode_col', 'Mean') t.add_data_by_column_id('hour_col', avg_seconds / 3600) t.add_data_by_column_id('minute_col', avg_seconds / 60) t.add_data_by_column_id('second_col', avg_seconds) # Add the median time information median_seconds = int(median(seconds)) t.add_data_by_column_id('barcode_col', 'Median') t.add_data_by_column_id('hour_col', median_seconds / 3600) t.add_data_by_column_id('minute_col', median_seconds / 60) t.add_data_by_column_id('second_col', median_seconds) # Add the total time information t.add_data_by_column_id('barcode_col', 'Total') t.add_data_by_column_id('hour_col', timings['All'].seconds / 3600) t.add_data_by_column_id('minute_col', timings['All'].seconds / 60) t.add_data_by_column_id('second_col', timings['All'].seconds) log.info(str(t)) return t
def datastore_to_report(ds): """ :type ds: DataStore :param ds: :return: """ attrs = [Attribute("ds_nfiles", len(ds.files), name="Number of files"), Attribute("ds_version", ds.version, name="Datastore version"), Attribute("ds_created_at", ds.created_at, name="Created At"), Attribute("ds_updated_at", ds.updated_at, name="Updated At")] columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"), ("path", "Path"), ("file_size", "Size"), ("created_at", "Created At"), ("modified_at", "Modified At")] to_i = lambda s: "ds_" + s columns = [Column(to_i(i), header=h) for i, h in columns_names] t = Table("datastore", title="DataStore Summary", columns=columns) def _to_relative_path(p): return "/".join(p.split("/")[-3:]) for file_id, ds_file in ds.files.iteritems(): t.add_data_by_column_id(to_i("file_id"), ds_file.file_id) t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id) t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path)) t.add_data_by_column_id(to_i("file_size"), ds_file.file_size) t.add_data_by_column_id(to_i("created_at"), ds_file.created_at) t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at) return Report("datastore_report", tables=[t], attributes=attrs)
class BaseVariantTableBuilder(object): def __init__(self): cols = [] cols.append(Column(Constants.C_SEQ)) cols.append(Column(Constants.C_POS)) cols.append(Column(Constants.C_VAR)) cols.append(Column(Constants.C_TYP)) cols.append(Column(Constants.C_COV)) cols.append(Column(Constants.C_CON)) log.debug('# columns {n}'.format(n=len(cols))) self._table = Table(self._get_table_id(), title=self._get_table_title(), columns=cols) def _get_table_title(self): pass def _get_table_id(self): pass @property def table(self): """ :returns: Table """ return self._table def _add_common_variant_atts(self, variant): """ Add variant attributes common to the "top" and "top minor" variant reports. :param variant: Variant """ self._table.add_data_by_column_id(Constants.C_SEQ, variant.contig) self._table.add_data_by_column_id(Constants.C_POS, variant.position) self._table.add_data_by_column_id(Constants.C_VAR, variant.variant) self._table.add_data_by_column_id(Constants.C_TYP, variant.type) self._table.add_data_by_column_id(Constants.C_COV, variant.coverage) self._table.add_data_by_column_id(Constants.C_CON, variant.confidence)