Beispiel #1
0
    def _to_table(self, movie_datum):
        """
        Create a pbreports Table for each movie.

        :param movie_datum: List of

        [(
        movie_name,
        reads,
        mean readlength,
        polymerase readlength
        number of subread bases
        mean subread readlength
        mean subread concordance), ...]
        """

        table = Table(Constants.T_STATS, columns=(Column(c_id)
                                                  for c_id in self.COL_IDS))

        for movie_data in movie_datum:
            if len(movie_data) != len(self.COL_IDS):
                log.error(movie_datum)
                raise ValueError(
                    "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS)))

            for value, c_id in zip(movie_data, self.COL_IDS):

                table.add_data_by_column_id(c_id, value)

        log.debug(str(table))
        return table
Beispiel #2
0
def to_table(motif_records):

    columns = [Column('motif_id', header="Motif"),
               Column('modified_position', header="Modified Position"),
               Column('modification_type', header="Motification Type"),
               Column('percent_motifs_detected',
                      header="% of Motifs Detected"),
               Column('ndetected_motifs', header="# of Motifs Detected"),
               Column('nmotifs_in_genome', header="# of Motifs in Genome"),
               Column('mean_readscore', header='Mean QV'),
               Column('mean_coverage', header="Mean Coverage"),
               Column('partner_motif', header="Partner Motif"),
               Column('mean_ipd_ration', header="Mean IPD ratio"),
               Column('group_tag', header="Group Tag"),
               Column('objective_score', header='Objective Score')]

    # Record attr name ordered by index in columns
    attr_names = ['motif_str', 'center_position',
                  'modification_type',
                  'fraction', 'ndetected',
                  'ngenome', 'mean_score',
                  'mean_coverage', 'partner_motif_str',
                  'mean_ipd_ratio',
                  'group_tag', 'objective_score']

    table = Table(Constants.T_ID, title="Motifs", columns=columns)

    for record in motif_records:
        for attr_name, column in zip(attr_names, columns):
            v = getattr(record, attr_name)
            table.add_data_by_column_id(column.id, v)

    return table
Beispiel #3
0
def to_table(motif_records):

    columns = [Column(Constants.C_ID),
               Column(Constants.C_POS),
               Column(Constants.C_TYPE),
               Column(Constants.C_PCT_MOTIF),
               Column(Constants.C_NMOTIF),
               Column(Constants.C_NMOTIF_GEN),
               Column(Constants.C_READSCORE),
               Column(Constants.C_COV),
               Column(Constants.C_PARTNER),
               Column(Constants.C_IPD),
               Column(Constants.C_GRP),
               Column(Constants.C_OBJ_SCORE)]

    # Record attr name ordered by index in columns
    attr_names = ['motif_str', 'center_position',
                  'modification_type',
                  'fraction', 'ndetected',
                  'ngenome', 'mean_score',
                  'mean_coverage', 'partner_motif_str',
                  'mean_ipd_ratio',
                  'group_tag', 'objective_score']

    table = Table(Constants.T_ID, columns=columns)

    for record in motif_records:
        for attr_name, column in zip(attr_names, columns):
            v = getattr(record, attr_name)
            table.add_data_by_column_id(column.id, v)

    return table
Beispiel #4
0
    def _to_table(self, movie_datum):
        """
        Create a pbreports Table for each movie.

        :param movie_datum: List of

        [(
        movie_name,
        reads,
        mean readlength,
        polymerase readlength
        number of subread bases
        mean subread readlength
        mean subread concordance), ...]
        """

        table = Table(Constants.T_STATS, columns=(Column(c_id)
                                                  for c_id in self.COL_IDS))

        for movie_data in movie_datum:
            if len(movie_data) != len(self.COL_IDS):
                log.error(movie_datum)
                raise ValueError(
                    "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(self.COL_IDS)))

            for value, c_id in zip(movie_data, self.COL_IDS):

                table.add_data_by_column_id(c_id, value)

        log.debug(str(table))
        return table
def create_table(d, barcode):
    """Long Amplicon Analysis results table"""

    columns = []

    if barcode:
        columns.append(Column("barcodename", header="Barcode"))

    columns.append(Column("coarsecluster", header="Sequence Cluster"))
    columns.append(Column("phase", header="Sequence Phase"))
    columns.append(Column("sequencelength", header="Length (bp)"))
    columns.append(Column("predictedaccuracy", header="Estimated Accuracy"))
    columns.append(Column("totalcoverage", header="Subreads coverage"))

    t = Table("result_table", title="Amplicon Consensus Summary",
              columns=columns)

    for fastaname in sorted(d.fastaname):
        row = d[d.fastaname == fastaname]
        for column in columns:
            #if column.id == "predictedaccuracy":
            #    accuracy = round(100 * row[column.id][0], 2)
            #    t.add_data_by_column_id(column.id, accuracy)
            #else:
            t.add_data_by_column_id(column.id, row[column.id][0])

    log.info(str(t))
    return t
Beispiel #6
0
def attributes_to_table(attributes, table_id):
    """Build a report table from Iso-Seq cluster attributes."""
    columns = [Column(x.id, header="") for x in attributes]
    table = Table(table_id, columns=columns)
    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)
    return table
def create_table(d, barcode):
    """Long Amplicon Analysis results table"""

    columns = []

    if barcode:
        columns.append(Column(Constants.C_BARCODE))

    columns.append(Column(Constants.C_CLUSTER))
    columns.append(Column(Constants.C_PHASE))
    columns.append(Column(Constants.C_LENGTH))
    columns.append(Column(Constants.C_ACCURACY))
    columns.append(Column(Constants.C_COVERAGE))

    t = Table(Constants.T_ID,
              columns=columns)

    for fastaname in sorted(d.fastaname):
        row = d[d.fastaname == fastaname]
        for column in columns:
            # if column.id == "predictedaccuracy":
            #    accuracy = round(100 * row[column.id][0], 2)
            #    t.add_data_by_column_id(column.id, accuracy)
            # else:
            t.add_data_by_column_id(column.id, row[column.id][0])

    log.info(str(t))
    return t
Beispiel #8
0
def to_table(motif_records):

    columns = [
        Column('motif_id', header=""),
        Column('modified_position', header=""),
        Column('modification_type', header=""),
        Column('percent_motifs_detected', header=""),
        Column('ndetected_motifs', header=""),
        Column('nmotifs_in_genome', header=""),
        Column('mean_readscore', header=''),
        Column('mean_coverage', header=""),
        Column('partner_motif', header=""),
        Column('mean_ipd_ratio', header=""),
        Column('group_tag', header=""),
        Column('objective_score', header='')
    ]

    # Record attr name ordered by index in columns
    attr_names = [
        'motif_str', 'center_position', 'modification_type', 'fraction',
        'ndetected', 'ngenome', 'mean_score', 'mean_coverage',
        'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'
    ]

    table = Table(Constants.T_ID, title="", columns=columns)

    for record in motif_records:
        for attr_name, column in zip(attr_names, columns):
            v = getattr(record, attr_name)
            table.add_data_by_column_id(column.id, v)

    return table
Beispiel #9
0
def create_table(d, barcode):
    """Long Amplicon Analysis results table"""

    columns = []

    if barcode:
        columns.append(Column("barcodename", header=""))

    columns.append(Column("coarsecluster", header=""))
    columns.append(Column("phase", header=""))
    columns.append(Column("sequencelength", header=""))
    columns.append(Column("predictedaccuracy", header=""))
    columns.append(Column("totalcoverage", header=""))

    t = Table("result_table", columns=columns)

    for fastaname in sorted(d.fastaname):
        row = d[d.fastaname == fastaname]
        for column in columns:
            # if column.id == "predictedaccuracy":
            #    accuracy = round(100 * row[column.id][0], 2)
            #    t.add_data_by_column_id(column.id, accuracy)
            # else:
            t.add_data_by_column_id(column.id, row[column.id][0])

    log.info(str(t))
    return t
class TestBasicTable(unittest.TestCase):

    """Basic Smoke tests"""

    def setUp(self):
        self.columns = [Column('one', header="One"),
                        Column('two', header="Two"),
                        Column('three', header="Three")]
        self.table = Table('my_table_with_values', columns=self.columns)
        datum = {'one': list(xrange(3)), 'two': list('abc'),
                 'three': 'file1 file2 file3'.split()}
        for k, values in datum.iteritems():
            for value in values:
                self.table.add_data_by_column_id(k, value)

    def test_str(self):
        """Smoke test for conversion to str"""
        log.info(str(self.table))
        self.assertIsNotNone(str(self.table))

    def test_columns(self):
        """Test Columns"""
        self.assertEqual(len(self.table.columns), 3)

    def test_column_values(self):
        """Basic check for column values"""
        for column in self.table.columns:
            self.assertEqual(len(column.values), 3)

    def test_to_dict(self):
        """Conversion to dictionary"""
        self.assertTrue(isinstance(self.table.to_dict(), dict))
        log.info(self.table.to_dict())
Beispiel #11
0
def to_table(motif_records):

    columns = [
        Column(Constants.C_ID),
        Column(Constants.C_POS),
        Column(Constants.C_TYPE),
        Column(Constants.C_PCT_MOTIF),
        Column(Constants.C_NMOTIF),
        Column(Constants.C_NMOTIF_GEN),
        Column(Constants.C_READSCORE),
        Column(Constants.C_COV),
        Column(Constants.C_PARTNER),
        Column(Constants.C_IPD),
        Column(Constants.C_GRP),
        Column(Constants.C_OBJ_SCORE)
    ]

    # Record attr name ordered by index in columns
    attr_names = [
        'motif_str', 'center_position', 'modification_type', 'fraction',
        'ndetected', 'ngenome', 'mean_score', 'mean_coverage',
        'partner_motif_str', 'mean_ipd_ratio', 'group_tag', 'objective_score'
    ]

    table = Table(Constants.T_ID, columns=columns)

    for record in motif_records:
        for attr_name, column in zip(attr_names, columns):
            v = getattr(record, attr_name)
            table.add_data_by_column_id(column.id, v)

    return table
Beispiel #12
0
    def _to_table(self, movie_datum):
        """
        Create a pbreports Table for each movie.

        :param movie_datum: List of

        [(
        movie_name,
        reads,
        mean readlength,
        polymerase readlength
        number of subread bases
        mean subread readlength
        mean subread accuracy), ...]
        """
        columns = [Column(k, header=h) for k,h in self.COLUMNS]
        table = Table(Constants.T_STATS,
                      title="Mapping Statistics Summary",
                      columns=columns)

        for movie_data in movie_datum:
            if len(movie_data) != len(columns):
                log.error(movie_datum)
                raise ValueError(
                    "Incompatible values. {n} values provided, expected {a}".format(n=len(movie_data), a=len(columns)))

            for value, c in zip(movie_data, columns):
                table.add_data_by_column_id(c.id, value)

        log.debug(str(table))
        print table
        return table
Beispiel #13
0
def create_table(d, barcode):
    """Long Amplicon Analysis results table"""

    columns = []

    if barcode:
        columns.append(Column(Constants.C_BARCODE))

    columns.append(Column(Constants.C_CLUSTER))
    columns.append(Column(Constants.C_PHASE))
    columns.append(Column(Constants.C_LENGTH))
    columns.append(Column(Constants.C_ACCURACY))
    columns.append(Column(Constants.C_COVERAGE))

    t = Table(Constants.T_ID, columns=columns)

    for fastaname in sorted(d.fastaname):
        row = d[d.fastaname == fastaname]
        for column in columns:
            # if column.id == "predictedaccuracy":
            #    accuracy = round(100 * row[column.id][0], 2)
            #    t.add_data_by_column_id(column.id, accuracy)
            # else:
            t.add_data_by_column_id(column.id, row[column.id][0])

    log.info(str(t))
    return t
Beispiel #14
0
def run_to_report(reads, barcodes, subreads=True, dataset_uuids=()):
    """ Generate a Report instance from a SubreadSet and BarcodeSet.
    :param subreads: If the ccs fofn is given this needs to be set to False
    """

    class MyRow(object):
        def __init__(self, label):
            self.label = label
            self.bases = 0
            self.reads = 0

    label2row = {}

    for label, barcode, read in _labels_reads_iterator(reads, barcodes, subreads=subreads):
        if not label in label2row:
            label2row[label] = MyRow(label)
        label2row[label].bases += len(read)
        label2row[label].reads += 1

    columns = [Column(Constants.C_BARCODE), Column(Constants.C_NREADS), Column(Constants.C_NBASES)]

    table = Table("barcode_table", columns=columns)
    labels = sorted(label2row.keys())
    for label in labels:
        row = label2row[label]
        table.add_data_by_column_id(Constants.C_BARCODE, label)
        table.add_data_by_column_id(Constants.C_NREADS, row.reads)
        table.add_data_by_column_id(Constants.C_NBASES, row.bases)

    report = Report(meta_rpt.id, tables=[table], dataset_uuids=dataset_uuids)
    return meta_rpt.apply_view(report)
Beispiel #15
0
def attributesToTable(attributes):
    """Build a report table from Iso-Seq cluster attributes."""
    columns = [Column(x.id, header="") for x in attributes]

    table = Table(Constants.T_ATTR, columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
Beispiel #16
0
def attributesToTable(attributes):
    """Build a report table from Iso-Seq cluster attributes."""
    columns = [Column(x.id, header="") for x in attributes]

    table = Table(Constants.T_ATTR,
                  columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
def attributesToTable(attributes):
    """Build a report table from IsoSeq cluster attributes."""
    columns = [Column(x.id, header=x.name) for x in attributes]

    table = Table('isoseq_cluster_table',
                  title="IsoSeq Cluster",
                  columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
Beispiel #18
0
def _attributes_to_table(attributes):
    """Build a report table from Iso-Seq Classify attributes.

    """
    columns = [Column(x.id) for x in attributes]

    table = Table(Constants.T_ATTR, columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
    def test_add_data_by_column_id(self):
        """Added data values by column identifier."""

        columns = [Column('one'), Column('two')]
        table = Table('mytable', columns=columns)

        datum = {'one': 12.0, 'two': 1234.0}

        for k, v in datum.iteritems():
            table.add_data_by_column_id(k, v)

        self.assertTrue(12.0 in table.columns[0].values)
        self.assertTrue(1234.0 in table.columns[1].values)
Beispiel #20
0
def _attributes_to_table(attributes):
    """Build a report table from Iso-Seq Classify attributes.

    """
    columns = [Column(x.id) for x in attributes]

    table = Table(Constants.T_ATTR,
                  columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
Beispiel #21
0
    def test_add_data_by_column_id(self):
        """Added data values by column identifier."""

        columns = [Column('one'), Column('two')]
        table = Table('mytable', columns=columns)

        datum = {'one': 12.0, 'two': 1234.0}

        for k, v in datum.iteritems():
            table.add_data_by_column_id(k, v)

        self.assertTrue(12.0 in table.columns[0].values)
        self.assertTrue(1234.0 in table.columns[1].values)
def create_table(tabulated_data):
    """Long Amplicon Analysis results table"""

    columns = []
    columns.append(Column("barcode_col", header="Sample"))
    columns.append(Column("good", header="Good"))
    columns.append(Column("good_pct", header="Good (%)"))
    columns.append(Column("chimera", header="Chimeric"))
    columns.append(Column("chimera_pct", header="Chimeric (%)"))
    columns.append(Column("noise", header="Noise"))
    columns.append(Column("noise_pct", header="Noise (%)"))

    t = Table("result_table",
              title="Amplicon Input Molecule Summary", columns=columns)

    for barcode, data in tabulated_data.iteritems():
        if barcode != 'all':
            t.add_data_by_column_id('barcode_col', barcode)
            for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']:
                t.add_data_by_column_id(column_id, data[column_id])
    t.add_data_by_column_id('barcode_col', 'All')
    for column_id in ['good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct']:
        t.add_data_by_column_id(column_id, tabulated_data['all'][column_id])

    log.info(str(t))
    return t
def create_table(tabulated_data):
    """Long Amplicon Analysis results table"""

    columns = []
    columns.append(Column("barcode_col", header=""))
    columns.append(Column("good", header=""))
    columns.append(Column("good_pct", header=""))
    columns.append(Column("chimera", header=""))
    columns.append(Column("chimera_pct", header=""))
    columns.append(Column("noise", header=""))
    columns.append(Column("noise_pct", header=""))

    t = Table(Constants.T_R, columns=columns)

    for barcode, data in tabulated_data.iteritems():
        if barcode != "all":
            t.add_data_by_column_id("barcode_col", barcode)
            for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]:
                t.add_data_by_column_id(column_id, data[column_id])
    t.add_data_by_column_id("barcode_col", "All")
    for column_id in ["good", "good_pct", "chimera", "chimera_pct", "noise", "noise_pct"]:
        t.add_data_by_column_id(column_id, tabulated_data["all"][column_id])

    log.info(str(t))
    return t
Beispiel #24
0
def create_table(tabulated_data):
    """Long Amplicon Analysis results table"""

    columns = []
    columns.append(Column("barcode_col", header=''))
    columns.append(Column("good", header=''))
    columns.append(Column("good_pct", header=''))
    columns.append(Column("chimera", header=''))
    columns.append(Column("chimera_pct", header=''))
    columns.append(Column("noise", header=''))
    columns.append(Column("noise_pct", header=''))

    t = Table(Constants.T_R, columns=columns)

    for barcode, data in tabulated_data.iteritems():
        if barcode != 'all':
            t.add_data_by_column_id('barcode_col', barcode)
            for column_id in [
                    'good', 'good_pct', 'chimera', 'chimera_pct', 'noise',
                    'noise_pct'
            ]:
                t.add_data_by_column_id(column_id, data[column_id])
    t.add_data_by_column_id('barcode_col', 'All')
    for column_id in [
            'good', 'good_pct', 'chimera', 'chimera_pct', 'noise', 'noise_pct'
    ]:
        t.add_data_by_column_id(column_id, tabulated_data['all'][column_id])

    log.info(str(t))
    return t
def _attributes_to_table(attributes):
    """Build a report table from IsoSeq Classify attributes.

    """
    columns = [Column(x.id, header=x.name) for x in attributes]

    table = Table('isoseq_classify_table',
                  title="IsoSeq Transcript Classification",
                  columns=columns)

    for x in attributes:
        table.add_data_by_column_id(x.id, x.value)

    return table
Beispiel #26
0
def to_task_summary_report(bg):

    cs = [Column("workflow_task_id", header="Task Id"),
          Column("workflow_task_status", header="Status"),
          Column("workflow_task_run_time", header="Task Runtime"),
          Column('workflow_task_nproc', header="Number of Procs"),
          Column("workflow_task_emsg", header="Error Message")]

    t = Table("workflow_task_summary", title="Task Summary", columns=cs)
    for tnode in bg.all_task_type_nodes():
        if isinstance(tnode, VALID_ALL_TASK_NODE_CLASSES):
            t.add_data_by_column_id("workflow_task_id", tnode.idx)
            t.add_data_by_column_id("workflow_task_status", bg.node[tnode]['state'])
            t.add_data_by_column_id("workflow_task_run_time", bg.node[tnode]['run_time'])
            t.add_data_by_column_id("workflow_task_nproc", bg.node[tnode]['nproc'])
            t.add_data_by_column_id("workflow_task_emsg", bg.node[tnode]['error_message'])

    return Report("workflow_task_summary", tables=[t])
Beispiel #27
0
def to_task_summary_report(bg):

    cs = [Column("workflow_task_id", header="Task Id"),
          Column("workflow_task_status", header="Status"),
          Column("workflow_task_run_time", header="Task Runtime"),
          Column('workflow_task_nproc', header="Number of Procs"),
          Column("workflow_task_emsg", header="Error Message")]

    t = Table("workflow_task_summary", title="Task Summary", columns=cs)
    for tnode in bg.all_task_type_nodes():
        if isinstance(tnode, VALID_ALL_TASK_NODE_CLASSES):
            t.add_data_by_column_id("workflow_task_id", tnode.idx)
            t.add_data_by_column_id("workflow_task_status", bg.node[tnode]['state'])
            t.add_data_by_column_id("workflow_task_run_time", bg.node[tnode]['run_time'])
            t.add_data_by_column_id("workflow_task_nproc", bg.node[tnode]['nproc'])
            t.add_data_by_column_id("workflow_task_emsg", bg.node[tnode]['error_message'])

    return Report("workflow_task_summary", tables=[t])
Beispiel #28
0
def _dict_to_report_table(table_id, key_attr, value_attr, d):
    """
    General {k->v} to create a pbreport Table

    :param table_id: Table id
    :param key_attr: Column id
    :param value_attr: Column id
    :param d: dict
    :return:
    """
    columns = [Column(key_attr, header="Attribute"),
               Column(value_attr, header="Value")]

    table = Table(table_id, columns=columns)
    for k, v in d.iteritems():
        table.add_data_by_column_id(key_attr, k)
        table.add_data_by_column_id(value_attr, v)

    return table
Beispiel #29
0
def _dict_to_report_table(table_id, key_attr, value_attr, d):
    """
    General {k->v} to create a pbreport Table

    :param table_id: Table id
    :param key_attr: Column id
    :param value_attr: Column id
    :param d: dict
    :return:
    """
    columns = [Column(key_attr, header="Attribute"),
               Column(value_attr, header="Value")]

    table = Table(table_id, columns=columns)
    for k, v in d.iteritems():
        table.add_data_by_column_id(key_attr, k)
        table.add_data_by_column_id(value_attr, v)

    return table
Beispiel #30
0
class TestBasicTable:

    """Basic Smoke tests"""

    def setup_method(self, method):
        self.columns = [Column('one', header="One"),
                        Column('two', header="Two"),
                        Column('three', header="Three")]
        self.table = Table('my_table_with_values', columns=self.columns)
        datum = [
            ('one', list(range(3))),
            ('two', list('abc')),
            ('three', 'file1 file2 file3'.split())
        ]
        for k, values in datum:
            for value in values:
                self.table.add_data_by_column_id(k, value)

    def test_str(self):
        """Smoke test for conversion to str"""
        log.info(str(self.table))
        assert str(self.table) is not None

    def test_columns(self):
        """Test Columns"""
        assert len(self.table.columns) == 3

    def test_column_values(self):
        """Basic check for column values"""
        for column in self.table.columns:
            assert len(column.values) == 3

    def test_to_dict(self):
        """Conversion to dictionary"""
        assert isinstance(self.table.to_dict(), dict)
        log.info(self.table.to_dict())

    def test_to_csv(self):
        f = tempfile.NamedTemporaryFile(suffix=".csv").name
        self.table.to_csv(f)
        with open(f) as csv_out:
            assert csv_out.read() == "One,Two,Three\n0,a,file1\n1,b,file2\n2,c,file3\n"
def _generate_table(list_fastq_stats):
    columns = [Column('file_name', header='File Name'),
               Column('n_reads', header="Number of Reads"),
               Column('total_bases', header="Total Bases"),
               Column('mean_readlength', header="Mean Readlength"),
               Column('mean_qv', header="Mean Quality Values")]

    table = Table('fastq_table', columns=columns)

    for fastq_stat in list_fastq_stats:
        table.add_data_by_column_id(
            'file_name', os.path.basename(fastq_stat.file_name))
        table.add_data_by_column_id('n_reads', fastq_stat.reads.shape[0])
        table.add_data_by_column_id(
            'total_bases', int(np.sum(fastq_stat.reads)))
        table.add_data_by_column_id(
            'mean_readlength', int(fastq_stat.reads.mean()))
        table.add_data_by_column_id('mean_qv', np.round(
            fastq_stat.qvs.mean(), decimals=2))

    return table
Beispiel #32
0
def _generate_table(list_fastq_stats):
    columns = [Column(Constants.C_FN, header=''),
               Column(Constants.C_NREADS, header=""),
               Column(Constants.C_TOT_BASES, header=""),
               Column(Constants.C_READLENGTH, header=""),
               Column(Constants.C_QV, header="")]

    table = Table(Constants.T_FASTQ, columns=columns)

    for fastq_stat in list_fastq_stats:
        table.add_data_by_column_id(
            Constants.C_FN, os.path.basename(fastq_stat.file_name))
        table.add_data_by_column_id(Constants.C_NREADS, fastq_stat.reads.shape[0])
        table.add_data_by_column_id(
            Constants.C_TOT_BASES, int(np.sum(fastq_stat.reads)))
        table.add_data_by_column_id(
            Constants.C_READLENGTH, int(fastq_stat.reads.mean()))
        table.add_data_by_column_id(Constants.C_QV, np.round(
            fastq_stat.qvs.mean(), decimals=2))

    return table
Beispiel #33
0
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None):
    """ High Level Report of the workflow state

    Write the output of workflow datastore to pbreports report object

    Workflow summary .dot/svg (collapsed workflow)
    Workflow details .dot/svg (chunked workflow)

    To add:
    - Resolved WorkflowSettings (e.g., nproc, max_workers)
    -

    :type bg: BindingsGraph

    """
    emsg = "" if error_message is None else error_message

    attributes = [Attribute('was_successful', was_successful, name="Was Successful"),
                  Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"),
                  Attribute('error_message', emsg, name="Error Message"),
                  Attribute('job_id', job_id, name="Job Id"),
                  Attribute('job_state', state, name="Job State"),
                  Attribute('job_output_dir', job_output_dir, name="Job Output Directory"),
                  Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")]

    columns = [Column('task_id', header='Task id'),
               Column('was_successful', header='Was Successful'),
               Column('state', header="Task State"),
               Column('run_time_sec', header="Run Time (sec)"),
               Column('nproc', header="# of procs")]

    tasks_table = Table('tasks', columns=columns)
    for tnode in bg.all_task_type_nodes():
        tasks_table.add_data_by_column_id('task_id', str(tnode))
        tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc'])
        tasks_table.add_data_by_column_id('state', bg.node[tnode]['state'])
        tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL)
        # rt_ = bg.node[tnode]['run_time']
        # rtime = None if rt_ is None else int(rt_)
        tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time'])

    ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes())
    fnodes_table = _to_table("file_node", bg, bg.file_nodes())

    report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table],
                    attributes=attributes)
    return report
Beispiel #34
0
def _run_to_report(labels_reads_iterator, reads, barcodes,
                   subreads=True, dataset_uuids=()):
    """ Generate a Report instance from a SubreadSet and BarcodeSet.
    :param subreads: If the ccs fofn is given this needs to be set to False
    """

    class MyRow(object):

        def __init__(self, label):
            self.label = label
            self.bases = 0
            self.reads = 0

    label2row = {}

    for label, read in labels_reads_iterator(reads, barcodes,
                                             subreads=subreads):
        if not label in label2row:
            label2row[label] = MyRow(label)
        label2row[label].bases += len(read)
        label2row[label].reads += 1

    columns = [Column('barcode', header="Barcode Name"),
               Column('number_of_reads', header="Reads"),
               Column('number_of_bases', header="Bases")]

    table = Table('barcode_table', title='Barcodes', columns=columns)
    labels = sorted(label2row.keys())
    for label in labels:
        row = label2row[label]
        table.add_data_by_column_id('barcode', label)
        table.add_data_by_column_id('number_of_reads', row.reads)
        table.add_data_by_column_id('number_of_bases', row.bases)

    report = Report('barcode', tables=[table],
                    dataset_uuids=dataset_uuids)
    return report
Beispiel #35
0
def _to_report(bg, job_output_dir, job_id, state, was_successful, run_time, error_message=None):
    """ High Level Report of the workflow state

    Write the output of workflow datastore to pbreports report object

    Workflow summary .dot/svg (collapsed workflow)
    Workflow details .dot/svg (chunked workflow)

    To add:
    - Resolved WorkflowSettings (e.g., nproc, max_workers)
    -

    :type bg: BindingsGraph

    """
    emsg = "" if error_message is None else error_message

    attributes = [Attribute('was_successful', was_successful, name="Was Successful"),
                  Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"),
                  Attribute('error_message', emsg, name="Error Message"),
                  Attribute('job_id', job_id, name="Job Id"),
                  Attribute('job_state', state, name="Job State"),
                  Attribute('job_output_dir', job_output_dir, name="Job Output Directory"),
                  Attribute('pbsmrtpipe_version', pbsmrtpipe.get_version(), name="pbsmrtpipe Version")]

    columns = [Column('task_id', header='Task id'),
               Column('was_successful', header='Was Successful'),
               Column('state', header="Task State"),
               Column('run_time_sec', header="Run Time (sec)"),
               Column('nproc', header="# of procs")]

    tasks_table = Table('tasks', columns=columns)
    for tnode in bg.all_task_type_nodes():
        tasks_table.add_data_by_column_id('task_id', str(tnode))
        tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc'])
        tasks_table.add_data_by_column_id('state', bg.node[tnode]['state'])
        tasks_table.add_data_by_column_id('was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL)
        # rt_ = bg.node[tnode]['run_time']
        # rtime = None if rt_ is None else int(rt_)
        tasks_table.add_data_by_column_id('run_time_sec', bg.node[tnode]['run_time'])

    ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes())
    fnodes_table = _to_table("file_node", bg, bg.file_nodes())

    report = Report('pbsmrtpipe', tables=[tasks_table, ep_table, fnodes_table],
                    attributes=attributes)
    return report
Beispiel #36
0
def run_to_report(reads, barcodes, subreads=True, dataset_uuids=()):
    """ Generate a Report instance from a SubreadSet and BarcodeSet.
    :param subreads: If the ccs fofn is given this needs to be set to False
    """
    class MyRow(object):
        def __init__(self, label):
            self.label = label
            self.bases = 0
            self.reads = 0

    label2row = {}

    for label, barcode, read in _labels_reads_iterator(reads,
                                                       barcodes,
                                                       subreads=subreads):
        if not label in label2row:
            label2row[label] = MyRow(label)
        label2row[label].bases += len(read)
        label2row[label].reads += 1

    columns = [
        Column(Constants.C_BARCODE),
        Column(Constants.C_NREADS),
        Column(Constants.C_NBASES)
    ]

    table = Table('barcode_table', columns=columns)
    labels = sorted(label2row.keys())
    for label in labels:
        row = label2row[label]
        table.add_data_by_column_id(Constants.C_BARCODE, label)
        table.add_data_by_column_id(Constants.C_NREADS, row.reads)
        table.add_data_by_column_id(Constants.C_NBASES, row.bases)

    report = Report(spec.id, tables=[table], dataset_uuids=dataset_uuids)
    return spec.apply_view(report)
Beispiel #37
0
class TestBasicTable(unittest.TestCase):
    """Basic Smoke tests"""
    def setUp(self):
        self.columns = [
            Column('one', header="One"),
            Column('two', header="Two"),
            Column('three', header="Three")
        ]
        self.table = Table('my_table_with_values', columns=self.columns)
        datum = {
            'one': list(xrange(3)),
            'two': list('abc'),
            'three': 'file1 file2 file3'.split()
        }
        for k, values in datum.iteritems():
            for value in values:
                self.table.add_data_by_column_id(k, value)

    def test_str(self):
        """Smoke test for conversion to str"""
        log.info(str(self.table))
        self.assertIsNotNone(str(self.table))

    def test_columns(self):
        """Test Columns"""
        self.assertEqual(len(self.table.columns), 3)

    def test_column_values(self):
        """Basic check for column values"""
        for column in self.table.columns:
            self.assertEqual(len(column.values), 3)

    def test_to_dict(self):
        """Conversion to dictionary"""
        self.assertTrue(isinstance(self.table.to_dict(), dict))
        log.info(self.table.to_dict())
Beispiel #38
0
def _to_table(tid, bg, nodes):
    """Create a table from File nodes or Entry nodes"""
    columns = [Column('id', header="Id"),
               Column('is_resolved', header='Is Resolved'),
               Column('path', header="Path")]

    table = Table(tid, columns=columns)
    for node in nodes:
        table.add_data_by_column_id('id', str(node))
        table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved'])
        try:
            table.add_data_by_column_id('path', bg.node[node]['path'])
        except KeyError as e:
            slog.error("Failed to get path from {n}".format(n=repr(node)))
            slog.error(e)
            table.add_data_by_column_id('path', "NA")

    return table
Beispiel #39
0
def _to_table(tid, bg, nodes):
    """Create a table from File nodes or Entry nodes"""
    columns = [Column('id', header="Id"),
               Column('is_resolved', header='Is Resolved'),
               Column('path', header="Path")]

    table = Table(tid, columns=columns)
    for node in nodes:
        table.add_data_by_column_id('id', str(node))
        table.add_data_by_column_id('is_resolved', bg.node[node]['is_resolved'])
        try:
            table.add_data_by_column_id('path', bg.node[node]['path'])
        except KeyError as e:
            slog.error("Failed to get path from {n}".format(n=repr(node)))
            slog.error(e)
            table.add_data_by_column_id('path', "NA")

    return table
Beispiel #40
0
class BaseVariantTableBuilder(object):

    def __init__(self):
        cols = []
        cols.append(Column('sequence', 'Sequence'))
        cols.append(Column('position', 'Position'))
        cols.append(Column('variant', 'Variant'))
        cols.append(Column('type', 'Type'))
        cols.append(Column('coverage', 'Coverage'))
        cols.append(Column('confidence', 'Confidence'))

        log.debug('# columns {n}'.format(n=len(cols)))

        self._table = Table(self._get_table_id(), title=self._get_table_title(),
            columns=cols)

    def _get_table_title(self):
        pass

    def _get_table_id(self):
        pass

    @property
    def table(self):
        """
        :returns: Table
        """
        return self._table

    def _add_common_variant_atts(self, variant):
        """
        Add variant attributes common to the "top" and "top minor" variant reports.
        :param variant: Variant
        """
        self._table.add_data_by_column_id('sequence', variant.contig)
        self._table.add_data_by_column_id('position', variant.position)
        self._table.add_data_by_column_id('variant', variant.variant)
        self._table.add_data_by_column_id('type', variant.type)
        self._table.add_data_by_column_id('coverage', variant.coverage)
        self._table.add_data_by_column_id('confidence', variant.confidence)
Beispiel #41
0
def _movie_results_to_table(movie_results):
    """Group movie results by movie name and build a report table.

    Table has movie name, # of CCS bases, Total CCS bases,
    mean CCS readlength and mean CCS accuracy.
    """

    columns = []
    columns.append(Column(Constants.C_MOVIE_NAME, values=[]))
    columns.append(Column(Constants.C_NREADS, values=[]))
    columns.append(Column(Constants.C_TOTAL_BASES, values=[]))
    columns.append(Column(Constants.C_MEAN_READLENGTH, values=[]))
    columns.append(Column(Constants.C_MEAN_ACCURACY, values=[]))
    columns.append(Column(Constants.C_MEAN_NPASSES, values=[]))
    table = Table(Constants.T_ID, columns=columns)

    movie_names = {m.movie_name for m in movie_results}

    for movie_name in movie_names:
        rs = [
            m.read_lengths for m in movie_results if m.movie_name == movie_name]
        read_lengths = np.concatenate(rs)
        ac = [
            m.accuracies for m in movie_results if m.movie_name == movie_name]
        accuracies = np.concatenate(ac)
        npass = [
            m.num_passes for m in movie_results if m.movie_name == movie_name]
        num_passes = np.concatenate(npass)

        m_readlength = int(
            read_lengths.mean()) if read_lengths.size > 0 else 0
        m_accuracy = accuracies.mean() if accuracies.size > 0 else 0.0
        m_npasses = int(np.round(num_passes.mean(), decimals=0)
                        ) if num_passes.size > 0 else 0
        #m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean()))))

        table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name)
        table.add_data_by_column_id(Constants.C_NREADS, read_lengths.shape[0])
        table.add_data_by_column_id(
            Constants.C_TOTAL_BASES, int(read_lengths.sum()))
        table.add_data_by_column_id(Constants.C_MEAN_READLENGTH, m_readlength)
        table.add_data_by_column_id(Constants.C_MEAN_ACCURACY, m_accuracy)
        #table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv)
        table.add_data_by_column_id(Constants.C_MEAN_NPASSES, m_npasses)

    return table
Beispiel #42
0
def create_table(timings):
    """Long Amplicon Analysis Timing Result table"""

    columns = []
    columns.append(Column(Constants.C_BC))
    columns.append(Column(Constants.C_HOUR))
    columns.append(Column(Constants.C_MIN))
    columns.append(Column(Constants.C_SEC))

    t = Table(Constants.T_ID, columns=columns)

    seconds = []
    for barcode in sorted(timings):
        if barcode != 'All':
            data = timings[barcode]
            t.add_data_by_column_id(Constants.C_BC, barcode)
            t.add_data_by_column_id(Constants.C_HOUR, data.seconds / 3600)
            t.add_data_by_column_id(Constants.C_MIN, data.seconds / 60)
            t.add_data_by_column_id(Constants.C_SEC, data.seconds)
            seconds.append(data.seconds)
    # Add the average time information
    seconds_sum = sum(seconds)
    avg_seconds = seconds_sum / len(timings)
    t.add_data_by_column_id(Constants.C_BC, 'Mean')
    t.add_data_by_column_id(Constants.C_HOUR, avg_seconds / 3600)
    t.add_data_by_column_id(Constants.C_MIN, avg_seconds / 60)
    t.add_data_by_column_id(Constants.C_SEC, avg_seconds)
    # Add the median time information
    median_seconds = int(median(seconds))
    t.add_data_by_column_id(Constants.C_BC, 'Median')
    t.add_data_by_column_id(Constants.C_HOUR, median_seconds / 3600)
    t.add_data_by_column_id(Constants.C_MIN, median_seconds / 60)
    t.add_data_by_column_id(Constants.C_SEC, median_seconds)
    # Add the total time information
    t.add_data_by_column_id(Constants.C_BC, 'Total')
    t.add_data_by_column_id(Constants.C_HOUR, timings['All'].seconds / 3600)
    t.add_data_by_column_id(Constants.C_MIN, timings['All'].seconds / 60)
    t.add_data_by_column_id(Constants.C_SEC, timings['All'].seconds)

    log.debug(str(t))
    return t
Beispiel #43
0
def _get_consensus_table_and_attributes(ref_data, reference_entry):
    """
    Get a tuple: Table and list of Attributes
    :param ref_data: (dict) dict of data pulled from alignment_summary.gff
    :param reference_entry: reference entry
    :return: tuple (Table, [Attributes])
    """
    ordered_ids = _ref_ids_ordered_by_len(ref_data)

    sum_lengths = 0.0
    mean_bases_called = 0
    mean_concord = 'NA'
    mean_coverage = 0

    columns = []
    columns.append(Column(Constants.C_CONTIG_NAME))
    columns.append(Column(Constants.C_CONTIG_LEN))
    columns.append(Column(Constants.C_BASES_CALLED))
    columns.append(Column(Constants.C_CONCORDANCE))
    columns.append(Column(Constants.C_COVERAGE))
    table = Table(Constants.T_STATS, columns=columns)

    for seqid in ordered_ids:
        contig = reference_entry.get_contig(seqid)

        length = float(ref_data[seqid][LENGTH])
        gaps = float(ref_data[seqid][GAPS])
        errors = float(ref_data[seqid][ERR])
        cov = float(ref_data[seqid][COV])

        sum_lengths += length
        bases_called = 1.0 - gaps / length
        mean_bases_called += bases_called * length

        concord = 'NA'
        if length != gaps:

            log.info('length {f}'.format(f=length))
            log.info('gaps {f}'.format(f=gaps))
            log.info('errors {f}'.format(f=errors))

            concord = 1.0 - errors / (length - gaps)
            if mean_concord is 'NA':
                mean_concord = concord * length
            else:
                mean_concord += concord * length

        coverage = cov / length
        mean_coverage += coverage * length

        # table shows values for each contig
        table.add_data_by_column_id(Constants.C_CONTIG_NAME, contig.name)
        table.add_data_by_column_id(Constants.C_CONTIG_LEN, length)
        table.add_data_by_column_id(Constants.C_BASES_CALLED, bases_called)
        table.add_data_by_column_id(Constants.C_CONCORDANCE, concord)
        table.add_data_by_column_id(Constants.C_COVERAGE, coverage)

    mean_contig_length = sum_lengths / len(ordered_ids)
    mean_bases_called = mean_bases_called / sum_lengths
    if mean_concord is not 'NA':
        mean_concord = mean_concord / sum_lengths
    mean_coverage = mean_coverage / sum_lengths

    attributes = []
    attributes.append(Attribute(Constants.MEAN_CONCORDANCE, mean_concord))
    attributes.append(
        Attribute(Constants.MEAN_CONTIG_LENGTH, mean_contig_length))
    attributes.append(Attribute(Constants.LONGEST_CONTIG, ordered_ids[0]))
    attributes.append(
        Attribute(Constants.MEAN_BASES_CALLED, mean_bases_called))
    attributes.append(Attribute(Constants.MEAN_COVERAGE, mean_coverage))

    return table, attributes
Beispiel #44
0
def _movie_results_to_table(movie_results):
    """Group movie results by movie name and build a report table.

    Table has movie name, # of CCS bases, Total CCS bases,
    mean CCS readlength and mean CCS accuracy.
    """

    columns = []
    columns.append(Column(Constants.C_MOVIE_NAME, values=[]))
    columns.append(Column(Constants.C_NREADS, values=[]))
    columns.append(Column(Constants.C_TOTAL_BASES, values=[]))
    columns.append(Column(Constants.C_MEAN_READLENGTH, values=[]))
    columns.append(Column(Constants.C_MEAN_ACCURACY, values=[]))
    columns.append(Column(Constants.C_MEAN_NPASSES, values=[]))
    table = Table(Constants.T_ID, columns=columns)

    movie_names = {m.movie_name for m in movie_results}

    for movie_name in movie_names:
        rs = [
            m.read_lengths for m in movie_results if m.movie_name == movie_name
        ]
        read_lengths = np.concatenate(rs)
        ac = [
            m.accuracies for m in movie_results if m.movie_name == movie_name
        ]
        accuracies = np.concatenate(ac)
        npass = [
            m.num_passes for m in movie_results if m.movie_name == movie_name
        ]
        num_passes = np.concatenate(npass)

        m_readlength = int(read_lengths.mean()) if read_lengths.size > 0 else 0
        m_accuracy = accuracies.mean() if accuracies.size > 0 else 0.0
        m_npasses = int(np.round(num_passes.mean(),
                                 decimals=0)) if num_passes.size > 0 else 0
        #m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean()))))

        table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name)
        table.add_data_by_column_id(Constants.C_NREADS, read_lengths.shape[0])
        table.add_data_by_column_id(Constants.C_TOTAL_BASES,
                                    int(read_lengths.sum()))
        table.add_data_by_column_id(Constants.C_MEAN_READLENGTH, m_readlength)
        table.add_data_by_column_id(Constants.C_MEAN_ACCURACY, m_accuracy)
        #table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv)
        table.add_data_by_column_id(Constants.C_MEAN_NPASSES, m_npasses)

    return table
Beispiel #45
0
def datastore_to_report(ds):
    """

    :type ds: DataStore
    :param ds:
    :return:
    """
    attrs = [
        Attribute("ds_nfiles", len(ds.files), name="Number of files"),
        Attribute("ds_version", ds.version, name="Datastore version"),
        Attribute("ds_created_at", ds.created_at, name="Created At"),
        Attribute("ds_updated_at", ds.updated_at, name="Updated At")
    ]

    columns_names = [("file_id", "File Id"), ("file_type_obj", "File Type"),
                     ("path", "Path"), ("file_size", "Size"),
                     ("created_at", "Created At"),
                     ("modified_at", "Modified At")]

    to_i = lambda s: "ds_" + s
    columns = [Column(to_i(i), header=h) for i, h in columns_names]
    t = Table("datastore", title="DataStore Summary", columns=columns)

    def _to_relative_path(p):
        return "/".join(p.split("/")[-3:])

    for file_id, ds_file in ds.files.iteritems():
        t.add_data_by_column_id(to_i("file_id"), ds_file.file_id)
        t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id)
        t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path))
        t.add_data_by_column_id(to_i("file_size"), ds_file.file_size)
        t.add_data_by_column_id(to_i("created_at"), ds_file.created_at)
        t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at)

    return Report("datastore_report", tables=[t], attributes=attrs)
Beispiel #46
0
class BaseVariantTableBuilder(object):
    def __init__(self):
        cols = []
        cols.append(Column(Constants.C_SEQ, ''))
        cols.append(Column(Constants.C_POS, ''))
        cols.append(Column(Constants.C_VAR, ''))
        cols.append(Column(Constants.C_TYP, ''))
        cols.append(Column(Constants.C_COV, ''))
        cols.append(Column(Constants.C_CON, ''))

        log.debug('# columns {n}'.format(n=len(cols)))

        self._table = Table(self._get_table_id(),
                            title=self._get_table_title(),
                            columns=cols)

    def _get_table_title(self):
        pass

    def _get_table_id(self):
        pass

    @property
    def table(self):
        """
        :returns: Table
        """
        return self._table

    def _add_common_variant_atts(self, variant):
        """
        Add variant attributes common to the "top" and "top minor" variant reports.
        :param variant: Variant
        """
        self._table.add_data_by_column_id(Constants.C_SEQ, variant.contig)
        self._table.add_data_by_column_id(Constants.C_POS, variant.position)
        self._table.add_data_by_column_id(Constants.C_VAR, variant.variant)
        self._table.add_data_by_column_id(Constants.C_TYP, variant.type)
        self._table.add_data_by_column_id(Constants.C_COV, variant.coverage)
        self._table.add_data_by_column_id(Constants.C_CON, variant.confidence)
Beispiel #47
0
def _movie_results_to_table(movie_results):
    """Group movie results by movie name and build a report table.

    Table has movie name, # of CCS bases, Total CCS bases,
    mean CCS readlength and mean CCS accuracy.
    """
    columns = [Column(Constants.C_MOVIE_NAME, header="Movie"),
               Column(Constants.A_NREADS, header="Consensus reads"),
               Column(Constants.A_TOTAL_BASES,
                      header="Number of consensus bases"),
               Column(Constants.A_MEAN_READLENGTH,
                      header="Mean Consensus Read Length"),
               Column(Constants.A_MEAN_ACCURACY,
                      header="Mean Consensus Predicted Accuracy"),
               Column(Constants.A_MEAN_QV,
                      header="Mean Consensus Predicted QV"),
               Column(Constants.A_MEAN_NPASSES, header="Mean Number of Passes")]

    table = Table(Constants.T_ID, title="Consensus reads", columns=columns)

    movie_names = {m.movie_name for m in movie_results}

    for movie_name in movie_names:
        rs = [
            m.read_lengths for m in movie_results if m.movie_name == movie_name]
        read_lengths = np.concatenate(rs)
        ac = [
            m.accuracies for m in movie_results if m.movie_name == movie_name]
        accuracies = np.concatenate(ac)
        npass = [
            m.num_passes for m in movie_results if m.movie_name == movie_name]
        num_passes = np.concatenate(npass)

        m_readlength = int(
            read_lengths.mean()) if read_lengths.size > 0 else 0.0
        m_accuracy = np.round(
            accuracies.mean(), decimals=4) if accuracies.size > 0 else 0.0
        m_npasses = np.round(
            num_passes.mean(), decimals=3) if num_passes.size > 0 else 0.0
        m_qv = int(round(accuracy_as_phred_qv(float(accuracies.mean()))))

        table.add_data_by_column_id(Constants.C_MOVIE_NAME, movie_name)
        table.add_data_by_column_id(Constants.A_NREADS, read_lengths.shape[0])
        table.add_data_by_column_id(
            Constants.A_TOTAL_BASES, read_lengths.sum())
        table.add_data_by_column_id(Constants.A_MEAN_READLENGTH, m_readlength)
        table.add_data_by_column_id(Constants.A_MEAN_ACCURACY, m_accuracy)
        table.add_data_by_column_id(Constants.A_MEAN_QV, m_qv)
        table.add_data_by_column_id(Constants.A_MEAN_NPASSES, m_npasses)

    return table
Beispiel #48
0
def _to_report(bg,
               job_output_dir,
               job_id,
               state,
               was_successful,
               run_time,
               error_message=None,
               report_uuid=None):
    """ High Level Report of the workflow state

    Write the output of workflow datastore to pbreports report object

    Workflow summary .dot/svg (collapsed workflow)
    Workflow details .dot/svg (chunked workflow)

    To add:
    - Resolved WorkflowSettings (e.g., nproc, max_workers)
    -

    :type bg: BindingsGraph

    """
    emsg = "" if error_message is None else error_message

    columns = [
        Column('task_id', header='Task id'),
        Column('was_successful', header='Was Successful'),
        Column('state', header="Task State"),
        Column('run_time_sec', header="Run Time (sec)"),
        Column('nproc', header="# of procs"),
        Column("num_core_hours", header="Core Hours")
    ]

    tasks_table = Table('tasks', title="Tasks", columns=columns)
    for tnode in bg.all_task_type_nodes():

        nproc = bg.node[tnode]['nproc']
        # the task might not be completed.
        run_time_sec = bg.node[tnode]['run_time']
        if run_time_sec is None:
            core_hours = 0.0
        else:
            core_hours = (run_time_sec / 60.0 / 60.0) * nproc

        tasks_table.add_data_by_column_id('task_id', str(tnode))
        tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc'])
        tasks_table.add_data_by_column_id('state', bg.node[tnode]['state'])
        tasks_table.add_data_by_column_id(
            'was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL)
        # rt_ = bg.node[tnode]['run_time']
        # rtime = None if rt_ is None else int(rt_)
        tasks_table.add_data_by_column_id('run_time_sec',
                                          bg.node[tnode]['run_time'])
        tasks_table.add_data_by_column_id('num_core_hours',
                                          round(core_hours, 4))

    total_core_hours = sum(
        tasks_table.get_column_by_id('num_core_hours').values)

    attributes = [
        Attribute('was_successful', was_successful, name="Was Successful"),
        Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"),
        Attribute('error_message', emsg, name="Error Message"),
        Attribute('job_id', job_id, name="Job Id"),
        Attribute('job_state', state, name="Job State"),
        Attribute('job_output_dir',
                  job_output_dir,
                  name="Job Output Directory"),
        Attribute('pbsmrtpipe_version',
                  pbsmrtpipe.get_version(),
                  name="pbsmrtpipe Version"),
        Attribute('total_core_hours', round(total_core_hours, 4),
                  "Total core hours")
    ]

    ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes(),
                         "Entry Points")
    fnodes_table = _to_table("file_node", bg, bg.file_nodes(), "File Nodes")

    # this would be nice if the DataSet UUIDs of the entry-points are added to the
    # dataset_uuids of the report.
    report = Report('pbsmrtpipe',
                    tables=[tasks_table, ep_table, fnodes_table],
                    attributes=attributes,
                    uuid=report_uuid)
    return report
Beispiel #49
0
def _get_consensus_table_and_attributes(ref_data, reference_entry):
    """
    Get a tuple: Table and list of Attributes
    :param ref_data: (dict) dict of data pulled from alignment_summary.gff
    :param reference_entry: (pbsystem.io.reference_entry) reference entry
    :return: tuple (pbreports.io.model.Table, [pbreports.io.model.Attributes])
    """
    table = Table('consensus_table', 'Consensus Calling Results')
    table.add_column(Column('contig_name', 'Reference'))
    table.add_column(Column('contig_len', 'Reference Length'))
    table.add_column(Column('bases_called', 'Bases Called'))
    table.add_column(Column('concordance', 'Consensus Accuracy'))
    table.add_column(Column('coverage', 'Base Coverage'))

    ordered_ids = _ref_ids_ordered_by_len(ref_data)

    sum_lengths = 0.0
    mean_bases_called = 0
    mean_concord = 'NA'
    mean_coverage = 0

    for seqid in ordered_ids:
        contig = reference_entry.get_contig(seqid)

        length = float(ref_data[seqid][LENGTH])
        gaps = float(ref_data[seqid][GAPS])
        errors = float(ref_data[seqid][ERR])
        cov = float(ref_data[seqid][COV])

        sum_lengths += length
        bases_called = 1.0 - gaps / length
        mean_bases_called += bases_called * length

        concord = 'NA'
        if length != gaps:

            log.info('length {f}'.format(f=length))
            log.info('gaps {f}'.format(f=gaps))
            log.info('errors {f}'.format(f=errors))

            concord = 1.0 - errors / (length - gaps)
            if mean_concord is 'NA':
                mean_concord = concord * length
            else:
                mean_concord += concord * length

        coverage = cov / length
        mean_coverage += coverage * length

        # table shows values for each contig
        table.add_data_by_column_id('contig_name', contig.name)
        table.add_data_by_column_id('contig_len', length)
        table.add_data_by_column_id('bases_called', bases_called)
        table.add_data_by_column_id('concordance', concord)
        table.add_data_by_column_id('coverage', coverage)

    mean_contig_length = sum_lengths / len(ordered_ids)
    mean_bases_called = mean_bases_called / sum_lengths
    if mean_concord is not 'NA':
        mean_concord = mean_concord / sum_lengths
    mean_coverage = mean_coverage / sum_lengths

    attributes = [Attribute(id_, val, Constants.ATTR_LABELS[id_])
        for id_, val in [
            (Constants.MEAN_CONTIG_LENGTH, mean_contig_length),
            (Constants.MEAN_BASES_CALLED, mean_bases_called),
            (Constants.MEAN_CONCORDANCE, mean_concord),
            (Constants.MEAN_COVERAGE, mean_coverage),
            (Constants.LONGEST_CONTIG, ordered_ids[0])]]

    return table, attributes
def create_table(timings):
    """Long Amplicon Analysis Timing Result table"""

    columns = []
    columns.append(Column("barcode_col", header="Sample"))
    columns.append(Column("hour_col", header="Hours"))
    columns.append(Column("minute_col", header="Minutes"))
    columns.append(Column("second_col", header="Total Time (seconds)"))

    t = Table("result_table",
              title="Amplicon Analysis Timing Summary", columns=columns)

    seconds = []
    for barcode in sorted(timings):
        if barcode != 'All':
            data = timings[barcode]
            t.add_data_by_column_id('barcode_col', barcode)
            t.add_data_by_column_id('hour_col',   data.seconds / 3600)
            t.add_data_by_column_id('minute_col', data.seconds / 60)
            t.add_data_by_column_id('second_col', data.seconds)
            seconds.append(data.seconds)
    # Add the average time information
    seconds_sum = sum(seconds)
    avg_seconds = seconds_sum / len(timings)
    t.add_data_by_column_id('barcode_col', 'Mean')
    t.add_data_by_column_id('hour_col',   avg_seconds / 3600)
    t.add_data_by_column_id('minute_col', avg_seconds / 60)
    t.add_data_by_column_id('second_col', avg_seconds)
    # Add the median time information
    median_seconds = int(median(seconds))
    t.add_data_by_column_id('barcode_col', 'Median')
    t.add_data_by_column_id('hour_col',   median_seconds / 3600)
    t.add_data_by_column_id('minute_col', median_seconds / 60)
    t.add_data_by_column_id('second_col', median_seconds)
    # Add the total time information
    t.add_data_by_column_id('barcode_col', 'Total')
    t.add_data_by_column_id('hour_col',   timings['All'].seconds / 3600)
    t.add_data_by_column_id('minute_col', timings['All'].seconds / 60)
    t.add_data_by_column_id('second_col', timings['All'].seconds)

    log.info(str(t))
    return t
def create_table(timings):
    """Long Amplicon Analysis Timing Result table"""

    columns = []
    columns.append(Column("barcode_col", header=""))
    columns.append(Column("hour_col", header=""))
    columns.append(Column("minute_col", header=""))
    columns.append(Column("second_col", header=""))

    t = Table("result_table",
              title="", columns=columns)

    seconds = []
    for barcode in sorted(timings):
        if barcode != 'All':
            data = timings[barcode]
            t.add_data_by_column_id('barcode_col', barcode)
            t.add_data_by_column_id('hour_col',   data.seconds / 3600)
            t.add_data_by_column_id('minute_col', data.seconds / 60)
            t.add_data_by_column_id('second_col', data.seconds)
            seconds.append(data.seconds)
    # Add the average time information
    seconds_sum = sum(seconds)
    avg_seconds = seconds_sum / len(timings)
    t.add_data_by_column_id('barcode_col', 'Mean')
    t.add_data_by_column_id('hour_col',   avg_seconds / 3600)
    t.add_data_by_column_id('minute_col', avg_seconds / 60)
    t.add_data_by_column_id('second_col', avg_seconds)
    # Add the median time information
    median_seconds = int(median(seconds))
    t.add_data_by_column_id('barcode_col', 'Median')
    t.add_data_by_column_id('hour_col',   median_seconds / 3600)
    t.add_data_by_column_id('minute_col', median_seconds / 60)
    t.add_data_by_column_id('second_col', median_seconds)
    # Add the total time information
    t.add_data_by_column_id('barcode_col', 'Total')
    t.add_data_by_column_id('hour_col',   timings['All'].seconds / 3600)
    t.add_data_by_column_id('minute_col', timings['All'].seconds / 60)
    t.add_data_by_column_id('second_col', timings['All'].seconds)

    log.info(str(t))
    return t
Beispiel #52
0
def datastore_to_report(ds):
    """

    :type ds: DataStore
    :param ds:
    :return:
    """
    attrs = [Attribute("ds_nfiles", len(ds.files), name="Number of files"),
             Attribute("ds_version", ds.version, name="Datastore version"),
             Attribute("ds_created_at", ds.created_at, name="Created At"),
             Attribute("ds_updated_at", ds.updated_at, name="Updated At")]

    columns_names = [("file_id", "File Id"),
                     ("file_type_obj", "File Type"),
                     ("path", "Path"),
                     ("file_size", "Size"),
                     ("created_at", "Created At"),
                     ("modified_at", "Modified At")]

    to_i = lambda s: "ds_" + s
    columns = [Column(to_i(i), header=h) for i, h in columns_names]
    t = Table("datastore", title="DataStore Summary", columns=columns)

    def _to_relative_path(p):
        return "/".join(p.split("/")[-3:])

    for file_id, ds_file in ds.files.iteritems():
        t.add_data_by_column_id(to_i("file_id"), ds_file.file_id)
        t.add_data_by_column_id(to_i("file_type_obj"), ds_file.file_type_id)
        t.add_data_by_column_id(to_i("path"), _to_relative_path(ds_file.path))
        t.add_data_by_column_id(to_i("file_size"), ds_file.file_size)
        t.add_data_by_column_id(to_i("created_at"), ds_file.created_at)
        t.add_data_by_column_id(to_i("modified_at"), ds_file.modified_at)

    return Report("datastore_report", tables=[t], attributes=attrs)
Beispiel #53
0
class BaseVariantTableBuilder(object):

    def __init__(self):
        cols = []
        cols.append(Column(Constants.C_SEQ))
        cols.append(Column(Constants.C_POS))
        cols.append(Column(Constants.C_VAR))
        cols.append(Column(Constants.C_TYP))
        cols.append(Column(Constants.C_COV))
        cols.append(Column(Constants.C_CON))

        log.debug('# columns {n}'.format(n=len(cols)))

        self._table = Table(self._get_table_id(), title=self._get_table_title(),
                            columns=cols)

    def _get_table_title(self):
        pass

    def _get_table_id(self):
        pass

    @property
    def table(self):
        """
        :returns: Table
        """
        return self._table

    def _add_common_variant_atts(self, variant):
        """
        Add variant attributes common to the "top" and "top minor" variant reports.
        :param variant: Variant
        """
        self._table.add_data_by_column_id(Constants.C_SEQ, variant.contig)
        self._table.add_data_by_column_id(Constants.C_POS, variant.position)
        self._table.add_data_by_column_id(Constants.C_VAR, variant.variant)
        self._table.add_data_by_column_id(Constants.C_TYP, variant.type)
        self._table.add_data_by_column_id(Constants.C_COV, variant.coverage)
        self._table.add_data_by_column_id(Constants.C_CON, variant.confidence)