def create_table(summary_csv):
    """Long Amplicon Analysis results table"""

    columns = []
    columns.append(Column(Constants.C_BC))
    columns.append(Column(Constants.C_GOOD))
    columns.append(Column(Constants.C_GOOD_PCT))
    columns.append(Column(Constants.C_CHIM))
    columns.append(Column(Constants.C_CHIM_PCT))
    columns.append(Column(Constants.C_NOISE))
    columns.append(Column(Constants.C_NOISE_PCT))

    t = Table(Constants.T_R, columns=columns)

    COL_IDS = [
        Constants.C_GOOD, Constants.C_GOOD_PCT, Constants.C_CHIM,
        Constants.C_CHIM_PCT, Constants.C_NOISE, Constants.C_NOISE_PCT
    ]

    def add_column(barcode_id, n_good, n_chimera, n_noise):
        pct_good = pct_chimera = pct_noise = 0
        total = n_good + n_chimera + n_noise
        if total > 0:
            pct_good = n_good / float(total)
            pct_chimera = n_chimera / float(total)
            pct_noise = n_noise / float(total)
        values = [n_good, pct_good, n_chimera, pct_chimera, n_noise, pct_noise]
        t.add_data_by_column_id(Constants.C_BC, bc_id)
        for column_id, value in zip(COL_IDS, values):
            t.add_data_by_column_id(column_id, value)

    with open(summary_csv) as csv_in:
        reader = csv.reader(csv_in, delimiter=',')
        reader.next()
        for rec in reader:
            assert len(rec) == 7, rec
            bc_id = rec[0]
            if bc_id == "All":
                continue
            add_column(bc_id, int(rec[1]), int(rec[3]), int(rec[5]))
    n_good = sum(t.get_column_by_id(Constants.C_GOOD).values)
    n_chimera = sum(t.get_column_by_id(Constants.C_CHIM).values)
    n_noise = sum(t.get_column_by_id(Constants.C_NOISE).values)
    add_column("All", n_good, n_chimera, n_noise)
    return t
def create_table(summary_csv):
    """Long Amplicon Analysis results table"""

    columns = []
    columns.append(Column(Constants.C_BC))
    columns.append(Column(Constants.C_GOOD))
    columns.append(Column(Constants.C_GOOD_PCT))
    columns.append(Column(Constants.C_CHIM))
    columns.append(Column(Constants.C_CHIM_PCT))
    columns.append(Column(Constants.C_NOISE))
    columns.append(Column(Constants.C_NOISE_PCT))

    t = Table(Constants.T_R, columns=columns)

    COL_IDS = [Constants.C_GOOD, Constants.C_GOOD_PCT, Constants.C_CHIM,
               Constants.C_CHIM_PCT, Constants.C_NOISE, Constants.C_NOISE_PCT]

    def add_column(barcode_id, n_good, n_chimera, n_noise):
        pct_good = pct_chimera = pct_noise = 0
        total = n_good + n_chimera + n_noise
        if total > 0:
            pct_good = n_good / float(total)
            pct_chimera = n_chimera / float(total)
            pct_noise = n_noise / float(total)
        values = [n_good, pct_good, n_chimera, pct_chimera, n_noise, pct_noise]
        t.add_data_by_column_id(Constants.C_BC, bc_id)
        for column_id, value in zip(COL_IDS, values):
            t.add_data_by_column_id(column_id, value)

    with open(summary_csv) as csv_in:
        reader = csv.reader(csv_in, delimiter=',')
        reader.next()
        for rec in reader:
            assert len(rec) == 7, rec
            bc_id = rec[0]
            if bc_id == "All":
                continue
            add_column(bc_id, int(rec[1]), int(rec[3]), int(rec[5]))
    n_good = sum(t.get_column_by_id(Constants.C_GOOD).values)
    n_chimera = sum(t.get_column_by_id(Constants.C_CHIM).values)
    n_noise = sum(t.get_column_by_id(Constants.C_NOISE).values)
    add_column("All", n_good, n_chimera, n_noise)
    return t
Example #3
0
def _to_report(bg,
               job_output_dir,
               job_id,
               state,
               was_successful,
               run_time,
               error_message=None,
               report_uuid=None):
    """ High Level Report of the workflow state

    Write the output of workflow datastore to pbreports report object

    Workflow summary .dot/svg (collapsed workflow)
    Workflow details .dot/svg (chunked workflow)

    To add:
    - Resolved WorkflowSettings (e.g., nproc, max_workers)
    -

    :type bg: BindingsGraph

    """
    emsg = "" if error_message is None else error_message

    columns = [
        Column('task_id', header='Task id'),
        Column('was_successful', header='Was Successful'),
        Column('state', header="Task State"),
        Column('run_time_sec', header="Run Time (sec)"),
        Column('nproc', header="# of procs"),
        Column("num_core_hours", header="Core Hours")
    ]

    tasks_table = Table('tasks', title="Tasks", columns=columns)
    for tnode in bg.all_task_type_nodes():

        nproc = bg.node[tnode]['nproc']
        # the task might not be completed.
        run_time_sec = bg.node[tnode]['run_time']
        if run_time_sec is None:
            core_hours = 0.0
        else:
            core_hours = (run_time_sec / 60.0 / 60.0) * nproc

        tasks_table.add_data_by_column_id('task_id', str(tnode))
        tasks_table.add_data_by_column_id('nproc', bg.node[tnode]['nproc'])
        tasks_table.add_data_by_column_id('state', bg.node[tnode]['state'])
        tasks_table.add_data_by_column_id(
            'was_successful', bg.node[tnode]['state'] == TaskStates.SUCCESSFUL)
        # rt_ = bg.node[tnode]['run_time']
        # rtime = None if rt_ is None else int(rt_)
        tasks_table.add_data_by_column_id('run_time_sec',
                                          bg.node[tnode]['run_time'])
        tasks_table.add_data_by_column_id('num_core_hours',
                                          round(core_hours, 4))

    total_core_hours = sum(
        tasks_table.get_column_by_id('num_core_hours').values)

    attributes = [
        Attribute('was_successful', was_successful, name="Was Successful"),
        Attribute('total_run_time_sec', int(run_time), name="Walltime (sec)"),
        Attribute('error_message', emsg, name="Error Message"),
        Attribute('job_id', job_id, name="Job Id"),
        Attribute('job_state', state, name="Job State"),
        Attribute('job_output_dir',
                  job_output_dir,
                  name="Job Output Directory"),
        Attribute('pbsmrtpipe_version',
                  pbsmrtpipe.get_version(),
                  name="pbsmrtpipe Version"),
        Attribute('total_core_hours', round(total_core_hours, 4),
                  "Total core hours")
    ]

    ep_table = _to_table("entry_points", bg, bg.entry_binding_nodes(),
                         "Entry Points")
    fnodes_table = _to_table("file_node", bg, bg.file_nodes(), "File Nodes")

    # this would be nice if the DataSet UUIDs of the entry-points are added to the
    # dataset_uuids of the report.
    report = Report('pbsmrtpipe',
                    tables=[tasks_table, ep_table, fnodes_table],
                    attributes=attributes,
                    uuid=report_uuid)
    return report