Ejemplo n.º 1
0
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report,
                    report, output_dir):
    """
    Entry to report.
    :param aligned_reads_file: (str) path to aligned_reads.xml
    :param mapping_stats_report: (str) path to mapping stats json report
    :param variants_report: (str) path to variants report
    """
    _validate_inputs([('aligned_reads_file', aligned_reads_file),
                      ('mapping_stats_report', mapping_stats_report),
                      ('variants_report', variants_report)])

    d_map = _get_mapping_stats_data(mapping_stats_report)
    reads, inst = _get_reads_info(aligned_reads_file)
    d_bam = _get_read_hole_data(reads, inst)
    d_var = _get_variants_data(variants_report)
    ds = AlignmentSet(aligned_reads_file)

    rpt = Report(meta_rpt.id, dataset_uuids=(ds.uuid, ))
    rpt.add_attribute(
        Attribute(Constants.A_INSTRUMENT, d_bam[Constants.A_INSTRUMENT]))
    rpt.add_attribute(
        Attribute(Constants.A_COVERAGE, d_var[Constants.A_COVERAGE]))
    rpt.add_attribute(
        Attribute(Constants.A_CONCORDANCE, d_var[Constants.A_CONCORDANCE]))
    rpt.add_attribute(
        Attribute(Constants.A_READLENGTH, d_map[Constants.A_READLENGTH]))
    rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS]))
    rpt = meta_rpt.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
Ejemplo n.º 2
0
def run_reference_dataset_report(reference_ds, output_json):
    """

    :param reference_ds:
    :type reference_ds: ReferenceSet

    :param output_json:
    :return:
    """
    output_dir = os.path.dirname(output_json)
    host = socket.getfqdn()

    attributes = _dataset_to_attribute_reports(reference_ds)
    _add = attributes.append

    _add(Attribute("host", host, name="Host"))
    _add(Attribute("task_dir", output_dir, name="Task Directory"))

    fasta_file = reference_ds.toExternalFiles()[0]

    plot_groups = try_fasta_to_plot_group(fasta_file, output_dir)
    report = Report("dev_diagnostic_report",
                    attributes=attributes,
                    plotgroups=plot_groups,
                    dataset_uuids=[reference_ds.uuid])

    report.write_json(output_json)
    return 0
Ejemplo n.º 3
0
def make_control_report(control_cmph5, filtered_subreads_csv, report,
                        output_dir, dpi, dumpdata):
    """
    Entry to report.
    :param control_cmph5: (str) path to control_reads.cmp.h5
    :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv
    """
    _validate_inputs(control_cmph5, filtered_subreads_csv)
    name, control_reads = _get_control_reads(control_cmph5)
    filtered_reads = _get_filtered_reads(filtered_subreads_csv)
    control_data, sample_data = _process_reads(control_reads, filtered_reads)
    nr = _get_num_control_reads(control_data)
    if nr == 0:
        # Not sure this ever happens, but logic exists in makeControlReport.py
        r = _get_error_report()
        r.write_json(os.path.join(output_dir, report))
        return
    atts = _get_attributes(name, control_data, sample_data)
    pgs = [
        _get_plot_group_score(control_data, sample_data, output_dir),
        _get_plot_group_length(control_data, sample_data, output_dir)
    ]
    r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs)
    r = meta_rpt.apply_view(r)
    r.write_json(os.path.join(output_dir, report))
Ejemplo n.º 4
0
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir):
    """
    Entry to report.
    :param aligned_reads_file: (str) path to aligned_reads.xml
    :param mapping_stats_report: (str) path to mapping stats json report
    :param variants_report: (str) path to variants report
    """
    _validate_inputs([('aligned_reads_file', aligned_reads_file),
                      ('mapping_stats_report', mapping_stats_report),
                      ('variants_report', variants_report)])

    d_map = _get_mapping_stats_data(mapping_stats_report)
    reads, inst = _get_reads_info(aligned_reads_file)
    d_bam = _get_read_hole_data(reads, inst)
    d_var = _get_variants_data(variants_report)

    rpt = Report('sat')
    rpt.add_attribute(Attribute('instrument', d_bam['instrument'],
                                Constants.ATTR_LABELS["instrument"]))
    rpt.add_attribute(Attribute('coverage', d_var['coverage'],
                                Constants.ATTR_LABELS["coverage"]))
    rpt.add_attribute(Attribute('accuracy', d_var['accuracy'],
                                Constants.ATTR_LABELS["accuracy"]))
    rpt.add_attribute(Attribute('mapped_readlength_mean', d_map[
                      'mapped_readlength_mean'], Constants.ATTR_LABELS["mapped_readlength_mean"]))
    rpt.add_attribute(Attribute('reads_in_cell', d_bam[
                      'reads_in_cell'], Constants.ATTR_LABELS["reads_in_cell"]))

    rpt.write_json(os.path.join(output_dir, report))
Ejemplo n.º 5
0
def make_control_report(control_cmph5, filtered_subreads_csv, report,
                        output_dir, dpi, dumpdata):
    """
    Entry to report.
    :param control_cmph5: (str) path to control_reads.cmp.h5
    :param filtered_subreads_csv: (str) path to filtered_subread_summary.csv
    """
    _validate_inputs(control_cmph5, filtered_subreads_csv)
    name, control_reads = _get_control_reads(control_cmph5)
    filtered_reads = _get_filtered_reads(filtered_subreads_csv)
    control_data, sample_data = _process_reads(control_reads, filtered_reads)
    nr = _get_num_control_reads(control_data)
    if nr == 0:
        # Not sure this ever happens, but logic exists in makeControlReport.py
        r = _get_error_report()
        r.write_json(os.path.join(output_dir, report))
        return
    atts = _get_attributes(name, control_data, sample_data)
    pgs = [_get_plot_group_score(control_data,
                                 sample_data, output_dir),
           _get_plot_group_length(control_data,
                                  sample_data, output_dir)]
    r = Report(meta_rpt.id, attributes=atts, plotgroups=pgs)
    r = meta_rpt.apply_view(r)
    r.write_json(os.path.join(output_dir, report))
Ejemplo n.º 6
0
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report,
                            output_dir, is_minor_variants_rpt=False):
    """
    Entry to report.
    :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz
    :param reference: (str) path to reference dir
    :param how_many: (int)
    :param batch_sort_size: (int)
    :param report: (str) report name
    :param batch_sort_size: (str) output dir
    :param is_minor_variants_rpt: (bool) True to create a minor top variant report. False to
    create a variant report.
    """
    _validate_inputs(gff, reference, how_many, batch_sort_size)

    table_builder = None
    if is_minor_variants_rpt:
        table_builder = MinorVariantTableBuilder()
    else:
        table_builder = VariantTableBuilder()
    vf = VariantFinder(gff, reference, how_many, batch_sort_size)
    top = vf.find_top()
    for v in top:
        table_builder.add_variant(v)

    r = Report(Constants.R_ID, tables=[table_builder.table],
               dataset_uuids=(ReferenceSet(reference).uuid,))
    r = spec.apply_view(r)
    r.write_json(os.path.join(output_dir, report))
    return 0
Ejemplo n.º 7
0
def make_variants_report(aln_summ_gff, variants_gff, reference, max_contigs_to_plot, report, output_dir, dpi=72, dumpdata=True):
    """
    Entry to report.
    :param aln_summ_gff: (str) path to alignment_summary.gff
    :param variants_gff: (str) path to variants_gff
    :param reference: (str) path to reference_dir
    :param max_contigs_to_plot: (int) max number of contigs to plot
    """
    _validate_inputs([('aln_summ_gff', aln_summ_gff),
                      ('variants_gff', variants_gff),
                      ('reference', reference)])

    # reference entry & top contings
    ref = openReference(reference)
    top_contigs = get_top_contigs_from_ref_entry(ref, max_contigs_to_plot)

    # extract gff data from files
    ref_data, contig_variants = _extract_alignment_summ_data(
        aln_summ_gff, top_contigs)
    _append_variants_gff_data(ref_data, variants_gff)

    # make report objects
    table, atts = _get_consensus_table_and_attributes(ref_data, ref)
    plotgroup = _create_variants_plot_grp(
        top_contigs, contig_variants, output_dir)

    rpt = Report(Constants.R_ID,
                 plotgroups=[plotgroup],
                 attributes=atts,
                 tables=[table],
                 dataset_uuids=(ReferenceSet(reference).uuid,))

    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return rpt
Ejemplo n.º 8
0
def make_sat_report(aligned_reads_file, mapping_stats_report, variants_report, report, output_dir):
    """
    Entry to report.
    :param aligned_reads_file: (str) path to aligned_reads.xml
    :param mapping_stats_report: (str) path to mapping stats json report
    :param variants_report: (str) path to variants report
    """
    _validate_inputs([('aligned_reads_file', aligned_reads_file),
                      ('mapping_stats_report', mapping_stats_report),
                      ('variants_report', variants_report)])

    d_map = _get_mapping_stats_data(mapping_stats_report)
    reads, inst = _get_reads_info(aligned_reads_file)
    d_bam = _get_read_hole_data(reads, inst)
    d_var = _get_variants_data(variants_report)
    ds = AlignmentSet(aligned_reads_file)

    rpt = Report(Constants.R_ID, dataset_uuids=(ds.uuid,))
    rpt.add_attribute(Attribute(Constants.A_INSTRUMENT,
                                d_bam[Constants.A_INSTRUMENT]))
    rpt.add_attribute(Attribute(Constants.A_COVERAGE,
                                d_var[Constants.A_COVERAGE]))
    rpt.add_attribute(Attribute(Constants.A_CONCORDANCE,
                                d_var[Constants.A_CONCORDANCE]))
    rpt.add_attribute(Attribute(Constants.A_READLENGTH,
                                d_map[Constants.A_READLENGTH]))
    rpt.add_attribute(Attribute(Constants.A_READS, d_bam[Constants.A_READS]))
    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
Ejemplo n.º 9
0
def make_topvariants_report(gff, reference, how_many, batch_sort_size, report,
                            output_dir):
    """
    Entry to report.
    :param gff: (str) path to variants.gff (or rare_variants.gff). Note, could also be *.gz
    :param reference: (str) path to reference dir
    :param how_many: (int)
    :param batch_sort_size: (int)
    :param report: (str) report name
    :param batch_sort_size: (str) output dir
    """
    _validate_inputs(gff, reference, how_many, batch_sort_size)

    table_builder = VariantTableBuilder()
    vf = VariantFinder(gff, reference, how_many, batch_sort_size)
    top = vf.find_top()
    for v in top:
        table_builder.add_variant(v)

    r = Report(Constants.R_ID,
               tables=[table_builder.table],
               dataset_uuids=(ReferenceSet(reference).uuid, ))
    r = spec.apply_view(r)
    r.write_json(os.path.join(output_dir, report))
    return 0
Ejemplo n.º 10
0
def run_reference_dataset_report(reference_ds, output_json):
    """

    :param reference_ds:
    :type reference_ds: ReferenceSet

    :param output_json:
    :return:
    """
    output_dir = os.path.dirname(output_json)
    host = socket.getfqdn()

    attributes = _dataset_to_attribute_reports(reference_ds)
    _add = attributes.append

    _add(Attribute("host", host, name="Host"))
    _add(Attribute("task_dir", output_dir, name="Task Directory"))

    fasta_file = reference_ds.toExternalFiles()[0]

    plot_groups = try_fasta_to_plot_group(fasta_file, output_json)
    report = Report("dev_diagnostic_report",
                    attributes=attributes,
                    plotgroups=plot_groups,
                    dataset_uuids=[reference_ds.uuid])

    report.write_json(output_json)
    return 0
Ejemplo n.º 11
0
def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Open dset CSV. Store absolute path of each alignment set.
    dset_paths = _get_dset_paths(input_file[0])

    # Open plots CSV. Store names of plots to produce.
    plots_to_generate = _get_plots_to_generate(input_file[1])

    dsets_kpis = {}
    for f in dset_paths:
        dset = openDataSet(dset_paths[f]['aset'])
        subsampled_dset = _subsample_alignments(dset)
        dsets_kpis[f] = _getKPIs(dset, subsampled_dset)

    figures = []
    # figure tuple has form (plot_group_id, plot_id, figure)
    if 'accuracy_vs_readlength' in plots_to_generate:
        figures.append(('accuracy', 'accuracy_vs_readlength', accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis)))
    if 'accuracy' in plots_to_generate:
        figures.append(('accuracy', 'accuracy', accuracy_plots._plot_accuracy_distribution(dsets_kpis)))
    if 'accuracy_boxplot' in plots_to_generate:
        figures.append(('accuracy', 'accuracy_boxplot', accuracy_plots._plot_accuracy_boxplots(dsets_kpis)))

    all_plots = {} # dictionary of plots. keys are groups
    for plot_group, plot_id, fig in figures:
        if plot_group not in all_plots.keys():
            all_plots[plot_group] = []
        plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False)
        phantomjs_driver.set_window_size(1920, 1080)
        phantomjs_driver.get('{i}.html'.format(i=plot_id))
        phantomjs_driver.save_screenshot('{i}.png'.format(i=plot_id))
        phantomjs_driver.get('{i}.html'.format(i=plot_id))
        phantomjs_driver.save_screenshot('{i}_thumb.png'.format(i=plot_id))
        os.remove('{i}.html'.format(i=plot_id))
        plot_path = '{i}.png'.format(i=plot_id)
        thumb_path = '{i}_thumb.png'.format(i=plot_id)
        all_plots[plot_group].append(Plot(plot_id, plot_path, thumbnail=thumb_path))

    plot_groups = []
    for plot_group_title in all_plots.keys():
        plot_group = PlotGroup( plot_group_title, plots=all_plots[plot_group_title])
        plot_groups.append(plot_group) 

    report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=())
    report.write_json( output_file )

    phantomjs_driver.quit()

    return 0
Ejemplo n.º 12
0
def write_random_report(path, nrecords):

    attributes = [
        Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i))
        for i in xrange(nrecords)
    ]
    r = Report("mock_report", attributes=attributes)
    r.write_json(path)
    return r
Ejemplo n.º 13
0
def write_task_report(run_time, nproc, exit_code):
    attributes = [
        Attribute("host", value=os.uname()[1]),
        Attribute("system", value=os.uname()[0]),
        Attribute("nproc", value=nproc),
        Attribute("run_time", value=run_time),
        Attribute("exit_code", value=exit_code)
    ]
    report = Report("workflow_task",
                    title="Workflow Task Report",
                    attributes=attributes,
                    tags=("internal", ))
    report.write_json("task-report.json")
Ejemplo n.º 14
0
def run_dev_ccs_report(rtc):
    from pbcore.io import ConsensusReadSet
    with ConsensusReadSet(rtc.task.input_files[0]) as ds:
        ds.updateCounts()
        attr = [
            Attribute("number_of_records", value=ds.numRecords),
            Attribute("total_length", value=ds.totalLength)
        ]
        report = Report("ccs_report",
                        title="ConsensusReadSet XML Report",
                        attributes=attr)
        report.write_json(rtc.task.output_files[0])
    return 0
Ejemplo n.º 15
0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72):
    """
    Entry point to report generation.
    """
    basemods_h5 = h5py.File(modifications_h5)
    scatter = get_qmod_plot(basemods_h5, output_dir, dpi)
    hist = get_qmod_hist(basemods_h5, output_dir, dpi)
    pg = PlotGroup(Constants.PG_KIN,
                   thumbnail=scatter.thumbnail,
                   plots=[scatter, hist])
    rpt = Report(meta_rpt.id, plotgroups=[pg])
    rpt = meta_rpt.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return 0
Ejemplo n.º 16
0
def make_modifications_report(modifications_h5, report, output_dir, dpi=72):
    """
    Entry point to report generation.
    """
    basemods_h5 = h5py.File(modifications_h5)
    scatter = get_qmod_plot(basemods_h5, output_dir, dpi)
    hist = get_qmod_hist(basemods_h5, output_dir, dpi)
    pg = PlotGroup(Constants.PG_KIN,
                   title=get_plotgroup_title(spec, Constants.PG_KIN),
                   thumbnail=scatter.thumbnail,
                   plots=[scatter, hist])
    rpt = Report(spec.id, plotgroups=[pg])
    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return 0
Ejemplo n.º 17
0
    def make_report(self, gff, reference, max_contigs_to_plot, report,
                    output_dir):
        """
        Entry to report.
        :param gff: (str) path to alignment_summary.gff
        :param reference: (str) path to reference_dir
        :param max_contigs_to_plot: (int) max number of contigs to plot
        """
        _validate_inputs(gff, reference)
        top_contigs = get_top_contigs(reference, max_contigs_to_plot)
        cov_map = _get_contigs_to_plot(gff, top_contigs)

        # stats may be None
        stats = _get_reference_coverage_stats(cov_map.values())

        a1 = _get_att_mean_coverage(stats)
        a2 = _get_att_percent_missing(stats)

        plot_grp_coverage = self._create_coverage_plot_grp(
            top_contigs, cov_map, output_dir)

        plot_grp_histogram = None
        if stats is not None:
            plot_grp_histogram = self._create_coverage_histo_plot_grp(
                stats, output_dir)

        plotgroups = []
        # Don't add the Plot Group if no plots are added
        if plot_grp_coverage.plots:
            plotgroups.append(plot_grp_coverage)

        if plot_grp_histogram is not None:
            # Don't add the Plot Group if no plots are added
            if plot_grp_histogram.plots:
                plotgroups.append(plot_grp_histogram)

        rpt = Report(self.spec.id,
                     plotgroups=plotgroups,
                     attributes=[a1, a2],
                     dataset_uuids=(ReferenceSet(reference).uuid,))

        rpt = self.spec.apply_view(rpt)
        rpt.write_json(os.path.join(output_dir, report))
        return rpt
Ejemplo n.º 18
0
def make_coverage_report(gff, reference, max_contigs_to_plot, report,
                         output_dir):
    """
    Entry to report.
    :param gff: (str) path to alignment_summary.gff
    :param reference: (str) path to reference_dir
    :param max_contigs_to_plot: (int) max number of contigs to plot
    """
    _validate_inputs(gff, reference)
    top_contigs = get_top_contigs(reference, max_contigs_to_plot)
    cov_map = _get_contigs_to_plot(gff, top_contigs)

    # stats may be None
    stats = _get_reference_coverage_stats(cov_map.values())

    a1 = _get_att_mean_coverage(stats)
    a2 = _get_att_percent_missing(stats)

    plot_grp_coverage = _create_coverage_plot_grp(
        top_contigs, cov_map, output_dir)

    plot_grp_histogram = None
    if stats is not None:
        plot_grp_histogram = _create_coverage_histo_plot_grp(stats, output_dir)

    plotgroups = []
    # Don't add the Plot Group if no plots are added
    if plot_grp_coverage.plots:
        plotgroups.append(plot_grp_coverage)

    if plot_grp_histogram is not None:
        # Don't add the Plot Group if no plots are added
        if plot_grp_histogram.plots:
            plotgroups.append(plot_grp_histogram)

    rpt = Report('coverage',
                 title="Coverage",
                 plotgroups=plotgroups,
                 attributes=[a1, a2],
                 dataset_uuids=(ReferenceSet(reference).uuid,))

    rpt.write_json(os.path.join(output_dir, report))
    return rpt
Ejemplo n.º 19
0
def make_polished_assembly_report(report, gff, fastq, output_dir):
    """
    Entry to report.
    :param gff: (str) path to alignment_summary.gff
    :param fastq: (str) path to polished fastq file
    :param report: (str) report name
    create a polished assembly report.
    """
    log.info("Starting version {f} v{x}".format(
        x=__version__, f=os.path.basename(__file__)))

    log.debug("Loading {f}".format(f=fastq))
    contigs = _get_contigs(fastq)

    log.debug("Loading {f}".format(f=gff))
    _get_contig_coverage(gff, contigs)

    log.debug("Computing and creating plots")

    cvqp = _coverage_vs_quality_plot(contigs, output_dir)

    pgrp = PlotGroup('coverage_based',
                     title='Contig Coverage vs Confidence',
                     thumbnail=cvqp.thumbnail,
                     plots=[cvqp])

    rep = Report('polished_assembly')
    rep.add_attribute(
        Attribute(Constants.A_N_CONTIGS, len(contigs),
                  Constants.ATTR_LABELS[Constants.A_N_CONTIGS]))
    read_lengths = [c.length for c in contigs.values()]
    read_lengths.sort()
    rep.add_attribute(_get_att_max_contig_length(read_lengths))
    rep.add_attribute(_get_att_n_50_contig_length(read_lengths))
    rep.add_attribute(_get_att_sum_contig_lengths(read_lengths))
    rep.add_plotgroup(pgrp)

    rep.write_json(os.path.join(output_dir, report))
    _write_coverage_vs_quality_csv(contigs, output_dir)

    return 0
Ejemplo n.º 20
0
def make_polished_assembly_report(report, gff, fastq, output_dir):
    """
    Entry to report.
    :param gff: (str) path to alignment_summary.gff
    :param fastq: (str) path to polished fastq file
    :param report: (str) report name
    create a polished assembly report.
    """
    log.info("Starting version {f} v{x}".format(x=__version__,
                                                f=os.path.basename(__file__)))

    log.debug("Loading {f}".format(f=fastq))
    contigs = _get_contigs(fastq)

    log.debug("Loading {f}".format(f=gff))
    _get_contig_coverage(gff, contigs)

    log.debug("Computing and creating plots")

    cvqp = _coverage_vs_quality_plot(contigs, output_dir)

    pgrp = PlotGroup(Constants.PG_COVERAGE,
                     thumbnail=cvqp.thumbnail,
                     plots=[cvqp])

    rep = Report(Constants.R_ID)
    rep.add_attribute(Attribute(Constants.A_N_CONTIGS, len(contigs)))
    read_lengths = [c.length for c in contigs.values()]
    read_lengths.sort()
    rep.add_attribute(_get_att_max_contig_length(read_lengths))
    rep.add_attribute(_get_att_n_50_contig_length(read_lengths))
    rep.add_attribute(_get_att_sum_contig_lengths(read_lengths))
    rep.add_attribute(_get_att_esize_contig_length(read_lengths))
    rep.add_plotgroup(pgrp)
    rep = spec.apply_view(rep)

    rep.write_json(os.path.join(output_dir, report))
    _write_coverage_vs_quality_csv(contigs, output_dir)

    return 0
Ejemplo n.º 21
0
def run_reference_dataset_report(reference_ds, output_json):
    """

    :param reference_ds:
    :type reference_ds: ReferenceSet

    :param output_json:
    :return:
    """
    attributes = _dataset_to_attribute_reports(reference_ds)

    fasta_file = reference_ds.toExternalFiles()[0]

    output_dir = os.path.dirname(output_json)
    plot_groups = try_fasta_to_plot_group(fasta_file, output_dir)
    report = Report("ds_reference_report",
                    attributes=attributes,
                    plotgroups=plot_groups,
                    dataset_uuids=[reference_ds.uuid])

    report.write_json(output_json)
    return 0
Ejemplo n.º 22
0
def run_reference_dataset_report(reference_ds, output_json):
    """

    :param reference_ds:
    :type reference_ds: ReferenceSet

    :param output_json:
    :return:
    """
    attributes = _dataset_to_attribute_reports(reference_ds)

    fasta_file = reference_ds.toExternalFiles()[0]

    output_dir = os.path.dirname(output_json)
    plot_groups = try_fasta_to_plot_group(fasta_file, output_dir)
    report = Report("ds_reference_report",
                    attributes=attributes,
                    plotgroups=plot_groups,
                    dataset_uuids=[reference_ds.uuid])

    report.write_json(output_json)
    return 0
Ejemplo n.º 23
0
def make_modifications_report(modifications_h5,
                              report,
                              output_dir,
                              dpi=DEFAULT_DPI):
    """
    Entry point to report generation.
    """
    try:
        import h5py
    except ImportError:
        raise ImportError("This module requires that h5py be installed")
    basemods_h5 = h5py.File(modifications_h5)
    scatter = get_qmod_plot(basemods_h5, output_dir, dpi)
    hist = get_qmod_hist(basemods_h5, output_dir, dpi)
    pg = PlotGroup(Constants.PG_KIN,
                   title=get_plotgroup_title(spec, Constants.PG_KIN),
                   thumbnail=scatter.thumbnail,
                   plots=[scatter, hist])
    rpt = Report(spec.id, plotgroups=[pg])
    rpt = spec.apply_view(rpt)
    rpt.write_json(os.path.join(output_dir, report))
    return 0
Ejemplo n.º 24
0
def run_verify_dataset_filters(rtc):
    from pbcore.io import SubreadSet
    expected_num_records = rtc.task.options[
        'pbsmrtpipe.task_options.num_records']
    expected_length = rtc.task.options['pbsmrtpipe.task_options.total_length']
    with SubreadSet(rtc.task.input_files[0]) as ds:
        # FIXME ideally we should not need to do this, but Scala code does not
        # have the ability to update counts directly
        ds.updateCounts()
        if ds.numRecords != expected_num_records:
            raise ValueError("Expected {e} records, got {n}".format(
                e=expected_num_records, n=ds.numRecords))
        if ds.totalLength != expected_length:
            raise ValueError("Expected length {e}, got {n}".format(
                e=expected_length, n=ds.totalLength))
        attr = [
            Attribute("number_of_records", value=ds.numRecords),
            Attribute("total_length", value=ds.totalLength)
        ]
        report = Report("subreads_report",
                        title="SubreadSet XML Report",
                        attributes=attr)
        report.write_json(rtc.task.output_files[0])
    return 0
Ejemplo n.º 25
0
def run_hello_world(conditions, output_report):
    a = Attribute("hello", value='world')
    report = Report('pbinternal_hello_world', attributes=[a])
    report.write_json(output_report)
    return 0
Ejemplo n.º 26
0
def write_random_report(path, nrecords):

    attributes = [Attribute("mock_attr_{i}".format(i=i), i, name="Attr {i}".format(i=i)) for i in xrange(nrecords)]
    r = Report("mock_report", attributes=attributes)
    r.write_json(path)
    return r
Ejemplo n.º 27
0
def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Open dset CSV. Store absolute path of each alignment set.
    dset_paths = _get_dset_paths(input_file[0])

    # Open plots CSV. Store names of plots to produce.
    # plots_to_generate = _get_plots_to_generate(input_file[1])

    dsets_kpis = {}
    for f in dset_paths:
        dset = openDataSet(dset_paths[f]['aset'])
        subsampled_dset = _subsample_alignments(dset)
        dsets_kpis[f] = _getKPIs(dset, subsampled_dset)

    # figure tuple has form (plot_group_id, plot_id, Plot Name, figure)
    figures = [
        ('accuracy', 'accuracy_vs_readlength', "Accuracy vs ReadLength", accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis)),
        ('accuracy', 'accuracy', "Accuracy", accuracy_plots._plot_accuracy_distribution(dsets_kpis)),
        ('accuracy', 'accuracy_boxplot', "Accuracy BoxPlot", accuracy_plots._plot_accuracy_boxplots(dsets_kpis))
    ]

    all_plots = {}  # dictionary of plots. keys are groups

    with PhantomDriver() as driver:
        for plot_group, plot_id, display_name, fig in figures:
            if plot_group not in all_plots.keys():
                all_plots[plot_group] = []
            plot(fig, filename='{i}.html'.format(i=plot_id), show_link=False, auto_open=False)
            plot_name = '{i}.png'.format(i=plot_id)

            driver.set_window_size(1920, 1080)
            driver.get('{i}.html'.format(i=plot_id))
            driver.save_screenshot(plot_name)
            driver.get('{i}.html'.format(i=plot_id))
            driver.save_screenshot('{i}_thumb.png'.format(i=plot_id))

            log.info("Saved screen to {}".format(plot_name))

            os.remove('{i}.html'.format(i=plot_id))
            plot_path = '{i}.png'.format(i=plot_id)
            thumb_path = '{i}_thumb.png'.format(i=plot_id)
            # the Plot API is a bit awkward. It should have "title"
            # the caption will be used as the display name
            p = Plot(plot_id, plot_path, thumbnail=thumb_path, caption=display_name)
            all_plots[plot_group].append(p)

    log.info("completed generating {} plots".format(len(all_plots)))

    plot_groups = []
    for plot_group_title in all_plots.keys():
        plot_group = PlotGroup(plot_group_title, title="Accuracy Plots",
                               plots=all_plots[plot_group_title])
        plot_groups.append(plot_group)

    report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=())
    report.write_json(output_file)

    return 0
Ejemplo n.º 28
0
def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Open dset CSV. Store absolute path of each alignment set.
    dset_paths = _get_dset_paths(input_file[0])

    # Open plots CSV. Store names of plots to produce.
    plots_to_generate = _get_plots_to_generate(input_file[1])

    dsets_kpis = {}
    for f in dset_paths:
        dset = openDataSet(dset_paths[f]['aset'])
        subsampled_dset = _subsample_alignments(dset)
        dsets_kpis[f] = _getKPIs(dset, subsampled_dset)

    figures = []
    # figure tuple has form (plot_group_id, plot_id, figure)
    if 'accuracy_vs_readlength' in plots_to_generate:
        figures.append(
            ('accuracy', 'accuracy_vs_readlength',
             accuracy_plots._plot_accuracy_vs_readlength(dsets_kpis)))
    if 'accuracy' in plots_to_generate:
        figures.append(
            ('accuracy', 'accuracy',
             accuracy_plots._plot_accuracy_distribution(dsets_kpis)))
    if 'accuracy_boxplot' in plots_to_generate:
        figures.append(('accuracy', 'accuracy_boxplot',
                        accuracy_plots._plot_accuracy_boxplots(dsets_kpis)))

    all_plots = {}  # dictionary of plots. keys are groups
    for plot_group, plot_id, fig in figures:
        if plot_group not in all_plots.keys():
            all_plots[plot_group] = []
        plot(fig,
             filename='{i}.html'.format(i=plot_id),
             show_link=False,
             auto_open=False)
        phantomjs_driver.set_window_size(1920, 1080)
        phantomjs_driver.get('{i}.html'.format(i=plot_id))
        phantomjs_driver.save_screenshot('{i}.png'.format(i=plot_id))
        phantomjs_driver.get('{i}.html'.format(i=plot_id))
        phantomjs_driver.save_screenshot('{i}_thumb.png'.format(i=plot_id))
        os.remove('{i}.html'.format(i=plot_id))
        plot_path = '{i}.png'.format(i=plot_id)
        thumb_path = '{i}_thumb.png'.format(i=plot_id)
        all_plots[plot_group].append(
            Plot(plot_id, plot_path, thumbnail=thumb_path))

    plot_groups = []
    for plot_group_title in all_plots.keys():
        plot_group = PlotGroup(plot_group_title,
                               plots=all_plots[plot_group_title])
        plot_groups.append(plot_group)

    report = Report('mh_toy', tables=(), plotgroups=plot_groups, attributes=())
    report.write_json(output_file)

    phantomjs_driver.quit()

    return 0
Ejemplo n.º 29
0
def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Open input CSV. Store absolute path of each alignment set.
    dset_paths = _get_dset_paths(input_file)

    dsets_kpis = {}
    for f in dset_paths:
        dset = openDataSet(f)
        subsampled_dset = _subsample_alignments(dset)
        dsets_kpis[f] = _getKPIs(dset, subsampled_dset)

    # save a simple plot
    traces = []
    titles = []
    max_rl = 0
    for key in dsets_kpis.keys():
        rl = dsets_kpis[key]['readlength']
        acc = dsets_kpis[key]['accuracy']
        if max(rl) > max_rl:
            max_rl = max(rl)
        trace = Scatter(x=rl, y=acc, mode='markers')
        traces.append(trace)
        titles.append(str(key))
    rows = len(traces)
    fig = plotly.tools.make_subplots(rows=rows,
                                     cols=1,
                                     subplot_titles=tuple(titles))
    fig['layout']['font']['size'] = 8
    fig['layout'].update(showlegend=False)
    for row, trace in enumerate(traces):
        fig.append_trace(trace, row + 1,
                         1)  # convert from zero-based to one-based indexing
        fig['layout']['xaxis' + str(row + 1)]['tickfont'].update(size=20)
        fig['layout']['yaxis' + str(row + 1)]['tickfont'].update(size=20)
        fig['layout']['xaxis' + str(row + 1)].update(range=[0, max_rl])

    fig['layout']['yaxis' + str(rows / 2 + 1)].update(title='accuracy')
    fig['layout']['yaxis' + str(rows / 2 + 1)]['titlefont'].update(size=20)
    fig['layout']['xaxis' + str(rows)].update(title='readlength (bases)')
    fig['layout']['xaxis' + str(rows)]['titlefont'].update(size=20)

    plot(fig,
         filename='accuracy_vs_readlength.html',
         show_link=False,
         auto_open=False)
    phantomjs_driver.set_window_size(1920, 1080)
    phantomjs_driver.get('accuracy_vs_readlength.html')
    phantomjs_driver.save_screenshot('accuracy_vs_readlength.png')
    #phantomjs_driver.set_window_size(400, 300) ruins the label size relations etc.
    phantomjs_driver.get('accuracy_vs_readlength.html')
    phantomjs_driver.save_screenshot('accuracy_vs_readlength_thumb.png')
    os.remove('accuracy_vs_readlength.html')
    plot_path = 'accuracy_vs_readlength.png'
    thumb_path = 'accuracy_vs_readlength_thumb.png'
    plot_id = 'accuracy_vs_readlength'
    acc_rl_plot = Plot(plot_id, plot_path, thumbnail=thumb_path)
    plot_list = [acc_rl_plot]

    plot_group = PlotGroup('accuracy', plots=plot_list)
    report = Report('accuracy',
                    tables=(),
                    plotgroups=[plot_group],
                    attributes=())
    report.write_json(output_file)

    return 0
def _example_main(input_file, output_file, **kwargs):
    """
    This func should be imported from your python package.

    This should have *no* dependency on the pbcommand IO, such as the RTC/TC models.
    """

    # This is just for test purposes
    log.info("Running example main with {i} {o} kw:{k}".format(i=input_file,
                                                               o=output_file,
                                                               k=kwargs))

    # Open input CSV. Store absolute path of each alignment set.
    dset_paths = _get_dset_paths(input_file)

    dsets_kpis = {}
    for f in dset_paths:
        dset = openDataSet(f)
        subsampled_dset = _subsample_alignments(dset)
        dsets_kpis[f] = _getKPIs(dset, subsampled_dset)

    # save a simple plot
    traces = []; titles = []; max_rl = 0
    for key in dsets_kpis.keys():
        rl = dsets_kpis[key]['readlength']
        acc = dsets_kpis[key]['accuracy']
        if max(rl) > max_rl:
            max_rl = max(rl)
        trace = Scatter(
                x = rl,
                y = acc,
                mode='markers'
        )
        traces.append( trace )
        titles.append( str(key) )
    rows = len( traces )
    fig = plotly.tools.make_subplots(rows=rows, cols=1,
                                     subplot_titles=tuple(titles))
    fig['layout']['font']['size'] = 8
    fig['layout'].update(showlegend=False)
    for row,trace in enumerate(traces):
        fig.append_trace(trace, row+1, 1) # convert from zero-based to one-based indexing
        fig['layout']['xaxis'+str(row+1)]['tickfont'].update(size=20)
        fig['layout']['yaxis'+str(row+1)]['tickfont'].update(size=20)
        fig['layout']['xaxis'+str(row+1)].update(range=[0,max_rl])

    fig['layout']['yaxis'+str(rows/2+1)].update(title='accuracy')
    fig['layout']['yaxis'+str(rows/2+1)]['titlefont'].update(size=20)
    fig['layout']['xaxis'+str(rows)].update(title='readlength (bases)')
    fig['layout']['xaxis'+str(rows)]['titlefont'].update(size=20)

    plot(fig, filename='accuracy_vs_readlength.html', show_link=False, auto_open=False)
    phantomjs_driver.set_window_size(1920, 1080)
    phantomjs_driver.get('accuracy_vs_readlength.html')
    phantomjs_driver.save_screenshot('accuracy_vs_readlength.png')
    #phantomjs_driver.set_window_size(400, 300) ruins the label size relations etc.
    phantomjs_driver.get('accuracy_vs_readlength.html')
    phantomjs_driver.save_screenshot('accuracy_vs_readlength_thumb.png')
    os.remove('accuracy_vs_readlength.html')
    plot_path = 'accuracy_vs_readlength.png'
    thumb_path = 'accuracy_vs_readlength_thumb.png'
    plot_id = 'accuracy_vs_readlength'
    acc_rl_plot = Plot(plot_id, plot_path, thumbnail=thumb_path)
    plot_list = [acc_rl_plot]

    plot_group = PlotGroup('accuracy', plots=plot_list)
    report = Report('accuracy', tables=(), plotgroups=[plot_group], attributes=())
    report.write_json( output_file )

    return 0