Example #1
0
 def run_after(self, rtc, output_dir):
     report_file = rtc.task.output_files[0]
     r = load_report_from_json(report_file)
     a = {a.id: a.value for a in r.attributes}
     n = {a.id: a.name for a in r.attributes}
     self.assertEqual(
         a, {
             "mock_attr_2": 4,
             "mock_attr_3": 6,
             "mock_attr_0": 0,
             "mock_attr_1": 2,
             "mock_attr_4": 8
         })
     self.assertEqual(
         n, {
             "mock_attr_2": "Attr 2",
             "mock_attr_3": "Attr 3",
             "mock_attr_0": "Attr 0",
             "mock_attr_1": "Attr 1",
             "mock_attr_4": "Attr 4"
         })
     keys = [str(a.id) for a in r.attributes]
     # check attribute order
     self.assertEqual(keys, [
         "mock_attr_0", "mock_attr_1", "mock_attr_2", "mock_attr_3",
         "mock_attr_4"
     ])
Example #2
0
 def run_after(self, rtc, output_dir):
     r = load_report_from_json(rtc.task.output_files[0])
     attr = {a.id: a.value for a in r.attributes}
     self.assertEqual(attr, {
         "num_polished_hq_isoforms": 11701,
         "num_polished_lq_isoforms": 44
     })
Example #3
0
def _report_to_attributes(report_file):
    report = load_report_from_json(report_file)
    attr = { a.id:a.value for a in report.attributes }
    if attr.get("isoseq_numconsensusisoforms", 0) > 0:
        avg = attr["isoseq_numtotalbases"]/attr["isoseq_numconsensusisoforms"]
        report.attributes.append(Attribute("isoseq_average_consensus_isoform_length", avg, name="Average consensus isoform length"))
    return report.attributes
 def test_exit_code_0(self):
     bam = self.getAlignmentSet()
     var_rpt = os.path.join(DATA, 'variants_report.json')
     mapping_rpt = os.path.join(DATA, 'mapping_stats_report.json')
     cmd = 'python -m pbreports.report.sat {o} {r} {c} {a} {v}'.format(o=self._output_dir,
                                                                       r='rpt.json',
                                                                       c=bam,
                                                                       a=var_rpt,
                                                                       v=mapping_rpt)
     o, c, m = backticks(cmd)
     log.info(cmd)
     if c is not 0:
         log.error(m)
         log.error(o)
         print(m)
     self.assertEquals(0, c)
     rpt_file = os.path.join(self._output_dir, 'rpt.json')
     rpt = load_report_from_json(rpt_file)
     self.assertEqual('sidney', rpt.get_attribute_by_id('instrument').value)
     self.assertEqual(1, rpt.get_attribute_by_id('coverage').value)
     self.assertEqual(1, rpt.get_attribute_by_id('concordance').value)
     self.assertEqual(7752, rpt.get_attribute_by_id(
         'mapped_readlength_mean').value)
     self.assertEqual(48, rpt.get_attribute_by_id('reads_in_cell').value)
     out = StringIO()
     self.assertTrue(summarize_report(rpt_file, out=out))
 def test_exit_code_0(self):
     bam = self.getAlignmentSet()
     var_rpt = os.path.join(DATA, 'variants_report.json')
     mapping_rpt = os.path.join(DATA, 'mapping_stats_report.json')
     cmd = 'python -m pbreports.report.sat {o} {r} {c} {a} {v}'.format(o=self._output_dir,
                                                             r='rpt.json',
                                                             c=bam,
                                                             a=var_rpt,
                                                             v=mapping_rpt)
     o, c, m = backticks(cmd)
     log.info(cmd)
     if c is not 0:
         log.error(m)
         log.error(o)
         print(m)
     self.assertEquals(0, c)
     rpt_file = os.path.join(self._output_dir, 'rpt.json')
     rpt = load_report_from_json(rpt_file)
     self.assertEqual('sidney', rpt.get_attribute_by_id('instrument').value)
     self.assertEqual(1, rpt.get_attribute_by_id('coverage').value)
     self.assertEqual(1, rpt.get_attribute_by_id('accuracy').value)
     self.assertEqual(1328, rpt.get_attribute_by_id('mapped_readlength_mean').value)
     self.assertEqual(48, rpt.get_attribute_by_id('reads_in_cell').value)
     out = StringIO()
     self.assertTrue(summarize_report(rpt_file, out=out))
 def run_after(self, rtc, output_dir):
     rpt = load_report_from_json(rtc.task.output_files[0])
     for a in rpt.attributes:
         if a.id == "avg_flnc_len":
             self.assertEqual(a.value, 1142)
             break
     else:
         self.fail("avg_flnc_len not found")
Example #7
0
def summarize_report(report_file, out=sys.stdout):
    report = load_report_from_json(report_file)
    attr = {a.id: a.value for a in report.attributes}
    coverage = attr[Constants.A_COVERAGE]
    concordance = attr[Constants.A_CONCORDANCE]
    out.write("%s:\n" % report_file)
    out.write("  {n}: {a}\n".format(n=meta_rpt.get_meta_attribute(Constants.A_CONCORDANCE).name,a=concordance))
    out.write("  {n}: {c}\n".format(n=meta_rpt.get_meta_attribute(Constants.A_COVERAGE).name,c=coverage))
    return coverage == 1 and concordance == 1
Example #8
0
 def _validate_against_spec(path):
     if self._specs is None:
         raise unittest.SkipTest("Can't find report specs.")
     rpt = load_report_from_json(path)
     spec = self._specs.get(rpt.id, None)
     if spec is None:
         self.fail("No spec found for report {r}".format(r=rpt.id))
     else:
         return spec.validate_report(rpt)
Example #9
0
def summarize_report(report_file, out=sys.stdout):
    report = load_report_from_json(report_file)
    attr = {a.id: a.value for a in report.attributes}
    coverage = attr["coverage"]
    accuracy = attr["accuracy"]
    out.write("%s:\n" % report_file)
    out.write("  CONSENSUS ACCURACY: {a}\n".format(a=attr["accuracy"]))
    out.write("  CONSENSUS COVERAGE: {c}\n".format(c=attr["coverage"]))
    return coverage == 1 and accuracy == 1
Example #10
0
def _get_mapping_stats_data(mapping_stats_rpt_file):
    """
    Extract attributes from the mapping stats report.
    :param mapping_stats_rpt_file: (str) path to the mapping stats report
    :return dict: mean mapped read length
    """
    rpt = load_report_from_json(mapping_stats_rpt_file)
    rl = rpt.get_attribute_by_id(Constants.M_READLENGTH).value
    return {Constants.M_READLENGTH: rl}
Example #11
0
def summarize_report(report_file, out=sys.stdout):
    report = load_report_from_json(report_file)
    attr = {a.id: a for a in report.attributes}
    coverage = attr[Constants.A_COVERAGE]
    concordance = attr[Constants.A_CONCORDANCE]
    out.write("{f}:\n".format(f=report_file))
    out.write("  {n}: {v}\n".format(n=concordance.name, v=concordance.value))
    out.write("  {n}: {v}\n".format(n=coverage.name, v=coverage.value))
    return coverage.value == 1 and concordance.value == 1
Example #12
0
 def _validate_against_spec(path):
     if self._specs is None:
         raise unittest.SkipTest("Can't find report specs.")
     rpt = load_report_from_json(path)
     spec = self._specs.get(rpt.id, None)
     if spec is None:
         self.fail("No spec found for report {r}".format(r=rpt.id))
     else:
         return spec.validate_report(rpt)
Example #13
0
def summarize_report(report_file, out=sys.stdout):
    report = load_report_from_json(report_file)
    attr = {a.id: a for a in report.attributes}
    coverage = attr[Constants.A_COVERAGE]
    concordance = attr[Constants.A_CONCORDANCE]
    out.write("{f}:\n".format(f=report_file))
    out.write("  {n}: {v}\n".format(n=concordance.name, v=concordance.value))
    out.write("  {n}: {v}\n".format(n=coverage.name, v=coverage.value))
    return coverage.value == 1 and concordance.value == 1
Example #14
0
def _get_mapping_stats_data(mapping_stats_rpt_file):
    """
    Extract attributes from the mapping stats report.
    :param mapping_stats_rpt_file: (str) path to the mapping stats report
    :return dict: mean mapped read length
    """
    rpt = load_report_from_json(mapping_stats_rpt_file)
    rl = rpt.get_attribute_by_id(Constants.M_READLENGTH).value
    return {Constants.M_READLENGTH: rl}
def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 title="Read Length of Consensus Isoforms Reads",
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    if inSummaryFN.endswith(".json"):
        attributes = _report_to_attributes(inSummaryFN)
        r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?
    else:
        attributes = summaryToAttributes(inSummaryFN)

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title="Transcript Clustering",
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return report
Example #16
0
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [ load_report_from_json(fn) for fn in json_files ]
    merged = Report.merge(reports)
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Example #17
0
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Example #18
0
def _get_variants_data(variants_rpt_file):
    """
    Extract attributes from the variants report.
    :param variants_rpt_file: (str) path to the variants report
    :return dict: coverage and concordance
    """
    rpt = load_report_from_json(variants_rpt_file)
    coverage = rpt.get_attribute_by_id(Constants.V_COVERAGE).value
    concordance = rpt.get_attribute_by_id(Constants.V_CONCORDANCE).value
    return {Constants.A_COVERAGE: coverage, Constants.A_CONCORDANCE: concordance}
 def test_make_report(self):
     tmpdir = tempfile.mkdtemp()
     r = pbreports.report.isoseq_classify.make_report(
         self.input_fasta, self.output_summary_json, tmpdir)
     j = r.to_json()
     attr = {a.id: a.value for a in r.attributes}
     r2 = load_report_from_json(self.output_summary_json)
     attr2 = {a.id: a.value for a in r2.attributes}
     for k, v in attr2.iteritems():
         self.assertEqual(attr[k], v)
Example #20
0
def _get_variants_data(variants_rpt_file):
    """
    Extract attributes from the variants report.
    :param variants_rpt_file: (str) path to the variants report
    :return dict: coverage and accuracy
    """
    rpt = load_report_from_json(variants_rpt_file)
    coverage = rpt.get_attribute_by_id('weighted_mean_bases_called').value
    accuracy = rpt.get_attribute_by_id('weighted_mean_concordance').value
    return {'coverage': coverage, 'accuracy': accuracy}
Example #21
0
 def run_after(self, rtc, output_dir):
     rpt = None
     uuids = []
     for file_name in rtc.task.output_files:
         if file_name.endswith(".json"):
             rpt = load_report_from_json(file_name)
         elif file_name.endswith(".xml"):
             uuids.append(ContigSet(file_name, strict=True).uuid)
         else:
             assert file_name.endswith(".csv")
     self.assertEqual(sorted(rpt._dataset_uuids), sorted(uuids))
Example #22
0
def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    attributes = _report_to_attributes(inSummaryFN)
    r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title=meta_rpt.title,
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
 def run_after(self, rtc, output_dir):
     report_file = rtc.task.output_files[0]
     r = load_report_from_json(report_file)
     a = {a.name: a.value for a in r.attributes}
     self.assertEqual(a, {
         'num_below_min_accuracy': 0,
         'num_not_converged': 0,
         'num_insert_size_too_small': 0,
         'num_too_many_unusable_subreads': 3,
         'num_no_usable_subreads': 0,
         'num_below_snr_threshold': 27,
         'num_ccs_reads': 52,
         'num_not_enough_full_passes': 58})
    def test_smoke(self):
        t = get_temp_file(suffix="-stats-1.json")
        _write_stats_to_json({"n_reads": 549, "n_zmws": 100}, t)
        t2 = get_temp_file(suffix="-stats-2.json")
        _write_stats_to_json({"n_reads": 733, "n_zmws": 100}, t2)

        tg = get_temp_file(suffix="stats-gather.json")
        G.gather_report([t, t2], tg)

        r = load_report_from_json(tg)
        stats = {a.id: a.value for a in r.attributes}
        self.assertEqual(stats["pb_n_reads"], 549 + 733)
        self.assertEqual(stats["pb_n_zmws"], 200)
Example #25
0
    def test_smoke(self):
        t = get_temp_file(suffix="-stats-1.json")
        _write_stats_to_json({'n_reads': 549, 'n_zmws': 100}, t)
        t2 = get_temp_file(suffix="-stats-2.json")
        _write_stats_to_json({'n_reads': 733, 'n_zmws': 100}, t2)

        tg = get_temp_file(suffix="stats-gather.json")
        G.gather_report([t, t2], tg)

        r = load_report_from_json(tg)
        stats = {a.id: a.value for a in r.attributes}
        self.assertEqual(stats['pb_n_reads'], 549 + 733)
        self.assertEqual(stats['pb_n_zmws'], 200)
Example #26
0
def summarize_report(report_file, out=sys.stdout):
    report = load_report_from_json(report_file)
    attr = {a.id: a.value for a in report.attributes}
    coverage = attr[Constants.A_COVERAGE]
    concordance = attr[Constants.A_CONCORDANCE]
    out.write("%s:\n" % report_file)
    out.write("  {n}: {a}\n".format(n=meta_rpt.get_meta_attribute(
        Constants.A_CONCORDANCE).name,
                                    a=concordance))
    out.write("  {n}: {c}\n".format(n=meta_rpt.get_meta_attribute(
        Constants.A_COVERAGE).name,
                                    c=coverage))
    return coverage == 1 and concordance == 1
Example #27
0
def _get_variants_data(variants_rpt_file):
    """
    Extract attributes from the variants report.
    :param variants_rpt_file: (str) path to the variants report
    :return dict: coverage and concordance
    """
    rpt = load_report_from_json(variants_rpt_file)
    coverage = rpt.get_attribute_by_id(Constants.V_COVERAGE).value
    concordance = rpt.get_attribute_by_id(Constants.V_CONCORDANCE).value
    return {
        Constants.A_COVERAGE: coverage,
        Constants.A_CONCORDANCE: concordance
    }
Example #28
0
def gather_report(json_files, output_file, dataset_xml=None):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    if dataset_xml is not None:
        ds_md = get_dataset_metadata(dataset_xml)
        merged._dataset_uuids = [ds_md.uuid]
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
Example #29
0
 def test_pbreports_reports_have_dataset_uuids(self):
     """Check that reports from pbreports list all input dataset UUIDs"""
     for file_id, file_info in self.datastore.get_file_dict().iteritems():
         if not "pbreports" in file_info.file_id:
             continue
         if file_info.file_type_id == FileTypes.REPORT.file_type_id:
             r = load_report_from_json(file_info.path)
             report_uuids = set(r._dataset_uuids)
             ds_uuids = _get_rtc_dataset_uuids(file_info.path)
             for uuid in ds_uuids:
                 self.assertTrue(uuid in report_uuids,
                     "{p}: {l} not in {r}".format(p=file_info.path,
                                                  l=uuid,
                                                  r=report_uuids))
Example #30
0
 def get_report(self, report_id):
     """
     Returns one of the pbreports outputs.  report_id can be a sequence if
     the report lives under multiple names (e.g. for subreads/CCS).
     """
     if isinstance(report_id, basestring):
         report_id = set([report_id])
     for uuid, file_info in self.data.files.iteritems():
         if file_info.file_type_id == FileTypes.REPORT.file_type_id:
             rpt = load_report_from_json(file_info.path)
             if rpt.id in report_id:
                 return rpt
     raise IOError("Can't find report with ID {i}".format(
                   i=" OR ".join(sorted(list(report_id)))))
Example #31
0
 def get_report(self, report_id):
     """
     Returns one of the pbreports outputs.  report_id can be a sequence if
     the report lives under multiple names (e.g. for subreads/CCS).
     """
     if isinstance(report_id, basestring):
         report_id = set([report_id])
     for uuid, file_info in self.data.files.iteritems():
         if file_info.file_type_id == FileTypes.REPORT.file_type_id:
             rpt = load_report_from_json(file_info.path)
             if rpt.id in report_id:
                 return rpt
     raise IOError("Can't find report with ID {i}".format(
         i=" OR ".join(sorted(list(report_id)))))
 def run_after(self, rtc, output_dir):
     report_file = rtc.task.output_files[0]
     r = load_report_from_json(report_file)
     a = {a.id: a.value for a in r.attributes}
     n = {a.id: a.name for a in r.attributes}
     self.assertEqual(a, {"mock_attr_2": 4, "mock_attr_3": 6,
         "mock_attr_0": 0, "mock_attr_1": 2, "mock_attr_4": 8})
     self.assertEqual(n, {"mock_attr_2": "Attr 2", "mock_attr_3": "Attr 3",
         "mock_attr_0": "Attr 0", "mock_attr_1": "Attr 1",
         "mock_attr_4": "Attr 4"})
     keys = [str(a.id) for a in r.attributes]
     # check attribute order
     self.assertEqual(keys, ["mock_attr_0", "mock_attr_1", "mock_attr_2",
                             "mock_attr_3", "mock_attr_4"])
Example #33
0
 def run_after(self, rtc, output_dir):
     report_file = rtc.task.output_files[0]
     r = load_report_from_json(report_file)
     a = {a.name: a.value for a in r.attributes}
     self.assertEqual(
         a, {
             'num_below_min_accuracy': 0,
             'num_not_converged': 0,
             'num_insert_size_too_small': 0,
             'num_too_many_unusable_subreads': 3,
             'num_no_usable_subreads': 0,
             'num_below_snr_threshold': 27,
             'num_ccs_reads': 52,
             'num_not_enough_full_passes': 58
         })
Example #34
0
def summarize_report(report_file, out=sys.stdout):
    """
    Utility function to harvest statistics from an existing report
    """
    from pbcommand.pb_io.report import load_report_from_json
    W = lambda a: out.write("  {n}: {v}\n".format(n=a.name, v=a.value))
    report = load_report_from_json(report_file)
    attr = {a.id: a for a in report.attributes}
    out.write("{f}:\n".format(f=report_file))
    W(attr[Constants.A_SUBREAD_CONCORDANCE])
    W(attr[Constants.A_NSUBREADS])
    W(attr[Constants.A_NREADS])
    W(attr[Constants.A_SUBREAD_NBASES])
    W(attr[Constants.A_READLENGTH])
    W(attr[Constants.A_SUBREAD_LENGTH])
Example #35
0
 def getReport(cls, report_id):
     if cls.service_access_layer is None:
         report_json = cls.datastore.get_report(report_id)
         assert report_json is not None, "Can't find %s" % report_id
         return load_report_from_json(report_json)
     else:
         # load report from services, not raw file
         for rpt_info in cls.service_access_layer.get_analysis_job_reports(
             cls.job_id):
             file_info = rpt_info['dataStoreFile']
             source_id = file_info.file_id.split("-")[0]
             if source_id == report_id:
                 report_d = cls.service_access_layer.get_analysis_job_report_details(cls.job_id, file_info.uuid)
                 return dict_to_report(report_d)
         raise RuntimeError("Can't find {i} report".format(i=report_id))
Example #36
0
 def test_pbreports_reports_have_dataset_uuids(self):
     """Check that reports from pbreports list all input dataset UUIDs"""
     for file_id, file_info in self.datastore.get_file_dict().iteritems():
         if not "pbreports" in file_info.file_id:
             continue
         if file_info.file_type_id == FileTypes.REPORT.file_type_id:
             r = load_report_from_json(file_info.path)
             report_uuids = set(r._dataset_uuids)
             ds_uuids = _get_rtc_dataset_uuids(file_info.path)
             for uuid in ds_uuids:
                 self.assertTrue(
                     uuid in report_uuids,
                     "{p}: {l} not in {r}".format(p=file_info.path,
                                                  l=uuid,
                                                  r=report_uuids))
Example #37
0
def summarize_report(report_file, out=sys.stdout):
    """
    Utility function to harvest statistics from an existing report
    """
    from pbcommand.pb_io.report import load_report_from_json
    W = lambda s: out.write(s + "\n")
    report = load_report_from_json(report_file)
    attr = {a.id: a.value for a in report.attributes}
    W("%s:" % report_file)
    W("  MEAN ACCURACY: {f}".format(f=attr[Constants.A_SUBREAD_ACCURACY]))
    W("  NSUBREADS: {n}".format(n=attr[Constants.A_NSUBREADS]))
    W("  NREADS: {n}".format(n=attr[Constants.A_NREADS]))
    W("  NBASES: {n}".format(n=attr[Constants.A_SUBREAD_NBASES]))
    W("  READLENGTH_MEAN: {n}".format(n=attr[Constants.A_READLENGTH]))
    W("  SUBREADLENGTH_MEAN: {n}".format(n=attr[Constants.A_SUBREAD_LENGTH]))
Example #38
0
def summarize_report(report_file, out=sys.stdout):
    """
    Utility function to harvest statistics from an existing report
    """
    from pbcommand.pb_io.report import load_report_from_json
    W = lambda s: out.write(s + "\n")
    report = load_report_from_json(report_file)
    attr = {a.id: a.value for a in report.attributes}
    W("%s:" % report_file)
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_SUBREAD_CONCORDANCE).name.upper(), a=attr[Constants.A_SUBREAD_CONCORDANCE]))
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_NSUBREADS).name.upper(), a=attr[Constants.A_NSUBREADS]))
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_NREADS).name.upper(), a=attr[Constants.A_NREADS]))
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_SUBREAD_NBASES).name.upper(), a=attr[Constants.A_SUBREAD_NBASES]))
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_READLENGTH).name.upper(), a=attr[Constants.A_READLENGTH]))
    W("  {n}: {a}".format(n=meta_rpt.get_meta_attribute(Constants.A_SUBREAD_LENGTH).name.upper(), a=attr[Constants.A_SUBREAD_LENGTH]))
Example #39
0
def summarize_report(report_file, out=sys.stdout):
    """
    Utility function to harvest statistics from an existing report
    """
    from pbcommand.pb_io.report import load_report_from_json
    W = lambda a: out.write("  {n}: {v}\n".format(n=a.name, v=a.value))
    report = load_report_from_json(report_file)
    attr = {a.id: a for a in report.attributes}
    out.write("{f}:\n".format(f=report_file))
    W(attr[Constants.A_SUBREAD_CONCORDANCE])
    W(attr[Constants.A_NSUBREADS])
    W(attr[Constants.A_NREADS])
    W(attr[Constants.A_SUBREAD_NBASES])
    W(attr[Constants.A_READLENGTH])
    W(attr[Constants.A_SUBREAD_LENGTH])
Example #40
0
 def getReport(cls):
     if cls.service_access_layer is None:
         return cls.datastore.get_report(cls.REPORT_ID)
     else:
         report_id = cls.REPORT_ID
         if isinstance(cls.REPORT_ID, basestring):
             report_id = set([cls.REPORT_ID])
         # load report from services, not raw file
         for rpt_info in cls.service_access_layer.get_analysis_job_reports(
             cls.job_id):
             file_info = rpt_info['dataStoreFile']
             rpt = load_report_from_json(file_info.path)
             if rpt.id in report_id:
                 return rpt
         raise IOError("Can't find report with ID {i}".format(
                       i=" OR ".join(sorted(list(report_id)))))
Example #41
0
 def getReport(cls, report_id):
     if cls.service_access_layer is None:
         report_json = cls.datastore.get_report(report_id)
         assert report_json is not None, "Can't find %s" % report_id
         return load_report_from_json(report_json)
     else:
         # load report from services, not raw file
         for rpt_info in cls.service_access_layer.get_analysis_job_reports(
                 cls.job_id):
             file_info = rpt_info['dataStoreFile']
             source_id = file_info.file_id.split("-")[0]
             if source_id == report_id:
                 report_d = cls.service_access_layer.get_analysis_job_report_details(
                     cls.job_id, file_info.uuid)
                 return dict_to_report(report_d)
         raise RuntimeError("Can't find {i} report".format(i=report_id))
Example #42
0
 def test_datastore_report_file_uuid(self):
     p = os.path.join(self.job_dir, "workflow", "datastore.json")
     with open(p, 'r') as r:
         d = json.loads(r.read())
         n_tested = 0
         for file_info in d['files']:
             if file_info['fileTypeId'] == FileTypes.REPORT.file_type_id:
                 rpt = load_report_from_json(file_info['path'])
                 self.assertEqual(
                     rpt.uuid, file_info['uniqueId'],
                     "{p}: {u1} != {u2}".format(p=file_info['path'],
                                                u1=rpt.uuid,
                                                u2=file_info['uniqueId']))
                 n_tested += 1
         if n_tested == 0:
             raise unittest.SkipTest("No Report JSON files in datastore.")
Example #43
0
 def test_datastore_report_file_uuid(self):
     p = os.path.join(self.job_dir, "workflow", "datastore.json")
     with open(p, 'r') as r:
         d = json.loads(r.read())
         n_tested = 0
         for file_info in d['files']:
             if file_info['fileTypeId'] == FileTypes.REPORT.file_type_id:
                 rpt = load_report_from_json(file_info['path'])
                 self.assertEqual(rpt.uuid, file_info['uniqueId'],
                                  "{p}: {u1} != {u2}".format(
                                  p=file_info['path'],
                                  u1=rpt.uuid,
                                  u2=file_info['uniqueId']))
                 n_tested += 1
         if n_tested == 0:
             raise unittest.SkipTest("No Report JSON files in datastore.")
Example #44
0
 def getReport(cls):
     if cls.service_access_layer is None:
         return cls.datastore.get_report(cls.REPORT_ID)
     else:
         report_id = cls.REPORT_ID
         if isinstance(cls.REPORT_ID, basestring):
             report_id = set([cls.REPORT_ID])
         # load report from services, not raw file
         for rpt_info in cls.service_access_layer.get_analysis_job_reports(
                 cls.job_id):
             file_info = rpt_info['dataStoreFile']
             rpt = load_report_from_json(file_info.path)
             if rpt.id in report_id:
                 return rpt
         raise IOError("Can't find report with ID {i}".format(
             i=" OR ".join(sorted(list(report_id)))))
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [load_report_from_json(fn) for fn in json_files]
    merged = Report.merge(reports)
    total_num_flnc_bases = 0
    total_num_flnc = 0
    for r in reports:
        attrs = {a.id: a.value for a in r.attributes}
        num_flnc = attrs["num_flnc"]
        num_flnc_bases = attrs["num_flnc_bases"]
        total_num_flnc += num_flnc
        total_num_flnc_bases += num_flnc_bases
    if total_num_flnc > 0:
        for a in merged.attributes:
            if a.id == "avg_flnc_len":
                # mimicking pbtranscript.io.Summary
                a._value = int(total_num_flnc_bases / total_num_flnc)
                log.info("Setting avg_flnc_len = {v}".format(v=a.value))
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
def gather_report(json_files, output_file):
    """
    Combines statistics (usually raw counts) stored as JSON files.
    Data models: pbcommand.models.report
    """
    reports = [ load_report_from_json(fn) for fn in json_files ]
    merged = Report.merge(reports)
    total_num_flnc_bases = 0
    total_num_flnc = 0
    for r in reports:
        attrs = {a.id:a.value for a in r.attributes}
        num_flnc = attrs["num_flnc"]
        num_flnc_bases = attrs["num_flnc_bases"]
        total_num_flnc += num_flnc
        total_num_flnc_bases += num_flnc_bases
    if total_num_flnc > 0:
        for a in merged.attributes:
            if a.id == "avg_flnc_len":
                # mimicking pbtranscript.io.Summary
                a._value = int(total_num_flnc_bases / total_num_flnc)
                log.info("Setting avg_flnc_len = {v}".format(v=a.value))
    with open(output_file, "w") as writer:
        writer.write(merged.to_json())
    return output_file
 def test_exit_code_0(self):
     bam = self.getAlignmentSet()
     var_rpt = os.path.join(DATA, "variants_report.json")
     mapping_rpt = os.path.join(DATA, "mapping_stats_report.json")
     cmd = "python -m pbreports.report.sat {o} {r} {c} {a} {v}".format(
         o=self._output_dir, r="rpt.json", c=bam, a=var_rpt, v=mapping_rpt
     )
     o, c, m = backticks(cmd)
     log.info(cmd)
     if c is not 0:
         log.error(m)
         log.error(o)
         print(m)
     self.assertEquals(0, c)
     rpt_file = os.path.join(self._output_dir, "rpt.json")
     rpt = load_report_from_json(rpt_file)
     self.assertEqual("sidney", rpt.get_attribute_by_id("instrument").value)
     self.assertEqual(1, rpt.get_attribute_by_id("coverage").value)
     self.assertEqual(1, rpt.get_attribute_by_id("concordance").value)
     self.assertEqual(7752, rpt.get_attribute_by_id("mapped_readlength_mean").value)
     self.assertEqual(48, rpt.get_attribute_by_id("reads_in_cell").value)
     out = StringIO()
     self.assertTrue(summarize_report(rpt_file, out=out))
     validate_report_complete(self, rpt)
Example #48
0
 def test_reports_metadata_complete(self):
     """Check that metadata is non-null and nonempty"""
     for file_id, file_info in self.datastore.get_file_dict().iteritems():
         if file_info.file_type_id == FileTypes.REPORT.file_type_id:
             report = load_report_from_json(file_info.path)
             self.is_not_none_or_empty_str(report.id, "Report ID")
             self.is_not_none_or_empty_str(report.title, "Report title")
             for attr in report.attributes:
                 self.is_not_none_or_empty_str(
                     attr.name, "Attribute {} name".format(attr.id))
             for table in report.tables:
                 self.is_not_none_or_empty_str(
                     table.title, "Table {} title".format(table.id))
                 for col in table.columns:
                     self.is_not_none_or_empty_str(
                         col.header, "Column {} header".format(col.id))
             for plotgroup in report.plotGroups:
                 self.is_not_none_or_empty_str(
                     plotgroup.title, "Plotgroup {} title".format(plotgroup.id))
                 for plot in plotgroup.plots:
                     self.is_not_none_or_empty_str(
                         plot.title, "Plot {} title".format(plot.id))
                     self.is_not_none_or_empty_str(
                         plot.caption, "Plot {} caption".format(plot.id))
 def _to_report(self):
     return load_report_from_json(self.report_json)
Example #50
0
def make_report(contig_set, summary_txt, output_dir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    :param contig_set: an input FASTA file which has all full-length,
    non-chimeric reads produced by pbtranscript.py classify.

    This file is required to plot a read length histogram as part of
    the report:
         fulllength_nonchimeric_readlength_hist.png

    :param summary_txt: a summary TXT file with classify attributes,
    including 6 attributes,
        number of consensus reads
        number of five prime reads,
        number of three prime reads,
        number of poly-A reads,
        number of full-length non-chimeric reads,
        average full-length n on-chimeric read length

    Attributes of the report are extracted from this file.

    :type contig_set: str
    :type summary_txt: str
    :type output_dir: str

    :rtype: Report
    """
    log.info(
        "Plotting read length histogram from file: {f}".format(f=contig_set))

    # Collect read lengths of
    def _get_reads():
        with ContigSet(contig_set) as f:
            for record in f:
                yield len(record.sequence)

    readlengths = np.fromiter(_get_reads(), dtype=np.int64,
                              count=-1).astype(float)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, output_dir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info(
        "Plotting summary attributes from file: {f}".format(f=summary_txt))
    dataset_uuids = [ContigSet(contig_set).uuid]
    # Produce attributes based on summary.
    attributes = report_to_attributes(summary_txt)
    r = load_report_from_json(summary_txt)
    # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
    # reports; should we be?

    table = attributes_to_table(attributes, Constants.T_ATTR)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    tables=[table],
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return spec.apply_view(report)
Example #51
0
def report_to_attributes(summary_json):
    report = load_report_from_json(summary_json)
    return [Attribute(attr.id, attr.value) for attr in report.attributes]
Example #52
0
def _report_to_attributes(report_file):
    report = load_report_from_json(report_file)
    return report.attributes
 def _to_report(self):
     return load_report_from_json(self.report_json)
Example #54
0
def make_report(contig_set, summary_txt, output_dir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    :param contig_set: an input FASTA file which has all full-length,
    non-chimeric reads produced by pbtranscript.py classify.

    This file is required to plot a read length histogram as part of
    the report:
         fulllength_nonchimeric_readlength_hist.png

    :param summary_txt: a summary TXT file with classify attributes,
    including 6 attributes,
        number of consensus reads
        number of five prime reads,
        number of three prime reads,
        number of poly-A reads,
        number of full-length non-chimeric reads,
        average full-length n on-chimeric read length

    Attributes of the report are extracted from this file.

    :type contig_set: str
    :type summary_txt: str
    :type output_dir: str

    :rtype: Report
    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=contig_set))

    # Collect read lengths of
    def _get_reads():
        with ContigSet(contig_set) as f:
            for record in f:
                yield len(record.sequence)

    readlengths = np.fromiter(_get_reads(), dtype=np.int64, count=-1).astype(float)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, output_dir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=summary_txt))
    dataset_uuids = [ContigSet(contig_set).uuid]
    # Produce attributes based on summary.
    attributes = _report_to_attributes(summary_txt)
    r = load_report_from_json(summary_txt)
    # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
    # reports; should we be?

    table = _attributes_to_table(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    tables=[table],
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return spec.apply_view(report)
Example #55
0
def _report_to_attributes(summary_json):
    report = load_report_from_json(summary_json)
    return [Attribute(attr.id, attr.value) for attr in report.attributes]
Example #56
0
def _compare_reports(self, rpt_json1, rpt_json2):
    rpt1 = load_report_from_json(rpt_json1)
    rpt2 = load_report_from_json(rpt_json2)
    attr1 = {a.id: a.value for a in rpt1.attributes}
    attr2 = {a.id: a.value for a in rpt2.attributes}
    self.assertEqual(attr1, attr2)