def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 title="Read Length of Consensus Isoforms Reads",
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    if inSummaryFN.endswith(".json"):
        attributes = _report_to_attributes(inSummaryFN)
        r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?
    else:
        attributes = summaryToAttributes(inSummaryFN)

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title="Transcript Clustering",
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return report
Beispiel #2
0
def makeReport(inReadsFN, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    attributes = _report_to_attributes(inSummaryFN)
    r = load_report_from_json(inSummaryFN)
        # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
        # reports; should we be?

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    title=meta_rpt.title,
                    attributes=attributes,
                    plotgroups=[readlength_group],
                    dataset_uuids=dataset_uuids)

    return meta_rpt.apply_view(report)
Beispiel #3
0
def makeReport(inReadsFN, hq_isoforms_fq, lq_isoforms_fq, inSummaryFN, outDir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    inReadsFN --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    hq_isoforms_fq/lq_isoforms_lq --- input FASTQ files which has
    all HQ/LQ isoforms produced by pbtranscript.py cluster.
    These two files will be required to plot the average QV histograms:
         hq_lq_isoforms_avgqv_hist.png

    inSummaryFN --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info("Plotting read length histogram from file: {f}".
             format(f=inReadsFN))

    # Collect read lengths of
    reader = ContigSet(inReadsFN)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs).astype(float)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, outDir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    # Collect average qvs
    hq_qvs = [np.mean(r.quality) for r in ContigSet(hq_isoforms_fq)]
    lq_qvs = [np.mean(r.quality) for r in ContigSet(lq_isoforms_fq)]
    avgqvs = np.array(hq_qvs + lq_qvs)

    # Plot average qv histogram
    avgqv_plot = create_avgqv_plot(avgqvs, outDir)
    avgqv_group = PlotGroup(Constants.PG_AVGQV,
                            plots=[avgqv_plot],
                            thumbnail=avgqv_plot.thumbnail)

    log.info("Plotting summary attributes from file: {f}".
             format(f=inSummaryFN))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(inReadsFN).uuid]
    attributes = _report_to_attributes(inSummaryFN)
    r = load_report_from_json(inSummaryFN)
    # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
    # reports; should we be?

    table = attributesToTable(attributes)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    attributes=attributes,
                    plotgroups=[readlength_group, avgqv_group],
                    dataset_uuids=dataset_uuids)

    return spec.apply_view(report)
Beispiel #4
0
def make_report(reads_fasta, hq_isoforms_fq, lq_isoforms_fq, summary_txt,
                output_dir):
    """
    Generate a report with ID, tables, attributes and plot groups.

    reads_fasta --- an input FASTA file which has all consensus
    isoforms produced by pbtranscript.py cluster.
    This file is required to plot a read length histogram as part of
    the report:
         consensus_isoforms_readlength_hist.png

    hq_isoforms_fq/lq_isoforms_lq --- input FASTQ files which has
    all HQ/LQ isoforms produced by pbtranscript.py cluster.
    These two files will be required to plot the average QV histograms:
         hq_lq_isoforms_avgqv_hist.png

    summary_txt --- a summary TXT file with cluster attributes,
    including two attributes:
         number of consensus isoforms
         average length of consensus isoforms
    Attributes of the report are extracted from this file.

    """
    log.info(
        "Plotting read length histogram from file: {f}".format(f=reads_fasta))

    # Collect read lengths of
    reader = ContigSet(reads_fasta)
    rs = [len(r.sequence) for r in reader]
    reader.close()
    readlengths = np.array(rs).astype(float)

    # Plot read length histogram
    readlength_plot = create_readlength_plot(readlengths, output_dir)
    readlength_group = PlotGroup(Constants.PG_READLENGTH,
                                 plots=[readlength_plot],
                                 thumbnail=readlength_plot.thumbnail)

    # Collect average qvs
    hq_qvs = [np.mean(r.quality) for r in FastqReader(hq_isoforms_fq)]
    lq_qvs = [np.mean(r.quality) for r in FastqReader(lq_isoforms_fq)]
    avgqvs = np.array(hq_qvs + lq_qvs)

    # Plot average qv histogram
    avgqv_plot = create_avgqv_plot(avgqvs, output_dir)
    avgqv_group = PlotGroup(Constants.PG_AVGQV,
                            plots=[avgqv_plot],
                            thumbnail=avgqv_plot.thumbnail)

    log.info(
        "Plotting summary attributes from file: {f}".format(f=summary_txt))
    # Produce attributes based on summary.
    dataset_uuids = [ContigSet(reads_fasta).uuid]
    attributes = report_to_attributes(summary_txt)
    r = load_report_from_json(summary_txt)
    # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these
    # reports; should we be?

    table = attributes_to_table(attributes, Constants.T_ATTR)
    log.info(str(table))

    # A report is consist of ID, tables, attributes, and plotgroups.
    report = Report(Constants.R_ID,
                    attributes=attributes,
                    plotgroups=[readlength_group, avgqv_group],
                    dataset_uuids=dataset_uuids)

    return spec.apply_view(report)