def makeReport(inReadsFN, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms average length of consensus isoforms Attributes of the report are extracted from this file. """ log.info("Plotting read length histogram from file: {f}". format(f=inReadsFN)) # Collect read lengths of reader = ContigSet(inReadsFN) rs = [len(r.sequence) for r in reader] reader.close() readlengths = np.array(rs) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, outDir) readlength_group = PlotGroup(Constants.PG_READLENGTH, title="Read Length of Consensus Isoforms Reads", plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) log.info("Plotting summary attributes from file: {f}". format(f=inSummaryFN)) # Produce attributes based on summary. dataset_uuids = [ContigSet(inReadsFN).uuid] if inSummaryFN.endswith(".json"): attributes = _report_to_attributes(inSummaryFN) r = load_report_from_json(inSummaryFN) # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these # reports; should we be? else: attributes = summaryToAttributes(inSummaryFN) table = attributesToTable(attributes) log.info(str(table)) # A report is consist of ID, tables, attributes, and plotgroups. report = Report(Constants.R_ID, title="Transcript Clustering", attributes=attributes, plotgroups=[readlength_group], dataset_uuids=dataset_uuids) return report
def makeReport(inReadsFN, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms average length of consensus isoforms Attributes of the report are extracted from this file. """ log.info("Plotting read length histogram from file: {f}". format(f=inReadsFN)) # Collect read lengths of reader = ContigSet(inReadsFN) rs = [len(r.sequence) for r in reader] reader.close() readlengths = np.array(rs) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, outDir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) log.info("Plotting summary attributes from file: {f}". format(f=inSummaryFN)) # Produce attributes based on summary. dataset_uuids = [ContigSet(inReadsFN).uuid] attributes = _report_to_attributes(inSummaryFN) r = load_report_from_json(inSummaryFN) # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these # reports; should we be? table = attributesToTable(attributes) log.info(str(table)) # A report is consist of ID, tables, attributes, and plotgroups. report = Report(Constants.R_ID, title=meta_rpt.title, attributes=attributes, plotgroups=[readlength_group], dataset_uuids=dataset_uuids) return meta_rpt.apply_view(report)
def makeReport(inReadsFN, hq_isoforms_fq, lq_isoforms_fq, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png hq_isoforms_fq/lq_isoforms_lq --- input FASTQ files which has all HQ/LQ isoforms produced by pbtranscript.py cluster. These two files will be required to plot the average QV histograms: hq_lq_isoforms_avgqv_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms average length of consensus isoforms Attributes of the report are extracted from this file. """ log.info("Plotting read length histogram from file: {f}". format(f=inReadsFN)) # Collect read lengths of reader = ContigSet(inReadsFN) rs = [len(r.sequence) for r in reader] reader.close() readlengths = np.array(rs).astype(float) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, outDir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) # Collect average qvs hq_qvs = [np.mean(r.quality) for r in ContigSet(hq_isoforms_fq)] lq_qvs = [np.mean(r.quality) for r in ContigSet(lq_isoforms_fq)] avgqvs = np.array(hq_qvs + lq_qvs) # Plot average qv histogram avgqv_plot = create_avgqv_plot(avgqvs, outDir) avgqv_group = PlotGroup(Constants.PG_AVGQV, plots=[avgqv_plot], thumbnail=avgqv_plot.thumbnail) log.info("Plotting summary attributes from file: {f}". format(f=inSummaryFN)) # Produce attributes based on summary. dataset_uuids = [ContigSet(inReadsFN).uuid] attributes = _report_to_attributes(inSummaryFN) r = load_report_from_json(inSummaryFN) # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these # reports; should we be? table = attributesToTable(attributes) log.info(str(table)) # A report is consist of ID, tables, attributes, and plotgroups. report = Report(Constants.R_ID, attributes=attributes, plotgroups=[readlength_group, avgqv_group], dataset_uuids=dataset_uuids) return spec.apply_view(report)
def make_report(reads_fasta, hq_isoforms_fq, lq_isoforms_fq, summary_txt, output_dir): """ Generate a report with ID, tables, attributes and plot groups. reads_fasta --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png hq_isoforms_fq/lq_isoforms_lq --- input FASTQ files which has all HQ/LQ isoforms produced by pbtranscript.py cluster. These two files will be required to plot the average QV histograms: hq_lq_isoforms_avgqv_hist.png summary_txt --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms average length of consensus isoforms Attributes of the report are extracted from this file. """ log.info( "Plotting read length histogram from file: {f}".format(f=reads_fasta)) # Collect read lengths of reader = ContigSet(reads_fasta) rs = [len(r.sequence) for r in reader] reader.close() readlengths = np.array(rs).astype(float) # Plot read length histogram readlength_plot = create_readlength_plot(readlengths, output_dir) readlength_group = PlotGroup(Constants.PG_READLENGTH, plots=[readlength_plot], thumbnail=readlength_plot.thumbnail) # Collect average qvs hq_qvs = [np.mean(r.quality) for r in FastqReader(hq_isoforms_fq)] lq_qvs = [np.mean(r.quality) for r in FastqReader(lq_isoforms_fq)] avgqvs = np.array(hq_qvs + lq_qvs) # Plot average qv histogram avgqv_plot = create_avgqv_plot(avgqvs, output_dir) avgqv_group = PlotGroup(Constants.PG_AVGQV, plots=[avgqv_plot], thumbnail=avgqv_plot.thumbnail) log.info( "Plotting summary attributes from file: {f}".format(f=summary_txt)) # Produce attributes based on summary. dataset_uuids = [ContigSet(reads_fasta).uuid] attributes = report_to_attributes(summary_txt) r = load_report_from_json(summary_txt) # FIXME(nechols)(2016-03-22) not using the dataset UUIDs from these # reports; should we be? table = attributes_to_table(attributes, Constants.T_ATTR) log.info(str(table)) # A report is consist of ID, tables, attributes, and plotgroups. report = Report(Constants.R_ID, attributes=attributes, plotgroups=[readlength_group, avgqv_group], dataset_uuids=dataset_uuids) return spec.apply_view(report)