def _get_plot_view_configs(self): """ Any change to the 'raw' view of a report plot should be changed here. There's three histogram plots. 1. Subread concordance 2. Subread rendlength 3. Readlength """ _p = [ PlotViewProperties( Constants.P_SUBREAD_CONCORDANCE, Constants.PG_SUBREAD_CONCORDANCE, generate_plot, 'mapped_subread_concordance_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE), ylabel=get_plot_ylabel(spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE), color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title=get_plot_title( spec, Constants.PG_SUBREAD_CONCORDANCE, Constants.P_SUBREAD_CONCORDANCE)), PlotViewProperties( Constants.P_SUBREAD_LENGTH, Constants.PG_SUBREAD_LENGTH, generate_plot, 'mapped_subreadlength_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH), ylabel=get_plot_ylabel(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH), use_group_thumb=True, color=get_blue(3), edgecolor=get_blue(2), plot_group_title=get_plot_title(spec, Constants.PG_SUBREAD_LENGTH, Constants.P_SUBREAD_LENGTH)), PlotViewProperties( Constants.P_READLENGTH, Constants.PG_READLENGTH, generate_plot, 'mapped_readlength_histogram.png', xlabel=get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), ylabel=get_plot_ylabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title=get_plot_title(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)), ] return {v.plot_id: v for v in _p}
def _plot_view_configs(): """ Any change to the 'raw' view of a report plot should be changed here. """ _p = [PlotViewProperties(Constants.P_PRE_FILTER_READ_LENGTH_HIST, Constants.PG_READ_LENGTH, custom_read_length_histogram, Constants.I_PRE_FILTER_READ_LENGTH, xlabel="ReadLength", ylabel="Reads", rlabel="bp > Read Length", title="Pre-Filter", color=get_green(3), edgecolor=get_green(2), plot_group_title="Polymerase Read Length"), PlotViewProperties(Constants.P_POST_FILTER_READ_LENGHT_HIST, Constants.PG_READ_LENGTH, custom_read_length_histogram, Constants.I_POST_FILTER_READ_LENGTH, xlabel="ReadLength", ylabel="Reads", rlabel="bp > Read Length", title="Post-Filter", color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title="Polymerase Read Length"), PlotViewProperties(Constants.P_PRE_FILTER_READ_SCORE_HIST, Constants.PG_READ_SCORE, custom_read_accuracy_histogram, Constants.I_PRE_FILTER_READ_SCORE, xlabel="Read Quality", ylabel="Reads", rlabel="bp > Read Quality", title="Pre-Filter", color=get_blue(3), edgecolor=get_blue(2), plot_group_title="Polymerase Read Quality"), PlotViewProperties(Constants.P_POST_FILTER_READ_SCORE_HIST, Constants.PG_READ_SCORE, custom_read_accuracy_histogram, Constants.I_POST_FILTER_READ_SCORE, xlabel="Read Quality", ylabel="Reads", rlabel="bp > Read Quality", title="Post-Filter", color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title="Polymerase Read Quality") ] # make it easier to access return {v.plot_id: v for v in _p}
def _get_plot_view_configs(self): """ Any change to the 'raw' view of a report plot should be changed here. There's three histogram plots. 1. Subread accuracy 2. Subread rendlength 3. Readlength """ _p = [ PlotViewProperties( Constants.P_SUBREAD_ACCURACY, Constants.PG_SUBREAD_ACCURACY, generate_plot, 'mapped_subread_accuracy_histogram.png', xlabel="Concordance", ylabel="Subreads", color=get_green(3), edgecolor=get_green(2), use_group_thumb=True, plot_group_title="Mapped Subread Accuracy"), PlotViewProperties( Constants.P_SUBREAD_LENGTH, Constants.PG_SUBREAD_LENGTH, generate_plot, 'mapped_subreadlength_histogram.png', xlabel="Subread Length", ylabel="Subreads", use_group_thumb=True, color=get_blue(3), edgecolor=get_blue(2), plot_group_title="Mapped Subread Length"), PlotViewProperties( Constants.P_READLENGTH, Constants.PG_READLENGTH, generate_plot, 'mapped_readlength_histogram.png', xlabel="Read Length", ylabel="Reads", color=get_blue(3), edgecolor=get_blue(2), use_group_thumb=True, plot_group_title="Mapped Polymerase Read Length") ] return {v.plot_id: v for v in _p}
def make_nreads_histogram(bc_groups, base_dir): """ Create simple histogram of read count frequency per barcode. """ fig, ax = make_histogram( datum=[float(g.n_reads) for g in bc_groups], # FIXME workaround axis_labels=["Number of Reads", "Number of Barcoded Samples"], nbins=min(len(bc_groups), 20), barcolor=get_blue(3)) return _to_plot(fig, Constants.P_HIST_NREADS, base_dir)
def make_readlength_histogram(bc_groups, base_dir): """ Create simple histogram of read length frequency per barcode. """ fig, ax = make_histogram( datum=[float(g.mean_read_length()) for g in bc_groups], # FIXME axis_labels=["Mean Read Length", "Number of Barcoded Samples"], nbins=min(len(bc_groups), 20), barcolor=get_blue(3)) return _to_plot(fig, Constants.P_HIST_RL, base_dir)
def _create_bars(contig_variants): """ :param contig_variants: (ContigVariants) :returns: tuple of pbreports.plot.helper.Bar objects """ dataIns = np.array([l[1] for l in contig_variants.variants]) dataDels = np.array([l[2] for l in contig_variants.variants]) dataSnv = np.array([l[3] for l in contig_variants.variants]) insBarModel = PH.Bar(dataIns, 'Insertions', color=PH.get_blue(3)) delBarModel = PH.Bar(dataDels, 'Deletions', color=PH.get_green(3)) snvBarModel = PH.Bar(dataSnv, 'Substitutions', color=PH.get_orange()) return (insBarModel, delBarModel, snvBarModel)
def make_bcqual_histogram(bc_groups, base_dir): """ Create simple histogram of barcode quality score frequency over all barcoded subreads. """ data = [] for g in bc_groups: data.extend(g.bqs) fig, ax = make_histogram( datum=data, axis_labels=["Barcode Quality Score", "Number of Barcoded Subreads"], nbins=50, barcolor=get_blue(3)) ax.axvline(26, color='r') return _to_plot(fig, Constants.P_HIST_BQ, base_dir)
_custom_read_accuracy_histogram = functools.partial( _custom_histogram_with_cdf, meta_rpt.get_meta_plotgroup(Constants.PG_ACCURACY).get_meta_plot( Constants.P_ACCURACY).ylabel["Custom"], 1000000) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot( Constants.P_READLENGTH).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot( Constants.P_READLENGTH).ylabel["L"], meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot( Constants.P_READLENGTH).ylabel["R"]), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (meta_rpt.get_meta_plotgroup(Constants.PG_ACCURACY).get_meta_plot( Constants.P_ACCURACY).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_ACCURACY).get_meta_plot( Constants.P_ACCURACY).ylabel["L"], meta_rpt.get_meta_plotgroup(Constants.PG_ACCURACY).get_meta_plot( Constants.P_ACCURACY).ylabel["R"]), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial( create_plot, _make_histogram, Constants.P_NPASSES, (meta_rpt.get_meta_plotgroup(Constants.PG_NPASSES).get_meta_plot( Constants.P_NPASSES).xlabel,
fig.savefig(path, dpi=dpi) log.debug("Saved plot with id {i} to {p}".format(p=path, i=plot_id)) thumbnail = plot_name.replace(".png", "_thumb.png") fig.savefig(os.path.join(output_dir, thumbnail), dpi=20) plt.close(fig) log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, os.path.basename(plot_name), thumbnail=os.path.basename(thumbnail)) return plot create_readlength_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_READLENGTH, ("Read Length", "Reads", "Reads > Read Length"), 80, "consensus_isoforms_readlength_hist.png", get_blue(3)) def makeReport(inReadsFN, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms
log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, to_b(plot_name), thumbnail=to_b(thumbnail)) return plot # These functions create signatures (datum, axis_labels, nbins, barcolor _custom_read_length_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Length", 1000000) _custom_read_accuracy_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Predicted Accuracy", 1000000) # These functions need to generate a function with signature (datum, # output_dir, dpi=) create_readlength_plot = functools.partial(__create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, ("Read Length", "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial(__create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, ("Quality", "Reads", "bp > Predicted Accuracy"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial(__create_plot, _make_histogram, Constants.P_NPASSES, ("Number of Passes", "Reads"), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial(__create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER, ("Number of passes", "Predicted accuracy (Phred QV)"), None, Constants.I_CCS_SCATTER_PLOT, get_blue(3)) def to_report(ccs_subread_set, output_dir):
fig.savefig(path, dpi=dpi) log.debug("Saved plot with id {i} to {p}".format(p=path, i=plot_id)) thumbnail = plot_name.replace(".png", "_thumb.png") fig.savefig(os.path.join(output_dir, thumbnail), dpi=20) plt.close(fig) log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, os.path.basename(plot_name), thumbnail=os.path.basename(thumbnail)) return plot create_readlength_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_READLENGTH, ("Read Length", "Reads", "Reads > Read Length"), 80, "consensus_isoforms_readlength_hist.png", get_blue(3)) create_avgqv_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_AVGQV, ("HQ LQ Isoform Average QV", "Isoforms", "Isoforms > Average QV"), 80, "hq_lq_isoforms_avgqv_hist.png", get_blue(3)) def makeReport(inReadsFN, hq_isoforms_fq, lq_isoforms_fq, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report:
fig.savefig(path, dpi=dpi) log.debug("Saved plot with id {i} to {p}".format(p=path, i=plot_id)) thumbnail = plot_name.replace(".png", "_thumb.png") fig.savefig(os.path.join(output_dir, thumbnail), dpi=20) plt.close(fig) log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, os.path.basename(plot_name), thumbnail=os.path.basename(thumbnail)) return plot create_readlength_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_READLENGTH, ("Read Length", "Reads", "Reads > Read Length"), 80, "fulllength_nonchimeric_readlength_hist.png", get_blue(3)) def make_report(contig_set, summary_txt, output_dir): """ Generate a report with ID, tables, attributes and plot groups. :param contig_set: an input FASTA file which has all full-length, non-chimeric reads produced by pbtranscript.py classify. This file is required to plot a read length histogram as part of the report: fulllength_nonchimeric_readlength_hist.png :param summary_txt: a summary TXT file with classify attributes, including 6 attributes,
thumbnail = plot_name.replace(".png", "_thumb.png") fig.savefig(os.path.join(output_dir, thumbnail), dpi=20) plt.close(fig) log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, os.path.basename(plot_name), thumbnail=os.path.basename(thumbnail)) return plot create_readlength_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_READLENGTH, ("Read Length", "Reads", "Reads > Read Length"), 80, "consensus_isoforms_readlength_hist.png", get_blue(3)) create_avgqv_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_AVGQV, ("HQ LQ Isoform Average QV", "Isoforms", "Isoforms > Average QV"), 80, "hq_lq_isoforms_avgqv_hist.png", get_blue(3)) def makeReport(inReadsFN, hq_isoforms_fq, lq_isoforms_fq, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report:
# These functions create signatures (data, axis_labels, nbins, barcolor _custom_read_length_histogram = functools.partial(_custom_histogram_with_cdf, "Mb > Read Length", 1000000) _custom_read_accuracy_histogram = functools.partial(_custom_histogram_with_cdf, "Mb > Read Score", 1000000) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3), ) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "bp > Read Score"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3), ) create_npasses_plot = functools.partial( create_plot, _make_histogram,
# These functions create signatures (data, axis_labels, nbins, barcolor) _custom_read_length_histogram = functools.partial(_custom_histogram_with_cdf, "Mb > Read Length", 1000000) # FIXME the fallback label is incorrect! _custom_read_accuracy_histogram = functools.partial(_custom_histogram_with_cdf, "Mb > Read Score", sys.maxint) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 50, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "reads > Read Score"), 100, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial( create_plot, make_histogram, Constants.P_NPASSES, (get_plot_xlabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), get_plot_ylabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES)), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial( create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER,
# PlotGroup PG_READLENGTH = "fulllength_nonchimeric_readlength_group" # Plots P_READLENGTH = "fulllength_nonchimeric_readlength_hist" # Table T_ATTR = "isoseq_classify_table" spec = load_spec(Constants.R_ID) create_readlength_plot = functools.partial( create_plot_impl, make_histogram_with_cdf, Constants.P_READLENGTH, ("Read Length", "Reads", "Reads > Read Length"), 80, "fulllength_nonchimeric_readlength_hist.png", get_blue(3)) def make_report(contig_set, summary_txt, output_dir): """ Generate a report with ID, tables, attributes and plot groups. :param contig_set: an input FASTA file which has all full-length, non-chimeric reads produced by pbtranscript.py classify. This file is required to plot a read length histogram as part of the report: fulllength_nonchimeric_readlength_hist.png :param summary_txt: a summary TXT file with classify attributes, including 6 attributes,
return plot # These functions create signatures (data, axis_labels, nbins, barcolor _custom_read_length_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Length", 1000000) _custom_read_accuracy_histogram = functools.partial( _custom_histogram_with_cdf, "Mb > Read Score", 1000000) # These functions need to generate a function with signature (data, # output_dir, dpi=) create_readlength_plot = functools.partial( create_plot, _custom_read_length_histogram, Constants.P_READLENGTH, (get_plot_xlabel(spec, Constants.PG_READLENGTH, Constants.P_READLENGTH), "Reads", "bp > Read Length"), 80, Constants.I_CCS_READ_LENGTH_HIST, get_blue(3)) create_accuracy_plot = functools.partial( create_plot, _custom_read_accuracy_histogram, Constants.P_ACCURACY, (get_plot_xlabel(spec, Constants.PG_ACCURACY, Constants.P_ACCURACY), "Reads", "bp > Read Score"), 80, Constants.I_CCS_READ_ACCURACY_HIST, get_green(3)) create_npasses_plot = functools.partial( create_plot, _make_histogram, Constants.P_NPASSES, (get_plot_xlabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES), get_plot_ylabel(spec, Constants.PG_NPASSES, Constants.P_NPASSES)), 80, Constants.I_CCS_NUM_PASSES_HIST, "#F18B17") create_scatter_plot = functools.partial( create_plot, scatter_plot_accuracy_vs_numpasses, Constants.P_SCATTER,
fig.savefig(os.path.join(output_dir, thumbnail), dpi=20) plt.close(fig) log.debug("Saved plot to {p}".format(p=thumbnail)) plot = Plot(plot_id, os.path.basename(plot_name), thumbnail=os.path.basename(thumbnail)) return plot create_readlength_plot = functools.partial( __create_plot, _make_histogram_with_cdf, Constants.P_READLENGTH, (meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot(Constants.P_READLENGTH).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot(Constants.P_READLENGTH).ylabel["L"], meta_rpt.get_meta_plotgroup(Constants.PG_READLENGTH).get_meta_plot(Constants.P_READLENGTH).ylabel["R"]), 80, "consensus_isoforms_readlength_hist.png", get_blue(3)) def makeReport(inReadsFN, inSummaryFN, outDir): """ Generate a report with ID, tables, attributes and plot groups. inReadsFN --- an input FASTA file which has all consensus isoforms produced by pbtranscript.py cluster. This file is required to plot a read length histogram as part of the report: consensus_isoforms_readlength_hist.png inSummaryFN --- a summary TXT file with cluster attributes, including two attributes: number of consensus isoforms