def _to_read_stats_plots(PlotConstants, title, readLenDists, readQualDists,
                         output_dir, dpi=72, lenDistShaper=None):
    length_plots = []
    # ReadLen distribution to barplot:
    if lenDistShaper is None:
        lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True)
    for i, orig_rlendist in enumerate(readLenDists):
        rlendist = lenDistShaper(orig_rlendist)
        assert sum(orig_rlendist.bins) == sum(rlendist.bins)
        len_fig, len_axes = get_fig_axes_lpr()
        len_axes.bar(rlendist.labels, rlendist.bins,
                     color=get_green(0), edgecolor=get_green(0),
                     width=(rlendist.binWidth * 0.75))
        len_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_LENGTH,
                                            PlotConstants.P_LENGTH))
        len_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_LENGTH,
                                            PlotConstants.P_LENGTH))
        png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i,
                                                              p=PlotConstants.P_LENGTH_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn,
                                                              dpi=dpi)
        length_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH),
                 os.path.relpath(png_base, output_dir),
                 title=title, caption=title,
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups = [
        PlotGroup(PlotConstants.PG_LENGTH,
                  title=title,
                  plots=length_plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    return plot_groups

    # FIXME these aren't useful yet
    qual_plots = []
    # ReadQual distribution to barplot:
    shaper = continuous_dist_shaper(readQualDists, trim_excess=True)
    for i, orig_rqualdist in enumerate(readQualDists):
        rqualdist = shaper(orig_rqualdist)
        qual_fig, qual_axes = get_fig_axes_lpr()
        qual_axes.bar(rqualdist.labels, rqualdist.bins,
                      color=get_green(0), edgecolor=get_green(0),
                      width=(rqualdist.binWidth * 0.75))
        qual_axes.set_xlabel(get_plot_xlabel(spec, PlotConstants.PG_QUAL,
                                             PlotConstants.P_QUAL))
        qual_axes.set_ylabel(get_plot_ylabel(spec, PlotConstants.PG_QUAL,
                                             PlotConstants.P_QUAL))
        png_fn = os.path.join(output_dir, "{p}{i}.png".format(i=i,
                                                              p=PlotConstants.P_QUAL_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn,
                                                              dpi=dpi)
        qual_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups.append(
        PlotGroup(PlotConstants.PG_QUAL,
                  plots=qual_plots))
    return plot_groups
예제 #2
0
def to_hq_hist_plot(hqbasefraction_dist, output_dir):
    plot_name = get_plot_title(spec, Constants.PG_HQ, Constants.P_HQ)
    x_label = get_plot_xlabel(spec, Constants.PG_HQ, Constants.P_HQ)
    y_label = get_plot_ylabel(spec, Constants.PG_HQ, Constants.P_HQ)
    nbins = int(hqbasefraction_dist['NumBins'].metavalue)
    bin_counts = hqbasefraction_dist['BinCounts']
    heights = [int(bc.metavalue) for bc in bin_counts]
    edges = [float(bn) / float(nbins) for bn in xrange(nbins)]
    bin_width = float(hqbasefraction_dist['BinWidth'].metavalue)
    fig, ax = get_fig_axes_lpr()
    ax.bar(edges,
           heights,
           color=get_green(0),
           edgecolor=get_green(0),
           width=(bin_width * 0.75))
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_HQ))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                          png_fn,
                                                          dpi=DEFAULT_DPI)
    hq_plot = Plot(Constants.P_HQ,
                   os.path.relpath(png_base, output_dir),
                   title=plot_name,
                   caption=plot_name,
                   thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_HQ, plots=[hq_plot])]
    return plot_groups
예제 #3
0
def _coverage_vs_quality_plot(contigs, output_dir):
    """
    Creates a scatter plot coverage vs quality plot for each contig in the
    polished assembly.  Each point represents one contig.
    :param contigs: (dict) contig id -> ContigInfo object
    :param output_dir: (str) path to output directory
    :return: (Plot) object that has already been saved as a PNG to output_dir
    """
    fig, axes = PH.get_fig_axes_lpr()
    axes = fig.add_subplot(111)
    axes.set_axisbelow(True)
    axes.set_ylabel("Mean Confidence (QV)")
    axes.set_xlabel("Mean Coverage Depth")
    PH.set_tick_label_font_size(axes, 12, 12)
    PH.set_axis_label_font_size(axes, 16)

    x_vals = [x.mean_coverage for x in contigs.values()]
    y_vals = [x.mean_qv for x in contigs.values()]

    axes.set_xlim(0, max(x_vals) * 1.2)
    axes.set_ylim(0, max(y_vals) * 1.2)

    axes.scatter(x_vals, y_vals, s=12)

    png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png")
    png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path)

    return Plot('cov_vs_qual', os.path.basename(png),
                thumbnail=os.path.basename(thumbpng))
예제 #4
0
def _coverage_vs_quality_plot(contigs, output_dir):
    """
    Creates a scatter plot coverage vs quality plot for each contig in the
    polished assembly.  Each point represents one contig.
    :param contigs: (dict) contig id -> ContigInfo object
    :param output_dir: (str) path to output directory
    :return: (Plot) object that has already been saved as a PNG to output_dir
    """
    import pbreports.plot.helper as PH
    fig, axes = PH.get_fig_axes_lpr()
    axes = fig.add_subplot(111)
    axes.set_axisbelow(True)
    axes.set_ylabel(
        get_plot_ylabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE))
    axes.set_xlabel(
        get_plot_xlabel(spec, Constants.PG_COVERAGE, Constants.P_COVERAGE))
    PH.set_tick_label_font_size(axes, 12, 12)
    PH.set_axis_label_font_size(axes, 16)

    x_vals = [x.mean_coverage for x in contigs.values()]
    y_vals = [x.mean_qv for x in contigs.values()]

    axes.set_xlim(0, max(x_vals) * 1.2)
    axes.set_ylim(0, max(y_vals) * 1.2)

    axes.scatter(x_vals, y_vals, s=12)

    png_path = os.path.join(output_dir, "polished_coverage_vs_quality.png")
    png, thumbpng = PH.save_figure_with_thumbnail(fig, png_path)

    return Plot(Constants.P_COVERAGE,
                os.path.basename(png),
                thumbnail=os.path.basename(thumbpng))
예제 #5
0
def to_readlen_plotgroup(readlen_dist, output_dir):
    plot_name = get_plot_title(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    x_label = get_plot_xlabel(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    y_label = get_plot_ylabel(
        spec, Constants.PG_READLENGTH, Constants.P_READLENGTH)
    nbins = readlen_dist.numBins
    heights = readlen_dist.bins
    bin_width = readlen_dist.binWidth
    edges = [float(bn) * bin_width for bn in xrange(nbins)]
    edges, heights, bin_width = reshape(readlen_dist, edges, heights)
    fig, ax = get_fig_axes_lpr()
    if sum(readlen_dist.bins) > 0:
        ax.bar(edges, heights, color=get_green(0),
               edgecolor=get_green(0), width=(bin_width * 0.75))
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    png_fn = os.path.join(
        output_dir, "{p}.png".format(p=Constants.P_READLENGTH))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn, dpi=DEFAULT_DPI)
    readlen_plot = Plot(Constants.P_READLENGTH,
                        os.path.relpath(png_base, output_dir),
                        title=plot_name, caption=plot_name,
                        thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_READLENGTH, plots=[readlen_plot])]
    return plot_groups
예제 #6
0
def _generate_histogram(datum, title, xlabel, ylabel=None):
    fig, ax = get_fig_axes_lpr()
    fig.suptitle(title)
    ax.hist(datum)
    ax.set_xlabel(xlabel)
    if ylabel:
        ax.set_ylabel(ylabel)
    return fig, ax
예제 #7
0
def _apply_plot_data(x_data, y1_data, y2_data, labels, legend_loc=None):
    """Default labels assume y1_data==control, y2_data==sample"""

    h1_color = '#5050f0'
    h2_color = '#f05050'

    # log option isn't really working yet ... funky with polygons
    # these are unintuitively inverted b/c we require the sample
    # on the left side of the doubleY axis
    h2, t = np.histogram(y1_data, bins=x_data)
    h1, t = np.histogram(y2_data, bins=x_data)

    fig, ax = get_fig_axes_lpr()
    x_data = x_data[:-1]
    y0 = np.zeros(len(x_data))
    #    if log:
    #        h1 = np.log10(h1)
    #        h2 = np.log10(h2)

    ax.fill_between(x_data,
                    y0,
                    h1.T,
                    alpha=0.6,
                    edgecolor=h1_color,
                    facecolor=h1_color)

    fake_h2 = mp.Rectangle((0.1, 0.1),
                           0.1,
                           0.1,
                           facecolor=h2_color,
                           edgecolor=h2_color,
                           alpha=0.6)
    fake_h1 = mp.Rectangle((0.1, 0.1),
                           0.1,
                           0.1,
                           facecolor=h1_color,
                           edgecolor=h1_color,
                           alpha=0.6)

    ax.set_xlabel(labels[0])
    ax.set_ylabel('%s (Sample)' % labels[1])
    ax.legend([fake_h2, fake_h1], ['Control', 'Sample'], loc=legend_loc)
    # gray border around legend
    ax.get_legend().get_frame().set_edgecolor('#a0a0a0')
    set_tick_label_font_size(ax, 12, 12)
    set_axis_label_font_size(ax, 16)

    ax2 = ax.twinx()
    ax2.fill_between(x_data,
                     y0,
                     h2.T,
                     alpha=0.6,
                     edgecolor=h2_color,
                     facecolor=h2_color)
    ax2.set_ylabel('%s (Control)' % labels[1])
    set_tick_label_font_size(ax2, 12, 12)
    set_axis_label_font_size(ax2, 16)
    return fig
예제 #8
0
def _create_contig_fig_ax(bars, xlabels):
    """
    Returns a fig,ax plot for this contig
    :param contig_variants: (ContigVariants) 
    """
    fig, ax = PH.get_fig_axes_lpr()
    PH.apply_bar_data(
        ax, bars, xlabels, (meta_rpt.get_meta_plotgroup(Constants.PG_VARIANTS).get_meta_plot(Constants.P_VARIANTS).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_VARIANTS).get_meta_plot(Constants.P_VARIANTS).ylabel))
    return fig, ax
예제 #9
0
def _create_contig_fig_ax(bars, xlabels):
    """
    Returns a fig,ax plot for this contig
    :param contig_variants: (ContigVariants) 
    """
    fig, ax = PH.get_fig_axes_lpr()
    PH.apply_bar_data(
        ax, bars, xlabels, ('Reference Start Position', 'Variants'))
    return fig, ax
예제 #10
0
def _create_contig_fig_ax(bars, xlabels):
    """
    Returns a fig,ax plot for this contig
    :param contig_variants: (ContigVariants) 
    """
    fig, ax = PH.get_fig_axes_lpr()
    xlabel = get_plot_xlabel(spec, Constants.PG_VARIANTS, Constants.P_VARIANTS)
    ylabel = get_plot_ylabel(spec, Constants.PG_VARIANTS, Constants.P_VARIANTS)
    PH.apply_bar_data(ax, bars, xlabels, (xlabel, ylabel))
    return fig, ax
예제 #11
0
def _create_fig_template(dims=(8, 6), facecolor='#ffffff', gridcolor='#e0e0e0'):
    fig, ax = PH.get_fig_axes_lpr(dims=dims)
    ax = fig.add_subplot(111)

    ax.axesPatch.set_facecolor(facecolor)
    ax.grid(color=gridcolor, linewidth=0.5, linestyle='-')
    ax.set_axisbelow(True)
    PH.set_tick_label_font_size(ax, 12, 12)
    PH.set_axis_label_font_size(ax, 16)
    return fig, ax
예제 #12
0
def _create_fig_template(dims=(8, 6), facecolor='#ffffff',
                         gridcolor='#e0e0e0'):
    fig, ax = PH.get_fig_axes_lpr(dims=dims)
    ax = fig.add_subplot(111)

    ax.axesPatch.set_facecolor(facecolor)
    ax.grid(color=gridcolor, linewidth=0.5, linestyle='-')
    ax.set_axisbelow(True)
    PH.set_tick_label_font_size(ax, 12, 12)
    PH.set_axis_label_font_size(ax, 16)
    return fig, ax
예제 #13
0
def _make_histogram(data, axis_labels, nbins, barcolor):
    """Create a fig, ax instance and generate a histogram.

    :param data: np.array
    :param axis_labels: (tuple of str) (axis label, y axis label)
    :return: matplotlib fig, ax
    """
    # axis_labels = ('Median Distance Between Adapters', 'Pre-Filter Reads')
    fig, ax = get_fig_axes_lpr()
    apply_histogram_data(ax, data, nbins, axis_labels=axis_labels, barcolor=barcolor)
    return fig, ax
예제 #14
0
def make_nreads_line_plot(bc_groups, base_dir):
    x = [i for (i, g) in enumerate(bc_groups, start=1)]
    y = [g.n_reads for g in bc_groups]
    mean_nreads = 0 if len(y) == 0 else sum(y) / len(y)
    fig, ax = get_fig_axes_lpr()
    ax.plot(x, y, color='blue')
    line = ax.axhline(mean_nreads, color='red', label="Mean Number of Reads")
    ax.set_xlabel("Barcode Rank Order")
    ax.set_ylabel("Count of Reads")
    fig.legend((line, ), ("Mean Number of Reads", ), ("upper right"))
    return _to_plot(fig, Constants.P_NREADS, base_dir)
예제 #15
0
def _make_histogram(datum, axis_labels, nbins, barcolor):
    """Create a fig, ax instance and generate a histogram.

    :param datum: np.array
    :param axis_labels: (tuple of str) (axis label, y axis label)
    :return: matplotlib fig, ax
    """
    # axis_labels = ('Median Distance Between Adapters', 'Pre-Filter Reads')
    fig, ax = get_fig_axes_lpr()
    apply_histogram_data(ax, datum, nbins, axis_labels=axis_labels,
                         barcolor=barcolor)
    return fig, ax
예제 #16
0
def __generate_histogram_comparison(method_name, title, xlabel, list_fastq_stats):
    fig, ax = get_fig_axes_lpr()
    fig.suptitle(title)

    alpha = 0.3
    hs = OrderedDict()
    for fastq_stat in list_fastq_stats:
        label = os.path.basename(fastq_stat.file_name)
        h = ax.hist(getattr(fastq_stat, method_name), alpha=alpha, bins=85, label=label)
        hs[label] = h

    ax.set_xlabel(xlabel)
    ax.legend(loc="best")
    return fig, ax
예제 #17
0
def __generate_histogram_comparison(method_name, title, xlabel, list_fastq_stats):
    fig, ax = get_fig_axes_lpr()
    fig.suptitle(title)

    alpha = 0.3
    hs = OrderedDict()
    for fastq_stat in list_fastq_stats:
        label = os.path.basename(fastq_stat.file_name)
        h = ax.hist(getattr(fastq_stat, method_name),
                    alpha=alpha, bins=85, label=label)
        hs[label] = h

    ax.set_xlabel(xlabel)
    ax.legend(loc="best")
    return fig, ax
예제 #18
0
def _create_histogram(stats):
    """
    Returns a fig,ax histogram plot for this reference
    :param stats: (ReferenceStats) 
    """
    numBins = 100
    binSize = max(1, int(stats.maxbin / numBins))
    # handle case where the coverage is zero. This prevents the histogram
    # construction from crashing with an index error.
    m = 1 if stats.maxbin == 0.0 else stats.maxbin
    bins = np.arange(0, m, binSize)
    fig, ax = get_fig_axes_lpr()
    apply_histogram_data(ax, stats.means, bins,
                         ('Coverage', 'Reference Regions'),
                         barcolor=Constants.COLOR_STEEL_BLUE_DARK,
                         showEdges=False)
    return fig, ax
예제 #19
0
def make_bq_qq_plot(bc_groups, base_dir):
    """
    Create Q-Q plot for barcode quality scores.
    """
    try:
        import scipy.stats
    except ImportError:
        warnings.warn("Can't import scipy.stats")
        return None
    else:
        data = []
        for g in bc_groups:
            data.append(g.mean_bcqual())
        fig, ax = get_fig_axes_lpr()
        scipy.stats.probplot(data, dist="norm", plot=ax)
        ax.set_title("Q-Q Plot of Barcode Quality Scores")
        return _to_plot(fig, Constants.P_BQ_QQ, base_dir)
예제 #20
0
def scatter_plot_accuracy_vs_concordance(
    data,
    axis_labels=(
        meta_rpt.get_meta_plotgroup(Constants.PG_QV_CALIBRATION).get_meta_plot(Constants.P_QV_CALIBRATION).xlabel,
        meta_rpt.get_meta_plotgroup(Constants.PG_QV_CALIBRATION).get_meta_plot(Constants.P_QV_CALIBRATION).ylabel,
    ),
    nbins=None,
    barcolor=None,
):
    accuracy, concordance = data
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=accuracy, yData=concordance, style="+")]
    apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False)
    xlim = ax.get_xlim()
    xy = np.linspace(xlim[0], xlim[1])
    ax.plot(xy, xy, "-", color="r")
    return fig, ax
예제 #21
0
def scatter_plot_accuracy_vs_numpasses(
    data,
    axis_labels=(
        get_plot_xlabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER),
        get_plot_ylabel(spec, Constants.PG_SCATTER, Constants.P_SCATTER),
    ),
    nbins=None,
    barcolor=None,
):
    """
    """
    npasses, accuracy = data
    qvs = accuracy_as_phred_qv(accuracy)
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=npasses, yData=qvs, style="o")]
    apply_line_data(ax=ax, line_models=data, axis_labels=axis_labels, only_whole_ticks=False)
    return fig, ax
예제 #22
0
파일: ccs.py 프로젝트: skinner/pbreports
def scatter_plot_accuracy_vs_numpasses(data,
        axis_labels=("Number of passes", "Predicted accuracy (Phred QV)"),
        nbins=None, barcolor=None):
    """
    """
    npasses, accuracy = data
    qvs = accuracy_as_phred_qv(accuracy)
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=npasses,
                yData=qvs,
                style='o')]
    apply_line_data(
        ax=ax,
        line_models=data,
        axis_labels=axis_labels,
        only_whole_ticks=False)
    return fig, ax
예제 #23
0
def _create_contig_plot(contig_coverage):
    """
    Returns a fig,ax plot for this contig
    :param contig_coverage: (ContigCoverage) 
    """
    npXData = np.array(contig_coverage.xData)
    line_fill = LineFill(xData=npXData,
                         yData=np.array(contig_coverage.yDataMean),
                         linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6,
                         yDataMin=np.array(contig_coverage.yDataStdevMinus),
                         yDataMax=np.array(contig_coverage.yDataStdevPlus),
                         edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT,
                         facecolor=Constants.COLOR_STEEL_BLUE_LIGHT)
    lines_fills = [line_fill]
    fig, ax = get_fig_axes_lpr()
    apply_line_data(ax, lines_fills, ('Reference Start Position', 'Coverage'))
    apply_line_fill_data(ax, lines_fills)
    return fig, ax
예제 #24
0
def scatter_plot_accuracy_vs_numpasses(
        data,
        axis_labels=(get_plot_xlabel(spec, Constants.PG_SCATTER,
                                     Constants.P_SCATTER),
                     get_plot_ylabel(spec, Constants.PG_SCATTER,
                                     Constants.P_SCATTER)),
        nbins=None,
        barcolor=None):
    """
    """
    npasses, accuracy = data
    qvs = accuracy_as_phred_qv(accuracy)
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=npasses, yData=qvs, style='o')]
    apply_line_data(ax=ax,
                    line_models=data,
                    axis_labels=axis_labels,
                    only_whole_ticks=False)
    return fig, ax
예제 #25
0
def scatter_plot_accuracy_vs_concordance(
        data,
        axis_labels=("Predicted accuracy", "Mapped concordance"),
        nbins=None,
        barcolor=None):
    accuracy, concordance = data
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=accuracy,
                 yData=concordance,
                 style='+')]
    apply_line_data(
        ax=ax,
        line_models=data,
        axis_labels=axis_labels,
        only_whole_ticks=False)
    xlim = ax.get_xlim()
    xy = np.linspace(xlim[0], xlim[1])
    ax.plot(xy, xy, '-', color='r')
    return fig, ax
예제 #26
0
def scatter_plot_accuracy_vs_concordance(
        data,
        axis_labels=(get_plot_xlabel(spec, Constants.PG_QV_CALIBRATION,
                                     Constants.P_QV_CALIBRATION),
                     get_plot_ylabel(spec, Constants.PG_QV_CALIBRATION,
                                     Constants.P_QV_CALIBRATION)),
        nbins=None,
        barcolor=None):
    accuracy, concordance = data
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=accuracy, yData=concordance, style='+')]
    apply_line_data(ax=ax,
                    line_models=data,
                    axis_labels=axis_labels,
                    only_whole_ticks=False)
    xlim = ax.get_xlim()
    xy = np.linspace(xlim[0], xlim[1])
    ax.plot(xy, xy, '-', color='r')
    return fig, ax
예제 #27
0
def _create_contig_plot(contig_coverage):
    """
    Returns a fig,ax plot for this contig
    :param contig_coverage: (ContigCoverage) 
    """
    npXData = np.array(contig_coverage.xData)
    line_fill = LineFill(xData=npXData,
                         yData=np.array(contig_coverage.yDataMean),
                         linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6,
                         yDataMin=np.array(contig_coverage.yDataStdevMinus),
                         yDataMax=np.array(contig_coverage.yDataStdevPlus),
                         edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT,
                         facecolor=Constants.COLOR_STEEL_BLUE_LIGHT)
    lines_fills = [line_fill]
    fig, ax = get_fig_axes_lpr()
    apply_line_data(ax, lines_fills, (meta_rpt.get_meta_plotgroup(Constants.PG_COVERAGE).get_meta_plot(
        Constants.P_COVERAGE).xlabel, meta_rpt.get_meta_plotgroup(Constants.PG_COVERAGE).get_meta_plot(Constants.P_COVERAGE).ylabel))
    apply_line_fill_data(ax, lines_fills)
    return fig, ax
예제 #28
0
def _apply_plot_data(x_data, y1_data, y2_data, labels, legend_loc=None):
    """Default labels assume y1_data==control, y2_data==sample"""

    h1_color = '#5050f0'
    h2_color = '#f05050'

    # log option isn't really working yet ... funky with polygons
    # these are unintuitively inverted b/c we require the sample
    # on the left side of the doubleY axis
    h2, t = np.histogram(y1_data, bins=x_data)
    h1, t = np.histogram(y2_data, bins=x_data)

    fig, ax = get_fig_axes_lpr()
    x_data = x_data[:-1]
    y0 = np.zeros(len(x_data))
#    if log:
#        h1 = np.log10(h1)
#        h2 = np.log10(h2)

    ax.fill_between(x_data, y0, h1.T, alpha=0.6,
                    edgecolor=h1_color, facecolor=h1_color)

    fake_h2 = mp.Rectangle((0.1, 0.1), 0.1, 0.1, facecolor=h2_color,
                           edgecolor=h2_color, alpha=0.6)
    fake_h1 = mp.Rectangle((0.1, 0.1), 0.1, 0.1, facecolor=h1_color,
                           edgecolor=h1_color, alpha=0.6)

    ax.set_xlabel(labels[0])
    ax.set_ylabel('%s (Sample)' % labels[1])
    ax.legend([fake_h2, fake_h1],
              ['Control', 'Sample'], loc=legend_loc)
    # gray border around legend
    ax.get_legend().get_frame().set_edgecolor('#a0a0a0')
    set_tick_label_font_size(ax, 12, 12)
    set_axis_label_font_size(ax, 16)

    ax2 = ax.twinx()
    ax2.fill_between(x_data, y0, h2.T, alpha=0.6,
                     edgecolor=h2_color, facecolor=h2_color)
    ax2.set_ylabel('%s (Control)' % labels[1])
    set_tick_label_font_size(ax2, 12, 12)
    set_axis_label_font_size(ax2, 16)
    return fig
예제 #29
0
파일: ccs.py 프로젝트: mdsmith/pbreports
def scatter_plot_accuracy_vs_numpasses(data,
                                       axis_labels=(
                                           meta_rpt.get_meta_plotgroup(Constants.PG_SCATTER).get_meta_plot(Constants.P_SCATTER).xlabel,
                                           meta_rpt.get_meta_plotgroup(Constants.PG_SCATTER).get_meta_plot(Constants.P_SCATTER).ylabel),
                                       nbins=None, barcolor=None):
    """
    """
    npasses, accuracy = data
    qvs = accuracy_as_phred_qv(accuracy)
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=npasses,
                 yData=qvs,
                 style='o')]
    apply_line_data(
        ax=ax,
        line_models=data,
        axis_labels=axis_labels,
        only_whole_ticks=False)
    return fig, ax
예제 #30
0
def scatter_plot_accuracy_vs_concordance(
        data,
        axis_labels,
        nbins,
        barcolor):
    accuracy, concordance = data
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=accuracy,
                 yData=concordance,
                 style='+')]
    apply_line_data(
        ax=ax,
        line_models=data,
        axis_labels=axis_labels,
        only_whole_ticks=False)
    xlim = ax.get_xlim()
    xy = np.linspace(xlim[0], xlim[1])
    ax.plot(xy, xy, '-', color='r')
    return fig, ax
예제 #31
0
 def _create_histogram(self, stats):
     """
     Returns a fig,ax histogram plot for this reference
     :param stats: (ReferenceStats)
     """
     numBins = 100
     binSize = max(1, int(stats.maxbin / numBins))
     # handle case where the coverage is zero. This prevents the histogram
     # construction from crashing with an index error.
     m = 1 if stats.maxbin == 0.0 else stats.maxbin
     bins = np.arange(0, m, binSize)
     fig, ax = get_fig_axes_lpr()
     xlabel = get_plot_xlabel(self.spec, Constants.PG_COVERAGE_HIST,
                              Constants.P_COVERAGE_HIST)
     ylabel = get_plot_ylabel(self.spec, Constants.PG_COVERAGE_HIST,
                              Constants.P_COVERAGE_HIST)
     apply_histogram_data(ax, stats.means, bins, (xlabel, ylabel),
                          barcolor=Constants.COLOR_STEEL_BLUE_DARK,
                          showEdges=False)
     return fig, ax
    def test_save_histograms(self):
        """Save std histograms."""
        tmpdir = tempfile.mkdtemp(prefix='pbreport_output')

        fig, ax = get_fig_axes_lpr()

        dump = os.path.join(tmpdir, 'dmp.data')

        log.debug('dmp file is {d}'.format(d=dump))

        apply_histogram_data(ax, [1, 2, 3, 4, 5, 6], 2,
                             axis_labels=('foo', 'bar'),
                             barcolor='#505050', xlim=(0, 20000),
                             data_file=dump)

        self.assertTrue(os.path.exists(dump))
        log.info("Writing temp histogram to {t}".format(t=tmpdir))

        save_figure_with_thumbnail(fig, os.path.join(tmpdir, 'foo.png'))
        self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo.png')))
        self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo_thumb.png')))
    def test_save_histograms(self):
        """Save std histograms."""
        tmpdir = tempfile.mkdtemp(prefix='pbreport_output')

        fig, ax = get_fig_axes_lpr()

        dump = os.path.join(tmpdir, 'dmp.data')

        log.debug('dmp file is {d}'.format(d=dump))

        apply_histogram_data(ax, [1, 2, 3, 4, 5, 6], 2,
                             axis_labels=('foo', 'bar'),
                             barcolor='#505050', xlim=(0, 20000),
                             data_file=dump)

        self.assertTrue(os.path.exists(dump))
        log.info("Writing temp histogram to {t}".format(t=tmpdir))

        save_figure_with_thumbnail(fig, os.path.join(tmpdir, 'foo.png'))
        self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo.png')))
        self.assertTrue(os.path.exists(os.path.join(tmpdir, 'foo_thumb.png')))
예제 #34
0
def to_rl_overlay_plot(numunfilteredbasecalls_dist, readlen_dist, output_dir):
    plot_name = get_plot_title(spec, Constants.PG_RRL, Constants.P_RRL)
    x_label = get_plot_xlabel(spec, Constants.PG_RRL, Constants.P_RRL)
    y_label = get_plot_ylabel(spec, Constants.PG_RRL, Constants.P_RRL)
    unfiltered_bins = [
        int(bc.metavalue) for bc in numunfilteredbasecalls_dist['BinCounts']
    ]
    poly_bins = [int(bc.metavalue) for bc in readlen_dist['BinCounts']]
    max_unfiltered = len(unfiltered_bins) * \
        int(numunfilteredbasecalls_dist['BinWidth'].metavalue)
    max_poly = len(poly_bins) * int(readlen_dist['BinWidth'].metavalue)
    unfiltered_data = expand_data(unfiltered_bins, max_unfiltered)
    poly_data = expand_data(poly_bins, max_poly)
    fig, ax = get_fig_axes_lpr()
    ax.hist(unfiltered_data,
            label="Unfiltered",
            histtype='stepfilled',
            alpha=0.3,
            bins=len(unfiltered_bins),
            range=[0, max_unfiltered])
    ax.hist(poly_data,
            label="Polymerase",
            histtype='stepfilled',
            alpha=0.3,
            bins=len(poly_bins),
            range=[0, max_poly])
    ax.set_xlabel(x_label)
    ax.set_ylabel(y_label)
    ax.legend()
    png_fn = os.path.join(output_dir, "{p}.png".format(p=Constants.P_RRL))
    png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                          png_fn,
                                                          dpi=DEFAULT_DPI)
    rrl_plot = Plot(Constants.P_RRL,
                    os.path.relpath(png_base, output_dir),
                    title=plot_name,
                    caption=plot_name,
                    thumbnail=os.path.relpath(thumbnail_base, output_dir))
    plot_groups = [PlotGroup(Constants.PG_RRL, plots=[rrl_plot])]
    return plot_groups
def scatter_plot_accuracy_vs_concordance(
        data,
        axis_labels=(
            get_plot_xlabel(spec, Constants.PG_QV_CALIBRATION,
                            Constants.P_QV_CALIBRATION),
            get_plot_ylabel(spec, Constants.PG_QV_CALIBRATION,
                            Constants.P_QV_CALIBRATION)),
        nbins=None,
        barcolor=None):
    accuracy, concordance = data
    fig, ax = get_fig_axes_lpr()
    data = [Line(xData=accuracy,
                 yData=concordance,
                 style='+')]
    apply_line_data(
        ax=ax,
        line_models=data,
        axis_labels=axis_labels,
        only_whole_ticks=False)
    xlim = ax.get_xlim()
    xy = np.linspace(xlim[0], xlim[1])
    ax.plot(xy, xy, '-', color='r')
    return fig, ax
예제 #36
0
 def _create_contig_plot(self, contig_coverage):
     """
     Returns a fig,ax plot for this contig
     :param contig_coverage: (ContigCoverage)
     """
     npXData = np.array(contig_coverage.xData)
     line_fill = LineFill(xData=npXData,
                          yData=np.array(contig_coverage.yDataMean),
                          linecolor=Constants.COLOR_STEEL_BLUE_DARK, alpha=0.6,
                          yDataMin=np.array(
                              contig_coverage.yDataStdevMinus),
                          yDataMax=np.array(contig_coverage.yDataStdevPlus),
                          edgecolor=Constants.COLOR_STEEL_BLUE_LIGHT,
                          facecolor=Constants.COLOR_STEEL_BLUE_LIGHT)
     lines_fills = [line_fill]
     fig, ax = get_fig_axes_lpr()
     xlabel = get_plot_xlabel(
         self.spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)
     ylabel = get_plot_ylabel(
         self.spec, Constants.PG_COVERAGE, Constants.P_COVERAGE)
     apply_line_data(ax, lines_fills, (xlabel, ylabel))
     apply_line_fill_data(ax, lines_fills)
     return fig, ax
예제 #37
0
def plot_aggregator_histogram(a, plot_view, output_dir):
    """
    h is the histogram (the h from h, bin_edges = np.histogram(data))
    bin_edges list of

    This does NOT save the image!

    :param a: Histogram Aggregator
    :param plot_view: Instance of PlotViewProperites


    :type plot_view: PlotViewProperties
    :type a: HistogramAggregator
    :type output_dir: str
    """

    h = a.bins
    bin_edges = [a.dx * i for i in xrange(len(a.bins) + 1)]

    # need to look up values in the histogram that aren't 0 to find
    # the max and min ranges to plot over. Use the indexes to look up
    # the actual values as a.dx * i
    first_index = 0
    for i, v in enumerate(a.bins):
        if v != 0:
            first_index = i
            break

    last_index = len(a.bins) - 1
    for i, v in enumerate(a.bins[::-1]):
        if v != 0:
            last_index = i
            break

    min_x = first_index * a.dx
    max_x = (a.nbins - last_index) * a.dx

    log.debug(a)
    log.debug((len(h), len(bin_edges)))
    log.debug(("Min, Max", min_x, max_x))

    #assert len(h) == (len(bin_edges) + 1)

    #dims = (12, 9)
    fig, ax = get_fig_axes_lpr()
    # need to inspect the data to find where to the plotting range
    # and the bin width. Should this just be dx?

    # this should be in PlotViewProperties
    ax.bar(bin_edges[:-1], h, width=a.dx, color=plot_view.color,
           edgecolor=plot_view.edgecolor)

    # Custom limits
    ax.set_xlim(min_x, max_x)

    ax.set_ylabel(plot_view.ylabel)
    ax.set_xlabel(plot_view.xlabel)

    if plot_view.title is not None:
        ax.set_title(plot_view.title)

    return fig, ax
예제 #38
0
def to_report_impl(dset, output_dir, dpi=DEFAULT_DPI):
    if not dset.metadata.summaryStats.medianInsertDists:
        raise InvalidStatsError("Pipeline Summary Stats (sts.xml) not found "
                                "or missing key distributions")

    # Pull some stats:
    adapter_dimers = np.round(100.0 *
                              dset.metadata.summaryStats.adapterDimerFraction,
                              decimals=2)
    short_inserts = np.round(100.0 *
                             dset.metadata.summaryStats.shortInsertFraction,
                             decimals=2)
    attributes = [
        Attribute(i, v)
        for i, v in zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS],
                        [adapter_dimers, short_inserts])
    ]

    if Constants.BASE_RATE_DIST in dset.metadata.summaryStats.tags:
        dist = dset.metadata.summaryStats[Constants.BASE_RATE_DIST]
        if len(dist) > 1:
            log.warn("Dataset was merged, local base rate not applicable")
        else:
            base_rate = dist[0].sampleMed
            attributes.append(Attribute(Constants.A_BASE_RATE, base_rate))
    else:
        log.warn("No local base rate distribution available")

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    shaper = continuous_dist_shaper(
        dset.metadata.summaryStats.medianInsertDists)
    for i, orig_ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        ins_len_dist = shaper(orig_ins_len_dist)
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels),
               ins_len_dist.bins,
               color=get_green(0),
               edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel(
            get_plot_xlabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER))
        ax.set_ylabel(
            get_plot_ylabel(spec, Constants.PG_ADAPTER, Constants.P_ADAPTER))
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig,
                                                              png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(
            Plot("adapter_xml_plot_{i}".format(i=i),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [
        PlotGroup(Constants.PG_ADAPTER,
                  plots=plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    tables = []

    report = Report(
        Constants.R_ID,
        attributes=attributes,
        tables=tables,
    )  # plotgroups=plot_groups)

    return spec.apply_view(report)
예제 #39
0
def _to_read_stats_plots(PlotConstants,
                         title,
                         readLenDists,
                         readQualDists,
                         output_dir,
                         dpi=72,
                         lenDistShaper=None):
    length_plots = []
    # ReadLen distribution to barplot:
    if lenDistShaper is None:
        lenDistShaper = continuous_dist_shaper(readLenDists, trim_excess=True)
    for i, orig_rlendist in enumerate(readLenDists):
        rlendist = lenDistShaper(orig_rlendist)
        assert sum(orig_rlendist.bins) == sum(rlendist.bins)
        len_fig, len_axes = get_fig_axes_lpr()
        len_axes.bar(rlendist.labels,
                     rlendist.bins,
                     color=get_green(0),
                     edgecolor=get_green(0),
                     width=(rlendist.binWidth * 0.75))
        len_axes.set_xlabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).xlabel)
        len_axes.set_ylabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).ylabel)
        png_fn = os.path.join(
            output_dir, "{p}{i}.png".format(i=i,
                                            p=PlotConstants.P_LENGTH_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(len_fig,
                                                              png_fn,
                                                              dpi=dpi)
        length_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_LENGTH),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups = [
        PlotGroup(PlotConstants.PG_LENGTH,
                  plots=length_plots,
                  thumbnail=os.path.relpath(thumbnail_base, output_dir))
    ]
    return plot_groups

    # FIXME these aren't useful yet
    qual_plots = []
    # ReadQual distribution to barplot:
    shaper = continuous_dist_shaper(readQualDists, trim_excess=True)
    for i, orig_rqualdist in enumerate(readQualDists):
        rqualdist = shaper(orig_rqualdist)
        qual_fig, qual_axes = get_fig_axes_lpr()
        qual_axes.bar(rqualdist.labels,
                      rqualdist.bins,
                      color=get_green(0),
                      edgecolor=get_green(0),
                      width=(rqualdist.binWidth * 0.75))
        qual_axes.set_xlabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_LENGTH).get_meta_plot(
                PlotConstants.P_LENGTH).xlabel)
        qual_axes.set_ylabel(
            meta_rpt.get_meta_plotgroup(PlotConstants.PG_QUAL).get_meta_plot(
                PlotConstants.P_QUAL).ylabel)
        png_fn = os.path.join(
            output_dir, "{p}{i}.png".format(i=i,
                                            p=PlotConstants.P_QUAL_PREFIX))
        png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig,
                                                              png_fn,
                                                              dpi=dpi)
        qual_plots.append(
            Plot("{p}_{i}".format(i=i, p=PlotConstants.P_QUAL),
                 os.path.relpath(png_base, output_dir),
                 thumbnail=os.path.relpath(thumbnail_base, output_dir)))
    plot_groups.append(PlotGroup(PlotConstants.PG_QUAL, plots=qual_plots))
    return plot_groups
예제 #40
0
def to_report(stats_xml, output_dir, dpi=72):
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    # stats_xml should be a dataset:
    dset = DataSet(stats_xml)
    dataset_uuids = [dset.uuid]
    # but if it isn't, no problem:
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
        # an sts file was provided which will generate a new random uuid
        dataset_uuids = []
    if not dset.metadata.summaryStats.readLenDists:
        raise RuntimeError("No Pipeline Summary Stats (sts.xml) found")

    # Build the stats table:
    nbases = 0
    nreads = 0
    n50 = 0
    readscoretotal = 0
    readscorenumber = 0
    approx_read_lens = []

    # if a merge failed there may be more than one dist:
    for rlendist in dset.metadata.summaryStats.readLenDists:
        nbases += _total_from_bins(rlendist.bins,
                                   rlendist.minBinValue,
                                   rlendist.binWidth)
        nreads += sum(rlendist.bins)

        # N50:
        for i, lbin in enumerate(rlendist.bins):
            # use the average, except for the last bin
            if i != len(rlendist.bins) - 1:
                value = ((i * rlendist.binWidth) + rlendist.minBinValue +
                         rlendist.binWidth / 2)
            # for the last bin, just use the value
            else:
                value = (i * rlendist.binWidth) + rlendist.minBinValue
            approx_read_lens.extend([value] * lbin)
            # TODO(mdsmith)(2016-02-09) make sure maxOutlierValue is updated
            # during a merge /todo
            # but pop off that last value and replace it with the
            # maxOutlierValue:
            # approx_read_lens.pop()
            # approx_read_lens.append(rlendist.maxBinValue)
    n50 = np.round(compute_n50(approx_read_lens))

    for rqualdist in dset.metadata.summaryStats.readQualDists:
        readscoretotal += _total_from_bins(rqualdist.bins,
                                           rqualdist.minBinValue,
                                           rqualdist.binWidth)
        readscorenumber += sum(rqualdist.bins)

    readlen = 0
    if nreads != 0:
        readlen = np.round(nbases / nreads, decimals=2)
    readQuality = 0
    if readscorenumber != 0:
        readQuality = np.round(readscoretotal / readscorenumber, decimals=2)
    row_names = ["Polymerase Read Bases",
                 "Polymerase Reads",
                 "Polymerase Read N50",
                 "Polymerase Read Length",
                 "Polymerase Read Quality"]
    _pre_filter = [np.round(nbases, decimals=2),
                   nreads,
                   n50,
                   readlen,
                   readQuality]

    plots = []

    # ReadLen distribution to barplot:
    for i, rlendist in enumerate(dset.metadata.summaryStats.readLenDists):
        len_fig, len_axes = get_fig_axes_lpr()
        len_axes.bar(rlendist.labels, rlendist.bins,
                     color=get_green(0), edgecolor=get_green(0),
                     width=(rlendist.binWidth * 0.75))
        len_axes.set_xlabel('Read Length')
        len_axes.set_ylabel('Reads')
        png_fn = os.path.join(output_dir, "readLenDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(len_fig, png_fn,
                                                              dpi=dpi)

        plots.append(Plot("filter_len_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base, output_dir),
                          thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [PlotGroup("filter_len_xml_plot_group",
                             title="Polymerase Read Length",
                             plots=plots,
                             thumbnail=os.path.relpath(thumbnail_base, output_dir))]

    plots = []

    # ReadQual distribution to barplot:
    for i, rqualdist in enumerate(dset.metadata.summaryStats.readQualDists):
        qual_fig, qual_axes = get_fig_axes_lpr()
        qual_axes.bar(rqualdist.labels, rqualdist.bins,
                      color=get_green(0), edgecolor=get_green(0),
                      width=(rqualdist.binWidth * 0.75))
        qual_axes.set_xlabel('Read Quality')
        qual_axes.set_ylabel('Reads')

        png_fn = os.path.join(output_dir, "readQualDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(qual_fig, png_fn,
                                                              dpi=dpi)

        plots.append(Plot("filter_qual_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base, output_dir),
                          thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups.append(PlotGroup("filter_qual_xml_plot_group",
                                 title="Polymerase Read Quality",
                                 plots=plots))

    # build the report:
    columns = [Column("filter_names_column", header="Metrics",
                      values=row_names)]
    columns.append(Column("filter_stats_column", header="Values",
                          values=_pre_filter))

    tables = [Table("filter_xml_table", "Filtering Statistics", columns)]

    report = Report("filtering_stats_xml_report",
                    title="Filtering stats XML report",
                    tables=tables,
                    attributes=None,
                    plotgroups=plot_groups,
                    dataset_uuids=dataset_uuids)

    return report
예제 #41
0
def to_report(stats_xml, output_dir, dpi=72):
    # TODO: make dpi matter
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = SubreadSet(stats_xml)
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
    if not dset.metadata.summaryStats.medianInsertDists:
        raise IOError("Pipeline Summary Stats (sts.xml) not found or missing "
                      "key distributions")

    # Pull some stats:
    adapter_dimers = np.round(
        100.0 * dset.metadata.summaryStats.adapterDimerFraction,
        decimals=2)
    short_inserts = np.round(
        100.0 * dset.metadata.summaryStats.shortInsertFraction,
        decimals=2)

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    shaper = continuous_dist_shaper(dset.metadata.summaryStats.medianInsertDists)
    for i, orig_ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        ins_len_dist = shaper(orig_ins_len_dist)
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins,
               color=get_green(0), edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).xlabel)
        ax.set_ylabel(meta_rpt.get_meta_plotgroup(Constants.PG_ADAPTER).get_meta_plot(Constants.P_ADAPTER).ylabel)
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(Plot("adapter_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base, output_dir),
                          thumbnail=os.path.relpath(thumbnail_base, output_dir)))

    plot_groups = [PlotGroup(Constants.PG_ADAPTER,
                             plots=plots,
                             thumbnail=os.path.relpath(thumbnail_base, output_dir))]
    attributes = [Attribute(i, v) for i,v in
        zip([Constants.A_DIMERS, Constants.A_SHORT_INSERTS],
            [adapter_dimers, short_inserts])]

    tables = []

    report = Report(meta_rpt.id,
                    title=meta_rpt.title,
                    attributes=attributes,
                    tables=tables,
                    )#plotgroups=plot_groups)

    return meta_rpt.apply_view(report)
예제 #42
0
def _make_plot(data,
               png_fn,
               bounds=None,
               dpi=DEFAULT_DPI,
               nolegend=False,
               x_label="Subread Length (bp)"):
    """Make a scatterplot of read length and concordance"""
    fig, axes = get_fig_axes_lpr()

    # from color brewer
    # qv_colors = ['#a6cee3', '#1f77b4', '#b2df8a', '#33a02c', '#fb9a99',
    #'#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a',
    #'#ffff99']
    qv_colors = ['#fc9272', '#fb6a4a', '#ef3b2c', '#cb181d']
    # qv_colors.extend(qv_colors)
    # qv_colors.extend(qv_colors)
    # plot by z-values
    qv_min = 1.0
    #qv_delta = 3.0
    handles = []
    labels = []
    # Make sure the max actually gets in a bin
    qv_max = max(data[:, 2]) + 1
    qv_delta = (qv_max - qv_min) / len(qv_colors)
    for qv_bin, color in zip(
            #np.arange(qv_min, qv_min + qv_delta * len(qv_colors), qv_delta),
            np.arange(qv_min, qv_max, qv_delta),
            qv_colors):
        if qv_bin > qv_max:
            break
        qv_bin_max = qv_bin + qv_delta
        points = data[(data[:, 2] >= qv_bin) * (data[:, 2] < qv_bin_max), :]
        if len(points[:, 0]) > 0:
            l, = axes.plot(points[:, 0],
                           points[:, 1],
                           'o',
                           c=color,
                           mec=color,
                           alpha=0.1,
                           ms=2.0)
            handles.append(l)
            labels.append('QV >= %d' % qv_bin)
    if not nolegend:
        axes.legend(handles,
                    labels,
                    loc='lower right',
                    numpoints=1,
                    borderpad=0.3,
                    markerscale=2.0,
                    handletextpad=0.3,
                    labelspacing=0.3,
                    handlelength=0.5)
        axes.get_legend().get_frame().set_edgecolor('#a0a0a0')

    if bounds:
        intbounds = map(int, bounds.split(":"))
        axes.set_xlim(xmin=intbounds[0], xmax=intbounds[1])
        axes.set_ylim(ymin=intbounds[2], ymax=intbounds[3])
    axes.set_xlabel(x_label)
    axes.set_ylabel('Mapped Concordance')
    save_figure_with_thumbnail(fig, png_fn, dpi=int(dpi))
    plt.close(fig)
예제 #43
0
def to_report(stats_xml, output_dir, dpi=72):
    #TODO: make dpi matter
    """Main point of entry

    :type stats_xml: str
    :type output_dir: str
    :type dpi: int

    :rtype: Report
    """
    log.info("Analyzing XML {f}".format(f=stats_xml))
    dset = DataSet(stats_xml)
    if not dset.metadata.summaryStats:
        dset.loadStats(stats_xml)
    if not dset.metadata.summaryStats.medianInsertDists:
        raise RuntimeError("No Pipeline Summary Stats (sts.xml) found")

    # Pull some stats:
    adapter_dimers = np.round(
        100.0 * dset.metadata.summaryStats.adapterDimerFraction,
        decimals=2)
    short_inserts = np.round(
        100.0 * dset.metadata.summaryStats.shortInsertFraction,
        decimals=2)

    plots = []
    # Pull some histograms (may have dupes (unmergeable distributions)):
    for i, ins_len_dist in enumerate(
            dset.metadata.summaryStats.medianInsertDists):
        # make a bar chart:
        fig, ax = get_fig_axes_lpr()
        ax.bar(map(float, ins_len_dist.labels), ins_len_dist.bins,
               color=get_green(0), edgecolor=get_green(0),
               width=(ins_len_dist.binWidth * 0.75))
        ax.set_xlabel('Median Distance Between Adapters')
        ax.set_ylabel('Reads')
        png_fn = os.path.join(output_dir,
                              "interAdapterDist{i}.png".format(i=i))
        png_base, thumbnail_base = save_figure_with_thumbnail(fig, png_fn,
                                                              dpi=dpi)

        # build the report:
        plots.append(Plot("adapter_xml_plot_{i}".format(i=i),
                          os.path.relpath(png_base),
                          thumbnail=os.path.relpath(thumbnail_base)))

    plot_groups = [PlotGroup("adapter_xml_plot_group",
                             title="Observed Insert Length Distribution",
                             plots=plots,
                             thumbnail=os.path.relpath(thumbnail_base))]

    columns = [Column("adaper_xml_conditions", None,
                      ('Adapter Dimers (0-10bp)',
                       'Short Inserts (11-100bp)')),
               Column("adaper_xml_results", None,
                      (adapter_dimers, short_inserts))]

    tables = [Table("adapter_xml_table", "Adapter Statistics", columns)]

    report = Report("adapter_xml_report", title="Adapter Report",
                    tables=tables, attributes=None, plotgroups=plot_groups)

    return report