Ejemplo n.º 1
0
def spatial_plot_plus_links(bead_xy_a,
                            bead_pairs,
                            umi_dist,
                            title: str,
                            pdf_pages: PdfPages,
                            pct: float = 95):
    with new_ax(pdf_pages, include_fig=True) as (fig, ax):
        # version of 'Blues' colormap that is pure white at the bottom
        cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            "BluesW", [(1.0, 1.0, 1.0), (0.0314, 0.188, 0.450)])
        norm = matplotlib.colors.Normalize(0,
                                           np.percentile(umi_dist, pct),
                                           clip=True)

        c = ax.scatter(
            bead_xy_a[:, 0],
            bead_xy_a[:, 1],
            c=umi_dist,
            s=0.5,
            cmap=cmap,
            norm=norm,
        )
        c.set_rasterized(True)

        xs = bead_xy_a[bead_pairs, 0].T
        ys = bead_xy_a[bead_pairs, 1].T

        ax.plot(xs, ys, alpha=0.1, color="g", linewidth=0.1)

        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.axis("equal")
        ax.set_title(title)
        fig.colorbar(c, ax=ax)
Ejemplo n.º 2
0
def plot_base_distribution(pdf_pages: PdfPages, base_dist_file: Path,
                           title: str):
    with base_dist_file.open() as fh:
        rows = list(csv.reader(fh, delimiter="\t"))
        base_distribution = np.array([[float(v) for v in r[1:-1]]
                                      for r in rows[1:]])
        base_distribution /= base_distribution.sum(axis=1, keepdims=True)

    with new_ax(pdf_pages) as ax:
        ax.set_prop_cycle("color", ["red", "blue", "green", "purple"])
        ax.plot(
            np.arange(1, base_distribution.shape[0] + 1),
            base_distribution,
            linewidth=0,
            marker="o",
            markersize=10,
            alpha=0.8,
            label=["A", "C", "G", "T"],
        )
        ax.legend(loc="lower right")
        ax.set_xlim(0, base_distribution.shape[0] + 1)
        ax.set_ylim(0, np.max(base_distribution) + 0.02)

        ax.set_xlabel("base position")
        ax.set_ylabel("fraction of reads")
        ax.set_title(title)
Ejemplo n.º 3
0
def plot_poly_a_trimming(pdf_pages: PdfPages, qm: dict[str, int],
                         poly_a_summary_files: list[Path]):
    total_hist = Counter()

    for summary_file in poly_a_summary_files:
        _, poly_a_hist = read_dropseq_metrics(summary_file)
        total_hist += poly_a_hist

    trimmed_count = sum(total_hist[k] for k in total_hist if k > 0)

    with new_ax(pdf_pages) as ax:
        ax.plot(
            np.arange(1,
                      max(total_hist) + 1),
            [total_hist[k] for k in total_hist if k > 0],
            marker="o",
            color="k",
        )

        ax.set_xlabel("First base of PolyA tail trimmed")
        ax.set_ylabel("Number of reads")
        ax.set_title(
            f"% Reads trimmed by 3' PolyA trimmer: {trimmed_count / qm['Total']:.3%}"
        )
        ax.set_xlim(0, max(total_hist) + 1)
Ejemplo n.º 4
0
def plot_log_hist(dist, title: str, pdf_pages: PdfPages):
    max_d = np.ceil(np.log10(max(dist)))
    with new_ax(pdf_pages) as ax:
        ax.hist(dist,
                bins=np.logspace(0, max_d, int(max_d * 10 + 1)),
                log=True)
        ax.set_xscale("log")
        ax.set_title(title)
Ejemplo n.º 5
0
def plot_combined_mapping_quality(pdf_pages: PdfPages, quality_metrics: Path,
                                  matched_quality_metrics: Path):
    keys = ["Total", "Mapped", "HQ", "HQ No Dupes"]
    qm, _ = read_quality_metrics(quality_metrics)
    mm, _ = read_quality_metrics(matched_quality_metrics)
    ms = [qm, mm]

    x = np.arange(len(keys))

    with new_ax(pdf_pages, ax_bounds=(0.1, 0.2, 0.8, 0.7)) as ax:
        ax.set_prop_cycle("color", ["lightskyblue", "goldenrod"])
        for i, m in enumerate(ms):
            ax.bar(
                x + 0.1 + 0.4 * i,
                [m[k] for k in keys],
                width=0.4,
                align="edge",
                label=["All", "Matched"][i],
                edgecolor="black",
            )

        ax.set_xticks(x + 0.5)
        ax.set_xticklabels(keys)

        ax.set_xticks(
            [v + 0.3 + i for v in x for i in (0.0, 0.4)],
            minor=True,
        )
        ax.set_xticklabels(
            [
                f"{int(m[k]):,}\n({m[k] / m['Total']:.1%})" for k in keys
                for m in ms
            ],
            minor=True,
            rotation=90,
        )

        ax.set_ylabel("# Reads")
        ax.set_title("Alignment Quality")

        ax.tick_params(axis="x", which="major", bottom=False, length=80)
        ax.tick_params(axis="x", which="minor")

        ax.legend()

    return qm
Ejemplo n.º 6
0
def plot_reads_per_barcode(pdf_pages: PdfPages, barcode_count):
    with gzip.open(barcode_count, "rt") as fh:
        # this file has form `num_reads    barcodes` in descending order
        read_counts = [
            int(r[0]) for r in csv.reader(fh, delimiter="\t") if r[0][0] != "#"
        ]

    total_reads = sum(read_counts)
    # truncate to first 10% of barcodes
    read_counts = read_counts[:len(read_counts) // 10]

    with new_ax(pdf_pages) as ax:
        ax.plot(np.cumsum(read_counts) / total_reads, color="g")

        ax.set_ylim((0, 1))
        ax.set_xlabel("Top 10% of cell barcodes, by number of reads")
        ax.set_ylabel("Cumulative fraction of reads")
        ax.set_title("Cumulative fraction of reads per cell barcode")
Ejemplo n.º 7
0
def plot_spatial_distribution(pdf_pages: PdfPages, bead_xy: np.ndarray,
                              dist: list[float], title: str):
    with new_ax(pdf_pages, include_fig=True) as (fig, ax):
        c = ax.scatter(
            bead_xy[:, 0],
            bead_xy[:, 1],
            c=dist,
            s=0.5,
            cmap="viridis_r",
            norm=matplotlib.colors.Normalize(0,
                                             np.percentile(dist, 95),
                                             clip=True),
        )
        c.set_rasterized(True)
        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.axis("equal")
        ax.set_title(f"{title} per matched bead")
        fig.colorbar(c, ax=ax)
Ejemplo n.º 8
0
def plot_mapping_quality(pdf_pages: PdfPages, quality_metrics: Path):
    keys = ["Total", "Mapped", "HQ", "HQ No Dupes"]
    qm, _ = read_quality_metrics(quality_metrics)

    with new_ax(pdf_pages) as ax:
        ax.bar(
            [
                f"{k}\n{int(qm[k]):,}\n({qm[k] / qm['Total']:.1%})"
                for k in keys
            ],
            [qm[k] for k in keys],
            width=0.8,
            color="lightskyblue",
            edgecolor="black",
        )

        ax.set_ylabel("# Reads")
        ax.set_title("Alignment Quality for All Reads")

    return qm
Ejemplo n.º 9
0
def spatial_plot(bead_xy_a, dist, title, pdf_pages, pct: float = 95.0):
    with new_ax(pdf_pages, include_fig=True) as (fig, ax):
        # version of 'Blues' colormap that is pure white at the bottom
        cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
            "BluesW", [(1.0, 1.0, 1.0), (0.0314, 0.188, 0.450)]
        )

        c = ax.scatter(
            bead_xy_a[:, 0],
            bead_xy_a[:, 1],
            c=dist,
            s=0.5,
            cmap=cmap,
            norm=matplotlib.colors.Normalize(0, np.percentile(dist, pct), clip=True),
        )
        c.set_rasterized(True)
        ax.set_xlabel("X")
        ax.set_ylabel("Y")
        ax.axis("equal")
        ax.set_title(title)
        fig.colorbar(c, ax=ax)
Ejemplo n.º 10
0
def plot_frac_intronic_exonic(pdf_pages: PdfPages, frac_intron_exon: Path,
                              title: str):
    # plot the fraction of reads mapping to different regions
    frac_ie, _ = read_frac_intronic_exonic(frac_intron_exon)

    # calculate some derivative measures
    frac_ie["exonic"] = frac_ie["coding"] + frac_ie["utr"]
    frac_ie["genic"] = frac_ie["exonic"] + frac_ie["intronic"]
    keys = ["ribosomal", "exonic", "genic", "intronic", "intergenic"]

    with new_ax(pdf_pages) as ax:
        ax.bar(
            [f"{k}\n{frac_ie[k]:.1%}" for k in keys],
            [100 * frac_ie[k] for k in keys],
            width=0.7,
            color="lightskyblue",
            edgecolor="black",
        )
        ax.set_ylim(0, 100)

        ax.set_ylabel("Percentage")
        ax.set_title(title)
Ejemplo n.º 11
0
def plot_dge_summary(pdf_pages: PdfPages, summary_file: Path, bead_xy: BeadXY):
    """
    Plot histograms of UMIs and genes per barcode
    """

    barcodes, umis_per_bc, genes_per_bc = read_dge_summary(summary_file)

    bead_xy = np.vstack([bead_xy[bc] for bc in barcodes])

    for dist, title in ((umis_per_bc, "UMIs"), (genes_per_bc, "genes")):
        with new_ax(pdf_pages) as ax:
            # can safely assume no zeros in this distribution
            # 10000 should be good enough for anyone
            ax.hist(
                dist,
                bins=np.logspace(0, 4, 41),
                facecolor="lightskyblue",
                edgecolor="black",
            )
            ax.set_xscale("log")
            ax.set_xlabel(f"Number of {title} (log10)")
            ax.set_title(f"Histogram of {title} per matched barcode")

    plot_spatial_distribution(pdf_pages, bead_xy, umis_per_bc, "UMIs")
Ejemplo n.º 12
0
def plot_hist(dist, title: str, pdf_pages: PdfPages):
    with new_ax(pdf_pages) as ax:
        ax.hist(dist, bins=100, log=True)
        ax.set_title(title)