Exemple #1
0
def plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize,
                           qbreaks, sbreaks, sep=True, chrlw=.1,
                           sepcolor="g", minfont=5, stdpf=True):
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbreaks:
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbreaks:
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos, .91, latex(label), size=fontsize,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91, pos, latex(label), size=fontsize,
                va="center", color="grey")

    # Plot the frame
    ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor)
    ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor)
    ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor)
    ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    return xlim, ylim
Exemple #2
0
def blastplot(
    ax,
    blastfile,
    qsizes,
    ssizes,
    qbed,
    sbed,
    style="dot",
    sampleN=None,
    baseticks=False,
    insetLabels=False,
    stripNames=False,
    highlights=None,
):

    assert style in DotStyles
    fp = open(blastfile)

    qorder = qbed.order if qbed else None
    sorder = sbed.order if sbed else None

    data = []

    for row in fp:
        b = BlastLine(row)
        query, subject = b.query, b.subject

        if stripNames:
            query = query.rsplit(".", 1)[0]
            subject = subject.rsplit(".", 1)[0]

        if qorder:
            if query not in qorder:
                continue
            qi, q = qorder[query]
            query = q.seqid
            qstart, qend = q.start, q.end
        else:
            qstart, qend = b.qstart, b.qstop

        if sorder:
            if subject not in sorder:
                continue
            si, s = sorder[subject]
            subject = s.seqid
            sstart, send = s.start, s.end
        else:
            sstart, send = b.sstart, b.sstop

        qi = qsizes.get_position(query, qstart)
        qj = qsizes.get_position(query, qend)
        si = ssizes.get_position(subject, sstart)
        sj = ssizes.get_position(subject, send)

        if None in (qi, si):
            continue
        data.append(((qi, qj), (si, sj)))

    if sampleN:
        if len(data) > sampleN:
            data = sample(data, sampleN)

    if not data:
        return logging.error("no blast data imported")

    xsize, ysize = qsizes.totalsize, ssizes.totalsize
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))

    if style == "line":
        for a, b in data:
            ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3)
    else:
        data = [(x[0], y[0]) for x, y in data]
        x, y = zip(*data)

        if style == "circle":
            ax.plot(x, y, "mo", mfc="w", mec="m", ms=3)
        elif style == "dot":
            ax.scatter(x, y, s=3, lw=0)

    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    ignore = True  # tag to mark whether to plot chr name (skip small ones)
    ignore_size_x = ignore_size_y = 0

    # plot the chromosome breaks
    logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes)))
    for (seqid, beg, end) in qsizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([end, end], ylim, "-", lw=1, color="grey")

    for (seqid, beg, end) in ssizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [end, end], "-", lw=1, color="grey")

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        if not ignore:
            if insetLabels:
                ax.text(pos,
                        0,
                        label,
                        size=8,
                        ha="center",
                        va="top",
                        color="grey")
            else:
                pos = 0.1 + pos * 0.8 / xsize
                root.text(
                    pos,
                    0.91,
                    label,
                    size=10,
                    ha="center",
                    va="bottom",
                    rotation=45,
                    color="grey",
                )

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        if not ignore:
            if insetLabels:
                continue
            pos = 0.9 - pos * 0.8 / ysize
            root.text(0.91, pos, label, size=10, va="center", color="grey")

    # Highlight regions based on a list of BedLine
    qhighlights = shighlights = None
    if highlights:
        if isinstance(highlights[0], BedLine):
            shighlights = highlights
        elif len(highlights) == 2:
            qhighlights, shighlights = highlights

    if qhighlights:
        for hl in qhighlights:
            hls = qsizes.get_position(hl.seqid, hl.start)
            ax.add_patch(
                Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0))
    if shighlights:
        for hl in shighlights:
            hls = ssizes.get_position(hl.seqid, hl.start)
            ax.add_patch(
                Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0))

    if baseticks:

        def increaseDensity(a, ratio=4):
            assert len(a) > 1
            stepsize = a[1] - a[0]
            newstepsize = int(stepsize / ratio)
            return np.arange(0, a[-1], newstepsize)

        # Increase the density of the ticks
        xticks = ax.get_xticks()
        yticks = ax.get_yticks()
        xticks = increaseDensity(xticks, ratio=2)
        yticks = increaseDensity(yticks, ratio=2)
        ax.set_xticks(xticks)

        # Plot outward ticklines
        for pos in xticks[1:]:
            if pos > xsize:
                continue
            pos = 0.1 + pos * 0.8 / xsize
            root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2)

        for pos in yticks[1:]:
            if pos > ysize:
                continue
            pos = 0.9 - pos * 0.8 / ysize
            root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_base_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color="gray",
             size=10)
    plt.setp(ax.get_yticklabels(), rotation=90)
Exemple #3
0
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, genomenames=None,
        sample_number=10000, ignore=.005, palette=None, chrlw=.01, title=None):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            #logging.warning("ignore %s" % query)
            continue
        if subject not in sorder:
            #logging.warning("ignore %s" % subject)
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    # only show random subset, default to sample_number = 5000
    if len(data) > sample_number:
        logging.debug("Showing a random subset of %s data points (total %s) " \
                      "for clarity." % (sample_number, len(data)))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    if not palette:
        data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)

    else:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=default_cm,
                vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    # Tag to mark whether to plot chr name (skip small ones)
    ignore_size_x = ignore_size_y = 0
    if ignore:
        ignore_size_x = xsize * ignore
        ignore_size_y = ysize * ignore

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([beg, beg], ylim, "g-", lw=chrlw)

    for (seqid, beg, end) in sbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [beg, beg], "g-", lw=chrlw)

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if not ignore:
            root.text(pos, .91, label,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if not ignore:
            root.text(.91, pos, label,
                va="center", color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if title:
        fig.suptitle(title, x=.05, y=.98, color="k")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #4
0
def dotplot(anchorfile,
            qbed,
            sbed,
            fig,
            root,
            ax,
            vmin=0,
            vmax=1,
            is_self=False,
            synteny=False,
            cmap_text=None,
            genomenames=None,
            sample_number=10000,
            minfont=5,
            palette=None,
            chrlw=.01,
            title=None,
            sepcolor="gainsboro"):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = len(data)
    # Only show random subset
    if npairs > sample_number:
        logging.debug("Showing a random subset of {0} data points (total {1}) " \
                      "for clarity.".format(sample_number, npairs))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    if not palette:
        data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)

    else:
        ax.scatter(x,
                   y,
                   c=c,
                   edgecolors="none",
                   s=2,
                   lw=0,
                   cmap=default_cm,
                   vmin=vmin,
                   vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbed.get_breaks():
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos,
                      .91,
                      latex(label),
                      size=fontsize,
                      ha="center",
                      va="bottom",
                      rotation=45,
                      color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91,
                      pos,
                      latex(label),
                      size=fontsize,
                      va="center",
                      color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color='gray',
             size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if not title:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(title, x=.5, y=.96, color="k")
    logging.debug(title)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #5
0
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper",
        genomenames=None, sample_number=10000, minfont=5, palette=None,
        chrlw=.01, title=None, sepcolor="gainsboro"):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\
                        .format(vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        if cmap_text:
            try:
                value = float(value)
            except ValueError:
                value = vmax

            if value < vmin:
                continue
            if value > vmax:
                continue
        else:
            value = 0

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = len(data)
    # Only show random subset
    if npairs > sample_number:
        logging.debug("Showing a random subset of {0} data points (total {1}) " \
                      "for clarity.".format(sample_number, npairs))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    #if not palette:
    #    data.sort(key=lambda x: -x[2])

    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)
    else:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap,
                vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbed.get_breaks():
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos, .91, latex(label), size=fontsize,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91, pos, latex(label), size=fontsize,
                va="center", color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(markup(gx), size=16)
    ax.set_ylabel(markup(gy), size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if not title:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(markup(title), x=.5, y=.96, color="k")
    logging.debug(title)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Exemple #6
0
def blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed,
        style="dot", proportional=False, sampleN=None,
        baseticks=False, insetLabels=False, stripNames=False,
        highlights=None):

    assert style in DotStyles
    fp = open(blastfile)

    qorder = qbed.order if qbed else None
    sorder = sbed.order if sbed else None

    data = []

    for row in fp:
        b = BlastLine(row)
        query, subject = b.query, b.subject

        if stripNames:
            query = query.rsplit(".", 1)[0]
            subject = subject.rsplit(".", 1)[0]

        if qorder:
            if query not in qorder:
                continue
            qi, q = qorder[query]
            query = q.seqid
            qstart, qend = q.start, q.end
        else:
            qstart, qend = b.qstart, b.qstop

        if sorder:
            if subject not in sorder:
                continue
            si, s = sorder[subject]
            subject = s.seqid
            sstart, send = s.start, s.end
        else:
            sstart, send = b.sstart, b.sstop

        qi = qsizes.get_position(query, qstart)
        qj = qsizes.get_position(query, qend)
        si = ssizes.get_position(subject, sstart)
        sj = ssizes.get_position(subject, send)

        if None in (qi, si):
            continue
        data.append(((qi, qj), (si, sj)))

    if sampleN:
        if len(data) > sampleN:
            data = sample(data, sampleN)

    if not data:
        return logging.error("no blast data imported")

    xsize, ysize = qsizes.totalsize, ssizes.totalsize
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))

    if style == "line":
        for a, b in data:
            ax.plot(a, b, 'ro-', mfc="w", mec="r", ms=3)
    else:
        data = [(x[0], y[0]) for x, y in data]
        x, y = zip(*data)

        if style == "circle":
            ax.plot(x, y, 'mo', mfc="w", mec="m", ms=3)
        elif style == "dot":
            ax.scatter(x, y, s=3, lw=0)

    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    ignore = True  # tag to mark whether to plot chr name (skip small ones)
    #ignore_size_x = xsize * .02
    #ignore_size_y = ysize * .02
    ignore_size_x = ignore_size_y = 0

    # plot the chromosome breaks
    logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes)))
    for (seqid, beg, end) in qsizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([end, end], ylim, "-", lw=1, color="grey")

    for (seqid, beg, end) in ssizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [end, end], "-", lw=1, color="grey")

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        if not ignore:
            if insetLabels:
                ax.text(pos, 0, label, size=8, \
                    ha="center", va="top", color="grey")
            else:
                pos = .1 + pos * .8 / xsize
                root.text(pos, .91, label, size=10,
                    ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        if not ignore:
            if insetLabels:
                continue
            pos = .9 - pos * .8 / ysize
            root.text(.91, pos, label, size=10,
                    va="center", color="grey")

    # Highlight regions based on a list of BedLine
    qhighlights = shighlights = None
    if highlights:
        if isinstance(highlights[0], BedLine):
            shighlights = highlights
        elif len(highlights) == 2:
            qhighlights, shighlights = highlights

    if qhighlights:
        for hl in qhighlights:
            hls = qsizes.get_position(hl.seqid, hl.start)
            ax.add_patch(Rectangle((hls, 0), hl.span, ysize,\
                         fc="r", alpha=.2, lw=0))
    if shighlights:
        for hl in shighlights:
            hls = ssizes.get_position(hl.seqid, hl.start)
            ax.add_patch(Rectangle((0, hls), xsize, hl.span, \
                         fc="r", alpha=.2, lw=0))

    if baseticks:
        def increaseDensity(a, ratio=4):
            assert len(a) > 1
            stepsize = a[1] - a[0]
            newstepsize = int(stepsize / ratio)
            return np.arange(0, a[-1], newstepsize)

        # Increase the density of the ticks
        xticks = ax.get_xticks()
        yticks = ax.get_yticks()
        xticks = increaseDensity(xticks, ratio=2)
        yticks = increaseDensity(yticks, ratio=2)
        ax.set_xticks(xticks)
        #ax.set_yticks(yticks)

        # Plot outward ticklines
        for pos in xticks[1:]:
            if pos > xsize:
                continue
            pos = .1 + pos * .8 / xsize
            root.plot((pos, pos), (.08, .1), '-', color="grey", lw=2)

        for pos in yticks[1:]:
            if pos > ysize:
                continue
            pos = .9 - pos * .8 / ysize
            root.plot((.09, .1), (pos, pos), '-', color="grey", lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_base_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)
    plt.setp(ax.get_yticklabels(), rotation=90)
Exemple #7
0
def draw_depth(
    root,
    ax,
    bed,
    chrinfo={},
    defaultcolor="k",
    sepcolor="w",
    ylim=100,
    title=None,
    subtitle=None,
):
    """ Draw depth plot on the given axes, using data from bed

    Args:
        root (matplotlib.Axes): Canvas axes
        ax (matplotlib.Axes): Axes to plot data on
        bed (Bed): Bed data from mosdepth
        chrinfo (ChrInfoFile): seqid => color, new name
        defaultcolor (str): matplotlib-compatible color for data points
        sepcolor (str): matplotlib-compatible color for chromosome breaks
        ylim (int): Upper limit of the y-axis (depth)
        title (str): Title of the figure, to the right of the axis
        subtitle (str): Subtitle of the figure, just below title
    """
    if chrinfo is None:
        chrinfo = {}
    sizes = bed.max_bp_in_chr
    seqids = chrinfo.keys() if chrinfo else sizes.keys()
    starts = {}
    ends = {}
    label_positions = []
    start = 0
    for seqid in seqids:
        starts[seqid] = start
        end = start + sizes[seqid]
        ends[seqid] = end
        label_positions.append((seqid, (start + end) / 2))
        start = end
    xsize = end

    # Extract plotting data
    data = []
    data_by_seqid = defaultdict(list)
    for b in bed:
        seqid = b.seqid
        if seqid not in starts:
            continue
        # chr01A  2000000 3000000 113.00
        x = starts[seqid] + (b.start + b.end) / 2
        y = float(b.accn)
        c = chrinfo[seqid].color if seqid in chrinfo else "k"
        data.append((x, y, c))
        data_by_seqid[seqid].append(y)

    x, y, c = zip(*data)
    ax.scatter(
        x,
        y,
        c=c,
        edgecolors="none",
        s=8,
        lw=0,
    )
    logging.debug("Obtained {} data points with depth data".format(len(data)))

    # Per seqid median
    medians = {}
    for seqid, values in data_by_seqid.items():
        c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor
        seqid_start = starts[seqid]
        seqid_end = ends[seqid]
        seqid_median = np.median(values)
        medians[seqid] = seqid_median
        ax.plot(
            (seqid_start, seqid_end),
            (seqid_median, seqid_median),
            "-",
            lw=4,
            color=c,
            alpha=0.5,
        )

    # vertical lines for all the breaks
    for pos in starts.values():
        ax.plot((pos, pos), (0, ylim), "-", lw=1, color=sepcolor)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    median_depth_y = 0.88
    chr_label_y = 0.08
    for seqid, position in label_positions:
        xpos = 0.1 + position * 0.8 / xsize
        c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor
        newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid
        root.text(xpos,
                  chr_label_y,
                  newseqid,
                  color=c,
                  ha="center",
                  va="center",
                  rotation=20)
        seqid_median = medians[seqid]
        root.text(
            xpos,
            median_depth_y,
            str(int(seqid_median)),
            color=c,
            ha="center",
            va="center",
        )

    if title:
        root.text(
            0.95,
            0.5,
            markup(title),
            color="darkslategray",
            ha="center",
            va="center",
            size=15,
        )
    if subtitle:
        root.text(
            0.95,
            0.375,
            markup(subtitle),
            color="darkslategray",
            ha="center",
            va="center",
            size=15,
        )

    ax.set_xticks([])
    ax.set_xlim(0, xsize)
    ax.set_ylim(0, ylim)
    ax.set_ylabel("Depth")

    set_human_axis(ax)
    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color="gray",
             size=10)
    normalize_axes(root)
Exemple #8
0
def dotplot(anchorfile, qbed, sbed, image_name, vmin, vmax, iopts,
        is_self=False, synteny=False, cmap_text=None):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    for row in fp:
        atoms = row.split()
        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue
        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            #logging.warning("ignore %s" % query)
            continue
        if subject not in sorder:
            #logging.warning("ignore %s" % subject)
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])  # the whole canvas
    ax = fig.add_axes([.1, .1, .8, .8])  # the dot plot

    sample_number = 5000  # only show random subset
    if len(data) > sample_number:
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)
    ax.scatter(x, y, c=c, s=2, lw=0, cmap=default_cm,
            vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    ignore = True  # tag to mark whether to plot chr name (skip small ones)
    ignore_size_x = xsize * .005
    ignore_size_y = ysize * .005

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([beg, beg], ylim, "g-", lw=1)

    for (seqid, beg, end) in sbed.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        seqid = seqid.split("_")[-1]
        try:
            seqid = int(seqid)
            seqid = "c%d" % seqid
        except:
            pass

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [beg, beg], "g-", lw=1)

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if not ignore:
            root.text(pos, .91, label,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if not ignore:
            root.text(.91, pos, label,
                va="center", color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    to_ax_label = lambda fname: _(op.basename(fname).split(".")[0])
    gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
def plot_breaks_and_labels(
    fig,
    root,
    ax,
    gx,
    gy,
    xsize,
    ysize,
    qbreaks,
    sbreaks,
    sep=True,
    chrlw=0.1,
    sepcolor="g",
    minfont=5,
    stdpf=True,
    chpf=True,
):
    xlim = (0, 47724.0) # hard-coding xlim maximum
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbreaks:
        xsize_ratio = abs(end - beg) * 0.8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        if chpf:
            seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)
    for (seqid, beg, end) in sbreaks:
        ysize_ratio = abs(end - beg) * 0.8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        if chpf:
            seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])
        ychr_labels.append((seqid, (beg + end) / 2, fontsize*0.85))
        if sep:
            ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    xchr_labels = [('chr\ 1', 1997.5, 12), ('chr\ 2', 5944.5, 12), ('chr\ 3', 9014.0, 12), ('chr\ 4', 11351.5, 12), ('chr\ 5', 13639.0, 12), ('chr\ 6', 17657.5, 12), ('chr\ 7', 22329.0, 12), ('chr\ 8', 25466.0, 12), ('chr\ 9', 28092.0, 12), ('chr\ 10', 31361.5, 12), ('chr\ 11', 34457.0, 12), ('chr\ 12', 37234.0, 12), ('chr\ 13', 41112.5, 12), ('chr\ 14', 43851.0, 12), ('chr\ 15', 45258.5, 12), ('scf\ 16', 46740.5, 12), ('scf\ 458', 47724.0, 12)]
    for label, pos, fontsize in xchr_labels:
        #print(xchr_labels)
        pos = 0.1 + pos * 0.8 / xsize
        if fontsize >= minfont:
            root.text(
                pos,
                0.91,
                latex(label),
                size=fontsize*0.85,
                ha="center",
                va="bottom",
                rotation=45,
                color="black",
            )

    # remember y labels are inverted
    ychr_labels = [('chr\ 1', 2672.0, 10.2), ('chr\ 2', 7532.0, 10.2), ('chr\ 3', 12035.0, 10.2), ('chr\ 4', 16228.0, 10.2), ('chr\ 5', 19784.5, 10.2), ('chr\ 6', 23211.0, 10.2), ('chr\ 7', 26612.5, 10.2), ('chr\ 8', 29773.0, 10.2), ('chr\ 9', 32518.0, 10.2), ('chr\ 10', 35004.5, 10.2), ('chr\ 11', 37760.0, 10.2), ('chr\ 12', 40635.5, 10.2), ('ChrSy', 42048.0, 0), ('ChrUn', 42140.5, 0)]
    for label, pos, fontsize in ychr_labels:
        #print(ychr_labels)
        pos = 0.9 - pos * 0.8 / ysize
        if fontsize >= minfont:
            root.text(0.91, pos, latex(label), size=fontsize*0.85, va="center", color="black")

    # Plot the frame
    ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor)
    ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor)
    ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor)
    ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    
    # The axis labels have been hardcoded (vs. gx gy as in original) so taht we can get the species names spelled out in italics, rather than the BED file name.
    ax.set_xlabel('$\it{Zizania\ palustris}$', size=16)
    ax.set_ylabel('$\it{Oryza\ sativa}$', size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="black", size=10)

    return xlim, ylim