Esempio n. 1
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print >> sys.stderr, message
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Esempio n. 2
0
def venn(args):
    """
    %prog venn *.benchmark

    Display benchmark results as Venn diagram.
    """
    from matplotlib_venn import venn2

    p = OptionParser(venn.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="9x9")

    if len(args) < 1:
        sys.exit(not p.print_help())

    bcs = args
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    pad = .02
    ystart = 1
    ywidth = 1. / len(bcs)
    tags = ("Bowers", "YGOB", "Schnable")
    for bc, tag in zip(bcs, tags):
        fp = open(bc)
        data = []
        for row in fp:
            prog, pcounts, tcounts, shared = row.split()
            pcounts = int(pcounts)
            tcounts = int(tcounts)
            shared = int(shared)
            data.append((prog, pcounts, tcounts, shared))
        xstart = 0
        xwidth = 1. / len(data)
        for prog, pcounts, tcounts, shared in data:
            a, b, c = pcounts - shared, tcounts - shared, shared
            ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
                               xwidth - 2 * pad, ywidth - 2 * pad])
            venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
            message = "Sn={0} Pu={1}".\
                format(percentage(shared, tcounts, precision=0, mode=-1),
                       percentage(shared, pcounts, precision=0, mode=-1))
            print(message, file=sys.stderr)
            ax.text(.5, .92, latex(message), ha="center", va="center",
                    transform=ax.transAxes, color='b')
            ax.set_axis_off()
            xstart += xwidth
        ystart -= ywidth

    panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
                  (.04, .96 - 2 * ywidth, "C")))
    panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
                        (.5, .98 - ywidth, "14 Yeast genomes"),
                        (.5, .98 - 2 * ywidth, "4 Grass genomes")))
    normalize_axes(root)
    savefig("venn.pdf", dpi=opts.dpi)
Esempio n. 3
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = 0.15, 0.65
    w, h = 0.7, 0.3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= 0.28
    h = 0.25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(
        ax,
        xlim=chrsize,
        ylim=25 * 1e-6,
        xfactor=1e-6,
        xlabel="Physical position (Mb)",
        yfactor=1000000,
        ylabel="Recomb. rate\n(cM / Mb)",
    )
    ax.xaxis.grid(False)

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict(
        (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation))
        for x in g.agp
        if not x.is_gap
    )
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = 0.6 / total_size
    y = 0.16
    pad = 0.03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg)
    root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip(
        (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio)
    ):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro")

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos + pad,
        "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)",
        **fontprop
    )

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(
        sum(markers) / 2,
        ypos - 3 * pad,
        r"$\textit{Estimated gap size: 96,433bp}$",
        color="r",
        ha="center",
        va="center",
    )

    labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C"))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Esempio n. 4
0
def plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize,
                           qbreaks, sbreaks, sep=True, chrlw=.1,
                           sepcolor="g", minfont=5, stdpf=True):
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbreaks:
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbreaks:
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos, .91, latex(label), size=fontsize,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91, pos, latex(label), size=fontsize,
                va="center", color="grey")

    # Plot the frame
    ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor)
    ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor)
    ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor)
    ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    return xlim, ylim
Esempio n. 5
0
def gcdepth(args):
    """
    %prog gcdepth sample_name tag

    Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output:
    - NA12878_S1.mosdepth.global.dist.txt
    - NA12878_S1.mosdepth.region.dist.txt
    - NA12878_S1.regions.bed.gz
    - NA12878_S1.regions.bed.gz.csi
    - NA12878_S1.regions.gc.bed.gz

    A sample mosdepth.sh script might look like:
    ```
    #!/bin/bash
    LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\
        bams/$1.bam -t 4 -c chr1 -n --by 1000

    bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\
        -bed $1.regions.bed.gz \\
        | pigz -c > $1.regions.gc.bed.gz
    ```
    """
    import hashlib
    from jcvi.algorithms.formula import MAD_interval as confidence_interval
    from jcvi.graphics.base import latex, plt, savefig, set2

    p = OptionParser(gcdepth.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    sample_name, tag = args
    # The tag is used to add to title, also provide a random (hashed) color
    coloridx = int(hashlib.sha256(tag).hexdigest(), 16) % len(set2)
    color = set2[coloridx]

    # mosdepth outputs a table that we can use to plot relationship
    gcbedgz = sample_name + ".regions.gc.bed.gz"
    df = pd.read_csv(gcbedgz, delimiter="\t")
    mf = df.loc[:, ("4_usercol", "6_pct_gc")]
    mf.columns = ["depth", "gc"]

    # We discard any bins that are gaps
    mf = mf[(mf["depth"] > 0.001) | (mf["gc"] > 0.001)]

    # Create GC bins
    gcbins = defaultdict(list)
    for i, row in mf.iterrows():
        gcp = int(round(row["gc"] * 100))
        gcbins[gcp].append(row["depth"])
    gcd = sorted(
        (k * 0.01, confidence_interval(v)) for (k, v) in gcbins.items())
    gcd_x, gcd_y = zip(*gcd)
    m, lo, hi = zip(*gcd_y)

    # Plot
    plt.plot(
        mf["gc"],
        mf["depth"],
        ".",
        color="lightslategray",
        ms=2,
        mec="lightslategray",
        alpha=0.1,
    )
    patch = plt.fill_between(
        gcd_x,
        lo,
        hi,
        facecolor=color,
        alpha=0.25,
        zorder=10,
        linewidth=0.0,
        label="Median +/- MAD band",
    )
    plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20)

    ax = plt.gca()
    ax.legend(handles=[patch], loc="best")
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 100)
    ax.set_title("{} ({})".format(latex(sample_name), tag))
    ax.set_xlabel("GC content")
    ax.set_ylabel("Depth")
    savefig(sample_name + ".gcdepth.png")
Esempio n. 6
0
File: cnv.py Progetto: xuanblo/jcvi
def gcdepth(args):
    """
    %prog gcdepth sample_name tag

    Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output:
    - NA12878_S1.mosdepth.global.dist.txt
    - NA12878_S1.mosdepth.region.dist.txt
    - NA12878_S1.regions.bed.gz
    - NA12878_S1.regions.bed.gz.csi
    - NA12878_S1.regions.gc.bed.gz

    A sample mosdepth.sh script might look like:
    ```
    #!/bin/bash
    LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\
        bams/$1.bam -t 4 -c chr1 -n --by 1000

    bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\
        -bed $1.regions.bed.gz \\
        | pigz -c > $1.regions.gc.bed.gz
    ```
    """
    import hashlib
    from jcvi.algorithms.formula import MAD_interval as confidence_interval
    from jcvi.graphics.base import latex, plt, savefig, set2

    p = OptionParser(gcdepth.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    sample_name, tag = args
    # The tag is used to add to title, also provide a random (hashed) color
    coloridx = int(hashlib.sha1(tag).hexdigest(), 16) % len(set2)
    color = set2[coloridx]

    # mosdepth outputs a table that we can use to plot relationship
    gcbedgz = sample_name + ".regions.gc.bed.gz"
    df = pd.read_csv(gcbedgz, delimiter="\t")
    mf = df.loc[:, ("4_usercol", "6_pct_gc")]
    mf.columns = ["depth", "gc"]

    # We discard any bins that are gaps
    mf = mf[(mf["depth"] > .001) | (mf["gc"] > .001)]

    # Create GC bins
    gcbins = defaultdict(list)
    for i, row in mf.iterrows():
        gcp = int(round(row["gc"] * 100))
        gcbins[gcp].append(row["depth"])
    gcd = sorted((k * .01, confidence_interval(v))
                 for (k, v) in gcbins.items())
    gcd_x, gcd_y = zip(*gcd)
    m, lo, hi = zip(*gcd_y)

    # Plot
    plt.plot(mf["gc"], mf["depth"], ".", color="lightslategray", ms=2,
             mec="lightslategray", alpha=.1)
    patch = plt.fill_between(gcd_x, lo, hi,
                             facecolor=color, alpha=.25, zorder=10,
                             linewidth=0.0, label="Median +/- MAD band")
    plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20)

    ax = plt.gca()
    ax.legend(handles=[patch], loc="best")
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 100)
    ax.set_title("{} ({})".format(latex(sample_name), tag))
    ax.set_xlabel("GC content")
    ax.set_ylabel("Depth")
    savefig(sample_name + ".gcdepth.png")
Esempio n. 7
0
def dotplot(anchorfile,
            qbed,
            sbed,
            fig,
            root,
            ax,
            vmin=0,
            vmax=1,
            is_self=False,
            synteny=False,
            cmap_text=None,
            genomenames=None,
            sample_number=10000,
            minfont=5,
            palette=None,
            chrlw=.01,
            title=None,
            sepcolor="gainsboro"):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = len(data)
    # Only show random subset
    if npairs > sample_number:
        logging.debug("Showing a random subset of {0} data points (total {1}) " \
                      "for clarity.".format(sample_number, npairs))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    if not palette:
        data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)

    else:
        ax.scatter(x,
                   y,
                   c=c,
                   edgecolors="none",
                   s=2,
                   lw=0,
                   cmap=default_cm,
                   vmin=vmin,
                   vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbed.get_breaks():
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos,
                      .91,
                      latex(label),
                      size=fontsize,
                      ha="center",
                      va="bottom",
                      rotation=45,
                      color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91,
                      pos,
                      latex(label),
                      size=fontsize,
                      va="center",
                      color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color='gray',
             size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if not title:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(title, x=.5, y=.96, color="k")
    logging.debug(title)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Esempio n. 8
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    pngfile, = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != '.':
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)

    resizefile, mainfile, labelfile, exif = \
                      convert_image(pngfile, pf, outdir=outdir,
                                    rotate=opts.rotate,
                                    rows=rows, cols=cols,
                                    labelrows=labelrows, labelcols=labelcols)

    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1,
                                             figsize=(iopts.w, iopts.h))

    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance, threshold_rel=.05, indices=False)
        coordinates = peak_local_max(distance, threshold_rel=.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug("Find objects with pixels between {0} ({1}%) and {2} ({3}%)"\
                    .format(min_size, opts.minsize, max_size, opts.maxsize))

    # Plotting
    ax1.set_title('Original picture')
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title('Edge detection\n({0})'.format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], 'g.')
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title('Object detection')
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx),
                 (y0 + major_dy, y0 - major_dy), 'r-')
        ax2.plot((x0 - minor_dx, x0 + minor_dx),
                 (y0 - minor_dy, y0 + minor_dy), 'r-')

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * .35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".\
                        format(i, npixels, len(pixels), 100. * npixels / canvas_size))

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle((minc, minr), maxc - minc, maxr - minr,
                                  fill=False, ec='w', lw=1)
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc, mr, "{0}".format(i), color='w',
                    ha="center", va="center", size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(.1, .92, "File: {0}".format(latex(filename)), color='g')
    ax4.text(.1, .86, "Label: {0}".format(latex(accession)), color='m')
    yy = .8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(.01, yy, str(i), va="center", bbox=dict(fc='none', ec='k'))
        ax4.text(.1, yy, o.pixeltag, va="center")
        yy -= .04
        ax4.add_patch(Rectangle((.1, yy - .025), .12, .05, lw=0,
                      fc=rgb_to_hex(o.rgb)))
        ax4.text(.27, yy, o.hashtag, va="center")
        yy -= .06
    ax4.text(.1 , yy, "(A total of {0} objects displayed)".format(nb_labels),
             color="darkslategrey")
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family='Helvetica', size=8)
        ax.set_yticklabels(yticklabels, family='Helvetica', size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects
Esempio n. 9
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (pngfile, ) = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != ".":
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)
    nbcolor = opts.changeBackground
    pngfile = convert_background(pngfile, nbcolor)
    resizefile, mainfile, labelfile, exif = convert_image(
        pngfile,
        pf,
        outdir=outdir,
        rotate=opts.rotate,
        rows=rows,
        cols=cols,
        labelrows=labelrows,
        labelcols=labelcols,
    )
    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4,
                                             nrows=1,
                                             figsize=(iopts.w, iopts.h))
    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance,
                                    threshold_rel=0.05,
                                    indices=False)
        coordinates = peak_local_max(distance, threshold_rel=0.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug(
        "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format(
            min_size, opts.minsize, max_size, opts.maxsize))

    # Plotting
    ax1.set_title("Original picture")
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title("Edge detection\n({0})".format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.")
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title("Object detection")
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx),
                 (y0 + major_dy, y0 - major_dy), "r-")
        ax2.plot((x0 - minor_dx, x0 + minor_dx),
                 (y0 - minor_dy, y0 + minor_dy), "r-")

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * 0.35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format(
            i, npixels, len(pixels), 100.0 * npixels / canvas_size))

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle((minc, minr),
                         maxc - minc,
                         maxr - minr,
                         fill=False,
                         ec="w",
                         lw=1)
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc,
                 mr,
                 "{0}".format(i),
                 color="w",
                 ha="center",
                 va="center",
                 size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g")
    ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m")
    yy = 0.8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k"))
        ax4.text(0.1, yy, o.pixeltag, va="center")
        yy -= 0.04
        ax4.add_patch(
            Rectangle((0.1, yy - 0.025),
                      0.12,
                      0.05,
                      lw=0,
                      fc=rgb_to_hex(o.rgb)))
        ax4.text(0.27, yy, o.hashtag, va="center")
        yy -= 0.06
    ax4.text(
        0.1,
        yy,
        "(A total of {0} objects displayed)".format(nb_labels),
        color="darkslategray",
    )
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family="Helvetica", size=8)
        ax.set_yticklabels(yticklabels, family="Helvetica", size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects
Esempio n. 10
0
def multilineplot(args):
    """
    %prog multilineplot fastafile chr1

    Combine multiple line plots in one vertical stack
    Inputs must be BED-formatted.

    --lines: traditional line plots, useful for plotting feature freq
    """
    p = OptionParser(multilineplot.__doc__)
    p.add_option("--lines",
                 help="Features to plot in lineplot [default: %default]")
    p.add_option("--colors",
                 help="List of colors matching number of input bed files")
    p.add_option("--mode", default="span", choices=("span", "count", "score"),
                 help="Accumulate feature based on [default: %default]")
    p.add_option("--binned", default=False, action="store_true",
                 help="Specify whether the input is already binned; " +
                 "if True, input files are considered to be binfiles")
    p.add_option("--ymax", type="int", help="Set Y-axis max")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract, merge = check_window_options(opts)
    linebeds = []
    colors = opts.colors
    if opts.lines:
        lines = opts.lines.split(",")
        assert len(colors) == len(lines), "Number of chosen colors must match" + \
                " number of input bed files"
        linebeds = get_beds(lines, binned=opts.binned)

    linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode,
                            binned=opts.binned, merge=merge)

    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0
    plt.rcParams["figure.figsize"] = iopts.w, iopts.h

    fig, axarr = plt.subplots(nrows=len(lines))
    if len(linebeds) == 1:
        axarr = (axarr, )
    fig.suptitle(latex(chr), color="darkslategray")

    for i, ax in enumerate(axarr):
        lineplot(ax, [linebins[i]], nbins, chr, window, shift, \
                color="{0}{1}".format(colors[i], 'r'))

    if opts.ymax:
        ax.set_ylim(0, opts.ymax)

    plt.subplots_adjust(hspace=0.5)

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Esempio n. 11
0
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper",
        genomenames=None, sample_number=10000, minfont=5, palette=None,
        chrlw=.01, title=None, sepcolor="gainsboro"):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\
                        .format(vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        if cmap_text:
            try:
                value = float(value)
            except ValueError:
                value = vmax

            if value < vmin:
                continue
            if value > vmax:
                continue
        else:
            value = 0

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = len(data)
    # Only show random subset
    if npairs > sample_number:
        logging.debug("Showing a random subset of {0} data points (total {1}) " \
                      "for clarity.".format(sample_number, npairs))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    #if not palette:
    #    data.sort(key=lambda x: -x[2])

    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)
    else:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap,
                vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbed.get_breaks():
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos, .91, latex(label), size=fontsize,
                ha="center", va="bottom", rotation=45, color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91, pos, latex(label), size=fontsize,
                va="center", color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(markup(gx), size=16)
    ax.set_ylabel(markup(gy), size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
            color='gray', size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if not title:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(markup(title), x=.5, y=.96, color="k")
    logging.debug(title)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Esempio n. 12
0
def estimategaps(args):
    """
    %prog estimategaps JM-4 chr1 JMMale-1

    Illustrate ALLMAPS gap estimation algorithm.
    """
    p = OptionParser(estimategaps.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300)

    if len(args) != 3:
        sys.exit(not p.print_help())

    pf, seqid, mlg = args
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"

    function = lambda x: x.cm
    cc = Map(bedfile, scaffold_info=True, function=function)
    agp = AGP(agpfile)

    g = GapEstimator(cc, agp, seqid, mlg, function=function)
    pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize
    spl, spld = g.spl, g.spld
    g.compute_all_gaps(verbose=False)

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Panel A
    xstart, ystart = .15, .65
    w, h = .7, .3
    t = np.linspace(0, chrsize, 1000)
    ax = fig.add_axes([xstart, ystart, w, h])
    mx, my = zip(*g.scatter_data)
    rho = spearmanr(mx, my)

    dsg = "g"
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(mx, my, ".", color=set2[3])
    ax.plot(t, spl(t), "-", color=dsg)
    ax.text(.05, .95, mlg, va="top", transform=ax.transAxes)
    normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize,
                       ylabel="Genetic distance (cM)")
    if rho < 0:
        ax.invert_yaxis()

    # Panel B
    ystart -= .28
    h = .25
    ax = fig.add_axes([xstart, ystart, w, h])
    ax.vlines(pp, 0, mlgsize, colors="beige")
    ax.plot(t, spld(t), "-", lw=2, color=dsg)
    ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5)
    normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6,
                       xfactor=1e-6, xlabel="Physical position (Mb)",
                       yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)")

    # Panel C (specific to JMMale-1)
    a, b = "scaffold_1076", "scaffold_861"
    sizes = dict((x.component_id, (x.object_beg, x.object_end,
                                   x.component_span, x.orientation)) \
                                   for x in g.agp if not x.is_gap)
    a_beg, a_end, asize, ao = sizes[a]
    b_beg, b_end, bsize, bo = sizes[b]
    gapsize = g.get_gapsize(a)
    total_size = asize + gapsize + bsize
    ratio = .6 / total_size
    y = .16
    pad = .03
    pb_ratio = w / chrsize

    # Zoom
    lsg = "lightslategray"
    root.plot((.15 + pb_ratio * a_beg, .2),
              (ystart, ystart - .14), ":", color=lsg)
    root.plot((.15 + pb_ratio * b_end, .3),
              (ystart, ystart - .08), ":", color=lsg)
    ends = []
    for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277),
                              (.2, .2 + (asize + gapsize) * ratio)):
        end = beg + size * ratio
        marker = beg + marker * ratio
        ends.append((beg, end, marker))
        root.plot((marker,), (y,), "o", color=lsg)
        root.text((beg + end) / 2, y + pad, latex(tag),
                  ha="center", va="center")
        HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro')

    begs, ends, markers = zip(*ends)
    fontprop = dict(color=lsg, ha="center", va="center")
    ypos = y + pad * 2
    root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos + pad,
              "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop)

    ypos = y - pad
    xx = markers[0], ends[0]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop)
    xx = markers[1], begs[1]
    root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg)
    root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop)

    root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg)
    root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$",
                                  color="r", ha="center", va="center")

    labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C'))
    panel_labels(root, labels)
    normalize_axes(root)

    pf = "estimategaps"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Esempio n. 13
0
def draw_chromosomes(
    root,
    bedfile,
    sizes,
    iopts,
    mergedist,
    winsize,
    imagemap,
    mappingfile=None,
    gauge=False,
    legend=True,
    empty=False,
    title=None,
):
    bed = Bed(bedfile)
    prefix = bedfile.rsplit(".", 1)[0]

    if imagemap:
        imgmapfile = prefix + ".map"
        mapfh = open(imgmapfile, "w")
        print('<map id="' + prefix + '">', file=mapfh)

    if mappingfile:
        mappings = DictFile(mappingfile, delimiter="\t")
        classes = sorted(set(mappings.values()))
        preset_colors = (DictFile(
            mappingfile, keypos=1, valuepos=2, delimiter="\t")
                         if DictFile.num_columns(mappingfile) >= 3 else {})
    else:
        classes = sorted(set(x.accn for x in bed))
        mappings = dict((x, x) for x in classes)
        preset_colors = {}

    logging.debug("A total of {} classes found: {}".format(
        len(classes), ",".join(classes)))

    # Assign colors to classes
    ncolors = max(3, min(len(classes), 12))
    palette = set1_n if ncolors <= 8 else set3_n
    colorset = palette(number=ncolors)
    colorset = sample_N(colorset, len(classes))
    class_colors = dict(zip(classes, colorset))
    class_colors.update(preset_colors)
    logging.debug("Assigned colors: {}".format(class_colors))

    chr_lens = {}
    centromeres = {}
    if sizes:
        chr_lens = Sizes(sizes).sizes_mapping
    else:
        for b, blines in groupby(bed, key=(lambda x: x.seqid)):
            blines = list(blines)
            maxlen = max(x.end for x in blines)
            chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = "-"

    chr_number = len(chr_lens)
    if centromeres:
        assert chr_number == len(
            centromeres), "chr_number = {}, centromeres = {}".format(
                chr_number, centromeres)

    r = 0.7  # width and height of the whole chromosome set
    xstart, ystart = 0.15, 0.85
    xinterval = r / chr_number
    xwidth = xinterval * 0.5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, clen) in enumerate(sorted(chr_lens.items())):
        xx = xstart + a * xinterval + 0.5 * xwidth
        root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center")
        if centromeres:
            yy = ystart - centromeres[chr] * ratio
            ChromosomeWithCentromere(root,
                                     xx,
                                     ystart,
                                     yy,
                                     ystart - clen * ratio,
                                     width=xwidth)
        else:
            Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = 1
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        prev_end, prev_klass = 0, None
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            if klass == "centromere":
                continue
            start = b.start
            end = b.end
            if start < prev_end + mergedist and klass == prev_klass:
                start = prev_end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(
                Rectangle(
                    (xx, yystart),
                    xwidth,
                    yyend - yystart,
                    fc=class_colors.get(klass, "lightslategray"),
                    lw=0,
                    alpha=alpha,
                ))
            prev_end, prev_klass = b.end, klass

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = (
                        xx,
                        (1 - ystart) + segment_start * ratio,
                        xx + xwidth,
                        (1 - ystart) + segment_end * ratio,
                    )
                    print(
                        "\t" + write_ImageMapLine(
                            tlx,
                            tly,
                            brx,
                            bry,
                            iopts.w,
                            iopts.h,
                            iopts.dpi,
                            chr + ":" + ",".join(bac_list),
                            segment_start,
                            segment_end,
                        ),
                        file=mapfh,
                    )

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = (
                xx,
                (1 - ystart) + segment_start * ratio,
                xx + xwidth,
                (1 - ystart) + segment_end * ratio,
            )
            print(
                "\t" + write_ImageMapLine(
                    tlx,
                    tly,
                    brx,
                    bry,
                    iopts.w,
                    iopts.h,
                    iopts.dpi,
                    chr + ":" + ",".join(bac_list),
                    segment_start,
                    segment_end,
                ),
                file=mapfh,
            )

    if imagemap:
        print("</map>", file=mapfh)
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if gauge:
        xstart, ystart = 0.9, 0.85
        Gauge(root, xstart, ystart - r, ystart, max_chr_len)

    if "centromere" in class_colors:
        del class_colors["centromere"]

    # class legends, four in a row
    if legend:
        xstart = 0.1
        xinterval = 0.8 / len(class_colors)
        xwidth = 0.04
        yy = 0.08
        for klass, cc in sorted(class_colors.items()):
            if klass == "-":
                continue
            root.add_patch(
                Rectangle((xstart, yy),
                          xwidth,
                          xwidth,
                          fc=cc,
                          lw=0,
                          alpha=alpha))
            root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10)
            xstart += xinterval

    if empty:
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1))
        root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10)

    if title:
        root.text(0.5, 0.95, markup(title), ha="center", va="center")
Esempio n. 14
0
def test_latex(s, expected):
    from jcvi.graphics.base import latex

    assert latex(s) == expected, "Expect {}".format(expected)
Esempio n. 15
0
def multilineplot(args):
    """
    %prog multilineplot fastafile chr1

    Combine multiple line plots in one vertical stack
    Inputs must be BED-formatted.

    --lines: traditional line plots, useful for plotting feature freq
    """
    p = OptionParser(multilineplot.__doc__)
    p.add_option("--lines", help="Features to plot in lineplot")
    p.add_option("--colors",
                 help="List of colors matching number of input bed files")
    p.add_option(
        "--mode",
        default="span",
        choices=("span", "count", "score"),
        help="Accumulate feature based on",
    )
    p.add_option(
        "--binned",
        default=False,
        action="store_true",
        help="Specify whether the input is already binned; " +
        "if True, input files are considered to be binfiles",
    )
    p.add_option("--ymax", type="int", help="Set Y-axis max")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract, merge = check_window_options(opts)
    linebeds = []
    colors = opts.colors
    if opts.lines:
        lines = opts.lines.split(",")
        assert len(colors) == len(lines), (
            "Number of chosen colors must match" +
            " number of input bed files")
        linebeds = get_beds(lines, binned=opts.binned)

    linebins = get_binfiles(linebeds,
                            fastafile,
                            shift,
                            mode=opts.mode,
                            binned=opts.binned,
                            merge=merge)

    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0
    plt.rcParams["figure.figsize"] = iopts.w, iopts.h

    fig, axarr = plt.subplots(nrows=len(lines))
    if len(linebeds) == 1:
        axarr = (axarr, )
    fig.suptitle(latex(chr), color="darkslategray")

    for i, ax in enumerate(axarr):
        lineplot(
            ax,
            [linebins[i]],
            nbins,
            chr,
            window,
            shift,
            color="{0}{1}".format(colors[i], "r"),
        )

    if opts.ymax:
        ax.set_ylim(0, opts.ymax)

    plt.subplots_adjust(hspace=0.5)

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot_breaks_and_labels(
    fig,
    root,
    ax,
    gx,
    gy,
    xsize,
    ysize,
    qbreaks,
    sbreaks,
    sep=True,
    chrlw=0.1,
    sepcolor="g",
    minfont=5,
    stdpf=True,
    chpf=True,
):
    xlim = (0, 47724.0) # hard-coding xlim maximum
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbreaks:
        xsize_ratio = abs(end - beg) * 0.8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        if chpf:
            seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        if sep:
            ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)
    for (seqid, beg, end) in sbreaks:
        ysize_ratio = abs(end - beg) * 0.8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        if chpf:
            seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2])
        ychr_labels.append((seqid, (beg + end) / 2, fontsize*0.85))
        if sep:
            ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    xchr_labels = [('chr\ 1', 1997.5, 12), ('chr\ 2', 5944.5, 12), ('chr\ 3', 9014.0, 12), ('chr\ 4', 11351.5, 12), ('chr\ 5', 13639.0, 12), ('chr\ 6', 17657.5, 12), ('chr\ 7', 22329.0, 12), ('chr\ 8', 25466.0, 12), ('chr\ 9', 28092.0, 12), ('chr\ 10', 31361.5, 12), ('chr\ 11', 34457.0, 12), ('chr\ 12', 37234.0, 12), ('chr\ 13', 41112.5, 12), ('chr\ 14', 43851.0, 12), ('chr\ 15', 45258.5, 12), ('scf\ 16', 46740.5, 12), ('scf\ 458', 47724.0, 12)]
    for label, pos, fontsize in xchr_labels:
        #print(xchr_labels)
        pos = 0.1 + pos * 0.8 / xsize
        if fontsize >= minfont:
            root.text(
                pos,
                0.91,
                latex(label),
                size=fontsize*0.85,
                ha="center",
                va="bottom",
                rotation=45,
                color="black",
            )

    # remember y labels are inverted
    ychr_labels = [('chr\ 1', 2672.0, 10.2), ('chr\ 2', 7532.0, 10.2), ('chr\ 3', 12035.0, 10.2), ('chr\ 4', 16228.0, 10.2), ('chr\ 5', 19784.5, 10.2), ('chr\ 6', 23211.0, 10.2), ('chr\ 7', 26612.5, 10.2), ('chr\ 8', 29773.0, 10.2), ('chr\ 9', 32518.0, 10.2), ('chr\ 10', 35004.5, 10.2), ('chr\ 11', 37760.0, 10.2), ('chr\ 12', 40635.5, 10.2), ('ChrSy', 42048.0, 0), ('ChrUn', 42140.5, 0)]
    for label, pos, fontsize in ychr_labels:
        #print(ychr_labels)
        pos = 0.9 - pos * 0.8 / ysize
        if fontsize >= minfont:
            root.text(0.91, pos, latex(label), size=fontsize*0.85, va="center", color="black")

    # Plot the frame
    ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor)
    ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor)
    ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor)
    ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    
    # The axis labels have been hardcoded (vs. gx gy as in original) so taht we can get the species names spelled out in italics, rather than the BED file name.
    ax.set_xlabel('$\it{Zizania\ palustris}$', size=16)
    ax.set_ylabel('$\it{Oryza\ sativa}$', size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="black", size=10)

    return xlim, ylim