Ejemplo n.º 1
0
def draw_box(clusters, ax, color="b"):

    for cluster in clusters:
        xrect, yrect = zip(*cluster)
        xmin, xmax, ymin, ymax = min(xrect), max(xrect), \
                                min(yrect), max(yrect)
        ax.add_patch(Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,\
                                ec=color, fc='y', alpha=.5))
Ejemplo n.º 2
0
def draw_cytoband(
    ax, chrom, filename=datafile("hg38.band.txt"), ymid=0.5, width=0.99, height=0.11
):
    import pandas as pd

    bands = pd.read_csv(filename, sep="\t")
    chrombands = bands[bands["#chrom"] == chrom]
    data = []
    for i, (chr, start, end, name, gie) in chrombands.iterrows():
        data.append((chr, start, end, name, gie))
    chromsize = max(x[2] for x in data)
    scale = width * 1.0 / chromsize
    xstart, ystart = (1 - width) / 2, ymid - height / 2
    bp_to_pos = lambda x: xstart + x * scale
    in_acen = False
    for chr, start, end, name, gie in data:
        color, alpha = get_color(gie)
        bplen = end - start
        if "acen" in gie:
            if in_acen:
                xys = [
                    (bp_to_pos(start), ymid),
                    (bp_to_pos(end), ystart),
                    (bp_to_pos(end), ystart + height),
                ]
            else:
                xys = [
                    (bp_to_pos(start), ystart),
                    (bp_to_pos(start), ystart + height),
                    (bp_to_pos(end), ymid),
                ]
            p = Polygon(xys, closed=True, ec="k", fc=color, alpha=alpha)
            in_acen = True
        else:
            p = Rectangle(
                (bp_to_pos(start), ystart),
                bplen * scale,
                height,
                ec="k",
                fc=color,
                alpha=alpha,
            )
        # print bp_to_pos(end)
        ax.add_patch(p)
        ax.text(
            bp_to_pos((start + end) / 2),
            ymid + height * 0.8,
            name,
            rotation=40,
            color="lightslategray",
        )

    ax.text(0.5, ystart - height, chrom, size=16, ha="center", va="center")

    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_axis_off()
Ejemplo n.º 3
0
    def __init__(self,
                 ax,
                 x1,
                 x2,
                 y,
                 height=0.04,
                 gradient=True,
                 fc="gray",
                 pic_type='Rectangle',
                 **kwargs):

        super(Glyph, self).__init__(ax)
        width = x2 - x1
        # Frame around the gradient rectangle
        p1 = (x1, y - 0.5 * height)
        if pic_type == 'Rectangle':
            self.append(Rectangle(p1, width, height, fc=fc, lw=0, **kwargs))
        elif pic_type == 'Arrow':
            style = "Simple,head_length=1,head_width=5,tail_width=5"
            x_tail = x1
            y_tail = y
            x_head = x2
            y_head = y
            self.append(
                FancyArrowPatch((x_tail, y_tail), (x_head, y_head),
                                arrowstyle=style,
                                fc=fc,
                                lw=0,
                                **kwargs))

        # Several overlaying patches
        if gradient:
            for cascade in np.arange(0.1, 0.55, 0.05):
                p1 = (x1, y - height * cascade)
                self.append(
                    Rectangle(p1,
                              width,
                              2 * cascade * height,
                              fc="w",
                              lw=0,
                              alpha=0.1,
                              **kwargs))

        self.add_patches()
Ejemplo n.º 4
0
    def __init__(self,
                 ax,
                 x1,
                 x2,
                 y,
                 height=.015,
                 ec="k",
                 patch=None,
                 patchcolor='lightgrey',
                 lw=1,
                 fc=None,
                 zorder=2,
                 roundrect=False):
        """
        Horizontal version of the Chromosome glyph above.
        """
        x1, x2 = sorted((x1, x2))
        super(HorizontalChromosome, self).__init__(ax)
        pts, r = self.get_pts(x1, x2, y, height)
        if roundrect:
            RoundRect(ax, (x1, y - height * .5),
                      x2 - x1,
                      height,
                      fill=False,
                      lw=lw,
                      ec=ec,
                      zorder=zorder + 1)
        else:
            self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder))

        if fc:
            pts, r = self.get_pts(x1, x2, y, height / 2)
            if roundrect:
                RoundRect(ax, (x1, y - height / 4),
                          x2 - x1,
                          height / 2,
                          fc=fc,
                          lw=0,
                          zorder=zorder)
            else:
                self.append(Polygon(pts, fc=fc, lw=0, zorder=zorder))
        if patch:
            rr = r * .9  # Shrink a bit for the patches
            for i in xrange(0, len(patch), 2):
                if i + 1 > len(patch) - 1:
                    continue
                p1, p2 = patch[i], patch[i + 1]
                self.append(
                    Rectangle((p1, y - rr),
                              p2 - p1,
                              2 * rr,
                              lw=0,
                              fc=patchcolor))

        self.add_patches()
Ejemplo n.º 5
0
 def draw(self):
     ar = self.ar
     pad = self.pad
     pads = 0
     for (a, b), w, color in zip(pairwise(ar), self.wiggles, self.colors):
         yf = self.ystart + w * 1. / self.wiggle
         if color:
             p = Rectangle((a + pads, yf), b - a, self.height, color=color)
             self.append(p)
         pads += pad
     self.add_patches()
Ejemplo n.º 6
0
def draw_geoscale(ax, minx=0, maxx=175):
    """
    Draw geological epoch on million year ago (mya) scale.
    """
    a, b = .1, .6  # Correspond to 200mya and 0mya

    def cv(x):
        return b - (x - b) / (maxx - minx) * (b - a)

    ax.plot((a, b), (.5, .5), "k-")
    tick = .015
    for mya in xrange(maxx - 25, 0, -25):
        p = cv(mya)
        ax.plot((p, p), (.5, .5 - tick), "k-")
        ax.text(p, .5 - 2.5 * tick, str(mya), ha="center", va="center")
    ax.text((a + b) / 2,
            .5 - 5 * tick,
            "Time before present (million years)",
            ha="center",
            va="center")

    # Source:
    # http://www.weston.org/schools/ms/biologyweb/evolution/handouts/GSAchron09.jpg
    Geo = (("Neogene", 2.6, 23.0, "#fee400"), ("Paleogene", 23.0, 65.5,
                                               "#ff9a65"),
           ("Cretaceous", 65.5, 145.5, "#80ff40"), ("Jurassic", 145.5, 201.6,
                                                    "#33fff3"))
    h = .05
    for era, start, end, color in Geo:
        start, end = cv(start), cv(end)
        end = max(a, end)
        p = Rectangle((end, .5 + tick / 2),
                      abs(start - end),
                      h,
                      lw=1,
                      ec="w",
                      fc=color)
        ax.text((start + end) / 2,
                .5 + (tick + h) / 2,
                era,
                ha="center",
                va="center",
                size=9)
        ax.add_patch(p)
Ejemplo n.º 7
0
    def __init__(self, ax, x, y1, y2, width=.015, ec="k", patch=None,
                 patchcolor='lightgrey', lw=1, fc="k", zorder=2):
        """
        Chromosome with positions given in (x, y1) => (x, y2)

        The chromosome can also be patched, e.g. to show scaffold composition in
        alternating shades. Use a list of starting locations to segment.
        """
        y1, y2 = sorted((y1, y2))
        super(Chromosome, self).__init__(ax)
        pts, r = self.get_pts(x, y1, y2, width)
        self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder))
        if patch:
            rr = r * .9  # Shrink a bit for the patches
            for i in xrange(0, len(patch), 2):
                if i + 1 > len(patch) - 1:
                    continue
                p1, p2 = patch[i], patch[i + 1]
                self.append(Rectangle((x - rr, p1), 2 * rr, p2 - p1, lw=0,
                             fc=patchcolor))

        self.add_patches()
Ejemplo n.º 8
0
def deletion(args):
    """
    %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed

    Plot histogram for napus deletions. Can plot deletion-genes or
    deletion-bases. The three largest segmental deletions will be highlighted
    along with a drawing of the C2 chromosome.
    """
    import math
    from jcvi.formats.bed import Bed
    from jcvi.graphics.chromosome import HorizontalChromosome
    from jcvi.graphics.base import kb_formatter

    p = OptionParser(deletion.__doc__)
    opts, args, iopts = p.set_image_options(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    deletion_genes, deletions, bed = args
    dg = [int(x) for x in open(deletion_genes)]
    dsg, lsg = "darkslategray", "lightslategray"

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([.1, .1, .8, .8])
    minval = 2 if deletion_genes == "deleted-genes" else 2048
    bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16)
    n, bins, histpatches = ax.hist(dg, bins=bins, \
                                   fc=lsg, alpha=.75)
    ax.set_xscale('log', basex=2)
    if deletion_genes == "deleted-genes":
        ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
        ax.set_xlabel('No. of deleted genes in each segment')
    else:
        ax.xaxis.set_major_formatter(kb_formatter)
        ax.set_xlabel('No. of deleted bases in each segment')
    ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d'))
    ax.set_ylabel('No. of segments')
    ax.patch.set_alpha(0.1)

    # Draw chromosome C2
    na, nb = .45, .85
    root.text((na + nb) / 2, .54, "ChrC02", ha="center")
    HorizontalChromosome(root, na, nb, .5, height=.025, fc=lsg, fill=True)

    order = Bed(bed).order
    fp = open(deletions)
    scale = lambda x: na + x * (nb - na) / 52886895
    for i, row in enumerate(fp):
        i += 1
        num, genes = row.split()
        genes = genes.split("|")
        ia, a = order[genes[0]]
        ib, b = order[genes[-1]]
        mi, mx = a.start, a.end
        mi, mx = scale(mi), scale(mx)
        root.add_patch(Rectangle((mi, .475), mx - mi, .05, fc="red", ec="red"))
        if i == 1:  # offset between two adjacent regions for aesthetics
            mi -= .015
        elif i == 2:
            mi += .015
        TextCircle(root, mi, .44, str(i), fc="red")

    for i, mi in zip(range(1, 4), (.83, .78, .73)):
        TextCircle(root, mi, .2, str(i), fc="red")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = deletion_genes + ".pdf"
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 9
0
def dotplot(anchorfile,
            qbed,
            sbed,
            fig,
            root,
            ax,
            vmin=0,
            vmax=1,
            is_self=False,
            synteny=False,
            cmap_text=None,
            genomenames=None,
            sample_number=10000,
            minfont=5,
            palette=None,
            chrlw=.01,
            title=None,
            sepcolor="gainsboro"):

    fp = open(anchorfile)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        try:
            value = float(value)
        except ValueError:
            value = vmax

        if value < vmin:
            value = vmin
        if value > vmax:
            value = vmax

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = vmax - value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = len(data)
    # Only show random subset
    if npairs > sample_number:
        logging.debug("Showing a random subset of {0} data points (total {1}) " \
                      "for clarity.".format(sample_number, npairs))
        data = sample(data, sample_number)

    # the data are plotted in this order, the least value are plotted
    # last for aesthetics
    if not palette:
        data.sort(key=lambda x: -x[2])

    default_cm = cm.copper
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)

    else:
        ax.scatter(x,
                   y,
                   c=c,
                   edgecolors="none",
                   s=2,
                   lw=0,
                   cmap=default_cm,
                   vmin=vmin,
                   vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    # Tag to mark whether to plot chr name (skip small ones)
    xchr_labels, ychr_labels = [], []
    th = TextHandler(fig)

    # plot the chromosome breaks
    for (seqid, beg, end) in qbed.get_breaks():
        xsize_ratio = abs(end - beg) * .8 / xsize
        fontsize = th.select_fontsize(xsize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        xchr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor)

    for (seqid, beg, end) in sbed.get_breaks():
        ysize_ratio = abs(end - beg) * .8 / ysize
        fontsize = th.select_fontsize(ysize_ratio)
        seqid = "".join(seqid_parse(seqid)[:2])

        ychr_labels.append((seqid, (beg + end) / 2, fontsize))
        ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor)

    # plot the chromosome labels
    for label, pos, fontsize in xchr_labels:
        pos = .1 + pos * .8 / xsize
        if fontsize >= minfont:
            root.text(pos,
                      .91,
                      latex(label),
                      size=fontsize,
                      ha="center",
                      va="bottom",
                      rotation=45,
                      color="grey")

    # remember y labels are inverted
    for label, pos, fontsize in ychr_labels:
        pos = .9 - pos * .8 / ysize
        if fontsize >= minfont:
            root.text(.91,
                      pos,
                      latex(label),
                      size=fontsize,
                      va="center",
                      color="grey")

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    ax.set_xlabel(gx, size=16)
    ax.set_ylabel(gy, size=16)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color='gray',
             size=10)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if not title:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(title, x=.5, y=.96, color="k")
    logging.debug(title)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()
Ejemplo n.º 10
0
def composite(args):
    """
    %prog composite fastafile chr1

    Combine line plots, feature bars and alt-bars, different data types
    specified in options. Inputs must be BED-formatted. Three types of viz are
    currently supported:

    --lines: traditional line plots, useful for plotting feature freq
    --bars: show where the extent of features are
    --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds
    """
    from jcvi.graphics.chromosome import HorizontalChromosome

    p = OptionParser(composite.__doc__)
    p.add_option("--lines", help="Features to plot in lineplot")
    p.add_option("--bars", help="Features to plot in bars")
    p.add_option("--altbars", help="Features to plot in alt-bars")
    p.add_option(
        "--fatten",
        default=False,
        action="store_true",
        help="Help visualize certain narrow features",
    )
    p.add_option(
        "--mode",
        default="span",
        choices=("span", "count", "score"),
        help="Accumulate feature based on",
    )
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract, merge = check_window_options(opts)
    linebeds, barbeds, altbarbeds = [], [], []
    fatten = opts.fatten
    if opts.lines:
        lines = opts.lines.split(",")
        linebeds = get_beds(lines)
    if opts.bars:
        bars = opts.bars.split(",")
        barbeds = get_beds(bars)
    if opts.altbars:
        altbars = opts.altbars.split(",")
        altbarbeds = get_beds(altbars)

    linebins = get_binfiles(linebeds,
                            fastafile,
                            shift,
                            mode=opts.mode,
                            merge=merge)

    margin = 0.12
    clen = Sizes(fastafile).mapping[chr]
    nbins = get_nbins(clen, shift)

    plt.rcParams["xtick.major.size"] = 0
    plt.rcParams["ytick.major.size"] = 0

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    root.text(0.5, 0.95, chr, ha="center", color="darkslategray")

    xstart, xend = margin, 1 - margin
    xlen = xend - xstart
    ratio = xlen / clen
    # Line plots
    ax = fig.add_axes([xstart, 0.6, xlen, 0.3])
    lineplot(ax, linebins, nbins, chr, window, shift)

    # Bar plots
    yy = 0.5
    yinterval = 0.08
    xs = lambda x: xstart + ratio * x
    r = 0.01
    fattend = 0.0025
    for bb in barbeds:
        root.text(xend + 0.01, yy, bb.split(".")[0], va="center")
        HorizontalChromosome(root, xstart, xend, yy, height=0.02)
        bb = Bed(bb)
        for b in bb:
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if fatten and span < fattend:
                span = fattend

            root.add_patch(
                Rectangle((start, yy - r),
                          span,
                          2 * r,
                          lw=0,
                          fc="darkslategray"))
        yy -= yinterval

    # Alternative bar plots
    offset = r / 2
    for bb in altbarbeds:
        root.text(xend + 0.01, yy, bb.split(".")[0], va="center")
        bb = Bed(bb)
        for i, b in enumerate(bb):
            start, end = xs(b.start), xs(b.end)
            span = end - start
            if span < 0.0001:
                continue
            offset = -offset
            root.add_patch(
                Rectangle((start, yy + offset),
                          end - start,
                          0.003,
                          lw=0,
                          fc="darkslategray"))
        yy -= yinterval

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 11
0
def ancestral(args):
    """
    %prog ancestral ancestral.txt assembly.fasta

    Karyotype evolution of pineapple. The figure is inspired by Amphioxus paper
    Figure 3 and Tetradon paper Figure 9.
    """
    p = OptionParser(ancestral.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    regionsfile, sizesfile = args
    regions = RegionsFile(regionsfile)
    sizes = Sizes(sizesfile).mapping
    sizes = dict((k, v) for (k, v) in sizes.iteritems() if k[:2] == "LG")
    maxsize = max(sizes.values())
    ratio = .5 / maxsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes((0, 0, 1, 1))

    from jcvi.graphics.base import set2
    a, b, c, d, e, f, g = set2[:7]
    set2 = (c, g, b, e, d, a, f)

    # Upper panel is the evolution of segments
    # All segments belong to one of seven karyotypes 1 to 7
    karyotypes = regions.karyotypes
    xgap = 1. / (1 + len(karyotypes))
    ygap = .05
    mgap = xgap / 4.5
    gwidth = mgap * .75
    tip = .02
    coords = {}
    for i, k in enumerate(regions.karyotypes):
        x = (i + 1) * xgap
        y = .9
        root.text(x, y + tip, "Anc" + k, ha="center")
        root.plot((x, x), (y, y - ygap), "k-", lw=2)
        y -= 2 * ygap
        coords['a'] = (x - 1.5 * mgap , y)
        coords['b'] = (x - .5 * mgap , y)
        coords['c'] = (x + .5 * mgap , y)
        coords['d'] = (x + 1.5 * mgap , y)
        coords['ab'] = join_nodes_vertical(root, coords, 'a', 'b', y + ygap / 2)
        coords['cd'] = join_nodes_vertical(root, coords, 'c', 'd', y + ygap / 2)
        coords['abcd'] = join_nodes_vertical(root, coords, 'ab', 'cd', y + ygap)
        for n in 'abcd':
            nx, ny = coords[n]
            root.text(nx, ny - tip, n, ha="center")
            coords[n] = (nx, ny - ygap / 2)

        kdata = regions.get_karyotype(k)
        for kd in kdata:
            g = kd.group
            gx, gy = coords[g]
            gsize = ratio * kd.span
            gy -= gsize
            p = Rectangle((gx - gwidth / 2, gy),
                           gwidth, gsize, lw=0, color=set2[i])
            root.add_patch(p)
            root.text(gx, gy + gsize / 2, kd.chromosome,
                      ha="center", va="center", color='w')
            coords[g] = (gx, gy - tip)

    # Bottom panel shows the location of segments on chromosomes
    # TODO: redundant code, similar to graphics.chromosome
    ystart = .54
    chr_number = len(sizes)
    xstart, xend = xgap - 2 * mgap, 1 - xgap + 2 * mgap
    xinterval = (xend - xstart - gwidth) / (chr_number - 1)
    chrpos = {}
    for a, (chr, clen) in enumerate(sorted(sizes.items())):
        chr = get_number(chr)
        xx = xstart + a * xinterval + gwidth / 2
        chrpos[chr] = xx
        root.text(xx, ystart + .01, chr, ha="center")
        Chromosome(root, xx, ystart, ystart - clen * ratio, width=gwidth)

    # Start painting
    for r in regions:
        xx = chrpos[r.chromosome]
        yystart = ystart - r.start * ratio
        yyend = ystart - r.end * ratio
        p = Rectangle((xx - gwidth / 2, yystart), gwidth, yyend - yystart,
                      color=set2[int(r.karyotype) - 1], lw=0)
        root.add_patch(p)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 12
0
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1,
        is_self=False, synteny=False, cmap_text=None, cmap="copper",
        genomenames=None, sample_number=10000, minfont=5, palette=None,
        chrlw=.1, title=None, sep=True, sepcolor="g", stdpf=True):

    fp = open(anchorfile)
    # add genome names
    if genomenames:
        gx, gy = genomenames.split("_")
    else:
        to_ax_label = lambda fname: op.basename(fname).split(".")[0]
        gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)]
    gx, gy = markup(gx), markup(gy)

    qorder = qbed.order
    sorder = sbed.order

    data = []
    if cmap_text:
        logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\
                        .format(vmin, vmax))

    block_id = 0
    for row in fp:
        atoms = row.split()
        block_color = None
        if row[0] == "#":
            block_id += 1
            if palette:
                block_color = palette.get(block_id, "k")
            continue

        # first two columns are query and subject, and an optional third column
        if len(atoms) < 2:
            continue

        query, subject = atoms[:2]
        value = atoms[-1]

        if cmap_text:
            try:
                value = float(value)
            except ValueError:
                value = vmax

            if value < vmin:
                continue
            if value > vmax:
                continue
        else:
            value = 0

        if query not in qorder:
            continue
        if subject not in sorder:
            continue

        qi, q = qorder[query]
        si, s = sorder[subject]

        nv = value if block_color is None else block_color
        data.append((qi, si, nv))
        if is_self:  # Mirror image
            data.append((si, qi, nv))

    npairs = downsample(data, sample_number=sample_number)
    x, y, c = zip(*data)

    if palette:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0)
    else:
        ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap,
                vmin=vmin, vmax=vmax)

    if synteny:
        clusters = batch_scan(data, qbed, sbed)
        draw_box(clusters, ax)

    if cmap_text:
        draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap)

    xsize, ysize = len(qbed), len(sbed)
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))
    qbreaks = qbed.get_breaks()
    sbreaks = sbed.get_breaks()
    xlim, ylim = plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize,
                           qbreaks, sbreaks, sep=sep, chrlw=chrlw,
                           sepcolor=sepcolor, minfont=minfont, stdpf=stdpf)

    # create a diagonal to separate mirror image for self comparison
    if is_self:
        ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2)

    if palette:  # bottom-left has the palette, if available
        colors = palette.colors
        xstart, ystart = .1, .05
        for category, c in sorted(colors.items()):
            root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c))
            root.text(xstart + .04, ystart, category, color=c)
            xstart += .1

    if title is None:
        title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy)
        if is_self:
            title = "Intra-genomic comparison within {0}".format(gx)
            npairs /= 2
        title += " ({0} gene pairs)".format(thousands(npairs))
    root.set_title(title, x=.5, y=.96, color="k")
    if title:
        logging.debug("Dot plot title: {}".format(title))
    normalize_axes(root)
Ejemplo n.º 13
0
def draw_chromosomes(
    root,
    bedfile,
    sizes,
    iopts,
    mergedist,
    winsize,
    imagemap,
    mappingfile=None,
    gauge=False,
    legend=True,
    empty=False,
    title=None,
):
    bed = Bed(bedfile)
    prefix = bedfile.rsplit(".", 1)[0]

    if imagemap:
        imgmapfile = prefix + ".map"
        mapfh = open(imgmapfile, "w")
        print('<map id="' + prefix + '">', file=mapfh)

    if mappingfile:
        mappings = DictFile(mappingfile, delimiter="\t")
        classes = sorted(set(mappings.values()))
        preset_colors = (DictFile(
            mappingfile, keypos=1, valuepos=2, delimiter="\t")
                         if DictFile.num_columns(mappingfile) >= 3 else {})
    else:
        classes = sorted(set(x.accn for x in bed))
        mappings = dict((x, x) for x in classes)
        preset_colors = {}

    logging.debug("A total of {} classes found: {}".format(
        len(classes), ",".join(classes)))

    # Assign colors to classes
    ncolors = max(3, min(len(classes), 12))
    palette = set1_n if ncolors <= 8 else set3_n
    colorset = palette(number=ncolors)
    colorset = sample_N(colorset, len(classes))
    class_colors = dict(zip(classes, colorset))
    class_colors.update(preset_colors)
    logging.debug("Assigned colors: {}".format(class_colors))

    chr_lens = {}
    centromeres = {}
    if sizes:
        chr_lens = Sizes(sizes).sizes_mapping
    else:
        for b, blines in groupby(bed, key=(lambda x: x.seqid)):
            blines = list(blines)
            maxlen = max(x.end for x in blines)
            chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = "-"

    chr_number = len(chr_lens)
    if centromeres:
        assert chr_number == len(
            centromeres), "chr_number = {}, centromeres = {}".format(
                chr_number, centromeres)

    r = 0.7  # width and height of the whole chromosome set
    xstart, ystart = 0.15, 0.85
    xinterval = r / chr_number
    xwidth = xinterval * 0.5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, clen) in enumerate(sorted(chr_lens.items())):
        xx = xstart + a * xinterval + 0.5 * xwidth
        root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center")
        if centromeres:
            yy = ystart - centromeres[chr] * ratio
            ChromosomeWithCentromere(root,
                                     xx,
                                     ystart,
                                     yy,
                                     ystart - clen * ratio,
                                     width=xwidth)
        else:
            Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = 1
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        prev_end, prev_klass = 0, None
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            if klass == "centromere":
                continue
            start = b.start
            end = b.end
            if start < prev_end + mergedist and klass == prev_klass:
                start = prev_end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(
                Rectangle(
                    (xx, yystart),
                    xwidth,
                    yyend - yystart,
                    fc=class_colors.get(klass, "lightslategray"),
                    lw=0,
                    alpha=alpha,
                ))
            prev_end, prev_klass = b.end, klass

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = (
                        xx,
                        (1 - ystart) + segment_start * ratio,
                        xx + xwidth,
                        (1 - ystart) + segment_end * ratio,
                    )
                    print(
                        "\t" + write_ImageMapLine(
                            tlx,
                            tly,
                            brx,
                            bry,
                            iopts.w,
                            iopts.h,
                            iopts.dpi,
                            chr + ":" + ",".join(bac_list),
                            segment_start,
                            segment_end,
                        ),
                        file=mapfh,
                    )

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = (
                xx,
                (1 - ystart) + segment_start * ratio,
                xx + xwidth,
                (1 - ystart) + segment_end * ratio,
            )
            print(
                "\t" + write_ImageMapLine(
                    tlx,
                    tly,
                    brx,
                    bry,
                    iopts.w,
                    iopts.h,
                    iopts.dpi,
                    chr + ":" + ",".join(bac_list),
                    segment_start,
                    segment_end,
                ),
                file=mapfh,
            )

    if imagemap:
        print("</map>", file=mapfh)
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if gauge:
        xstart, ystart = 0.9, 0.85
        Gauge(root, xstart, ystart - r, ystart, max_chr_len)

    if "centromere" in class_colors:
        del class_colors["centromere"]

    # class legends, four in a row
    if legend:
        xstart = 0.1
        xinterval = 0.8 / len(class_colors)
        xwidth = 0.04
        yy = 0.08
        for klass, cc in sorted(class_colors.items()):
            if klass == "-":
                continue
            root.add_patch(
                Rectangle((xstart, yy),
                          xwidth,
                          xwidth,
                          fc=cc,
                          lw=0,
                          alpha=alpha))
            root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10)
            xstart += xinterval

    if empty:
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1))
        root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10)

    if title:
        root.text(0.5, 0.95, markup(title), ha="center", va="center")
Ejemplo n.º 14
0
def draw_geoscale(ax, margin=0.1, rmargin=0.2, yy=0.1, max_dist=3.0):
    """
    Draw geological epoch on million year ago (mya) scale.
    max_dist = 3.0 => max is 300 mya
    """
    import math

    a, b = margin, 1 - rmargin  # Correspond to 300mya and 0mya
    minx, maxx = 0, int(max_dist * 100)

    def cv(x):
        return b - (x - b) / (maxx - minx) * (b - a)

    ax.plot((a, b), (yy, yy), "k-")
    tick = 0.0125
    scale_start = int(math.ceil(maxx / 25) * 25)
    for mya in range(scale_start - 25, 0, -25):
        p = cv(mya)
        ax.plot((p, p), (yy, yy - tick), "k-")
        ax.text(p, yy - 2.5 * tick, str(mya), ha="center", va="center")

    ax.text(
        (a + b) / 2,
        yy - 5 * tick,
        "Time before present (million years)",
        ha="center",
        va="center",
    )

    # Source:
    # https://en.wikipedia.org/wiki/Geological_period
    Geo = (
        ("Neogene", 2.588, 23.03),
        ("Paleogene", 23.03, 66.0),
        ("Cretaceous", 66.0, 145.5),
        ("Jurassic", 145.5, 201.3),
        ("Triassic", 201.3, 252.17),
        ("Permian", 252.17, 298.9),
        # ("Carboniferous", 298.9, 358.9),
    )
    h = 0.05
    for (era, start, end), color in zip(Geo, set3_n(len(Geo))):
        if maxx - start < 10:  # not visible enough
            continue
        start, end = cv(start), cv(end)
        end = max(a, end)
        p = Rectangle((end, yy + tick / 2),
                      abs(start - end),
                      h,
                      lw=1,
                      ec="w",
                      fc=color)
        ax.text(
            (start + end) / 2,
            yy + (tick + h) / 2,
            era,
            ha="center",
            va="center",
            size=8,
        )
        ax.add_patch(p)
Ejemplo n.º 15
0
def draw_table(ax,
               csv_table,
               extent=(0, 1, 0, 1),
               stripe_color="beige",
               yinflation=1):
    """Draw table on canvas.

    Args:
        ax (matplotlib axes): matplotlib axes
        csv_table (CsvTable): Parsed CSV table
        extent (tuple, optional): (left, right, bottom, top). Defaults to (0, 1, 0, 1).
        stripe_color (str, optional): Stripe color of the table. Defaults to
        "beige".
        yinflation (float, optional): Inflate on y since imshow aspect ratio
        sometimes create warped images. Defaults to 1.
    """
    left, right, bottom, top = extent
    width = right - left
    height = top - bottom
    rows = csv_table.rows
    column_widths = csv_table.column_widths(width)
    print(column_widths)

    yinterval = height / rows
    for i, row in enumerate(csv_table):
        should_stripe = i % 2 == 0
        contain_images = isinstance(row[0], list)
        xstart = left
        if contain_images:
            box_width = min(
                min(column_widths[j] / len(x) for j, x in enumerate(row)),
                yinterval)
        for j, cell in enumerate(row):
            xinterval = column_widths[j]
            xmid = xstart + xinterval / 2
            ymid = top - (i + 0.5) * yinterval
            if contain_images:
                # There may be multiple images, center them
                rect = (xstart, top - (i + 1) * yinterval, xinterval,
                        yinterval)
                draw_multiple_images_in_rectangle(ax,
                                                  cell,
                                                  rect,
                                                  box_width,
                                                  yinflation=yinflation)
                should_stripe = False
            else:
                ax.text(
                    xmid,
                    ymid,
                    cell,
                    ha="center",
                    va="center",
                )

            xstart += column_widths[j]

        if not should_stripe:
            continue

        # Draw the stripes, extend a little longer horizontally
        xpad = 0.01
        ax.add_patch(
            Rectangle(
                (left - xpad, top - (i + 1) * yinterval),
                width + 2 * xpad,
                yinterval,
                fc=stripe_color,
                ec=stripe_color,
            ))
Ejemplo n.º 16
0
def stack(args):
    """
    %prog stack fastafile

    Create landscape plots that show the amounts of genic sequences, and repetitive
    sequences along the chromosomes.
    """
    p = OptionParser(stack.__doc__)
    p.add_option("--top",
                 default=10,
                 type="int",
                 help="Draw the first N chromosomes")
    p.add_option(
        "--stacks",
        default="Exons,Introns,DNA_transposons,Retrotransposons",
        help="Features to plot in stackplot",
    )
    p.add_option("--switch", help="Change chr names based on two-column file")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    (fastafile, ) = args
    top = opts.top
    window, shift, subtract, merge = check_window_options(opts)
    switch = opts.switch
    if switch:
        switch = DictFile(opts.switch)

    stacks = opts.stacks.split(",")
    bedfiles = get_beds(stacks)
    binfiles = get_binfiles(bedfiles,
                            fastafile,
                            shift,
                            subtract=subtract,
                            merge=merge)

    sizes = Sizes(fastafile)
    s = list(sizes.iter_sizes())[:top]
    maxl = max(x[1] for x in s)
    margin = 0.08
    inner = 0.02  # y distance between tracks

    pf = fastafile.rsplit(".", 1)[0]
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, maxl)

    # Per chromosome
    yinterval = (1 - 2 * margin) / (top + 1)
    xx = margin
    yy = 1 - margin
    for chr, clen in s:
        yy -= yinterval
        xlen = clen / ratio
        cc = chr
        if "_" in chr:
            ca, cb = chr.split("_")
            cc = ca[0].upper() + cb

        if switch and cc in switch:
            cc = "\n".join((cc, "({0})".format(switch[cc])))

        root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner,
                                 color=gray))
        ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

        nbins = clen / shift
        if clen % shift:
            nbins += 1

        stackplot(ax, binfiles, nbins, palette, chr, window, shift)
        root.text(xx - 0.04,
                  yy + 0.5 * (yinterval - inner),
                  cc,
                  ha="center",
                  va="center")

        ax.set_xlim(0, nbins)
        ax.set_ylim(0, 1)
        ax.set_axis_off()

    # Legends
    yy -= yinterval
    xx = margin
    for b, p in zip(bedfiles, palette):
        b = b.rsplit(".", 1)[0].replace("_", " ")
        b = Registration.get(b, b)

        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        xx += 2 * inner
        root.text(xx, yy, b, size=13)
        xx += len(b) * 0.012 + inner

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 17
0
def heatmap(args):
    """
    %prog heatmap fastafile chr1

    Combine stack plot with heatmap to show abundance of various tracks along
    given chromosome. Need to give multiple beds to --stacks and --heatmaps
    """
    p = OptionParser(heatmap.__doc__)
    p.add_option(
        "--stacks",
        default="Exons,Introns,DNA_transposons,Retrotransposons",
        help="Features to plot in stackplot",
    )
    p.add_option(
        "--heatmaps",
        default="Copia,Gypsy,hAT,Helitron,Introns,Exons",
        help="Features to plot in heatmaps",
    )
    p.add_option("--meres",
                 default=None,
                 help="Extra centromere / telomere features")
    add_window_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, chr = args
    window, shift, subtract, merge = check_window_options(opts)

    stacks = opts.stacks.split(",")
    heatmaps = opts.heatmaps.split(",")
    stackbeds = get_beds(stacks)
    heatmapbeds = get_beds(heatmaps)
    stackbins = get_binfiles(stackbeds,
                             fastafile,
                             shift,
                             subtract=subtract,
                             merge=merge)
    heatmapbins = get_binfiles(heatmapbeds,
                               fastafile,
                               shift,
                               subtract=subtract,
                               merge=merge)

    margin = 0.06
    inner = 0.015
    clen = Sizes(fastafile).mapping[chr]

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])

    # Gauge
    ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin)
    yinterval = 0.3
    xx = margin
    yy = 1 - margin
    yy -= yinterval
    xlen = clen / ratio
    cc = chr
    if "_" in chr:
        ca, cb = chr.split("_")
        cc = ca[0].upper() + cb

    root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray))
    ax = fig.add_axes([xx, yy, xlen, yinterval - inner])

    nbins = get_nbins(clen, shift)

    owindow = clen / 100
    if owindow > window:
        window = owindow / shift * shift

    stackplot(ax, stackbins, nbins, palette, chr, window, shift)
    ax.text(
        0.1,
        0.9,
        cc,
        va="top",
        zorder=100,
        transform=ax.transAxes,
        bbox=dict(boxstyle="round", fc="w", alpha=0.5),
    )

    # Legends
    xx += xlen + 0.01
    yspace = (yinterval - inner) / (len(stackbins) + 1)
    yy = 1 - margin - yinterval
    for s, p in zip(stacks, palette):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy += yspace
        root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0))
        root.text(xx + 1.5 * inner, yy, s, size=10)

    yh = 0.05  # Heatmap height
    # Heatmaps
    xx = margin
    yy = 1 - margin - yinterval - inner
    for s, p in zip(heatmaps, heatmapbins):
        s = s.replace("_", " ")
        s = Registration.get(s, s)

        yy -= yh
        m = stackarray(p, chr, window, shift)

        Y = np.array([m, m])
        root.imshow(
            Y,
            extent=(xx, xx + xlen, yy, yy + yh - inner),
            interpolation="nearest",
            aspect="auto",
            cmap=iopts.cmap,
        )
        root.text(xx + xlen + 0.01, yy, s, size=10)

    yy -= yh

    meres = opts.meres
    if meres:
        bed = Bed(meres)
        for b in bed:
            if b.seqid != chr:
                continue
            pos = (b.start + b.end) / 2
            cpos = pos / ratio
            xx = margin + cpos
            accn = b.accn.capitalize()
            root.add_patch(CirclePolygon((xx, yy), radius=0.01, fc="m",
                                         ec="m"))
            root.text(xx + 0.014, yy, accn, va="center", color="m")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = chr + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 18
0
def expr(args):
    """
    %prog expr block exp layout napus.bed

    Plot a composite figure showing synteny and the expression level between
    homeologs in two tissues - total 4 lists of values. block file contains the
    gene pairs between AN and CN.
    """
    from jcvi.graphics.base import red_purple as default_cm

    p = OptionParser(expr.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x5")

    if len(args) != 4:
        sys.exit(not p.print_help())

    block, exp, layout, napusbed = args

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    s = Synteny(fig, root, block, napusbed, layout)

    # Import the expression values
    # Columns are: leaf-A, leaf-C, root-A, root-C
    fp = open(exp)
    data = {}
    for row in fp:
        gid, lf, rt = row.split()
        lf, rt = float(lf), float(rt)
        data[gid] = (lf, rt)

    rA, rB = s.rr
    gA = [x.accn for x in rA.genes]
    gC = [x.accn for x in rB.genes]

    A = [data.get(x, (0, 0)) for x in gA]
    C = [data.get(x, (0, 0)) for x in gC]
    A = np.array(A)
    C = np.array(C)
    A = np.transpose(A)
    C = np.transpose(C)

    d, h = .01, .1
    lsg = "lightslategrey"
    coords = s.gg  # Coordinates of the genes
    axes = []
    for j, (y, gg) in enumerate(((.79, gA), (.24, gC))):
        r = s.rr[j]
        x = r.xstart
        w = r.xend - r.xstart
        ax = fig.add_axes([x, y, w, h])
        axes.append(ax)
        root.add_patch(
            Rectangle((x - h, y - d),
                      w + h + d,
                      h + 2 * d,
                      fill=False,
                      ec=lsg,
                      lw=1))
        root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center")
        root.text(x - d, y + h / 4, "leaf", ha="right", va="center")
        ty = y - 2 * d if y > .5 else y + h + 2 * d
        nrows = len(gg)
        for i, g in enumerate(gg):
            start, end = coords[(j, g)]
            sx, sy = start
            ex, ey = end
            assert sy == ey
            sy = sy + 2 * d if sy > .5 else sy - 2 * d
            root.plot(((sx + ex) / 2, x + w * (i + .5) / nrows), (sy, ty),
                      lw=1,
                      ls=":",
                      color="k",
                      alpha=.2)

    axA, axC = axes
    p = axA.pcolormesh(A, cmap=default_cm)
    p = axC.pcolormesh(C, cmap=default_cm)
    axA.set_xlim(0, len(gA))
    axC.set_xlim(0, len(gC))

    x, y, w, h = .35, .1, .3, .05
    ax_colorbar = fig.add_axes([x, y, w, h])
    fig.colorbar(p, cax=ax_colorbar, orientation='horizontal')
    root.text(x - d, y + h / 2, "RPKM", ha="right", va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    for x in (axA, axC, root):
        x.set_axis_off()

    image_name = "napusf4b." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 19
0
    def __init__(self,
                 fig,
                 root,
                 canvas,
                 chr,
                 xlim,
                 datadir,
                 order=None,
                 hlsuffix=None,
                 palette=None,
                 cap=50,
                 gauge="bottom",
                 plot_label=True,
                 plot_chr_label=True,
                 gauge_step=5000000,
                 vlines=None,
                 labels_dict={},
                 diverge=('r', 'g')):
        x, y, w, h = canvas
        p = .01
        root.add_patch(
            Rectangle((x - p, y - p),
                      w + 2 * p,
                      h + 2 * p,
                      lw=1,
                      fill=False,
                      ec="darkslategray",
                      zorder=10))
        datafiles = glob(op.join(datadir, chr + "*"))

        if order:
            datafiles = [z for z in datafiles if z.split(".")[1] in order]
            datafiles.sort(key=lambda x: order.index(x.split(".")[1]))

        ntracks = len(datafiles)
        yinterval = h / ntracks
        yy = y + h

        if palette is None:
            # Get the palette
            set2 = get_map('Set2', 'qualitative', ntracks).mpl_colors
        else:
            set2 = [palette] * ntracks

        if gauge == "top":
            gauge_ax = fig.add_axes([x, yy + p, w, .0001])
            adjust_spines(gauge_ax, ["top"])
            tpos = yy + .07
        elif gauge == "bottom":
            gauge_ax = fig.add_axes([x, y - p, w, .0001])
            adjust_spines(gauge_ax, ["bottom"])
            tpos = y - .07

        start, end = xlim
        if gauge:
            fs = gauge_step < 1000000
            setup_gauge_ax(gauge_ax,
                           start,
                           end,
                           gauge_step,
                           float_formatter=fs)

        if plot_chr_label:
            root.text(x + w / 2,
                      tpos,
                      chr,
                      ha="center",
                      va="center",
                      color="darkslategray",
                      size=16)

        yys = []
        for label, datafile, c in zip(order, datafiles, set2):
            yy -= yinterval
            yys.append(yy)
            ax = fig.add_axes([x, yy, w, yinterval * .9])
            xy = XYtrack(ax, datafile, color=c)
            xy.interpolate(end)
            xy.cap(ymax=cap)
            if vlines:
                xy.vlines(vlines)
            if hlsuffix:
                hlfile = op.join(datadir, ".".join((label, hlsuffix)))
                xy.import_hlfile(hlfile, chr, diverge=diverge)
            if plot_label:
                label = labels_dict.get(label, label.capitalize())
                label = r"\textit{{{0}}}".format(label)
                root.text(x - .015,
                          yy + yinterval / 2,
                          label,
                          ha="right",
                          va="center")
            xy.draw()
            ax.set_xlim(*xlim)

        self.yys = yys
Ejemplo n.º 20
0
def qc(args):
    """
    %prog qc prefix

    Expects data files including:
    1. `prefix.bedpe` draws Bezier curve between paired reads
    2. `prefix.sizes` draws length of the contig/scaffold
    3. `prefix.gaps.bed` mark the position of the gaps in sequence
    4. `prefix.bed.coverage` plots the base coverage
    5. `prefix.pairs.bed.coverage` plots the clone coverage

    See assembly.coverage.posmap() for the generation of these files.
    """
    from jcvi.graphics.glyph import Bezier

    p = OptionParser(qc.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    prefix, = args
    scf = prefix

    # All these files *must* be present in the current folder
    bedpefile = prefix + ".bedpe"
    fastafile = prefix + ".fasta"
    sizesfile = prefix + ".sizes"
    gapsbedfile = prefix + ".gaps.bed"
    bedfile = prefix + ".bed"
    bedpefile = prefix + ".bedpe"
    pairsbedfile = prefix + ".pairs.bed"

    sizes = Sizes(fastafile).mapping
    size = sizes[scf]

    fig = plt.figure(1, (8, 5))
    root = fig.add_axes([0, 0, 1, 1])

    # the scaffold
    root.add_patch(Rectangle((.1, .15), .8, .03, fc='k'))

    # basecoverage and matecoverage
    ax = fig.add_axes([.1, .45, .8, .45])

    bins = 200  # Smooth the curve
    basecoverage = Coverage(bedfile, sizesfile)
    matecoverage = Coverage(pairsbedfile, sizesfile)

    x, y = basecoverage.get_plot_data(scf, bins=bins)
    baseline, = ax.plot(x, y, 'g-')
    x, y = matecoverage.get_plot_data(scf, bins=bins)
    mateline, = ax.plot(x, y, 'r-')
    legends = ("Base coverage", "Mate coverage")
    leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True)
    leg.get_frame().set_alpha(.5)
    ax.set_xlim(0, size)

    # draw the read pairs
    fp = open(bedpefile)
    pairs = []
    for row in fp:
        scf, astart, aend, scf, bstart, bend, clonename = row.split()
        astart, bstart = int(astart), int(bstart)
        aend, bend = int(aend), int(bend)
        start = min(astart, bstart) + 1
        end = max(aend, bend)
        pairs.append((start, end))

    bpratio = .8 / size
    cutoff = 1000  # inserts smaller than this are not plotted
    # this convert from base => x-coordinate
    pos = lambda x: (.1 + x * bpratio)
    ypos = .15 + .03
    for start, end in pairs:
        dist = end - start

        if dist < cutoff:
            continue

        dist = min(dist, 10000)
        # 10Kb == .25 canvas height
        height = .25 * dist / 10000
        xstart = pos(start)
        xend = pos(end)
        p0 = (xstart, ypos)
        p1 = (xstart, ypos + height)
        p2 = (xend, ypos + height)
        p3 = (xend, ypos)
        Bezier(root, p0, p1, p2, p3)

    # gaps on the scaffold
    fp = open(gapsbedfile)
    for row in fp:
        b = BedLine(row)
        start, end = b.start, b.end
        xstart = pos(start)
        xend = pos(end)
        root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w'))

    root.text(.5, .1, scf, color='b', ha="center")
    warn_msg = "Only the inserts > {0}bp are shown".format(cutoff)
    root.text(.5, .1, scf, color='b', ha="center")
    root.text(.5, .05, warn_msg, color='gray', ha="center")
    # clean up and output
    set_human_base_axis(ax)
    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = prefix + ".pdf"
    savefig(figname, dpi=300)
Ejemplo n.º 21
0
def seeds(args):
    """
    %prog seeds [pngfile|jpgfile]

    Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image.
    """
    p = OptionParser(seeds.__doc__)
    p.set_outfile()
    opts, args, iopts = add_seeds_options(p, args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (pngfile, ) = args
    pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0]
    sigma, kernel = opts.sigma, opts.kernel
    rows, cols = opts.rows, opts.cols
    labelrows, labelcols = opts.labelrows, opts.labelcols
    ff = opts.filter
    calib = opts.calibrate
    outdir = opts.outdir
    if outdir != ".":
        mkdir(outdir)
    if calib:
        calib = json.load(must_open(calib))
        pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"]
        tr = np.array(tr)
    nbcolor = opts.changeBackground
    pngfile = convert_background(pngfile, nbcolor)
    resizefile, mainfile, labelfile, exif = convert_image(
        pngfile,
        pf,
        outdir=outdir,
        rotate=opts.rotate,
        rows=rows,
        cols=cols,
        labelrows=labelrows,
        labelcols=labelcols,
    )
    oimg = load_image(resizefile)
    img = load_image(mainfile)

    fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4,
                                             nrows=1,
                                             figsize=(iopts.w, iopts.h))
    # Edge detection
    img_gray = rgb2gray(img)
    logging.debug("Running {0} edge detection ...".format(ff))
    if ff == "canny":
        edges = canny(img_gray, sigma=opts.sigma)
    elif ff == "roberts":
        edges = roberts(img_gray)
    elif ff == "sobel":
        edges = sobel(img_gray)
    edges = clear_border(edges, buffer_size=opts.border)
    selem = disk(kernel)
    closed = closing(edges, selem) if kernel else edges
    filled = binary_fill_holes(closed)

    # Watershed algorithm
    if opts.watershed:
        distance = distance_transform_edt(filled)
        local_maxi = peak_local_max(distance,
                                    threshold_rel=0.05,
                                    indices=False)
        coordinates = peak_local_max(distance, threshold_rel=0.05)
        markers, nmarkers = label(local_maxi, return_num=True)
        logging.debug("Identified {0} watershed markers".format(nmarkers))
        labels = watershed(closed, markers, mask=filled)
    else:
        labels = label(filled)

    # Object size filtering
    w, h = img_gray.shape
    canvas_size = w * h
    min_size = int(round(canvas_size * opts.minsize / 100))
    max_size = int(round(canvas_size * opts.maxsize / 100))
    logging.debug(
        "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format(
            min_size, opts.minsize, max_size, opts.maxsize))

    # Plotting
    ax1.set_title("Original picture")
    ax1.imshow(oimg)

    params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel)
    if opts.watershed:
        params += ", watershed"
    ax2.set_title("Edge detection\n({0})".format(params))
    closed = gray2rgb(closed)
    ax2_img = labels
    if opts.edges:
        ax2_img = closed
    elif opts.watershed:
        ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.")
    ax2.imshow(ax2_img, cmap=iopts.cmap)

    ax3.set_title("Object detection")
    ax3.imshow(img)

    filename = op.basename(pngfile)
    if labelfile:
        accession = extract_label(labelfile)
    else:
        accession = pf

    # Calculate region properties
    rp = regionprops(labels)
    rp = [x for x in rp if min_size <= x.area <= max_size]
    nb_labels = len(rp)
    logging.debug("A total of {0} objects identified.".format(nb_labels))
    objects = []
    for i, props in enumerate(rp):
        i += 1
        if i > opts.count:
            break

        y0, x0 = props.centroid
        orientation = props.orientation
        major, minor = props.major_axis_length, props.minor_axis_length
        major_dx = cos(orientation) * major / 2
        major_dy = sin(orientation) * major / 2
        minor_dx = sin(orientation) * minor / 2
        minor_dy = cos(orientation) * minor / 2
        ax2.plot((x0 - major_dx, x0 + major_dx),
                 (y0 + major_dy, y0 - major_dy), "r-")
        ax2.plot((x0 - minor_dx, x0 + minor_dx),
                 (y0 - minor_dy, y0 + minor_dy), "r-")

        npixels = int(props.area)
        # Sample the center of the blob for color
        d = min(int(round(minor / 2 * 0.35)) + 1, 50)
        x0d, y0d = int(round(x0)), int(round(y0))
        square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)]
        pixels = []
        for row in square:
            pixels.extend(row)
        logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format(
            i, npixels, len(pixels), 100.0 * npixels / canvas_size))

        rgb = pixel_stats(pixels)
        objects.append(Seed(filename, accession, i, rgb, props, exif))
        minr, minc, maxr, maxc = props.bbox
        rect = Rectangle((minc, minr),
                         maxc - minc,
                         maxr - minr,
                         fill=False,
                         ec="w",
                         lw=1)
        ax3.add_patch(rect)
        mc, mr = (minc + maxc) / 2, (minr + maxr) / 2
        ax3.text(mc,
                 mr,
                 "{0}".format(i),
                 color="w",
                 ha="center",
                 va="center",
                 size=6)

    for ax in (ax2, ax3):
        ax.set_xlim(0, h)
        ax.set_ylim(w, 0)

    # Output identified seed stats
    ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g")
    ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m")
    yy = 0.8
    fw = must_open(opts.outfile, "w")
    if not opts.noheader:
        print(Seed.header(calibrate=calib), file=fw)
    for o in objects:
        if calib:
            o.calibrate(pixel_cm_ratio, tr)
        print(o, file=fw)
        i = o.seedno
        if i > 7:
            continue
        ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k"))
        ax4.text(0.1, yy, o.pixeltag, va="center")
        yy -= 0.04
        ax4.add_patch(
            Rectangle((0.1, yy - 0.025),
                      0.12,
                      0.05,
                      lw=0,
                      fc=rgb_to_hex(o.rgb)))
        ax4.text(0.27, yy, o.hashtag, va="center")
        yy -= 0.06
    ax4.text(
        0.1,
        yy,
        "(A total of {0} objects displayed)".format(nb_labels),
        color="darkslategray",
    )
    normalize_axes(ax4)

    for ax in (ax1, ax2, ax3):
        xticklabels = [int(x) for x in ax.get_xticks()]
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_xticklabels(xticklabels, family="Helvetica", size=8)
        ax.set_yticklabels(yticklabels, family="Helvetica", size=8)

    image_name = op.join(outdir, pf + "." + iopts.format)
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    return objects
Ejemplo n.º 22
0
def blastplot(
    ax,
    blastfile,
    qsizes,
    ssizes,
    qbed,
    sbed,
    style="dot",
    sampleN=None,
    baseticks=False,
    insetLabels=False,
    stripNames=False,
    highlights=None,
):

    assert style in DotStyles
    fp = open(blastfile)

    qorder = qbed.order if qbed else None
    sorder = sbed.order if sbed else None

    data = []

    for row in fp:
        b = BlastLine(row)
        query, subject = b.query, b.subject

        if stripNames:
            query = query.rsplit(".", 1)[0]
            subject = subject.rsplit(".", 1)[0]

        if qorder:
            if query not in qorder:
                continue
            qi, q = qorder[query]
            query = q.seqid
            qstart, qend = q.start, q.end
        else:
            qstart, qend = b.qstart, b.qstop

        if sorder:
            if subject not in sorder:
                continue
            si, s = sorder[subject]
            subject = s.seqid
            sstart, send = s.start, s.end
        else:
            sstart, send = b.sstart, b.sstop

        qi = qsizes.get_position(query, qstart)
        qj = qsizes.get_position(query, qend)
        si = ssizes.get_position(subject, sstart)
        sj = ssizes.get_position(subject, send)

        if None in (qi, si):
            continue
        data.append(((qi, qj), (si, sj)))

    if sampleN:
        if len(data) > sampleN:
            data = sample(data, sampleN)

    if not data:
        return logging.error("no blast data imported")

    xsize, ysize = qsizes.totalsize, ssizes.totalsize
    logging.debug("xsize=%d ysize=%d" % (xsize, ysize))

    if style == "line":
        for a, b in data:
            ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3)
    else:
        data = [(x[0], y[0]) for x, y in data]
        x, y = zip(*data)

        if style == "circle":
            ax.plot(x, y, "mo", mfc="w", mec="m", ms=3)
        elif style == "dot":
            ax.scatter(x, y, s=3, lw=0)

    xlim = (0, xsize)
    ylim = (ysize, 0)  # invert the y-axis

    xchr_labels, ychr_labels = [], []
    ignore = True  # tag to mark whether to plot chr name (skip small ones)
    ignore_size_x = ignore_size_y = 0

    # plot the chromosome breaks
    logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes)))
    for (seqid, beg, end) in qsizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_x
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        xchr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot([end, end], ylim, "-", lw=1, color="grey")

    for (seqid, beg, end) in ssizes.get_breaks():
        ignore = abs(end - beg) < ignore_size_y
        if ignore:
            continue
        seqid = rename_seqid(seqid)

        ychr_labels.append((seqid, (beg + end) / 2, ignore))
        ax.plot(xlim, [end, end], "-", lw=1, color="grey")

    # plot the chromosome labels
    for label, pos, ignore in xchr_labels:
        if not ignore:
            if insetLabels:
                ax.text(pos,
                        0,
                        label,
                        size=8,
                        ha="center",
                        va="top",
                        color="grey")
            else:
                pos = 0.1 + pos * 0.8 / xsize
                root.text(
                    pos,
                    0.91,
                    label,
                    size=10,
                    ha="center",
                    va="bottom",
                    rotation=45,
                    color="grey",
                )

    # remember y labels are inverted
    for label, pos, ignore in ychr_labels:
        if not ignore:
            if insetLabels:
                continue
            pos = 0.9 - pos * 0.8 / ysize
            root.text(0.91, pos, label, size=10, va="center", color="grey")

    # Highlight regions based on a list of BedLine
    qhighlights = shighlights = None
    if highlights:
        if isinstance(highlights[0], BedLine):
            shighlights = highlights
        elif len(highlights) == 2:
            qhighlights, shighlights = highlights

    if qhighlights:
        for hl in qhighlights:
            hls = qsizes.get_position(hl.seqid, hl.start)
            ax.add_patch(
                Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0))
    if shighlights:
        for hl in shighlights:
            hls = ssizes.get_position(hl.seqid, hl.start)
            ax.add_patch(
                Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0))

    if baseticks:

        def increaseDensity(a, ratio=4):
            assert len(a) > 1
            stepsize = a[1] - a[0]
            newstepsize = int(stepsize / ratio)
            return np.arange(0, a[-1], newstepsize)

        # Increase the density of the ticks
        xticks = ax.get_xticks()
        yticks = ax.get_yticks()
        xticks = increaseDensity(xticks, ratio=2)
        yticks = increaseDensity(yticks, ratio=2)
        ax.set_xticks(xticks)

        # Plot outward ticklines
        for pos in xticks[1:]:
            if pos > xsize:
                continue
            pos = 0.1 + pos * 0.8 / xsize
            root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2)

        for pos in yticks[1:]:
            if pos > ysize:
                continue
            pos = 0.9 - pos * 0.8 / ysize
            root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2)

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

    # beautify the numeric axis
    for tick in ax.get_xticklines() + ax.get_yticklines():
        tick.set_visible(False)

    set_human_base_axis(ax)

    plt.setp(ax.get_xticklabels() + ax.get_yticklabels(),
             color="gray",
             size=10)
    plt.setp(ax.get_yticklabels(), rotation=90)
Ejemplo n.º 23
0
def ld(args):
    """
    %prog ld map

    Calculate pairwise linkage disequilibrium given MSTmap.
    """
    import numpy as np
    from random import sample

    from jcvi.algorithms.matrix import symmetrize

    p = OptionParser(ld.__doc__)
    p.add_option("--subsample", default=500, type="int",
                 help="Subsample markers to speed up [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    mstmap, = args
    subsample = opts.subsample
    data = MSTMap(mstmap)
    # Take random subsample while keeping marker order
    if subsample < data.nmarkers:
        data = [data[x] for x in \
                sorted(sample(xrange(len(data)), subsample))]

    markerbedfile = mstmap + ".subsample.bed"
    ldmatrix = mstmap + ".subsample.matrix"

    if need_update(mstmap, (markerbedfile, ldmatrix)):
        nmarkers = len(data)
        fw = open(markerbedfile, "w")
        print >> fw, "\n".join(x.bedline for x in data)
        logging.debug("Write marker set of size {0} to file `{1}`."\
                        .format(nmarkers, markerbedfile))

        M = np.zeros((nmarkers, nmarkers), dtype=float)
        for i, j in combinations(range(nmarkers), 2):
            a = data[i]
            b = data[j]
            M[i, j] = calc_ldscore(a.genotype, b.genotype)

        M = symmetrize(M)

        logging.debug("Write LD matrix to file `{0}`.".format(ldmatrix))
        M.tofile(ldmatrix)
    else:
        nmarkers = len(Bed(markerbedfile))
        M = np.fromfile(ldmatrix, dtype="float").reshape(nmarkers, nmarkers)
        logging.debug("LD matrix `{0}` exists ({1}x{1})."\
                        .format(ldmatrix, nmarkers))

    from jcvi.graphics.base import plt, savefig, Rectangle, draw_cmap

    plt.rcParams["axes.linewidth"] = 0

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([.1, .1, .8, .8])  # the heatmap

    ax.matshow(M, cmap=iopts.cmap)

    # Plot chromosomes breaks
    bed = Bed(markerbedfile)
    xsize = len(bed)
    extent = (0, nmarkers)
    chr_labels = []
    ignore_size = 20

    for (seqid, beg, end) in bed.get_breaks():
        ignore = abs(end - beg) < ignore_size
        pos = (beg + end) / 2
        chr_labels.append((seqid, pos, ignore))
        if ignore:
            continue
        ax.plot((end, end), extent, "w-", lw=1)
        ax.plot(extent, (end, end), "w-", lw=1)

    # Plot chromosome labels
    for label, pos, ignore in chr_labels:
        pos = .1 + pos * .8 / xsize
        if not ignore:
            root.text(pos, .91, label,
                ha="center", va="bottom", rotation=45, color="grey")
            root.text(.09, pos, label,
                ha="right", va="center", color="grey")

    ax.set_xlim(extent)
    ax.set_ylim(extent)
    ax.set_axis_off()

    draw_cmap(root, "Pairwise LD (r2)", 0, 1, cmap=default_cm)

    root.add_patch(Rectangle((.1, .1), .8, .8, fill=False, ec="k", lw=2))
    m = mstmap.split(".")[0]
    root.text(.5, .06, "Linkage Disequilibrium between {0} markers".format(m), ha="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = m + ".subsample" + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
Ejemplo n.º 24
0
def main():
    """
    %prog bedfile id_mappings

    Takes a bedfile that contains the coordinates of features to plot on the
    chromosomes, and `id_mappings` file that map the ids to certain class. Each
    class will get assigned a unique color. `id_mappings` file is optional (if
    omitted, will not paint the chromosome features, except the centromere).
    """
    p = OptionParser(main.__doc__)
    p.add_option("--title",
                 default="Medicago truncatula v3.5",
                 help="title of the image [default: `%default`]")
    p.add_option("--gauge",
                 default=False,
                 action="store_true",
                 help="draw a gauge with size label [default: %default]")
    p.add_option(
        "--imagemap",
        default=False,
        action="store_true",
        help=
        "generate an HTML image map associated with the image [default: %default]"
    )
    p.add_option(
        "--winsize",
        default=50000,
        type="int",
        help=
        "if drawing an imagemap, specify the window size (bases) of each map element "
        "[default: %default bp]")
    p.add_option("--empty", help="Write legend for unpainted region")
    opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300)

    if len(args) not in (1, 2):
        sys.exit(p.print_help())

    bedfile = args[0]
    mappingfile = None
    if len(args) == 2:
        mappingfile = args[1]

    winsize = opts.winsize
    imagemap = opts.imagemap
    w, h = iopts.w, iopts.h
    dpi = iopts.dpi

    prefix = bedfile.rsplit(".", 1)[0]
    figname = prefix + "." + opts.format
    if imagemap:
        imgmapfile = prefix + '.map'
        mapfh = open(imgmapfile, "w")
        print >> mapfh, '<map id="' + prefix + '">'

    if mappingfile:
        mappings = DictFile(mappingfile, delimiter="\t")
        classes = sorted(set(mappings.values()))
        logging.debug("A total of {0} classes found: {1}".format(
            len(classes), ','.join(classes)))
    else:
        mappings = {}
        classes = []
        logging.debug("No classes registered (no id_mappings given).")

    mycolors = "rgbymc"
    class_colors = dict(zip(classes, mycolors))

    bed = Bed(bedfile)
    chr_lens = {}
    centromeres = {}
    for b, blines in groupby(bed, key=(lambda x: x.seqid)):
        blines = list(blines)
        maxlen = max(x.end for x in blines)
        chr_lens[b] = maxlen

    for b in bed:
        accn = b.accn
        if accn == "centromere":
            centromeres[b.seqid] = b.start
        if accn in mappings:
            b.accn = mappings[accn]
        else:
            b.accn = '-'

    chr_number = len(chr_lens)
    assert chr_number == len(centromeres)

    fig = plt.figure(1, (w, h))
    root = fig.add_axes([0, 0, 1, 1])

    r = .7  # width and height of the whole chromosome set
    xstart, ystart = .15, .85
    xinterval = r / chr_number
    xwidth = xinterval * .5  # chromosome width
    max_chr_len = max(chr_lens.values())
    ratio = r / max_chr_len  # canvas / base

    # first the chromosomes
    for a, (chr, cent_position) in enumerate(sorted(centromeres.items())):
        clen = chr_lens[chr]
        xx = xstart + a * xinterval + .5 * xwidth
        yy = ystart - cent_position * ratio
        root.text(xx, ystart + .01, chr, ha="center")
        ChromosomeWithCentromere(root,
                                 xx,
                                 ystart,
                                 yy,
                                 ystart - clen * ratio,
                                 width=xwidth)

    chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys())))

    alpha = .75
    # color the regions
    for chr in sorted(chr_lens.keys()):
        segment_size, excess = 0, 0
        bac_list = []
        for b in bed.sub_bed(chr):
            clen = chr_lens[chr]
            idx = chr_idxs[chr]
            klass = b.accn
            start = b.start
            end = b.end
            xx = xstart + idx * xinterval
            yystart = ystart - end * ratio
            yyend = ystart - start * ratio
            root.add_patch(
                Rectangle((xx, yystart),
                          xwidth,
                          yyend - yystart,
                          fc=class_colors.get(klass, "w"),
                          lw=0,
                          alpha=alpha))

            if imagemap:
                """
                `segment` : size of current BAC being investigated + `excess`
                `excess`  : left-over bases from the previous BAC, as a result of
                            iterating over `winsize` regions of `segment`
                """
                if excess == 0:
                    segment_start = start
                segment = (end - start + 1) + excess
                while True:
                    if segment < winsize:
                        bac_list.append(b.accn)
                        excess = segment
                        break
                    segment_end = segment_start + winsize - 1
                    tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                                  xx + xwidth, (1 - ystart) + segment_end * ratio
                    print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                            w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

                    segment_start += winsize
                    segment -= winsize
                    bac_list = []

        if imagemap and excess > 0:
            bac_list.append(b.accn)
            segment_end = end
            tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \
                          xx + xwidth, (1 - ystart) + segment_end * ratio
            print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \
                    w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end)

    if imagemap:
        print >> mapfh, '</map>'
        mapfh.close()
        logging.debug("Image map written to `{0}`".format(mapfh.name))

    if opts.gauge:
        xstart, ystart = .9, .85
        Gauge(root, xstart, ystart - r, ystart, max_chr_len)

    # class legends, four in a row
    xstart = .1
    xinterval = .2
    xwidth = .04
    yy = .08
    for klass, cc in sorted(class_colors.items()):
        if klass == '-':
            continue
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha))
        root.text(xstart + xwidth + .01, yy, klass, fontsize=10)
        xstart += xinterval

    empty = opts.empty
    if empty:
        root.add_patch(
            Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1))
        root.text(xstart + xwidth + .01, yy, empty, fontsize=10)

    root.text(.5,
              .95,
              opts.title,
              fontstyle="italic",
              ha="center",
              va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    savefig(figname, dpi=dpi, iopts=iopts)
Ejemplo n.º 25
0
def bites(args):
    """
    %prog bites

    Illustrate the pipeline for automated bite discovery.
    """

    p = OptionParser(__doc__)
    opts, args = p.parse_args()

    fig = plt.figure(1, (6, 6))
    root = fig.add_axes([0, 0, 1, 1])

    # HSP pairs
    hsps = (
        ((50, 150), (60, 180)),
        ((190, 250), (160, 235)),
        ((300, 360), (270, 330)),
        ((430, 470), (450, 490)),
        ((570, 620), (493, 543)),
        ((540, 555), (370, 385)),  # non-collinear hsps
    )

    titlepos = (0.9, 0.65, 0.4)
    titles = ("Compare orthologous region", "Find collinear HSPs",
              "Scan paired gaps")
    ytip = 0.01
    mrange = 650.0
    m = lambda x: x / mrange * 0.7 + 0.1
    for i, (ya, title) in enumerate(zip(titlepos, titles)):
        yb = ya - 0.1
        plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1)
        plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1)
        RoundLabel(root, 0.5, ya + 4 * ytip, title)
        root.text(0.9, ya, "A. thaliana", ha="center", va="center")
        root.text(0.9, yb, "B. rapa", ha="center", va="center")
        myhsps = hsps
        if i >= 1:
            myhsps = hsps[:-1]
        for (a, b), (c, d) in myhsps:
            a, b, c, d = [m(x) for x in (a, b, c, d)]
            r1 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r2 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fc="r",
                           lw=0,
                           zorder=2)
            r3 = Rectangle((a, ya - ytip),
                           b - a,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r4 = Rectangle((c, yb - ytip),
                           d - c,
                           2 * ytip,
                           fill=False,
                           zorder=3)
            r5 = Polygon(
                ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip),
                 (b, ya - ytip)),
                fc="r",
                alpha=0.2,
            )
            rr = (r1, r2, r3, r4, r5)
            if i == 2:
                rr = rr[:-1]
            for r in rr:
                root.add_patch(r)

    # Gap pairs
    hspa, hspb = zip(*myhsps)
    gapa, gapb = [], []
    for (a, b), (c, d) in pairwise(hspa):
        gapa.append((b + 1, c - 1))
    for (a, b), (c, d) in pairwise(hspb):
        gapb.append((b + 1, c - 1))
    gaps = zip(gapa, gapb)
    tpos = titlepos[-1]

    yy = tpos - 0.05
    for i, ((a, b), (c, d)) in enumerate(gaps):
        i += 1
        a, b, c, d = [m(x) for x in (a, b, c, d)]
        xx = (a + b + c + d) / 4
        TextCircle(root, xx, yy, str(i))

    # Bites
    ystart = 0.24
    ytip = 0.05
    bites = (
        ("Bite(40=>-15)", True),
        ("Bite(50=>35)", False),
        ("Bite(70=>120)", False),
        ("Bite(100=>3)", True),
    )
    for i, (bite, selected) in enumerate(bites):
        xx = 0.15 if (i % 2 == 0) else 0.55
        yy = ystart - i / 2 * ytip
        i += 1
        TextCircle(root, xx, yy, str(i))
        color = "k" if selected else "gray"
        root.text(xx + ytip, yy, bite, size=10, color=color, va="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    figname = fname() + ".pdf"
    savefig(figname, dpi=300)