Ejemplo n.º 1
0
def plot(args):
    """
    %prog plot input.bed seqid

    Plot the matchings between the reconstructed pseudomolecules and the maps.
    Two types of visualizations are available in one canvas:

    1. Parallel axes, and matching markers are shown in connecting lines;
    2. Scatter plot.
    """
    from jcvi.graphics.base import plt, savefig, normalize_axes, \
                set2, panel_labels
    from jcvi.graphics.chromosome import Chromosome, GeneticMap, \
                HorizontalChromosome

    p = OptionParser(plot.__doc__)
    add_allmaps_plot_options(p)
    opts, args, iopts = p.set_image_options(args, figsize="10x6")

    if len(args) != 2:
        sys.exit(not p.print_help())

    inputbed, seqid = args
    pf = inputbed.rsplit(".", 1)[0]
    bedfile = pf + ".lifted.bed"
    agpfile = pf + ".agp"
    weightsfile = opts.weightsfile
    links = opts.links

    function = get_function(opts.distance)
    cc = Map(bedfile, function)
    allseqids = cc.seqids
    mapnames = cc.mapnames
    weights = Weights(weightsfile, mapnames)
    assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids)

    s = Scaffold(seqid, cc)
    mlgs = [k for k, v in s.mlg_counts.items() if v >= links]
    mlgsizes = {}
    for mlg in mlgs:
        mm = cc.extract_mlg(mlg)
        mlgsize = max(function(x) for x in mm)
        mlgsizes[mlg] = mlgsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax1 = fig.add_axes([0, 0, .5, 1])
    ax2 = fig.add_axes([.5, 0, .5, 1])

    # Find the layout first
    ystart, ystop = .9, .1
    L = Layout(mlgsizes)
    coords = L.coords

    tip = .02
    marker_pos = {}
    # Palette
    colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames))
    colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs)

    rhos = {}
    # Parallel coordinates
    for mlg, (x, y1, y2) in coords.items():
        mm = cc.extract_mlg(mlg)
        markers = [(m.accn, function(m)) for m in mm]  # exhaustive marker list
        xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid]
        mx, my = zip(*xy)
        rho = spearmanr(mx, my)
        rhos[mlg] = rho
        flip = rho < 0

        g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip)
        extra = -3 * tip if x < .5 else 3 * tip
        ha = "right" if x < .5 else "left"
        mapname = mlg.split("-")[0]
        tlg = mlg.replace("_", ".")  # Latex does not like underscore char
        label = "{0} (w={1})".format(tlg, weights[mapname])
        ax1.text(x + extra, (y1 + y2) / 2,
                 label,
                 color=colors[mlg],
                 ha=ha,
                 va="center",
                 rotation=90)
        marker_pos.update(g.marker_pos)

    agp = AGP(agpfile)
    agp = [x for x in agp if x.object == seqid]
    chrsize = max(x.object_end for x in agp)

    # Pseudomolecules in the center
    r = ystart - ystop
    ratio = r / chrsize
    f = lambda x: (ystart - ratio * x)
    patchstart = [f(x.object_beg) for x in agp if not x.is_gap]
    Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2)

    label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0))
    ax1.text(.5, ystart + tip, label, ha="center")

    scatter_data = defaultdict(list)
    # Connecting lines
    for b in s.markers:
        marker_name = b.accn
        if marker_name not in marker_pos:
            continue

        cx = .5
        cy = f(b.pos)
        mx = coords[b.mlg][0]
        my = marker_pos[marker_name]

        extra = -tip if mx < cx else tip
        extra *= 1.25  # leave boundaries for aesthetic reasons
        cx += extra
        mx -= extra
        ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg])
        scatter_data[b.mlg].append((b.pos, function(b)))

    # Scatter plot, same data as parallel coordinates
    xstart, xstop = sorted((ystart, ystop))
    f = lambda x: (xstart + ratio * x)
    pp = [x.object_beg for x in agp if not x.is_gap]
    patchstart = [f(x) for x in pp]
    HorizontalChromosome(ax2,
                         xstart,
                         xstop,
                         ystop,
                         height=2 * tip,
                         patch=patchstart,
                         lw=2)

    gap = .03
    ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values())

    tlgs = []
    for mlg, mlgsize in sorted(mlgsizes.items()):
        height = ratio * mlgsize
        ystart -= height
        xx = .5 + xstart / 2
        width = r / 2
        color = colors[mlg]
        ax = fig.add_axes([xx, ystart, width, height])
        ypos = ystart + height / 2
        ystart -= gap
        sd = scatter_data[mlg]
        xx, yy = zip(*sd)
        ax.vlines(pp, 0, mlgsize, colors="beige")
        ax.plot(xx, yy, ".", color=color)
        rho = rhos[mlg]
        ax.text(.5,
                1 - .4 * gap / height,
                r"$\rho$={0:.3f}".format(rho),
                ha="center",
                va="top",
                transform=ax.transAxes,
                color="gray")
        tlg = mlg.replace("_", ".")
        tlgs.append((tlg, ypos, color))
        ax.set_xlim(0, chrsize)
        ax.set_ylim(0, mlgsize)
        ax.set_xticks([])
        while height / len(ax.get_yticks()) < .03 and len(
                ax.get_yticks()) >= 2:
            ax.set_yticks(ax.get_yticks()[::2])  # Sparsify the ticks
        yticklabels = [int(x) for x in ax.get_yticks()]
        ax.set_yticklabels(yticklabels, family='Helvetica')
        if rho < 0:
            ax.invert_yaxis()

    for i, (tlg, ypos, color) in enumerate(tlgs):
        ha = "center"
        if len(tlgs) > 4:
            ha = "right" if i % 2 else "left"
        root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center")

    if opts.panels:
        labels = ((.04, .96, 'A'), (.48, .96, 'B'))
        panel_labels(root, labels)

    normalize_axes((ax1, ax2, root))
    image_name = seqid + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)
    plt.close(fig)
Ejemplo n.º 2
0
def ancestral(args):
    """
    %prog ancestral ancestral.txt assembly.fasta

    Karyotype evolution of pineapple. The figure is inspired by Amphioxus paper
    Figure 3 and Tetradon paper Figure 9.
    """
    p = OptionParser(ancestral.__doc__)
    opts, args, iopts = p.set_image_options(args, figsize="8x7")

    if len(args) != 2:
        sys.exit(not p.print_help())

    regionsfile, sizesfile = args
    regions = RegionsFile(regionsfile)
    sizes = Sizes(sizesfile).mapping
    sizes = dict((k, v) for (k, v) in sizes.iteritems() if k[:2] == "LG")
    maxsize = max(sizes.values())
    ratio = .5 / maxsize

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes((0, 0, 1, 1))

    from jcvi.graphics.base import set2
    a, b, c, d, e, f, g = set2[:7]
    set2 = (c, g, b, e, d, a, f)

    # Upper panel is the evolution of segments
    # All segments belong to one of seven karyotypes 1 to 7
    karyotypes = regions.karyotypes
    xgap = 1. / (1 + len(karyotypes))
    ygap = .05
    mgap = xgap / 4.5
    gwidth = mgap * .75
    tip = .02
    coords = {}
    for i, k in enumerate(regions.karyotypes):
        x = (i + 1) * xgap
        y = .9
        root.text(x, y + tip, "Anc" + k, ha="center")
        root.plot((x, x), (y, y - ygap), "k-", lw=2)
        y -= 2 * ygap
        coords['a'] = (x - 1.5 * mgap, y)
        coords['b'] = (x - .5 * mgap, y)
        coords['c'] = (x + .5 * mgap, y)
        coords['d'] = (x + 1.5 * mgap, y)
        coords['ab'] = join_nodes_vertical(root, coords, 'a', 'b',
                                           y + ygap / 2)
        coords['cd'] = join_nodes_vertical(root, coords, 'c', 'd',
                                           y + ygap / 2)
        coords['abcd'] = join_nodes_vertical(root, coords, 'ab', 'cd',
                                             y + ygap)
        for n in 'abcd':
            nx, ny = coords[n]
            root.text(nx, ny - tip, n, ha="center")
            coords[n] = (nx, ny - ygap / 2)

        kdata = regions.get_karyotype(k)
        for kd in kdata:
            g = kd.group
            gx, gy = coords[g]
            gsize = ratio * kd.span
            gy -= gsize
            p = Rectangle((gx - gwidth / 2, gy),
                          gwidth,
                          gsize,
                          lw=0,
                          color=set2[i])
            root.add_patch(p)
            root.text(gx,
                      gy + gsize / 2,
                      kd.chromosome,
                      ha="center",
                      va="center",
                      color='w')
            coords[g] = (gx, gy - tip)

    # Bottom panel shows the location of segments on chromosomes
    # TODO: redundant code, similar to graphics.chromosome
    ystart = .54
    chr_number = len(sizes)
    xstart, xend = xgap - 2 * mgap, 1 - xgap + 2 * mgap
    xinterval = (xend - xstart - gwidth) / (chr_number - 1)
    chrpos = {}
    for a, (chr, clen) in enumerate(sorted(sizes.items())):
        chr = get_number(chr)
        xx = xstart + a * xinterval + gwidth / 2
        chrpos[chr] = xx
        root.text(xx, ystart + .01, chr, ha="center")
        Chromosome(root, xx, ystart, ystart - clen * ratio, width=gwidth)

    # Start painting
    for r in regions:
        xx = chrpos[r.chromosome]
        yystart = ystart - r.start * ratio
        yyend = ystart - r.end * ratio
        p = Rectangle((xx - gwidth / 2, yystart),
                      gwidth,
                      yyend - yystart,
                      color=set2[int(r.karyotype) - 1],
                      lw=0)
        root.add_patch(p)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    pf = "pineapple-karyotype"
    image_name = pf + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)