Ejemplo n.º 1
0
def main():
    p = OptionParser(__doc__)
    p.add_option("--groups", default=False, action="store_true",
                 help="The first row contains group info [default: %default]")
    p.add_option("--rowgroups", help="Row groupings [default: %default]")
    p.add_option("--horizontalbar", default=False, action="store_true",
                 help="Horizontal color bar [default: vertical]")
    p.add_option("--cmap", default="jet",
                 help="Use this color map [default: %default]")
    opts, args, iopts = set_image_options(p, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    datafile, = args
    pf = datafile.rsplit(".", 1)[0]
    rowgroups = opts.rowgroups

    groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups)
    cols = [x.replace("ay ", "") for x in cols]

    if rowgroups:
        fp = open(rowgroups)
        rgroups = []
        for row in fp:
            a, b = row.split()
            irows = [rows.index(x) for x in b.split(",")]
            rgroups.append((a, min(irows), max(irows)))

    plt.rcParams["axes.linewidth"] = 0

    xstart = .18
    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([xstart, .15, .7, .7])

    default_cm = cm.get_cmap(opts.cmap)
    im = ax.matshow(data, cmap=default_cm, norm=LogNorm(vmin=1, vmax=10000))
    nrows, ncols = len(rows), len(cols)

    xinterval = .7 / ncols
    yinterval = .7 / max(nrows, ncols)

    plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center")
    plt.yticks(range(nrows), rows, size=10)

    for x in ax.get_xticklines() + ax.get_yticklines():
        x.set_visible(False)

    ax.set_xlim(-.5, ncols - .5)

    t = [1, 10, 100, 1000, 10000]
    pad = .06
    if opts.horizontalbar:
        ypos = .5 * (1 - nrows * yinterval) - pad
        axcolor = fig.add_axes([.3, ypos, .4, .02])
        orientation = "horizontal"
    else:
        axcolor = fig.add_axes([.9, .3, .02, .4])
        orientation = "vertical"
    fig.colorbar(im, cax=axcolor, ticks=t, format=_("%d"), orientation=orientation)

    if groups:
        groups = [(key, len(list(nn))) for key, nn in groupby(groups)]
        yy = .5 + .5 * nrows / ncols * .7 + .06
        e = .005
        sep = -.5

        for k, kl in groups:
            # Separator in the array area
            sep += kl
            ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2)
            # Group labels on the top
            kl *= xinterval
            root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2)
            root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray")
            xstart += kl

    if rowgroups:
        from jcvi.graphics.glyph import TextCircle

        xpos = .04
        tip = .015
        assert rgroups
        ystart = 1 - .5 * (1 - nrows * yinterval)
        for gname, start, end in rgroups:
            start = ystart - start * yinterval
            end = ystart - (end + 1) * yinterval
            start -= tip / 3
            end += tip / 3

            # Bracket the groups
            root.plot((xpos, xpos + tip), (start, start), "k-", lw=2)
            root.plot((xpos, xpos), (start, end), "k-", lw=2)
            root.plot((xpos, xpos + tip), (end, end), "k-", lw=2)
            TextCircle(root, xpos, .5 * (start + end), gname)

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = pf + "." + opts.cmap + "." + iopts.format
    logging.debug("Print image to `{0}` {1}".format(image_name, iopts))
    plt.savefig(image_name, dpi=iopts.dpi)
    plt.rcdefaults()
Ejemplo n.º 2
0
def ld(args):
    """
    %prog ld map

    Calculate pairwise linkage disequilibrium given MSTmap.
    """
    import numpy as np
    from random import sample
    from itertools import combinations

    from jcvi.algorithms.matrix import symmetrize

    p = OptionParser(ld.__doc__)
    p.add_option("--subsample", default=500, type="int",
                 help="Subsample markers to speed up [default: %default]")
    p.add_option("--cmap", default="jet",
                 help="Use this color map [default: %default]")
    opts, args, iopts = p.set_image_options(args, figsize="8x8")

    if len(args) != 1:
        sys.exit(not p.print_help())

    mstmap, = args
    subsample = opts.subsample
    data = MSTMap(mstmap)
    # Take random subsample while keeping marker order
    if subsample < data.nmarkers:
        data = [data[x] for x in \
                sorted(sample(xrange(len(data)), subsample))]

    markerbedfile = mstmap + ".subsample.bed"
    ldmatrix = mstmap + ".subsample.matrix"

    if need_update(mstmap, (markerbedfile, ldmatrix)):
        nmarkers = len(data)
        fw = open(markerbedfile, "w")
        print >> fw, "\n".join(x.bedline for x in data)
        logging.debug("Write marker set of size {0} to file `{1}`."\
                        .format(nmarkers, markerbedfile))

        M = np.zeros((nmarkers, nmarkers), dtype=float)
        for i, j in combinations(range(nmarkers), 2):
            a = data[i]
            b = data[j]
            M[i, j] = calc_ldscore(a.genotype, b.genotype)

        M = symmetrize(M)

        logging.debug("Write LD matrix to file `{0}`.".format(ldmatrix))
        M.tofile(ldmatrix)
    else:
        nmarkers = len(Bed(markerbedfile))
        M = np.fromfile(ldmatrix, dtype="float").reshape(nmarkers, nmarkers)
        logging.debug("LD matrix `{0}` exists ({1}x{1})."\
                        .format(ldmatrix, nmarkers))

    from jcvi.graphics.base import plt, savefig, cm, Rectangle, draw_cmap

    plt.rcParams["axes.linewidth"] = 0

    fig = plt.figure(1, (iopts.w, iopts.h))
    root = fig.add_axes([0, 0, 1, 1])
    ax = fig.add_axes([.1, .1, .8, .8])  # the heatmap

    default_cm = cm.get_cmap(opts.cmap)
    ax.matshow(M, cmap=default_cm)

    # Plot chromosomes breaks
    bed = Bed(markerbedfile)
    xsize = len(bed)
    extent = (0, nmarkers)
    chr_labels = []
    ignore_size = 20

    for (seqid, beg, end) in bed.get_breaks():
        ignore = abs(end - beg) < ignore_size
        pos = (beg + end) / 2
        chr_labels.append((seqid, pos, ignore))
        if ignore:
            continue
        ax.plot((end, end), extent, "w-", lw=1)
        ax.plot(extent, (end, end), "w-", lw=1)

    # Plot chromosome labels
    for label, pos, ignore in chr_labels:
        pos = .1 + pos * .8 / xsize
        if not ignore:
            root.text(pos, .91, label,
                ha="center", va="bottom", rotation=45, color="grey")
            root.text(.09, pos, label,
                ha="right", va="center", color="grey")

    ax.set_xlim(extent)
    ax.set_ylim(extent)
    ax.set_axis_off()

    draw_cmap(root, "Pairwise LD (r2)", 0, 1, cmap=default_cm)

    root.add_patch(Rectangle((.1, .1), .8, .8, fill=False, ec="k", lw=2))
    m = mstmap.split(".")[0]
    root.text(.5, .06, "Linkage Disequilibrium between {0} markers".format(m), ha="center")

    root.set_xlim(0, 1)
    root.set_ylim(0, 1)
    root.set_axis_off()

    image_name = m + ".subsample" + "." + iopts.format
    savefig(image_name, dpi=iopts.dpi, iopts=iopts)