def main(): p = OptionParser(__doc__) p.add_option("--groups", default=False, action="store_true", help="The first row contains group info [default: %default]") p.add_option("--rowgroups", help="Row groupings [default: %default]") p.add_option("--horizontalbar", default=False, action="store_true", help="Horizontal color bar [default: vertical]") p.add_option("--cmap", default="jet", help="Use this color map [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] rowgroups = opts.rowgroups groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) cols = [x.replace("ay ", "") for x in cols] if rowgroups: fp = open(rowgroups) rgroups = [] for row in fp: a, b = row.split() irows = [rows.index(x) for x in b.split(",")] rgroups.append((a, min(irows), max(irows))) plt.rcParams["axes.linewidth"] = 0 xstart = .18 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([xstart, .15, .7, .7]) default_cm = cm.get_cmap(opts.cmap) im = ax.matshow(data, cmap=default_cm, norm=LogNorm(vmin=1, vmax=10000)) nrows, ncols = len(rows), len(cols) xinterval = .7 / ncols yinterval = .7 / max(nrows, ncols) plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") plt.yticks(range(nrows), rows, size=10) for x in ax.get_xticklines() + ax.get_yticklines(): x.set_visible(False) ax.set_xlim(-.5, ncols - .5) t = [1, 10, 100, 1000, 10000] pad = .06 if opts.horizontalbar: ypos = .5 * (1 - nrows * yinterval) - pad axcolor = fig.add_axes([.3, ypos, .4, .02]) orientation = "horizontal" else: axcolor = fig.add_axes([.9, .3, .02, .4]) orientation = "vertical" fig.colorbar(im, cax=axcolor, ticks=t, format=_("%d"), orientation=orientation) if groups: groups = [(key, len(list(nn))) for key, nn in groupby(groups)] yy = .5 + .5 * nrows / ncols * .7 + .06 e = .005 sep = -.5 for k, kl in groups: # Separator in the array area sep += kl ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2) # Group labels on the top kl *= xinterval root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray") xstart += kl if rowgroups: from jcvi.graphics.glyph import TextCircle xpos = .04 tip = .015 assert rgroups ystart = 1 - .5 * (1 - nrows * yinterval) for gname, start, end in rgroups: start = ystart - start * yinterval end = ystart - (end + 1) * yinterval start -= tip / 3 end += tip / 3 # Bracket the groups root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) root.plot((xpos, xpos), (start, end), "k-", lw=2) root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) TextCircle(root, xpos, .5 * (start + end), gname) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + opts.cmap + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def ld(args): """ %prog ld map Calculate pairwise linkage disequilibrium given MSTmap. """ import numpy as np from random import sample from itertools import combinations from jcvi.algorithms.matrix import symmetrize p = OptionParser(ld.__doc__) p.add_option("--subsample", default=500, type="int", help="Subsample markers to speed up [default: %default]") p.add_option("--cmap", default="jet", help="Use this color map [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) mstmap, = args subsample = opts.subsample data = MSTMap(mstmap) # Take random subsample while keeping marker order if subsample < data.nmarkers: data = [data[x] for x in \ sorted(sample(xrange(len(data)), subsample))] markerbedfile = mstmap + ".subsample.bed" ldmatrix = mstmap + ".subsample.matrix" if need_update(mstmap, (markerbedfile, ldmatrix)): nmarkers = len(data) fw = open(markerbedfile, "w") print >> fw, "\n".join(x.bedline for x in data) logging.debug("Write marker set of size {0} to file `{1}`."\ .format(nmarkers, markerbedfile)) M = np.zeros((nmarkers, nmarkers), dtype=float) for i, j in combinations(range(nmarkers), 2): a = data[i] b = data[j] M[i, j] = calc_ldscore(a.genotype, b.genotype) M = symmetrize(M) logging.debug("Write LD matrix to file `{0}`.".format(ldmatrix)) M.tofile(ldmatrix) else: nmarkers = len(Bed(markerbedfile)) M = np.fromfile(ldmatrix, dtype="float").reshape(nmarkers, nmarkers) logging.debug("LD matrix `{0}` exists ({1}x{1})."\ .format(ldmatrix, nmarkers)) from jcvi.graphics.base import plt, savefig, cm, Rectangle, draw_cmap plt.rcParams["axes.linewidth"] = 0 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([.1, .1, .8, .8]) # the heatmap default_cm = cm.get_cmap(opts.cmap) ax.matshow(M, cmap=default_cm) # Plot chromosomes breaks bed = Bed(markerbedfile) xsize = len(bed) extent = (0, nmarkers) chr_labels = [] ignore_size = 20 for (seqid, beg, end) in bed.get_breaks(): ignore = abs(end - beg) < ignore_size pos = (beg + end) / 2 chr_labels.append((seqid, pos, ignore)) if ignore: continue ax.plot((end, end), extent, "w-", lw=1) ax.plot(extent, (end, end), "w-", lw=1) # Plot chromosome labels for label, pos, ignore in chr_labels: pos = .1 + pos * .8 / xsize if not ignore: root.text(pos, .91, label, ha="center", va="bottom", rotation=45, color="grey") root.text(.09, pos, label, ha="right", va="center", color="grey") ax.set_xlim(extent) ax.set_ylim(extent) ax.set_axis_off() draw_cmap(root, "Pairwise LD (r2)", 0, 1, cmap=default_cm) root.add_patch(Rectangle((.1, .1), .8, .8, fill=False, ec="k", lw=2)) m = mstmap.split(".")[0] root.text(.5, .06, "Linkage Disequilibrium between {0} markers".format(m), ha="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = m + ".subsample" + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)