def draw_box(clusters, ax, color="b"): for cluster in clusters: xrect, yrect = zip(*cluster) xmin, xmax, ymin, ymax = min(xrect), max(xrect), \ min(yrect), max(yrect) ax.add_patch(Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,\ ec=color, fc='y', alpha=.5))
def draw_cytoband( ax, chrom, filename=datafile("hg38.band.txt"), ymid=0.5, width=0.99, height=0.11 ): import pandas as pd bands = pd.read_csv(filename, sep="\t") chrombands = bands[bands["#chrom"] == chrom] data = [] for i, (chr, start, end, name, gie) in chrombands.iterrows(): data.append((chr, start, end, name, gie)) chromsize = max(x[2] for x in data) scale = width * 1.0 / chromsize xstart, ystart = (1 - width) / 2, ymid - height / 2 bp_to_pos = lambda x: xstart + x * scale in_acen = False for chr, start, end, name, gie in data: color, alpha = get_color(gie) bplen = end - start if "acen" in gie: if in_acen: xys = [ (bp_to_pos(start), ymid), (bp_to_pos(end), ystart), (bp_to_pos(end), ystart + height), ] else: xys = [ (bp_to_pos(start), ystart), (bp_to_pos(start), ystart + height), (bp_to_pos(end), ymid), ] p = Polygon(xys, closed=True, ec="k", fc=color, alpha=alpha) in_acen = True else: p = Rectangle( (bp_to_pos(start), ystart), bplen * scale, height, ec="k", fc=color, alpha=alpha, ) # print bp_to_pos(end) ax.add_patch(p) ax.text( bp_to_pos((start + end) / 2), ymid + height * 0.8, name, rotation=40, color="lightslategray", ) ax.text(0.5, ystart - height, chrom, size=16, ha="center", va="center") ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_axis_off()
def __init__(self, ax, x1, x2, y, height=0.04, gradient=True, fc="gray", pic_type='Rectangle', **kwargs): super(Glyph, self).__init__(ax) width = x2 - x1 # Frame around the gradient rectangle p1 = (x1, y - 0.5 * height) if pic_type == 'Rectangle': self.append(Rectangle(p1, width, height, fc=fc, lw=0, **kwargs)) elif pic_type == 'Arrow': style = "Simple,head_length=1,head_width=5,tail_width=5" x_tail = x1 y_tail = y x_head = x2 y_head = y self.append( FancyArrowPatch((x_tail, y_tail), (x_head, y_head), arrowstyle=style, fc=fc, lw=0, **kwargs)) # Several overlaying patches if gradient: for cascade in np.arange(0.1, 0.55, 0.05): p1 = (x1, y - height * cascade) self.append( Rectangle(p1, width, 2 * cascade * height, fc="w", lw=0, alpha=0.1, **kwargs)) self.add_patches()
def __init__(self, ax, x1, x2, y, height=.015, ec="k", patch=None, patchcolor='lightgrey', lw=1, fc=None, zorder=2, roundrect=False): """ Horizontal version of the Chromosome glyph above. """ x1, x2 = sorted((x1, x2)) super(HorizontalChromosome, self).__init__(ax) pts, r = self.get_pts(x1, x2, y, height) if roundrect: RoundRect(ax, (x1, y - height * .5), x2 - x1, height, fill=False, lw=lw, ec=ec, zorder=zorder + 1) else: self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder)) if fc: pts, r = self.get_pts(x1, x2, y, height / 2) if roundrect: RoundRect(ax, (x1, y - height / 4), x2 - x1, height / 2, fc=fc, lw=0, zorder=zorder) else: self.append(Polygon(pts, fc=fc, lw=0, zorder=zorder)) if patch: rr = r * .9 # Shrink a bit for the patches for i in xrange(0, len(patch), 2): if i + 1 > len(patch) - 1: continue p1, p2 = patch[i], patch[i + 1] self.append( Rectangle((p1, y - rr), p2 - p1, 2 * rr, lw=0, fc=patchcolor)) self.add_patches()
def draw(self): ar = self.ar pad = self.pad pads = 0 for (a, b), w, color in zip(pairwise(ar), self.wiggles, self.colors): yf = self.ystart + w * 1. / self.wiggle if color: p = Rectangle((a + pads, yf), b - a, self.height, color=color) self.append(p) pads += pad self.add_patches()
def draw_geoscale(ax, minx=0, maxx=175): """ Draw geological epoch on million year ago (mya) scale. """ a, b = .1, .6 # Correspond to 200mya and 0mya def cv(x): return b - (x - b) / (maxx - minx) * (b - a) ax.plot((a, b), (.5, .5), "k-") tick = .015 for mya in xrange(maxx - 25, 0, -25): p = cv(mya) ax.plot((p, p), (.5, .5 - tick), "k-") ax.text(p, .5 - 2.5 * tick, str(mya), ha="center", va="center") ax.text((a + b) / 2, .5 - 5 * tick, "Time before present (million years)", ha="center", va="center") # Source: # http://www.weston.org/schools/ms/biologyweb/evolution/handouts/GSAchron09.jpg Geo = (("Neogene", 2.6, 23.0, "#fee400"), ("Paleogene", 23.0, 65.5, "#ff9a65"), ("Cretaceous", 65.5, 145.5, "#80ff40"), ("Jurassic", 145.5, 201.6, "#33fff3")) h = .05 for era, start, end, color in Geo: start, end = cv(start), cv(end) end = max(a, end) p = Rectangle((end, .5 + tick / 2), abs(start - end), h, lw=1, ec="w", fc=color) ax.text((start + end) / 2, .5 + (tick + h) / 2, era, ha="center", va="center", size=9) ax.add_patch(p)
def __init__(self, ax, x, y1, y2, width=.015, ec="k", patch=None, patchcolor='lightgrey', lw=1, fc="k", zorder=2): """ Chromosome with positions given in (x, y1) => (x, y2) The chromosome can also be patched, e.g. to show scaffold composition in alternating shades. Use a list of starting locations to segment. """ y1, y2 = sorted((y1, y2)) super(Chromosome, self).__init__(ax) pts, r = self.get_pts(x, y1, y2, width) self.append(Polygon(pts, fill=False, lw=lw, ec=ec, zorder=zorder)) if patch: rr = r * .9 # Shrink a bit for the patches for i in xrange(0, len(patch), 2): if i + 1 > len(patch) - 1: continue p1, p2 = patch[i], patch[i + 1] self.append(Rectangle((x - rr, p1), 2 * rr, p2 - p1, lw=0, fc=patchcolor)) self.add_patches()
def deletion(args): """ %prog deletion [deletion-genes|deletion-bases] C2-deletions boleracea.bed Plot histogram for napus deletions. Can plot deletion-genes or deletion-bases. The three largest segmental deletions will be highlighted along with a drawing of the C2 chromosome. """ import math from jcvi.formats.bed import Bed from jcvi.graphics.chromosome import HorizontalChromosome from jcvi.graphics.base import kb_formatter p = OptionParser(deletion.__doc__) opts, args, iopts = p.set_image_options(args) if len(args) != 3: sys.exit(not p.print_help()) deletion_genes, deletions, bed = args dg = [int(x) for x in open(deletion_genes)] dsg, lsg = "darkslategray", "lightslategray" fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([.1, .1, .8, .8]) minval = 2 if deletion_genes == "deleted-genes" else 2048 bins = np.logspace(math.log(minval, 10), math.log(max(dg), 10), 16) n, bins, histpatches = ax.hist(dg, bins=bins, \ fc=lsg, alpha=.75) ax.set_xscale('log', basex=2) if deletion_genes == "deleted-genes": ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d')) ax.set_xlabel('No. of deleted genes in each segment') else: ax.xaxis.set_major_formatter(kb_formatter) ax.set_xlabel('No. of deleted bases in each segment') ax.yaxis.set_major_formatter(mpl.ticker.FormatStrFormatter('%d')) ax.set_ylabel('No. of segments') ax.patch.set_alpha(0.1) # Draw chromosome C2 na, nb = .45, .85 root.text((na + nb) / 2, .54, "ChrC02", ha="center") HorizontalChromosome(root, na, nb, .5, height=.025, fc=lsg, fill=True) order = Bed(bed).order fp = open(deletions) scale = lambda x: na + x * (nb - na) / 52886895 for i, row in enumerate(fp): i += 1 num, genes = row.split() genes = genes.split("|") ia, a = order[genes[0]] ib, b = order[genes[-1]] mi, mx = a.start, a.end mi, mx = scale(mi), scale(mx) root.add_patch(Rectangle((mi, .475), mx - mi, .05, fc="red", ec="red")) if i == 1: # offset between two adjacent regions for aesthetics mi -= .015 elif i == 2: mi += .015 TextCircle(root, mi, .44, str(i), fc="red") for i, mi in zip(range(1, 4), (.83, .78, .73)): TextCircle(root, mi, .2, str(i), fc="red") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = deletion_genes + ".pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None, sepcolor="gainsboro"): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = len(data) # Only show random subset if npairs > sample_number: logging.debug("Showing a random subset of {0} data points (total {1}) " \ "for clarity.".format(sample_number, npairs)) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics if not palette: data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbed.get_breaks(): ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if not title: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(title, x=.5, y=.96, color="k") logging.debug(title) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def composite(args): """ %prog composite fastafile chr1 Combine line plots, feature bars and alt-bars, different data types specified in options. Inputs must be BED-formatted. Three types of viz are currently supported: --lines: traditional line plots, useful for plotting feature freq --bars: show where the extent of features are --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds """ from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(composite.__doc__) p.add_option("--lines", help="Features to plot in lineplot") p.add_option("--bars", help="Features to plot in bars") p.add_option("--altbars", help="Features to plot in alt-bars") p.add_option( "--fatten", default=False, action="store_true", help="Help visualize certain narrow features", ) p.add_option( "--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on", ) add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract, merge = check_window_options(opts) linebeds, barbeds, altbarbeds = [], [], [] fatten = opts.fatten if opts.lines: lines = opts.lines.split(",") linebeds = get_beds(lines) if opts.bars: bars = opts.bars.split(",") barbeds = get_beds(bars) if opts.altbars: altbars = opts.altbars.split(",") altbarbeds = get_beds(altbars) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, merge=merge) margin = 0.12 clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) root.text(0.5, 0.95, chr, ha="center", color="darkslategray") xstart, xend = margin, 1 - margin xlen = xend - xstart ratio = xlen / clen # Line plots ax = fig.add_axes([xstart, 0.6, xlen, 0.3]) lineplot(ax, linebins, nbins, chr, window, shift) # Bar plots yy = 0.5 yinterval = 0.08 xs = lambda x: xstart + ratio * x r = 0.01 fattend = 0.0025 for bb in barbeds: root.text(xend + 0.01, yy, bb.split(".")[0], va="center") HorizontalChromosome(root, xstart, xend, yy, height=0.02) bb = Bed(bb) for b in bb: start, end = xs(b.start), xs(b.end) span = end - start if fatten and span < fattend: span = fattend root.add_patch( Rectangle((start, yy - r), span, 2 * r, lw=0, fc="darkslategray")) yy -= yinterval # Alternative bar plots offset = r / 2 for bb in altbarbeds: root.text(xend + 0.01, yy, bb.split(".")[0], va="center") bb = Bed(bb) for i, b in enumerate(bb): start, end = xs(b.start), xs(b.end) span = end - start if span < 0.0001: continue offset = -offset root.add_patch( Rectangle((start, yy + offset), end - start, 0.003, lw=0, fc="darkslategray")) yy -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ancestral(args): """ %prog ancestral ancestral.txt assembly.fasta Karyotype evolution of pineapple. The figure is inspired by Amphioxus paper Figure 3 and Tetradon paper Figure 9. """ p = OptionParser(ancestral.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) regionsfile, sizesfile = args regions = RegionsFile(regionsfile) sizes = Sizes(sizesfile).mapping sizes = dict((k, v) for (k, v) in sizes.iteritems() if k[:2] == "LG") maxsize = max(sizes.values()) ratio = .5 / maxsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes((0, 0, 1, 1)) from jcvi.graphics.base import set2 a, b, c, d, e, f, g = set2[:7] set2 = (c, g, b, e, d, a, f) # Upper panel is the evolution of segments # All segments belong to one of seven karyotypes 1 to 7 karyotypes = regions.karyotypes xgap = 1. / (1 + len(karyotypes)) ygap = .05 mgap = xgap / 4.5 gwidth = mgap * .75 tip = .02 coords = {} for i, k in enumerate(regions.karyotypes): x = (i + 1) * xgap y = .9 root.text(x, y + tip, "Anc" + k, ha="center") root.plot((x, x), (y, y - ygap), "k-", lw=2) y -= 2 * ygap coords['a'] = (x - 1.5 * mgap , y) coords['b'] = (x - .5 * mgap , y) coords['c'] = (x + .5 * mgap , y) coords['d'] = (x + 1.5 * mgap , y) coords['ab'] = join_nodes_vertical(root, coords, 'a', 'b', y + ygap / 2) coords['cd'] = join_nodes_vertical(root, coords, 'c', 'd', y + ygap / 2) coords['abcd'] = join_nodes_vertical(root, coords, 'ab', 'cd', y + ygap) for n in 'abcd': nx, ny = coords[n] root.text(nx, ny - tip, n, ha="center") coords[n] = (nx, ny - ygap / 2) kdata = regions.get_karyotype(k) for kd in kdata: g = kd.group gx, gy = coords[g] gsize = ratio * kd.span gy -= gsize p = Rectangle((gx - gwidth / 2, gy), gwidth, gsize, lw=0, color=set2[i]) root.add_patch(p) root.text(gx, gy + gsize / 2, kd.chromosome, ha="center", va="center", color='w') coords[g] = (gx, gy - tip) # Bottom panel shows the location of segments on chromosomes # TODO: redundant code, similar to graphics.chromosome ystart = .54 chr_number = len(sizes) xstart, xend = xgap - 2 * mgap, 1 - xgap + 2 * mgap xinterval = (xend - xstart - gwidth) / (chr_number - 1) chrpos = {} for a, (chr, clen) in enumerate(sorted(sizes.items())): chr = get_number(chr) xx = xstart + a * xinterval + gwidth / 2 chrpos[chr] = xx root.text(xx, ystart + .01, chr, ha="center") Chromosome(root, xx, ystart, ystart - clen * ratio, width=gwidth) # Start painting for r in regions: xx = chrpos[r.chromosome] yystart = ystart - r.start * ratio yyend = ystart - r.end * ratio p = Rectangle((xx - gwidth / 2, yystart), gwidth, yyend - yystart, color=set2[int(r.karyotype) - 1], lw=0) root.add_patch(p) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.1, title=None, sep=True, sepcolor="g", stdpf=True): fp = open(anchorfile) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] gx, gy = markup(gx), markup(gy) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\ .format(vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] if cmap_text: try: value = float(value) except ValueError: value = vmax if value < vmin: continue if value > vmax: continue else: value = 0 if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = downsample(data, sample_number=sample_number) x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) qbreaks = qbed.get_breaks() sbreaks = sbed.get_breaks() xlim, ylim = plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=sep, chrlw=chrlw, sepcolor=sepcolor, minfont=minfont, stdpf=stdpf) # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if title is None: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(title, x=.5, y=.96, color="k") if title: logging.debug("Dot plot title: {}".format(title)) normalize_axes(root)
def draw_chromosomes( root, bedfile, sizes, iopts, mergedist, winsize, imagemap, mappingfile=None, gauge=False, legend=True, empty=False, title=None, ): bed = Bed(bedfile) prefix = bedfile.rsplit(".", 1)[0] if imagemap: imgmapfile = prefix + ".map" mapfh = open(imgmapfile, "w") print('<map id="' + prefix + '">', file=mapfh) if mappingfile: mappings = DictFile(mappingfile, delimiter="\t") classes = sorted(set(mappings.values())) preset_colors = (DictFile( mappingfile, keypos=1, valuepos=2, delimiter="\t") if DictFile.num_columns(mappingfile) >= 3 else {}) else: classes = sorted(set(x.accn for x in bed)) mappings = dict((x, x) for x in classes) preset_colors = {} logging.debug("A total of {} classes found: {}".format( len(classes), ",".join(classes))) # Assign colors to classes ncolors = max(3, min(len(classes), 12)) palette = set1_n if ncolors <= 8 else set3_n colorset = palette(number=ncolors) colorset = sample_N(colorset, len(classes)) class_colors = dict(zip(classes, colorset)) class_colors.update(preset_colors) logging.debug("Assigned colors: {}".format(class_colors)) chr_lens = {} centromeres = {} if sizes: chr_lens = Sizes(sizes).sizes_mapping else: for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = "-" chr_number = len(chr_lens) if centromeres: assert chr_number == len( centromeres), "chr_number = {}, centromeres = {}".format( chr_number, centromeres) r = 0.7 # width and height of the whole chromosome set xstart, ystart = 0.15, 0.85 xinterval = r / chr_number xwidth = xinterval * 0.5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, clen) in enumerate(sorted(chr_lens.items())): xx = xstart + a * xinterval + 0.5 * xwidth root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center") if centromeres: yy = ystart - centromeres[chr] * ratio ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) else: Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = 1 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] prev_end, prev_klass = 0, None for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn if klass == "centromere": continue start = b.start end = b.end if start < prev_end + mergedist and klass == prev_klass: start = prev_end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle( (xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "lightslategray"), lw=0, alpha=alpha, )) prev_end, prev_klass = b.end, klass if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = ( xx, (1 - ystart) + segment_start * ratio, xx + xwidth, (1 - ystart) + segment_end * ratio, ) print( "\t" + write_ImageMapLine( tlx, tly, brx, bry, iopts.w, iopts.h, iopts.dpi, chr + ":" + ",".join(bac_list), segment_start, segment_end, ), file=mapfh, ) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = ( xx, (1 - ystart) + segment_start * ratio, xx + xwidth, (1 - ystart) + segment_end * ratio, ) print( "\t" + write_ImageMapLine( tlx, tly, brx, bry, iopts.w, iopts.h, iopts.dpi, chr + ":" + ",".join(bac_list), segment_start, segment_end, ), file=mapfh, ) if imagemap: print("</map>", file=mapfh) mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if gauge: xstart, ystart = 0.9, 0.85 Gauge(root, xstart, ystart - r, ystart, max_chr_len) if "centromere" in class_colors: del class_colors["centromere"] # class legends, four in a row if legend: xstart = 0.1 xinterval = 0.8 / len(class_colors) xwidth = 0.04 yy = 0.08 for klass, cc in sorted(class_colors.items()): if klass == "-": continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10) xstart += xinterval if empty: root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10) if title: root.text(0.5, 0.95, markup(title), ha="center", va="center")
def draw_geoscale(ax, margin=0.1, rmargin=0.2, yy=0.1, max_dist=3.0): """ Draw geological epoch on million year ago (mya) scale. max_dist = 3.0 => max is 300 mya """ import math a, b = margin, 1 - rmargin # Correspond to 300mya and 0mya minx, maxx = 0, int(max_dist * 100) def cv(x): return b - (x - b) / (maxx - minx) * (b - a) ax.plot((a, b), (yy, yy), "k-") tick = 0.0125 scale_start = int(math.ceil(maxx / 25) * 25) for mya in range(scale_start - 25, 0, -25): p = cv(mya) ax.plot((p, p), (yy, yy - tick), "k-") ax.text(p, yy - 2.5 * tick, str(mya), ha="center", va="center") ax.text( (a + b) / 2, yy - 5 * tick, "Time before present (million years)", ha="center", va="center", ) # Source: # https://en.wikipedia.org/wiki/Geological_period Geo = ( ("Neogene", 2.588, 23.03), ("Paleogene", 23.03, 66.0), ("Cretaceous", 66.0, 145.5), ("Jurassic", 145.5, 201.3), ("Triassic", 201.3, 252.17), ("Permian", 252.17, 298.9), # ("Carboniferous", 298.9, 358.9), ) h = 0.05 for (era, start, end), color in zip(Geo, set3_n(len(Geo))): if maxx - start < 10: # not visible enough continue start, end = cv(start), cv(end) end = max(a, end) p = Rectangle((end, yy + tick / 2), abs(start - end), h, lw=1, ec="w", fc=color) ax.text( (start + end) / 2, yy + (tick + h) / 2, era, ha="center", va="center", size=8, ) ax.add_patch(p)
def draw_table(ax, csv_table, extent=(0, 1, 0, 1), stripe_color="beige", yinflation=1): """Draw table on canvas. Args: ax (matplotlib axes): matplotlib axes csv_table (CsvTable): Parsed CSV table extent (tuple, optional): (left, right, bottom, top). Defaults to (0, 1, 0, 1). stripe_color (str, optional): Stripe color of the table. Defaults to "beige". yinflation (float, optional): Inflate on y since imshow aspect ratio sometimes create warped images. Defaults to 1. """ left, right, bottom, top = extent width = right - left height = top - bottom rows = csv_table.rows column_widths = csv_table.column_widths(width) print(column_widths) yinterval = height / rows for i, row in enumerate(csv_table): should_stripe = i % 2 == 0 contain_images = isinstance(row[0], list) xstart = left if contain_images: box_width = min( min(column_widths[j] / len(x) for j, x in enumerate(row)), yinterval) for j, cell in enumerate(row): xinterval = column_widths[j] xmid = xstart + xinterval / 2 ymid = top - (i + 0.5) * yinterval if contain_images: # There may be multiple images, center them rect = (xstart, top - (i + 1) * yinterval, xinterval, yinterval) draw_multiple_images_in_rectangle(ax, cell, rect, box_width, yinflation=yinflation) should_stripe = False else: ax.text( xmid, ymid, cell, ha="center", va="center", ) xstart += column_widths[j] if not should_stripe: continue # Draw the stripes, extend a little longer horizontally xpad = 0.01 ax.add_patch( Rectangle( (left - xpad, top - (i + 1) * yinterval), width + 2 * xpad, yinterval, fc=stripe_color, ec=stripe_color, ))
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes") p.add_option( "--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot", ) p.add_option("--switch", help="Change chr names based on two-column file") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) (fastafile, ) = args top = opts.top window, shift, subtract, merge = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) stacks = opts.stacks.split(",") bedfiles = get_beds(stacks) binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract, merge=merge) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = 0.08 inner = 0.02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - 0.04, yy + 0.5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, b, size=13) xx += len(b) * 0.012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option( "--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot", ) p.add_option( "--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps", ) p.add_option("--meres", default=None, help="Extra centromere / telomere features") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract, merge = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = get_beds(stacks) heatmapbeds = get_beds(heatmaps) stackbins = get_binfiles(stackbeds, fastafile, shift, subtract=subtract, merge=merge) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract=subtract, merge=merge) margin = 0.06 inner = 0.015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = 0.3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = get_nbins(clen, shift) owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) ax.text( 0.1, 0.9, cc, va="top", zorder=100, transform=ax.transAxes, bbox=dict(boxstyle="round", fc="w", alpha=0.5), ) # Legends xx += xlen + 0.01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = 0.05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow( Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto", cmap=iopts.cmap, ) root.text(xx + xlen + 0.01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=0.01, fc="m", ec="m")) root.text(xx + 0.014, yy, accn, va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def expr(args): """ %prog expr block exp layout napus.bed Plot a composite figure showing synteny and the expression level between homeologs in two tissues - total 4 lists of values. block file contains the gene pairs between AN and CN. """ from jcvi.graphics.base import red_purple as default_cm p = OptionParser(expr.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 4: sys.exit(not p.print_help()) block, exp, layout, napusbed = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) s = Synteny(fig, root, block, napusbed, layout) # Import the expression values # Columns are: leaf-A, leaf-C, root-A, root-C fp = open(exp) data = {} for row in fp: gid, lf, rt = row.split() lf, rt = float(lf), float(rt) data[gid] = (lf, rt) rA, rB = s.rr gA = [x.accn for x in rA.genes] gC = [x.accn for x in rB.genes] A = [data.get(x, (0, 0)) for x in gA] C = [data.get(x, (0, 0)) for x in gC] A = np.array(A) C = np.array(C) A = np.transpose(A) C = np.transpose(C) d, h = .01, .1 lsg = "lightslategrey" coords = s.gg # Coordinates of the genes axes = [] for j, (y, gg) in enumerate(((.79, gA), (.24, gC))): r = s.rr[j] x = r.xstart w = r.xend - r.xstart ax = fig.add_axes([x, y, w, h]) axes.append(ax) root.add_patch( Rectangle((x - h, y - d), w + h + d, h + 2 * d, fill=False, ec=lsg, lw=1)) root.text(x - d, y + 3 * h / 4, "root", ha="right", va="center") root.text(x - d, y + h / 4, "leaf", ha="right", va="center") ty = y - 2 * d if y > .5 else y + h + 2 * d nrows = len(gg) for i, g in enumerate(gg): start, end = coords[(j, g)] sx, sy = start ex, ey = end assert sy == ey sy = sy + 2 * d if sy > .5 else sy - 2 * d root.plot(((sx + ex) / 2, x + w * (i + .5) / nrows), (sy, ty), lw=1, ls=":", color="k", alpha=.2) axA, axC = axes p = axA.pcolormesh(A, cmap=default_cm) p = axC.pcolormesh(C, cmap=default_cm) axA.set_xlim(0, len(gA)) axC.set_xlim(0, len(gC)) x, y, w, h = .35, .1, .3, .05 ax_colorbar = fig.add_axes([x, y, w, h]) fig.colorbar(p, cax=ax_colorbar, orientation='horizontal') root.text(x - d, y + h / 2, "RPKM", ha="right", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) for x in (axA, axC, root): x.set_axis_off() image_name = "napusf4b." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def __init__(self, fig, root, canvas, chr, xlim, datadir, order=None, hlsuffix=None, palette=None, cap=50, gauge="bottom", plot_label=True, plot_chr_label=True, gauge_step=5000000, vlines=None, labels_dict={}, diverge=('r', 'g')): x, y, w, h = canvas p = .01 root.add_patch( Rectangle((x - p, y - p), w + 2 * p, h + 2 * p, lw=1, fill=False, ec="darkslategray", zorder=10)) datafiles = glob(op.join(datadir, chr + "*")) if order: datafiles = [z for z in datafiles if z.split(".")[1] in order] datafiles.sort(key=lambda x: order.index(x.split(".")[1])) ntracks = len(datafiles) yinterval = h / ntracks yy = y + h if palette is None: # Get the palette set2 = get_map('Set2', 'qualitative', ntracks).mpl_colors else: set2 = [palette] * ntracks if gauge == "top": gauge_ax = fig.add_axes([x, yy + p, w, .0001]) adjust_spines(gauge_ax, ["top"]) tpos = yy + .07 elif gauge == "bottom": gauge_ax = fig.add_axes([x, y - p, w, .0001]) adjust_spines(gauge_ax, ["bottom"]) tpos = y - .07 start, end = xlim if gauge: fs = gauge_step < 1000000 setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs) if plot_chr_label: root.text(x + w / 2, tpos, chr, ha="center", va="center", color="darkslategray", size=16) yys = [] for label, datafile, c in zip(order, datafiles, set2): yy -= yinterval yys.append(yy) ax = fig.add_axes([x, yy, w, yinterval * .9]) xy = XYtrack(ax, datafile, color=c) xy.interpolate(end) xy.cap(ymax=cap) if vlines: xy.vlines(vlines) if hlsuffix: hlfile = op.join(datadir, ".".join((label, hlsuffix))) xy.import_hlfile(hlfile, chr, diverge=diverge) if plot_label: label = labels_dict.get(label, label.capitalize()) label = r"\textit{{{0}}}".format(label) root.text(x - .015, yy + yinterval / 2, label, ha="right", va="center") xy.draw() ax.set_xlim(*xlim) self.yys = yys
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = ("Base coverage", "Mate coverage") leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, scf, color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, scf, color='b', ha="center") root.text(.5, .05, warn_msg, color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" savefig(figname, dpi=300)
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) (pngfile, ) = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != ".": mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) nbcolor = opts.changeBackground pngfile = convert_background(pngfile, nbcolor) resizefile, mainfile, labelfile, exif = convert_image( pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols, ) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=0.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug( "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format( min_size, opts.minsize, max_size, opts.maxsize)) # Plotting ax1.set_title("Original picture") ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title("Edge detection\n({0})".format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title("Object detection") ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), "r-") ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * 0.35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format( i, npixels, len(pixels), 100.0 * npixels / canvas_size)) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color="w", ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g") ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m") yy = 0.8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) ax4.text(0.1, yy, o.pixeltag, va="center") yy -= 0.04 ax4.add_patch( Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb))) ax4.text(0.27, yy, o.hashtag, va="center") yy -= 0.06 ax4.text( 0.1, yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategray", ) normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family="Helvetica", size=8) ax.set_yticklabels(yticklabels, family="Helvetica", size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects
def blastplot( ax, blastfile, qsizes, ssizes, qbed, sbed, style="dot", sampleN=None, baseticks=False, insetLabels=False, stripNames=False, highlights=None, ): assert style in DotStyles fp = open(blastfile) qorder = qbed.order if qbed else None sorder = sbed.order if sbed else None data = [] for row in fp: b = BlastLine(row) query, subject = b.query, b.subject if stripNames: query = query.rsplit(".", 1)[0] subject = subject.rsplit(".", 1)[0] if qorder: if query not in qorder: continue qi, q = qorder[query] query = q.seqid qstart, qend = q.start, q.end else: qstart, qend = b.qstart, b.qstop if sorder: if subject not in sorder: continue si, s = sorder[subject] subject = s.seqid sstart, send = s.start, s.end else: sstart, send = b.sstart, b.sstop qi = qsizes.get_position(query, qstart) qj = qsizes.get_position(query, qend) si = ssizes.get_position(subject, sstart) sj = ssizes.get_position(subject, send) if None in (qi, si): continue data.append(((qi, qj), (si, sj))) if sampleN: if len(data) > sampleN: data = sample(data, sampleN) if not data: return logging.error("no blast data imported") xsize, ysize = qsizes.totalsize, ssizes.totalsize logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) if style == "line": for a, b in data: ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3) else: data = [(x[0], y[0]) for x, y in data] x, y = zip(*data) if style == "circle": ax.plot(x, y, "mo", mfc="w", mec="m", ms=3) elif style == "dot": ax.scatter(x, y, s=3, lw=0) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) ignore_size_x = ignore_size_y = 0 # plot the chromosome breaks logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) for (seqid, beg, end) in qsizes.get_breaks(): ignore = abs(end - beg) < ignore_size_x if ignore: continue seqid = rename_seqid(seqid) xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([end, end], ylim, "-", lw=1, color="grey") for (seqid, beg, end) in ssizes.get_breaks(): ignore = abs(end - beg) < ignore_size_y if ignore: continue seqid = rename_seqid(seqid) ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [end, end], "-", lw=1, color="grey") # plot the chromosome labels for label, pos, ignore in xchr_labels: if not ignore: if insetLabels: ax.text(pos, 0, label, size=8, ha="center", va="top", color="grey") else: pos = 0.1 + pos * 0.8 / xsize root.text( pos, 0.91, label, size=10, ha="center", va="bottom", rotation=45, color="grey", ) # remember y labels are inverted for label, pos, ignore in ychr_labels: if not ignore: if insetLabels: continue pos = 0.9 - pos * 0.8 / ysize root.text(0.91, pos, label, size=10, va="center", color="grey") # Highlight regions based on a list of BedLine qhighlights = shighlights = None if highlights: if isinstance(highlights[0], BedLine): shighlights = highlights elif len(highlights) == 2: qhighlights, shighlights = highlights if qhighlights: for hl in qhighlights: hls = qsizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0)) if shighlights: for hl in shighlights: hls = ssizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0)) if baseticks: def increaseDensity(a, ratio=4): assert len(a) > 1 stepsize = a[1] - a[0] newstepsize = int(stepsize / ratio) return np.arange(0, a[-1], newstepsize) # Increase the density of the ticks xticks = ax.get_xticks() yticks = ax.get_yticks() xticks = increaseDensity(xticks, ratio=2) yticks = increaseDensity(yticks, ratio=2) ax.set_xticks(xticks) # Plot outward ticklines for pos in xticks[1:]: if pos > xsize: continue pos = 0.1 + pos * 0.8 / xsize root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2) for pos in yticks[1:]: if pos > ysize: continue pos = 0.9 - pos * 0.8 / ysize root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_base_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) plt.setp(ax.get_yticklabels(), rotation=90)
def ld(args): """ %prog ld map Calculate pairwise linkage disequilibrium given MSTmap. """ import numpy as np from random import sample from jcvi.algorithms.matrix import symmetrize p = OptionParser(ld.__doc__) p.add_option("--subsample", default=500, type="int", help="Subsample markers to speed up [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) mstmap, = args subsample = opts.subsample data = MSTMap(mstmap) # Take random subsample while keeping marker order if subsample < data.nmarkers: data = [data[x] for x in \ sorted(sample(xrange(len(data)), subsample))] markerbedfile = mstmap + ".subsample.bed" ldmatrix = mstmap + ".subsample.matrix" if need_update(mstmap, (markerbedfile, ldmatrix)): nmarkers = len(data) fw = open(markerbedfile, "w") print >> fw, "\n".join(x.bedline for x in data) logging.debug("Write marker set of size {0} to file `{1}`."\ .format(nmarkers, markerbedfile)) M = np.zeros((nmarkers, nmarkers), dtype=float) for i, j in combinations(range(nmarkers), 2): a = data[i] b = data[j] M[i, j] = calc_ldscore(a.genotype, b.genotype) M = symmetrize(M) logging.debug("Write LD matrix to file `{0}`.".format(ldmatrix)) M.tofile(ldmatrix) else: nmarkers = len(Bed(markerbedfile)) M = np.fromfile(ldmatrix, dtype="float").reshape(nmarkers, nmarkers) logging.debug("LD matrix `{0}` exists ({1}x{1})."\ .format(ldmatrix, nmarkers)) from jcvi.graphics.base import plt, savefig, Rectangle, draw_cmap plt.rcParams["axes.linewidth"] = 0 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([.1, .1, .8, .8]) # the heatmap ax.matshow(M, cmap=iopts.cmap) # Plot chromosomes breaks bed = Bed(markerbedfile) xsize = len(bed) extent = (0, nmarkers) chr_labels = [] ignore_size = 20 for (seqid, beg, end) in bed.get_breaks(): ignore = abs(end - beg) < ignore_size pos = (beg + end) / 2 chr_labels.append((seqid, pos, ignore)) if ignore: continue ax.plot((end, end), extent, "w-", lw=1) ax.plot(extent, (end, end), "w-", lw=1) # Plot chromosome labels for label, pos, ignore in chr_labels: pos = .1 + pos * .8 / xsize if not ignore: root.text(pos, .91, label, ha="center", va="bottom", rotation=45, color="grey") root.text(.09, pos, label, ha="right", va="center", color="grey") ax.set_xlim(extent) ax.set_ylim(extent) ax.set_axis_off() draw_cmap(root, "Pairwise LD (r2)", 0, 1, cmap=default_cm) root.add_patch(Rectangle((.1, .1), .8, .8, fill=False, ec="k", lw=2)) m = mstmap.split(".")[0] root.text(.5, .06, "Linkage Disequilibrium between {0} markers".format(m), ha="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = m + ".subsample" + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option( "--imagemap", default=False, action="store_true", help= "generate an HTML image map associated with the image [default: %default]" ) p.add_option( "--winsize", default=50000, type="int", help= "if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") p.add_option("--empty", help="Write legend for unpainted region") opts, args, iopts = p.set_image_options(figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print >> mapfh, '<map id="' + prefix + '">' if mappingfile: mappings = DictFile(mappingfile, delimiter="\t") classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format( len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "rgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, cent_position) in enumerate(sorted(centromeres.items())): clen = chr_lens[chr] xx = xstart + a * xinterval + .5 * xwidth yy = ystart - cent_position * ratio root.text(xx, ystart + .01, chr, ha="center") ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) if imagemap: print >> mapfh, '</map>' mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: xstart, ystart = .9, .85 Gauge(root, xstart, ystart - r, ystart, max_chr_len) # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, klass, fontsize=10) xstart += xinterval empty = opts.empty if empty: root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) root.text(xstart + xwidth + .01, yy, empty, fontsize=10) root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() savefig(figname, dpi=dpi, iopts=iopts)
def bites(args): """ %prog bites Illustrate the pipeline for automated bite discovery. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (6, 6)) root = fig.add_axes([0, 0, 1, 1]) # HSP pairs hsps = ( ((50, 150), (60, 180)), ((190, 250), (160, 235)), ((300, 360), (270, 330)), ((430, 470), (450, 490)), ((570, 620), (493, 543)), ((540, 555), (370, 385)), # non-collinear hsps ) titlepos = (0.9, 0.65, 0.4) titles = ("Compare orthologous region", "Find collinear HSPs", "Scan paired gaps") ytip = 0.01 mrange = 650.0 m = lambda x: x / mrange * 0.7 + 0.1 for i, (ya, title) in enumerate(zip(titlepos, titles)): yb = ya - 0.1 plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1) plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1) RoundLabel(root, 0.5, ya + 4 * ytip, title) root.text(0.9, ya, "A. thaliana", ha="center", va="center") root.text(0.9, yb, "B. rapa", ha="center", va="center") myhsps = hsps if i >= 1: myhsps = hsps[:-1] for (a, b), (c, d) in myhsps: a, b, c, d = [m(x) for x in (a, b, c, d)] r1 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fc="r", lw=0, zorder=2) r2 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fc="r", lw=0, zorder=2) r3 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fill=False, zorder=3) r4 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fill=False, zorder=3) r5 = Polygon( ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), fc="r", alpha=0.2, ) rr = (r1, r2, r3, r4, r5) if i == 2: rr = rr[:-1] for r in rr: root.add_patch(r) # Gap pairs hspa, hspb = zip(*myhsps) gapa, gapb = [], [] for (a, b), (c, d) in pairwise(hspa): gapa.append((b + 1, c - 1)) for (a, b), (c, d) in pairwise(hspb): gapb.append((b + 1, c - 1)) gaps = zip(gapa, gapb) tpos = titlepos[-1] yy = tpos - 0.05 for i, ((a, b), (c, d)) in enumerate(gaps): i += 1 a, b, c, d = [m(x) for x in (a, b, c, d)] xx = (a + b + c + d) / 4 TextCircle(root, xx, yy, str(i)) # Bites ystart = 0.24 ytip = 0.05 bites = ( ("Bite(40=>-15)", True), ("Bite(50=>35)", False), ("Bite(70=>120)", False), ("Bite(100=>3)", True), ) for i, (bite, selected) in enumerate(bites): xx = 0.15 if (i % 2 == 0) else 0.55 yy = ystart - i / 2 * ytip i += 1 TextCircle(root, xx, yy, str(i)) color = "k" if selected else "gray" root.text(xx + ytip, yy, bite, size=10, color=color, va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = fname() + ".pdf" savefig(figname, dpi=300)