def draw_gauge(ax, margin, maxl, rightmargin=None, optimal=7): # Draw a gauge on the top of the canvas rightmargin = rightmargin or margin ax.plot([margin, 1 - rightmargin], [1 - margin, 1 - margin], "k-", lw=2) best_stride = autoscale(maxl) nintervals = int(round(maxl * 1. / best_stride)) newl = nintervals * best_stride xx, yy = margin, 1 - margin tip = .005 xinterval = (1 - margin - rightmargin) / nintervals l = human_size(best_stride) if l[-1] == 'b': suffix = target = l[-2:] for i in xrange(0, newl + 1, best_stride): l = human_size(i, precision=0, target=target) if l[-1] == 'b': l, suffix = l[:-2], l[-2:] ax.plot([xx, xx], [yy, yy + tip], "k-", lw=2) ax.text(xx, yy + 2 * tip, _(l), ha="center", size=13) xx += xinterval xx += 4 * tip - xinterval ax.text(xx, yy + 2 * tip, _(suffix)) return best_stride / xinterval
def draw_cmap(ax, cmap_text, vmin, vmax, cmap=None, reverse=False): X = [1, 0] if reverse else [0, 1] Y = np.array([X, X]) xmin, xmax = .5, .9 ymin, ymax = .02, .04 ax.imshow(Y, extent=(xmin, xmax, ymin, ymax), cmap=cmap) ax.text(xmin - .01, (ymin + ymax) * .5, _(cmap_text), ha="right", va="center", size=10) vmiddle = (vmin + vmax) * .5 xmiddle = (xmin + xmax) * .5 for x, v in zip((xmin, xmiddle, xmax), (vmin, vmiddle, vmax)): ax.text(x, ymin - .005, _("%.1f" % v), ha="center", va="top", size=10)
def __init__(self, ax, x1, x2, t, **kwargs): ax.text(x1, x2, _(t), ha="center", bbox=dict(boxstyle="round", fill=False))
def gff(args): """ %prog gff *.gff Draw exons for genes based on gff files. Each gff file should contain only one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas. """ align_choices = ("left", "center", "right") p = OptionParser(gff.__doc__) p.add_option("--align", default="left", choices=align_choices, help="Horizontal alignment {0} [default: %default]".\ format("|".join(align_choices))) p.add_option("--noUTR", default=False, action="store_true", help="Do not plot UTRs [default: %default]") opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) gffiles = args ngenes = len(gffiles) setups, ratio = get_setups(gffiles, canvas=.6, noUTR=opts.noUTR) align = opts.align xs = .2 if align == "left" else .8 yinterval = canvas / ngenes ys = .8 tip = .01 for genename, mrnabed, cdsbeds in setups: ex = ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align) genename = _(genename) if align == "left": root.text(xs - tip, ys, genename, ha="right", va="center") elif align == "right": root.text(xs + tip, ys, genename, ha="left", va="center") ys -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "exons.pdf" plt.savefig(figname, dpi=300) logging.debug("Figure saved to `{0}`".format(figname))
def plot_ks_dist(ax, data, interval, components, ks_max, color='r'): from jcvi.graphics.base import _ line, = my_hist(ax, data, interval, ks_max, color=color) logging.debug("Total {0} pairs after filtering.".format(len(data))) probs, mus, variances = get_mixture(data, components) bins = np.arange(0.001, ks_max, .001) y = lognormpdf_mix(bins, probs, mus, variances, interval) line_mixture, = ax.plot(bins, y, ':', color=color, lw=3) for i in xrange(components): peak_val = exp(mus[i]) mixline = lognormpdf_mix(peak_val, probs, mus, variances, interval) ax.text(peak_val, mixline, _("Ks=%.2f" % peak_val), \ color="w", size=10, bbox=dict(ec='w',fc=color, alpha=.6, boxstyle='round')) return line, line_mixture
def scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=None): from jcvi.graphics.blastplot import blastplot # qsizes, qbed are properties for the evidences # ssizes, sbed are properties for the current scaffoldID blastplot(ax, blastf, qsizes, ssizes, qbed, sbed, \ style="circle", insetLabels=True, stripNames=True, highlights=highlights) # FPC_scf.bed => FPC fname = qbed.filename.split(".")[0].split("_")[0] xtitle = fname if xtitle == "FPC": ax.set_xticklabels([""] * len(ax.get_xticklabels())) ax.set_xlabel(_(xtitle), color="g") for x in ax.get_xticklines(): x.set_visible(False)
def draw(self, roundrect=False): if self.empty: return y = self.y color = self.color ax = self.ax xs = xstart = self.xstart gap = self.gap va = self.va nseqids = len(self.seqids) tr = self.tr for i, sid in enumerate(self.seqids): size = self.sizes[sid] rsize = self.ratio * size xend = xstart + rsize hc = HorizontalChromosome(ax, xstart, xend, y, height=self.height, lw=self.lw, fc=color, roundrect=roundrect) hc.set_transform(tr) sid = sid.rsplit("_", 1)[-1] si = "".join(x for x in sid if x not in string.letters) si = str(int(si)) xx = (xstart + xend) / 2 xstart = xend + gap if nseqids > 2 * MaxSeqids and (i + 1) % 10 != 0: continue if nseqids < 5: continue pad = .02 if va == "bottom": pad = - pad TextCircle(ax, xx, y + pad, _(si), radius=.01, fc="w", color=color, size=10, transform=tr) xp = .1 if (self.xstart + self.xend) / 2 <= .5 else .92 label = markup(self.label) c = color if color != "gainsboro" else "k" ax.text(xp, y + self.height * .6, label, ha="center", color=c, transform=tr)
def plot_one_scaffold(scaffoldID, ssizes, sbed, trios, imagename, iopts, highlights=None): ntrios = len(trios) fig = plt.figure(1, (14, 8)) plt.cla() plt.clf() root = fig.add_axes([0, 0, 1, 1]) axes = [fig.add_subplot(1, ntrios, x) for x in range(1, ntrios + 1)] scafsize = ssizes.get_size(scaffoldID) for trio, ax in zip(trios, axes): blastf, qsizes, qbed = trio scaffolding(ax, scaffoldID, blastf, qsizes, ssizes, qbed, sbed, highlights=highlights) root.text(.5, .95, _("{0} (size={1})".\ format(scaffoldID, thousands(scafsize))), size=18, ha="center", color='b') root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() plt.savefig(imagename, dpi=iopts.dpi) logging.debug("Print image to `{0}` {1}".format(imagename, iopts))
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option("--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] fig = plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe([bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = _(bedfile.split(".")[0]) msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname ="{0}.{1}.pdf".format(fastafile, ctg) plt.savefig(figname, dpi=iopts.dpi) logging.debug("Figure saved to `{0}` {1}.".format(figname, iopts))
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve logging.debug("Coverage curve use window size of {0} bases.".format(window)) basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = (_("Base coverage"), _("Mate coverage")) leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, _(scf), color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, _(scf), color='b', ha="center") root.text(.5, .05, _(warn_msg), color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" plt.savefig(figname, dpi=300) logging.debug("Figure saved to `{0}`".format(figname))
def demo(args): """ %prog demo Draw sample gene features to illustrate the various fates of duplicate genes - to be used in a book chapter. """ p = OptionParser(demo.__doc__) opts, args = p.parse_args(args) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) panel_space = .23 dup_space = .025 # Draw a gene and two regulatory elements at these arbitrary locations locs = [(.5, .9), # ancestral gene (.5, .9 - panel_space + dup_space), # identical copies (.5, .9 - panel_space - dup_space), (.5, .9 - 2 * panel_space + dup_space), # degenerate copies (.5, .9 - 2 * panel_space - dup_space), (.2, .9 - 3 * panel_space + dup_space), # sub-functionalization (.2, .9 - 3 * panel_space - dup_space), (.5, .9 - 3 * panel_space + dup_space), # neo-functionalization (.5, .9 - 3 * panel_space - dup_space), (.8, .9 - 3 * panel_space + dup_space), # non-functionalization (.8, .9 - 3 * panel_space - dup_space), ] default_regulator = "gm" regulators = [default_regulator, default_regulator, default_regulator, "wm", default_regulator, "wm", "gw", "wb", default_regulator, "ww", default_regulator, ] width = .24 for i, (xx, yy) in enumerate(locs): regulator = regulators[i] x1, x2 = xx - .5 * width, xx + .5 * width Glyph(root, x1, x2, yy) if i == 9: # upper copy for non-functionalization continue # coding region x1, x2 = xx - .16 * width, xx + .45 * width Glyph(root, x1, x2, yy, fc="k") # two regulatory elements x1, x2 = xx - .4 * width, xx - .28 * width for xx, fc in zip((x1, x2), regulator): if fc == 'w': continue DoubleCircle(root, xx, yy, fc=fc) rotation = 30 tip = .02 if i == 0: ya = yy + tip root.text(x1, ya, _("Flower"), rotation=rotation, va="bottom") root.text(x2, ya, _("Root"), rotation=rotation, va="bottom") elif i == 7: ya = yy + tip root.text(x2, ya, _("Leaf"), rotation=rotation, va="bottom") # Draw arrows between panels (center) arrow_dist = .08 ar_xpos = .5 for ar_ypos in (.3, .53, .76): root.annotate(" ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops) ar_ypos = .3 for ar_xpos in (.2, .8): root.annotate(" ", (ar_xpos, ar_ypos), (.5, ar_ypos + arrow_dist), arrowprops=arrowprops) # Duplication, Degeneration xx = .6 ys = (.76, .53) processes = ("Duplication", "Degeneration") for yy, process in zip(ys, processes): root.text(xx, yy + .02, process, fontweight="bold") # Label of fates xs = (.2, .5, .8) fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") yy = .05 for xx, fate in zip(xs, fates): RoundLabel(root, xx, yy, fate) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "demo.pdf" plt.savefig(figname, dpi=300) logging.debug("Figure saved to `{0}`".format(figname))
def draw_tree(ax, tx, rmargin=.3, outgroup=None, gffdir=None, sizes=None): t = Tree(tx) if outgroup: R = t.get_common_ancestor(*outgroup) else: # Calculate the midpoint node R = t.get_midpoint_outgroup() t.set_outgroup(R) farthest, max_dist = t.get_farthest_leaf() margin = .05 xstart = margin ystart = 1 - margin canvas = 1 - rmargin - 2 * margin tip = .005 # scale the tree scale = canvas / max_dist num_leaves = len(t.get_leaf_names()) yinterval = canvas / (num_leaves + 1) # get exons structures, if any structures = {} if gffdir: gffiles = glob("{0}/*.gff*".format(gffdir)) setups, ratio = get_setups(gffiles, canvas=rmargin / 2, noUTR=True) structures = dict((a, (b, c)) for a, b, c in setups) if sizes: sizes = Sizes(sizes).mapping coords = {} i = 0 for n in t.traverse("postorder"): dist = n.get_distance(t) xx = xstart + scale * dist if n.is_leaf(): yy = ystart - i * yinterval i += 1 ax.text(xx + tip, yy, n.name, va="center", fontstyle="italic", size=8) gname = n.name.split("_")[0] if gname in structures: mrnabed, cdsbeds = structures[gname] ExonGlyph(ax, 1 - rmargin / 2, yy, mrnabed, cdsbeds, align="right", ratio=ratio) if sizes and gname in sizes: size = sizes[gname] size = size / 3 - 1 # base pair converted to amino acid size = _("{0}aa".format(size)) ax.text(1 - rmargin / 2 + tip, yy, size) else: children = [coords[x] for x in n.get_children()] children_x, children_y = zip(*children) min_y, max_y = min(children_y), max(children_y) # plot the vertical bar ax.plot((xx, xx), (min_y, max_y), "k-") # plot the horizontal bar for cx, cy in children: ax.plot((xx, cx), (cy, cy), "k-") yy = sum(children_y) * 1. / len(children_y) support = n.support ax.text(xx, yy, _("{0:d}".format(int(abs(support * 100)))), ha="right", size=10) coords[n] = (xx, yy) # scale bar br = .1 x1 = xstart + .1 x2 = x1 + br * scale yy = ystart - i * yinterval ax.plot([x1, x1], [yy - tip, yy + tip], "k-") ax.plot([x2, x2], [yy - tip, yy + tip], "k-") ax.plot([x1, x2], [yy, yy], "k-") ax.text((x1 + x2) / 2, yy - tip, _("{0:g}".format(br)), va="top", ha="center", size=10)
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option("--imagemap", default=False, action="store_true", help="generate an HTML image map associated with the image [default: %default]") p.add_option("--winsize", default=50000, type="int", help="if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print >> mapfh, '<map id="' + prefix + '">' if mappingfile: mappings = dict(x.split() for x in open(mappingfile)) classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format(len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "wrgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, cent_position) in enumerate(sorted(centromeres.items())): clen = chr_lens[chr] xx = xstart + a * xinterval + .5 * xwidth yy = ystart - cent_position * ratio root.text(xx, ystart + .01, _(chr), ha="center") ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch(Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) if imagemap: print >> mapfh, '</map>' mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: tip = .008 # the ticks on the gauge bar extra = .006 # the offset for the unit label xstart, ystart = .9, .85 yy = ystart gauge = int(ceil(max_chr_len / 1e6)) mb = ratio * 1e6 yinterval = 2 * mb root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2) for x in xrange(0, gauge, 2): if x % 10: root.plot([xstart, xstart + tip], [yy, yy], "b-") else: root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2) root.text(xstart + tip + extra, yy, _(x), color="gray", va="center") yy -= yinterval root.text(xstart, yy - .03, _("Mb"), color="gray", va="center") # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch(Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9) xstart += xinterval root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() plt.savefig(figname, dpi=dpi) logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option("--vmax", default=2., type="float", help="Maximum value, inclusive [default: %default]") p.add_option("--bins", default=20, type="int", help="Number of bins to plot in the histogram [default: %default]") p.add_option("--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option("--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args header, data = read_ks_file(ks_file) ks_max = opts.vmax for f in fields.split()[1:]: columndata = [getattr(x, f) for x in data] title = "{0}: {1:.2f}".format(descriptions[f], np.median(columndata)) title += " ({0:.2f} +/- {1:.2f})".\ format(np.mean(columndata), np.std(columndata)) ks = ("ks" in f) if not ks: continue bins = (0, ks_max, opts.bins) if ks else (0, .6, 10) digit = 1 if ks else 2 stem_leaf_plot(columndata, *bins, digit=digit, title=title) if not opts.pdf: return from jcvi.graphics.base import mpl, _, tex_formatter, tex_1digit_formatter from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas fig = mpl.figure.Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_axes([.12, .1, .8, .8]) components = opts.components data = [x.ng_ks for x in data] interval = ks_max / opts.bins line, line_mixture = plot_ks_dist(ax, data, interval, components, ks_max, color='r') leg = ax.legend((line, line_mixture), ("Ks", "Ks (fitted)"), shadow=True, fancybox=True, prop={"size": 10}) leg.get_frame().set_alpha(.5) ax.set_xlim((0, ks_max)) ax.set_title(_('Ks distribution'), fontweight="bold") ax.set_xlabel(_('Synonymous substitutions per site (Ks)')) ax.set_ylabel(_('Percentage of gene pairs')) ax.xaxis.set_major_formatter(tex_1digit_formatter) ax.yaxis.set_major_formatter(tex_formatter) image_name = "Ks_plot.pdf" canvas.print_figure(image_name, dpi=300) logging.debug("Print image to `{0}`.".format(image_name))
def histogram(args): """ %prog histogram meryl.histogram species K Plot the histogram based on meryl K-mer distribution, species and N are only used to annotate the graphic. Find out totalKmers when running kmer.meryl(). """ p = OptionParser(histogram.__doc__) p.add_option("--pdf", default=False, action="store_true", help="Print PDF instead of ASCII plot [default: %default]") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) histfile, species, N = args ascii = not opts.pdf fp = open(histfile) hist = {} totalKmers = 0 # Guess the format of the Kmer histogram soap = False for row in fp: if len(row.split()) == 1: soap = True break fp.seek(0) for rowno, row in enumerate(fp): if soap: K = rowno + 1 counts = int(row.strip()) else: # meryl histogram K, counts = row.split()[:2] K, counts = int(K), int(counts) Kcounts = K * counts totalKmers += Kcounts hist[K] = counts history = ["drop"] for a, b in pairwise(sorted(hist.items())): Ka, ca = a Kb, cb = b if ca <= cb: status = "rise" else: status = "drop" if history[-1] != status: history.append(status) if history == ["drop", "rise", "drop"]: break Total_Kmers = int(totalKmers) Kmer_coverage = Ka Genome_size = Total_Kmers * 1. / Ka / 1e6 Total_Kmers_msg = "Total {0}-mers: {1}".format(N, Total_Kmers) Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage) Genome_size_msg = "Estimated genome size: {0:.1f}Mb".format(Genome_size) for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg): print >> sys.stderr, msg counts = sorted((a, b) for a, b in hist.items() if a <= 100) x, y = zip(*counts) title = "{0} genome {1}-mer histogram".format(species, N) if ascii: return asciiplot(x, y, title=title) fig = plt.figure(1, (6, 6)) plt.plot(x, y, 'g-', lw=2, alpha=.5) ax = plt.gca() ax.text(.5, .9, _(Total_Kmers_msg), ha="center", color='b', transform=ax.transAxes) ax.text(.5, .8, _(Kmer_coverage_msg), ha="center", color='b', transform=ax.transAxes) ax.text(.5, .7, _(Genome_size_msg), ha="center", color='b', transform=ax.transAxes) ax.set_title(_(title), color='r') xlabel, ylabel = "Coverage (X)", "Counts" ax.set_xlabel(_(xlabel), color='r') ax.set_ylabel(_(ylabel), color='r') set_human_axis(ax) imagename = histfile.split(".")[0] + ".pdf" plt.savefig(imagename, dpi=100) print >> sys.stderr, "Image saved to `{0}`.".format(imagename)
def main(): p = OptionParser(__doc__) p.add_option("--groups", default=False, action="store_true", help="The first row contains group info [default: %default]") p.add_option("--rowgroups", help="Row groupings [default: %default]") p.add_option("--horizontalbar", default=False, action="store_true", help="Horizontal color bar [default: vertical]") p.add_option("--cmap", default="jet", help="Use this color map [default: %default]") opts, args, iopts = set_image_options(p, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] rowgroups = opts.rowgroups groups, rows, cols, data = parse_csv(datafile, vmin=1, groups=opts.groups) cols = [x.replace("ay ", "") for x in cols] if rowgroups: fp = open(rowgroups) rgroups = [] for row in fp: a, b = row.split() irows = [rows.index(x) for x in b.split(",")] rgroups.append((a, min(irows), max(irows))) plt.rcParams["axes.linewidth"] = 0 xstart = .18 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax = fig.add_axes([xstart, .15, .7, .7]) default_cm = cm.get_cmap(opts.cmap) im = ax.matshow(data, cmap=default_cm, norm=LogNorm(vmin=1, vmax=10000)) nrows, ncols = len(rows), len(cols) xinterval = .7 / ncols yinterval = .7 / max(nrows, ncols) plt.xticks(range(ncols), cols, rotation=45, size=10, ha="center") plt.yticks(range(nrows), rows, size=10) for x in ax.get_xticklines() + ax.get_yticklines(): x.set_visible(False) ax.set_xlim(-.5, ncols - .5) t = [1, 10, 100, 1000, 10000] pad = .06 if opts.horizontalbar: ypos = .5 * (1 - nrows * yinterval) - pad axcolor = fig.add_axes([.3, ypos, .4, .02]) orientation = "horizontal" else: axcolor = fig.add_axes([.9, .3, .02, .4]) orientation = "vertical" fig.colorbar(im, cax=axcolor, ticks=t, format=_("%d"), orientation=orientation) if groups: groups = [(key, len(list(nn))) for key, nn in groupby(groups)] yy = .5 + .5 * nrows / ncols * .7 + .06 e = .005 sep = -.5 for k, kl in groups: # Separator in the array area sep += kl ax.plot([sep, sep], [-.5, nrows - .5], "w-", lw=2) # Group labels on the top kl *= xinterval root.plot([xstart + e, xstart + kl - e], [yy, yy], "-", color="gray", lw=2) root.text(xstart + .5 * kl, yy + e, k, ha="center", color="gray") xstart += kl if rowgroups: from jcvi.graphics.glyph import TextCircle xpos = .04 tip = .015 assert rgroups ystart = 1 - .5 * (1 - nrows * yinterval) for gname, start, end in rgroups: start = ystart - start * yinterval end = ystart - (end + 1) * yinterval start -= tip / 3 end += tip / 3 # Bracket the groups root.plot((xpos, xpos + tip), (start, start), "k-", lw=2) root.plot((xpos, xpos), (start, end), "k-", lw=2) root.plot((xpos, xpos + tip), (end, end), "k-", lw=2) TextCircle(root, xpos, .5 * (start + end), gname) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + opts.cmap + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def demo(args): """ %prog demo Draw sample gene features to illustrate the various fates of duplicate genes - to be used in a book chapter. """ p = OptionParser(demo.__doc__) opts, args = p.parse_args(args) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) panel_space = .23 dup_space = .025 # Draw a gene and two regulatory elements at these arbitrary locations locs = [ (.5, .9), # ancestral gene (.5, .9 - panel_space + dup_space), # identical copies (.5, .9 - panel_space - dup_space), (.5, .9 - 2 * panel_space + dup_space), # degenerate copies (.5, .9 - 2 * panel_space - dup_space), (.2, .9 - 3 * panel_space + dup_space), # sub-functionalization (.2, .9 - 3 * panel_space - dup_space), (.5, .9 - 3 * panel_space + dup_space), # neo-functionalization (.5, .9 - 3 * panel_space - dup_space), (.8, .9 - 3 * panel_space + dup_space), # non-functionalization (.8, .9 - 3 * panel_space - dup_space), ] default_regulator = "gm" regulators = [ default_regulator, default_regulator, default_regulator, "wm", default_regulator, "wm", "gw", "wb", default_regulator, "ww", default_regulator, ] width = .24 for i, (xx, yy) in enumerate(locs): regulator = regulators[i] x1, x2 = xx - .5 * width, xx + .5 * width Glyph(root, x1, x2, yy) if i == 9: # upper copy for non-functionalization continue # coding region x1, x2 = xx - .16 * width, xx + .45 * width Glyph(root, x1, x2, yy, fc="k") # two regulatory elements x1, x2 = xx - .4 * width, xx - .28 * width for xx, fc in zip((x1, x2), regulator): if fc == 'w': continue DoubleCircle(root, xx, yy, fc=fc) rotation = 30 tip = .02 if i == 0: ya = yy + tip root.text(x1, ya, _("Flower"), rotation=rotation, va="bottom") root.text(x2, ya, _("Root"), rotation=rotation, va="bottom") elif i == 7: ya = yy + tip root.text(x2, ya, _("Leaf"), rotation=rotation, va="bottom") # Draw arrows between panels (center) arrow_dist = .08 ar_xpos = .5 for ar_ypos in (.3, .53, .76): root.annotate(" ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops) ar_ypos = .3 for ar_xpos in (.2, .8): root.annotate(" ", (ar_xpos, ar_ypos), (.5, ar_ypos + arrow_dist), arrowprops=arrowprops) # Duplication, Degeneration xx = .6 ys = (.76, .53) processes = ("Duplication", "Degeneration") for yy, process in zip(ys, processes): root.text(xx, yy + .02, process, fontweight="bold") # Label of fates xs = (.2, .5, .8) fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") yy = .05 for xx, fate in zip(xs, fates): RoundLabel(root, xx, yy, fate) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "demo.pdf" plt.savefig(figname, dpi=300) logging.debug("Figure saved to `{0}`".format(figname))
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option( "--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = [x + ".bed" for x in stacks] heatmapbeds = [x + ".bed" for x in heatmaps] stackbins = get_binfiles(stackbeds, fastafile, shift, subtract) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top") # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, _(accn), va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option("--vmax", default=2., type="float", help="Maximum value, inclusive [default: %default]") p.add_option( "--bins", default=20, type="int", help="Number of bins to plot in the histogram [default: %default]") p.add_option( "--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option( "--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args header, data = read_ks_file(ks_file) ks_max = opts.vmax for f in fields.split()[1:]: columndata = [getattr(x, f) for x in data] title = "{0}: {1:.2f}".format(descriptions[f], np.median(columndata)) title += " ({0:.2f} +/- {1:.2f})".\ format(np.mean(columndata), np.std(columndata)) ks = ("ks" in f) if not ks: continue bins = (0, ks_max, opts.bins) if ks else (0, .6, 10) digit = 1 if ks else 2 stem_leaf_plot(columndata, *bins, digit=digit, title=title) if not opts.pdf: return from jcvi.graphics.base import mpl, _, tex_formatter, tex_1digit_formatter from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas fig = mpl.figure.Figure(figsize=(5, 5)) canvas = FigureCanvas(fig) ax = fig.add_axes([.12, .1, .8, .8]) components = opts.components data = [x.ng_ks for x in data] interval = ks_max / opts.bins line, line_mixture = plot_ks_dist(ax, data, interval, components, ks_max, color='r') leg = ax.legend((line, line_mixture), ("Ks", "Ks (fitted)"), shadow=True, fancybox=True, prop={"size": 10}) leg.get_frame().set_alpha(.5) ax.set_xlim((0, ks_max)) ax.set_title(_('Ks distribution'), fontweight="bold") ax.set_xlabel(_('Synonymous substitutions per site (Ks)')) ax.set_ylabel(_('Percentage of gene pairs')) ax.xaxis.set_major_formatter(tex_1digit_formatter) ax.yaxis.set_major_formatter(tex_formatter) image_name = "Ks_plot.pdf" canvas.print_figure(image_name, dpi=300) logging.debug("Print image to `{0}`.".format(image_name))
image_name = op.splitext(blastfile)[0] + "." + opts.format plt.rcParams["xtick.major.pad"] = 16 plt.rcParams["ytick.major.pad"] = 16 # Fix the width xsize, ysize = qsizes.totalsize, ssizes.totalsize ratio = ysize * 1. / xsize if proportional else 1 width = iopts.w height = iopts.h * ratio fig = plt.figure(1, (width, height)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed, style=opts.style, proportional=proportional, sampleN=opts.sample, baseticks=True, stripNames=opts.stripNames) # add genome names to_ax_label = lambda fname: _(op.basename(fname).split(".")[0]) gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def dotplot(anchorfile, qbed, sbed, image_name, vmin, vmax, iopts, is_self=False, synteny=False, cmap_text=None): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) for row in fp: atoms = row.split() # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: #logging.warning("ignore %s" % query) continue if subject not in sorder: #logging.warning("ignore %s" % subject) continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot sample_number = 5000 # only show random subset if len(data) > sample_number: data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) ax.scatter(x, y, c=c, s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) ignore_size_x = xsize * .005 ignore_size_y = ysize * .005 # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): ignore = abs(end - beg) < ignore_size_x seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([beg, beg], ylim, "g-", lw=1) for (seqid, beg, end) in sbed.get_breaks(): ignore = abs(end - beg) < ignore_size_y seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [beg, beg], "g-", lw=1) # plot the chromosome labels for label, pos, ignore in xchr_labels: pos = .1 + pos * .8 / xsize if not ignore: root.text(pos, .91, label, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, ignore in ychr_labels: pos = .9 - pos * .8 / ysize if not ignore: root.text(.91, pos, label, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names to_ax_label = lambda fname: _(op.basename(fname).split(".")[0]) gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi)
def main(): """ %prog bedfile id_mappings Takes a bedfile that contains the coordinates of features to plot on the chromosomes, and `id_mappings` file that map the ids to certain class. Each class will get assigned a unique color. `id_mappings` file is optional (if omitted, will not paint the chromosome features, except the centromere). """ p = OptionParser(main.__doc__) p.add_option("--title", default="Medicago truncatula v3.5", help="title of the image [default: `%default`]") p.add_option("--gauge", default=False, action="store_true", help="draw a gauge with size label [default: %default]") p.add_option( "--imagemap", default=False, action="store_true", help= "generate an HTML image map associated with the image [default: %default]" ) p.add_option( "--winsize", default=50000, type="int", help= "if drawing an imagemap, specify the window size (bases) of each map element " "[default: %default bp]") opts, args, iopts = set_image_options(p, figsize="6x6", dpi=300) if len(args) not in (1, 2): sys.exit(p.print_help()) bedfile = args[0] mappingfile = None if len(args) == 2: mappingfile = args[1] winsize = opts.winsize imagemap = opts.imagemap w, h = iopts.w, iopts.h dpi = iopts.dpi prefix = bedfile.rsplit(".", 1)[0] figname = prefix + "." + opts.format if imagemap: imgmapfile = prefix + '.map' mapfh = open(imgmapfile, "w") print >> mapfh, '<map id="' + prefix + '">' if mappingfile: mappings = dict(x.split() for x in open(mappingfile)) classes = sorted(set(mappings.values())) logging.debug("A total of {0} classes found: {1}".format( len(classes), ','.join(classes))) else: mappings = {} classes = [] logging.debug("No classes registered (no id_mappings given).") mycolors = "wrgbymc" class_colors = dict(zip(classes, mycolors)) bed = Bed(bedfile) chr_lens = {} centromeres = {} for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = '-' chr_number = len(chr_lens) assert chr_number == len(centromeres) fig = plt.figure(1, (w, h)) root = fig.add_axes([0, 0, 1, 1]) r = .7 # width and height of the whole chromosome set xstart, ystart = .15, .85 xinterval = r / chr_number xwidth = xinterval * .5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, cent_position) in enumerate(sorted(centromeres.items())): clen = chr_lens[chr] xx = xstart + a * xinterval + .5 * xwidth yy = ystart - cent_position * ratio root.text(xx, ystart + .01, _(chr), ha="center") ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = .75 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn start = b.start end = b.end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle((xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "w"), lw=0, alpha=alpha)) if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = xx, (1 - ystart) + segment_start * ratio, \ xx + xwidth, (1 - ystart) + segment_end * ratio print >> mapfh, '\t' + write_ImageMapLine(tlx, tly, brx, bry, \ w, h, dpi, chr+":"+",".join(bac_list), segment_start, segment_end) if imagemap: print >> mapfh, '</map>' mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if opts.gauge: tip = .008 # the ticks on the gauge bar extra = .006 # the offset for the unit label xstart, ystart = .9, .85 yy = ystart gauge = int(ceil(max_chr_len / 1e6)) mb = ratio * 1e6 yinterval = 2 * mb root.plot([xstart, xstart], [yy, yy - r], 'b-', lw=2) for x in xrange(0, gauge, 2): if x % 10: root.plot([xstart, xstart + tip], [yy, yy], "b-") else: root.plot([xstart - tip, xstart + tip], [yy, yy], 'b-', lw=2) root.text(xstart + tip + extra, yy, _(x), color="gray", va="center") yy -= yinterval root.text(xstart, yy - .03, _("Mb"), color="gray", va="center") # class legends, four in a row xstart = .1 xinterval = .2 xwidth = .04 yy = .08 for klass, cc in sorted(class_colors.items()): if klass == '-': continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + .01, yy, _(klass), fontsize=9) xstart += xinterval root.text(.5, .95, opts.title, fontstyle="italic", ha="center", va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() plt.savefig(figname, dpi=dpi) logging.debug("Figure saved to `{0}` {1}".format(figname, iopts))
width = iopts.w height = iopts.h * ratio fig = plt.figure(1, (width, height)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed, style=opts.style, proportional=proportional, sampleN=opts.sample, baseticks=True, stripNames=opts.stripNames) # add genome names to_ax_label = lambda fname: _(op.basename(fname).split(".")[0]) gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) bedfiles = [x + ".bed" for x in opts.stacks.split(",")] binfiles = get_binfiles(bedfiles, fastafile, shift) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) max_len = s # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, _(b), size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option( "--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift, subtract = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) bedfiles = [x + ".bed" for x in opts.stacks.split(",")] binfiles = get_binfiles(bedfiles, fastafile, shift, subtract) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) max_len = s # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, _(b), size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option("--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = set_image_options(p, args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = [x + ".bed" for x in stacks] heatmapbeds = [x + ".bed" for x in heatmaps] stackbins = get_binfiles(stackbeds, fastafile, shift) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift) window, shift = check_window_options(opts) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) root.text(xx + inner, yy + yinterval - 2 * inner, cc, va="top") # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, _(accn), va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi) plt.rcdefaults()
def __init__(self, ax, x1, x2, t, **kwargs): ax.text(x1, x2, _(t), ha="center", bbox=dict(boxstyle="round",fill=False))