def gaps(args): """ %prog gaps A_vs_B.blast Find distribution of gap sizes betwen adjacent HSPs. """ p = OptionParser(gaps.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) blastfile, = args blast = BlastSlow(blastfile) logging.debug("A total of {} records imported".format(len(blast))) query_gaps = list(collect_gaps(blast)) subject_gaps = list(collect_gaps(blast, use_subject=True)) logging.debug("Query gaps: {} Subject gaps: {}"\ .format(len(query_gaps), len(subject_gaps))) from jcvi.graphics.base import savefig import seaborn as sns sns.distplot(query_gaps) savefig("query_gaps.pdf")
def coverage(args): """ %prog coverage *.coverage Plot coverage along chromosome. The coverage file can be generated with: $ samtools depth a.bam > a.coverage The plot is a simple line plot using matplotlib. """ from jcvi.graphics.base import savefig p = OptionParser(coverage.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) covfile, = args df = pd.read_csv(covfile, sep='\t', names=["Ref", "Position", "Depth"]) xlabel, ylabel = "Position", "Depth" df.plot(xlabel, ylabel, color='g') image_name = covfile + "." + iopts.format savefig(image_name)
def draw(self, title="Ks distribution"): from jcvi.graphics.base import tex_formatter, \ tex_1digit_formatter, tex_2digit_formatter, savefig ax = self.ax ks_max = self.ks_max lines = self.lines labels = self.labels legendp = self.legendp leg = ax.legend(lines, labels, legendp, shadow=True, fancybox=True, prop={"size": 10}) leg.get_frame().set_alpha(.5) ax.set_xlim((0, ks_max - self.interval)) ax.set_title(title, fontweight="bold") ax.set_xlabel('Synonymous substitutions per site (Ks)') ax.set_ylabel('Percentage of gene pairs') tf = tex_2digit_formatter if self.interval < .1 else \ tex_1digit_formatter ax.xaxis.set_major_formatter(tf) ax.yaxis.set_major_formatter(tex_formatter) image_name = "Ks_plot.pdf" savefig(image_name, dpi=300)
def birch(args): """ %prog birch seqids layout Plot birch macro-synteny, with an embedded phylogenetic tree to the right. """ p = OptionParser(birch.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 2: sys.exit(not p.print_help()) seqids, layout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) K = Karyotype(fig, root, seqids, layout) L = K.layout xs = .79 dt = dict(rectangle=False, circle=False) # Embed a phylogenetic tree to the right coords = {} coords["Amborella"] = (xs, L[0].y) coords["Vitis"] = (xs, L[1].y) coords["Prunus"] = (xs, L[2].y) coords["Betula"] = (xs, L[3].y) coords["Populus"] = (xs, L[4].y) coords["Arabidopsis"] = (xs, L[5].y) coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) coords["malvids"] = join_nodes(root, coords, \ "Populus", "Arabidopsis", xs, **dt) coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) coords["angiosperm"] = join_nodes(root, coords, \ "eudicots", "Amborella", xs, **dt) # Show branch length branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") branch_length(root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top") branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") branch_length(root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top") branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "birch" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.1, .18, .32, .64]) B = fig.add_axes([.6, .18, .32, .64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((.04, .92, "A"), (.54, .92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, .42, .52, .48) labels = ((.04, .96, 'A'), (.04, .52, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def epoch(args): """ %prog epoch Illustrate the methods used in Maggie's epoch paper, in particular, how to classifiy S/G/F/FB/FN for the genes. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (6, 4)) root = fig.add_axes([0, 0, 1, 1]) # Separators linestyle = dict(lw=2, color="b", alpha=.2, zorder=2) root.plot((0, 1), (.5, .5), "--", **linestyle) for i in (1./3, 2./3): root.plot((i, i), (.5, 1), "--", **linestyle) for i in (1./6, 3./6, 5./6): root.plot((i, i), (0, .5), "--", **linestyle) # Diagrams plot_diagram(root, 1./6, 3./4, "S", "syntenic") plot_diagram(root, 3./6, 3./4, "F", "missing, with both flankers") plot_diagram(root, 5./6, 3./4, "G", "missing, with one flanker") plot_diagram(root, 2./6, 1./4, "FB", "has non-coding matches") plot_diagram(root, 4./6, 1./4, "FN", "syntenic region has gap") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = fname() + ".pdf" savefig(figname, dpi=300)
def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") p.add_option("--scalebar", default=False, action="store_true", help="Add scale bar to the plot") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--order", help="The order to plot the tracks, comma-separated") opts, args, iopts = p.set_image_options() if len(args) != 3: sys.exit(not p.print_help()) chr, sizes, datadir = args order = opts.order hlsuffix = opts.hlsuffix if order: order = order.split(",") sizes = Sizes(sizes) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) canvas = (.12, .35, .8, .35) chr_size = sizes.get_size(chr) c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def oropetium(args): """ %prog oropetium mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(oropetium.__doc__) p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra) # legend showing the orientation of the genes draw_gene_legend(root, .4, .57, .74, text=True, repeat=True) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["oropetium"] = (xs, .7) coords["setaria"] = (xs, .6) coords["sorghum"] = (xs, .5) coords["rice"] = (xs, .4) coords["brachypodium"] = (xs, .3) xs -= xp coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) xs -= xp coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) xs -= xp coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) # Names of the internal nodes for tag in ("BEP", "Poaceae"): nx, ny = coords[tag] nx, ny = nx - .005, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) for tag in ("PACMAD",): nx, ny = coords[tag] nx, ny = nx - .005, ny + .02 root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "oropetium" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multilineplot(args): """ %prog multilineplot fastafile chr1 Combine multiple line plots in one vertical stack Inputs must be BED-formatted. --lines: traditional line plots, useful for plotting feature freq """ p = OptionParser(multilineplot.__doc__) p.add_option("--lines", help="Features to plot in lineplot [default: %default]") p.add_option("--colors", help="List of colors matching number of input bed files") p.add_option("--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on [default: %default]") p.add_option("--binned", default=False, action="store_true", help="Specify whether the input is already binned; " + "if True, input files are considered to be binfiles") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) linebeds = [] colors = opts.colors if opts.lines: lines = opts.lines.split(",") assert len(colors) == len(lines), "Number of chosen colors must match" + \ " number of input bed files" linebeds = get_beds(lines, binned=opts.binned) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned) clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 plt.rcParams["figure.figsize"] = iopts.w, iopts.h fig, axarr = plt.subplots(nrows=len(lines)) if len(linebeds) == 1: axarr = (axarr, ) fig.suptitle(chr, color="darkslategray") for i, ax in enumerate(axarr): lineplot(ax, [linebins[i]], nbins, chr, window, shift, \ color="{0}{1}".format(colors[i], 'r')) plt.subplots_adjust(hspace=0.5) image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot_data(x, y, tour, M): from jcvi.graphics.base import plt, savefig plt.plot(x, y, "ro") for ia, ib in pairwise(tour): plt.plot((x[ia], x[ib]), (y[ia], y[ib]), "r-") score = evaluate(tour, M) plt.title("Score={0:.2f}".format(score)) savefig("demo.pdf")
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def scenario(args): """ %prog scenario Illustration of the two-step genome merger process for B. rapa companion paper. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (5, 5)) root = fig.add_axes([0, 0, 1, 1]) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() # Layout format: (x, y, label, (chr lengths)) anc = (.5, .9, "Ancestor", (1,)) s1 = (.2, .6, "Genome I", (1,)) s2 = (.5, .6, "Genome II", (1,)) s3 = (.8, .6, "Genome III", (1,)) tetra = (.35, .4, "Tetraploid I / II", (.5, .9)) hexa = (.5, .1, "Hexaploid I / II / III", (.36, .46, .9)) labels = (anc, s1, s2, s3, tetra, hexa) connections = ((anc, s1), (anc, s2), (anc, s3),\ (s1, tetra), (s2, tetra), (tetra, hexa), (s3, hexa)) xinterval = .02 yratio = .05 for xx, yy, label, chrl in labels: #RoundLabel(root, xx, yy, label) root.text(xx, yy, label, ha="center", va="center") offset = len(label) * .012 for i, c in enumerate(chrl): ya = yy + yratio * c yb = yy - yratio * c Chromosome(root, xx - offset + i * xinterval, ya, yb, width=.01) # Comments comments = ((.15, .33, "II dominant"), (.25, .03, "III dominant")) for xx, yy, c in comments: root.text(xx, yy, c, size=9, ha="center", va="center") # Branches tip = .04 for a, b in connections: xa, ya, la, chra = a xb, yb, lb, chrb = b plt.plot((xa, xb), (ya - tip, yb + 2 * tip), 'k-', lw=2, alpha=.5) figname = fname() + ".pdf" savefig(figname, dpi=300)
def litchi(args): """ %prog litchi mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(litchi.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch) # legend showing the orientation of the genes draw_gene_legend(root, .4, .7, .82) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["lychee"] = (xs, .37) coords["clementine"] = (xs, .5) coords["cacao"] = (xs, .6) coords["strawberry"] = (xs, .7) coords["grape"] = (xs, .8) xs -= xp coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs) xs -= xp coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs) xs -= xp coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs) xs -= xp coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs, circle=False) # Names of the internal nodes for tag in ("Rosid", "Rosid-II", "Sapindales"): nx, ny = coords[tag] nx, ny = nx - .01, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "litchi" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def covlen(args): """ %prog covlen covfile fastafile Plot coverage vs length. `covfile` is two-column listing contig id and depth of coverage. """ import numpy as np import pandas as pd import seaborn as sns from jcvi.formats.base import DictFile p = OptionParser(covlen.__doc__) p.add_option("--maxsize", default=1000000, type="int", help="Max contig size") p.add_option("--maxcov", default=100, type="int", help="Max contig size") p.add_option("--color", default='m', help="Color of the data points") p.add_option("--kind", default="scatter", choices=("scatter", "reg", "resid", "kde", "hex"), help="Kind of plot to draw") opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 2: sys.exit(not p.print_help()) covfile, fastafile = args cov = DictFile(covfile, cast=float) s = Sizes(fastafile) data = [] maxsize, maxcov = opts.maxsize, opts.maxcov for ctg, size in s.iter_sizes(): c = cov.get(ctg, 0) if size > maxsize: continue if c > maxcov: continue data.append((size, c)) x, y = zip(*data) x = np.array(x) y = np.array(y) logging.debug("X size {0}, Y size {1}".format(x.size, y.size)) df = pd.DataFrame() xlab, ylab = "Length", "Coverage of depth (X)" df[xlab] = x df[ylab] = y sns.jointplot(xlab, ylab, kind=opts.kind, data=df, xlim=(0, maxsize), ylim=(0, maxcov), stat_func=None, edgecolor="w", color=opts.color) figname = covfile + ".pdf" savefig(figname, dpi=iopts.dpi, iopts=iopts)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1. / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = .1, .9 ymin, ymax = .1, .9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray") for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"): root.text(x, .95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot_main(anchorfile, qbed, sbed, image_name, iopts, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None): fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=vmin, vmax=vmax, is_self=is_self, synteny=synteny, cmap_text=cmap_text, cmap=cmap, genomenames=genomenames, sample_number=sample_number, minfont=minfont, palette=palette, chrlw=chrlw, title=title) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, .27, .37, .52) # annotate the WGD events fc = 'lightslategrey' x = .09 radius = .012 TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc) TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc) TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc) for ypos in (.825, .8, .72): root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [.85, .775], ":", color=fc, lw=2) root.plot([x, x], [.75, .675], ":", color=fc, lw=2) labels = ((.04, .96, 'A'), (.04, .54, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def amborella(args): """ %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphics.synteny. """ p = OptionParser(amborella.__doc__) p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch tree = opts.tree fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) # legend showing the orientation of the genes draw_gene_legend(root, .5, .68, .5) # annotate the WGD events fc = 'lightslategrey' x = .05 radius = .012 TextCircle(root, x, .86, '$\gamma$', radius=radius) TextCircle(root, x, .95, '$\epsilon$', radius=radius) root.plot([x, x], [.83, .9], ":", color=fc, lw=2) pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02) x, y = zip(*pts) root.plot(x, y, ":", color=fc, lw=2) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "amborella" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog ploidy seqids layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of B. napus genome. """ p = OptionParser(ploidy.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, klayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) fc = "darkslategrey" radius = .012 ot = -.05 # use this to adjust vertical position of the left panel TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc) root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc) TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc) TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc) root.text(.1, .77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc) root.text(.1, .67 + ot, r"Brassica triplication", ha="center", va="top", color=fc, size=11) root.text(.1, .65 + ot, r"$\times3\times2\times2\times3$", ha="center", va="top", color=fc) root.text(.1, .42 + ot, r"Allo-tetraploidy", ha="center", va="top", color=fc, size=11) root.text(.1, .4 + ot, r"$\times3\times2\times2\times3\times2$", ha="center", va="top", color=fc) bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5) root.text(.5, .2 + ot, r"\noindent\textit{Brassica napus}\\" "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", ha="center", size=16, color="k", bbox=bb) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "napus" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def _draw_trees(trees, nrow=1, ncol=1, rmargin=.3, iopts=None, outdir=".", shfile=None, **kwargs): """ Draw one or multiple trees on one plot. """ from jcvi.graphics.tree import draw_tree if shfile: SHs = DictFile(shfile, delimiter="\t") ntrees = len(trees) n = nrow * ncol for x in xrange(int(ceil(float(ntrees)/n))): fig = plt.figure(1, (iopts.w, iopts.h)) if iopts \ else plt.figure(1, (5, 5)) root = fig.add_axes([0, 0, 1, 1]) xiv = 1. / ncol yiv = 1. / nrow xstart = list(np.arange(0, 1, xiv)) * nrow ystart = list(chain(*zip(*[list(np.arange(0, 1, yiv))[::-1]] * ncol))) for i in xrange(n*x, n*(x+1)): if i == ntrees: break ax = fig.add_axes([xstart[i%n], ystart[i%n], xiv, yiv]) f = trees.keys()[i] tree = trees[f] try: SH = SHs[f] except: SH = None draw_tree(ax, tree, rmargin=rmargin, reroot=False, \ supportcolor="r", SH=SH, **kwargs) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() format = iopts.format if iopts else "pdf" dpi = iopts.dpi if iopts else 300 if n == 1: image_name = f.rsplit(".", 1)[0] + "." + format else: image_name = "trees{0}.{1}".format(x, format) image_name = op.join(outdir, image_name) savefig(image_name, dpi=dpi, iopts=iopts) plt.clf()
def correlation(args): """ %prog correlation postgenomic-s.tsv Plot correlation of age vs. postgenomic features. """ p = OptionParser(correlation.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x8") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_correlation(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".correlation.pdf" savefig(outfile)
def ccn(args): """ %prog ccn combined.tsv Plot several ccn plots including chr1,chrX,chrY,chrM """ p = OptionParser(ccn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x8") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_ccn(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".ccn.pdf" savefig(outfile)
def mtdotplots(args): """ %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two graphics.dotplot() function calls as panel A and B. """ from jcvi.graphics.dotplot import check_beds, dotplot p = OptionParser(mtdotplots.__doc__) p.set_beds() opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90) if len(args) != 3: sys.exit(not p.print_help()) a, b, ac = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) r1 = fig.add_axes([0, 0, .5, 1]) r2 = fig.add_axes([.5, 0, .5, 1]) a1 = fig.add_axes([.05, .1, .4, .8]) a2 = fig.add_axes([.55, .1, .4, .8]) anchorfile = op.join(a, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self, genomenames="Mt3.5_Mt3.5") opts.qbed = opts.sbed = None anchorfile = op.join(b, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self, genomenames="Mt4.0_Mt4.0") root.text(.03, .95, "A", ha="center", va="center", size=36) root.text(.53, .95, "B", ha="center", va="center", size=36) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "mtdotplots" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def gff(args): """ %prog gff *.gff Draw exons for genes based on gff files. Each gff file should contain only one gene, and only the "mRNA" and "CDS" feature will be drawn on the canvas. """ align_choices = ("left", "center", "right") p = OptionParser(gff.__doc__) p.add_option("--align", default="left", choices=align_choices, help="Horizontal alignment [default: %default]") p.add_option("--noUTR", default=False, action="store_true", help="Do not plot UTRs [default: %default]") opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) gffiles = args ngenes = len(gffiles) canvas = .6 setups, ratio = get_setups(gffiles, canvas=canvas, noUTR=opts.noUTR) align = opts.align xs = .2 if align == "left" else .8 yinterval = canvas / ngenes ys = .8 tip = .01 for genename, mrnabed, cdsbeds in setups: ExonGlyph(root, xs, ys, mrnabed, cdsbeds, ratio=ratio, align=align) if align == "left": root.text(xs - tip, ys, genename, ha="right", va="center") elif align == "right": root.text(xs + tip, ys, genename, ha="left", va="center") ys -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "exons.pdf" savefig(figname, dpi=300)
def qc(args): """ %prog qc postgenomic-s.tsv Plot basic statistics of a given sample: Age, Gender, Ethnicity, Cohort, Chemistry """ p = OptionParser(heritability.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_qc(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".qc.pdf" savefig(outfile)
def movieframe(args): """ %prog movieframe tour test.clm contigs.ref.anchors Draw heatmap and synteny in the same plot. """ p = OptionParser(movieframe.__doc__) p.add_option("--label", help="Figure title") p.set_beds() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 3: sys.exit(not p.print_help()) tour, clmfile, anchorsfile = args tour = tour.split(",") image_name = opts.outfile or ("movieframe." + iopts.format) label = opts.label or op.basename(image_name).rsplit(".", 1)[0] clm = CLMFile(clmfile) totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) M = read_clm(clm, totalbins, bins) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax1 = fig.add_axes([.05, .1, .4, .8]) # heatmap ax2 = fig.add_axes([.55, .1, .4, .8]) # dot plot ax2_root = fig.add_axes([.5, 0, .5, 1]) # dot plot canvas # Left axis: heatmap plot_heatmap(ax1, M, breaks, iopts) # Right axis: synteny qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") root.text(.5, .98, clm.name, color="g", ha="center", va="center") root.text(.5, .95, label, color="darkslategray", ha="center", va="center") normalize_axes(root) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, layoutfile = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, layoutfile) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"): ax = self.ax ks_max = self.ks_max lines = self.lines labels = self.labels legendp = self.legendp if len(lines) > 1: leg = ax.legend(lines, labels, loc=legendp, shadow=True, fancybox=True, prop={"size": 10}) leg.get_frame().set_alpha(.5) ax.set_xlim((0, ks_max - self.interval)) ax.set_title(markup(title), fontweight="bold") ax.set_xlabel(markup('Synonymous substitutions per site (*Ks*)')) ax.set_ylabel('Percentage of gene pairs') ax.set_xticklabels(ax.get_xticks(), family='Helvetica') ax.set_yticklabels(ax.get_yticks(), family='Helvetica') savefig(filename, dpi=300)
def main(): p = OptionParser(__doc__) p.add_option( "--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option( "--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") p.add_option( "--gene_style", default="Rectangle", help= "Default <Rectangle> to plot genes as rectangle. Accept <Arrow> to add orientation of genes." ) p.add_option( "--scalebar", default=False, action="store_true", help="Add scale bar to the plot", ) p.add_option( "--add_gene_legend", default=False, action="store_true", help="Add forward and reverse strand gene legend to the plot", ) p.add_option( "--add_gene_label", default=False, action="store_true", help="Add gene names to the plot", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar, shadestyle=opts.shadestyle, gene_legend=opts.add_gene_legend, add_gene_label=opts.add_gene_label, gene_style=opts.gene_style) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot_main(args): p = OptionParser(__doc__) p.set_beds() p.add_option( "--synteny", default=False, action="store_true", help="Run a fast synteny scan and display blocks", ) p.add_option("--cmaptext", help="Draw colormap box on the bottom-left corner") p.add_option( "--vmin", dest="vmin", type="float", default=0, help="Minimum value in the colormap", ) p.add_option( "--vmax", dest="vmax", type="float", default=2, help="Maximum value in the colormap", ) p.add_option( "--nmax", dest="sample_number", type="int", default=10000, help="Maximum number of data points to plot", ) p.add_option( "--minfont", type="int", default=4, help="Do not render labels with size smaller than", ) p.add_option("--colormap", help="Two column file, block id to color mapping") p.add_option( "--colororientation", action="store_true", default=False, help="Color the blocks based on orientation, similar to mummerplot", ) p.add_option( "--nosort", default=False, action="store_true", help="Do not sort the seqids along the axes", ) p.add_option("--nosep", default=False, action="store_true", help="Do not add contig lines") p.add_option("--title", help="Title of the dot plot") p.set_dotplot_opts() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="9x9", style="dark", dpi=90, cmap="copper") if len(args) != 1: sys.exit(not p.print_help()) (anchorfile, ) = args qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts, sorted=(not opts.nosort)) palette = opts.colormap if palette: palette = Palette(palettefile=palette) elif opts.colororientation: palette = Palette.from_block_orientation(anchorfile, qbed, sbed) cmaptext = opts.cmaptext if anchorfile.endswith(".ks"): from jcvi.apps.ks import KsFile logging.debug("Anchors contain Ks values") cmaptext = cmaptext or "*Ks* values" anchorksfile = anchorfile + ".anchors" if need_update(anchorfile, anchorksfile): ksfile = KsFile(anchorfile) ksfile.print_to_anchors(anchorksfile) anchorfile = anchorksfile if opts.skipempty: ac = AnchorFile(anchorfile) if is_self: qseqids = sseqids = set() else: qseqids, sseqids = set(), set() for pair in ac.iter_pairs(): q, s = pair[:2] qi, q = qorder[q] si, s = sorder[s] qseqids.add(q.seqid) sseqids.add(s.seqid) if is_self: qbed = sbed = subset_bed(qbed, qseqids) else: qbed = subset_bed(qbed, qseqids) sbed = subset_bed(sbed, sseqids) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # the dot plot dotplot( anchorfile, qbed, sbed, fig, root, ax, vmin=opts.vmin, vmax=opts.vmax, is_self=is_self, synteny=opts.synteny, cmap_text=opts.cmaptext, cmap=iopts.cmap, genomenames=opts.genomenames, sample_number=opts.sample_number, minfont=opts.minfont, palette=palette, sep=(not opts.nosep), sepcolor=set1[int(opts.theme)], title=opts.title, stdpf=(not opts.nostdpf), chpf=(not opts.nochpf), ) image_name = opts.outfile or (op.splitext(anchorfile)[0] + "." + opts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) fig.clear()
def heatmap(args): """ %prog heatmap fastafile chr1 Combine stack plot with heatmap to show abundance of various tracks along given chromosome. Need to give multiple beds to --stacks and --heatmaps """ p = OptionParser(heatmap.__doc__) p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--heatmaps", default="Copia,Gypsy,hAT,Helitron,Introns,Exons", help="Features to plot in heatmaps [default: %default]") p.add_option("--meres", default=None, help="Extra centromere / telomere features [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) stacks = opts.stacks.split(",") heatmaps = opts.heatmaps.split(",") stackbeds = get_beds(stacks) heatmapbeds = get_beds(heatmaps) stackbins = get_binfiles(stackbeds, fastafile, shift, subtract=subtract) heatmapbins = get_binfiles(heatmapbeds, fastafile, shift, subtract=subtract) margin = .06 inner = .015 clen = Sizes(fastafile).mapping[chr] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, clen, rightmargin=4 * margin) yinterval = .3 xx = margin yy = 1 - margin yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = get_nbins(clen, shift) owindow = clen / 100 if owindow > window: window = owindow / shift * shift stackplot(ax, stackbins, nbins, palette, chr, window, shift) ax.text(.1, .9, cc, va="top", zorder=100, transform=ax.transAxes, bbox=dict(boxstyle="round", fc="w", alpha=.5)) # Legends xx += xlen + .01 yspace = (yinterval - inner) / (len(stackbins) + 1) yy = 1 - margin - yinterval for s, p in zip(stacks, palette): s = s.replace("_", " ") s = Registration.get(s, s) yy += yspace root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) root.text(xx + 1.5 * inner, yy, s, size=10) yh = .05 # Heatmap height # Heatmaps xx = margin yy = 1 - margin - yinterval - inner for s, p in zip(heatmaps, heatmapbins): s = s.replace("_", " ") s = Registration.get(s, s) yy -= yh m = stackarray(p, chr, window, shift) Y = np.array([m, m]) root.imshow(Y, extent=(xx, xx + xlen, yy, yy + yh - inner), interpolation="nearest", aspect="auto") root.text(xx + xlen + .01, yy, s, size=10) yy -= yh meres = opts.meres if meres: bed = Bed(meres) for b in bed: if b.seqid != chr: continue pos = (b.start + b.end) / 2 cpos = pos / ratio xx = margin + cpos accn = b.accn.capitalize() root.add_patch(CirclePolygon((xx, yy), radius=.01, fc="m", ec="m")) root.text(xx + .014, yy, accn, va="center", color="m") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") p.add_option("--tree", help="Display trees on the bottom of the figure") p.add_option("--extra", help="Extra features in BED format") p.add_option( "--genelabelsize", default=0, type="int", help="Show gene labels at this font size, useful for debugging. " + "However, plot may appear visually crowded. " + "Reasonably good values are 2 to 6 [Default: disabled]", ) p.add_option( "--scalebar", default=False, action="store_true", help="Add scale bar to the plot", ) p.add_option( "--glyphstyle", default="box", choices=Glyph.Styles, help="Style of feature glyphs", ) p.add_option( "--glyphcolor", default="orientation", choices=Glyph.Palette, help="Glyph coloring based on", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny( fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, genelabelsize=opts.genelabelsize, scalebar=opts.scalebar, shadestyle=opts.shadestyle, glyphstyle=opts.glyphstyle, glyphcolor=opts.glyphcolor, ) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def wheel(args): """ %prog wheel datafile.csv groups.csv Wheel plot that shows continous data in radial axes. """ p = OptionParser(wheel.__doc__) p.add_option( "--column", default="score", choices=("score", "percentile"), help="Which column to extract from `datafile.csv`", ) opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") if len(args) != 2: sys.exit(not p.print_help()) datafile, groupsfile = args column = opts.column linecolor = "#d6d6d6" df = parse_data(datafile, score_column=opts.column) groups = parse_groups(groupsfile) labels = [g for g in groups if g in df] print(labels) df = [df[g] for g in labels] print(df) groups = [groups[g] for g in labels] print(groups) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) categories = len(df) # ax = plt.subplot(111, projection='polar') ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) brewer = [ "#FF3B30", "#DD43A0", "#5856D6", "#007AFE", "#56BDEC", "#4CD8BA", "#4CD864", "#B0F457", "#FEF221", "#FFCC01", "#FF9500", "#FF3B30", ] # Baseline theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) R = max(max(df), 10) xlim = (-R, R) if column == "score" else (-100, 100) plim = (-R / 2, R) if column == "score" else (0, 100) ci = (-0.5, 2) if column == "score" else (10, 90) # Grid if column == "score": for t in theta: ax.plot([t, t], plim, color=linecolor) ax.axis("off") # Contours for t in plim: ax.plot(_theta, [t] * len(_theta), color=linecolor) # Sectors (groupings) collapsed_groups = [] gg = [] for group, c in groupby(enumerate(groups), lambda x: x[1]): c = [x[0] for x in list(c)] collapsed_groups.append(group) gg.append(c) show_sector = False if show_sector: theta_interval = 2 * np.pi / categories theta_pad = theta_interval / 2 * 0.9 for color, group in zip(brewer, gg): tmin, tmax = min(group), max(group) sector( ax, theta[tmin], theta[tmax], theta_pad, R * 0.95, ls="-", color=color, lw=2, ) # Data r = df closed_plot(ax, theta, r, color="lightslategray", alpha=0.25) for color, group in zip(brewer, gg): hidden_data = [(theta[x], r[x]) for x in group if (ci[0] <= r[x] <= ci[1])] shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] or r[x] > ci[1])] for alpha, data in zip((1, 1), (hidden_data, shown_data)): if not data: continue color_theta, color_r = zip(*data) ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) # Print out data diseaseNames, risks = labels, df print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta))) print("var risks = [{}]".format(",".join(str(x) for x in risks))) print("var diseaseNames = [{}]".format(",".join( ['"{}"'.format(x) for x in diseaseNames]))) # Labels from math import cos, sin r = 0.5 for i, label in enumerate(labels): tl = theta[i] x, y = 0.5 + r * cos(tl), 0.5 + r * sin(tl) d = degrees(tl) if 90 < d % 360 < 270: # On the left quardrants d -= 180 root.text(x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor) print(x, y, label) # Add baseline baseline = 0 if column == "score" else 50 _r = len(_theta) * [baseline] closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) # Add confidence interval if column == "percentile": barcolor = "#eeeeee" ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) ax.set_rmin(xlim[0]) ax.set_rmax(xlim[1]) normalize_axes(root) image_name = pf + "-" + column + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(args): """ %prog newicktree Plot Newick formatted tree. The gene structure can be plotted along if --gffdir is given. The gff file needs to be `genename.gff`. If --sizes is on, also show the number of amino acids. With --barcode a mapping file can be provided to convert seq names to eg. species names, useful in unified tree display. This file should have distinctive barcodes in column1 and new names in column2, tab delimited. """ p = OptionParser(main.__doc__) p.add_option("--outgroup", help="Outgroup for rerooting the tree. " + \ "Use comma to separate multiple taxa.") p.add_option("--noreroot", default=False, action="store_true", \ help="Don't reroot the input tree [default: %default]") p.add_option("--rmargin", default=.3, type="float", help="Set blank rmargin to the right [default: %default]") p.add_option("--gffdir", default=None, help="The directory that contain GFF files [default: %default]") p.add_option("--sizes", default=None, help="The FASTA file or the sizes file [default: %default]") p.add_option("--SH", default=None, type="string", help="SH test p-value [default: %default]") p.add_option("--scutoff", default=0, type="int", help="cutoff for displaying node support, 0-100 [default: %default]") p.add_option("--barcode", default=None, help="path to seq names barcode mapping file: " \ "barcode<tab>new_name [default: %default]") p.add_option("--leafcolor", default="k", help="Font color for the OTUs, or path to a file " \ "containing color mappings: leafname<tab>color [default: %default]") p.add_option("--leaffont", default=12, help="Font size for the OTUs") p.add_option("--geoscale", default=False, action="store_true", help="Plot geological scale") opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args outgroup = None reroot = not opts.noreroot if opts.outgroup: outgroup = opts.outgroup.split(",") if datafile == "demo": tx = """(((Os02g0681100:0.1151,Sb04g031800:0.11220)1.0:0.0537, (Os04g0578800:0.04318,Sb06g026210:0.04798)-1.0:0.08870)1.0:0.06985, ((Os03g0124100:0.08845,Sb01g048930:0.09055)1.0:0.05332, (Os10g0534700:0.06592,Sb01g030630:0.04824)-1.0:0.07886):0.09389);""" else: logging.debug("Load tree file `{0}`.".format(datafile)) tx = open(datafile).read() pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) if opts.geoscale: draw_geoscale(root) else: if op.isfile(opts.leafcolor): leafcolor = "k" leafcolorfile = opts.leafcolor else: leafcolor = opts.leafcolor leafcolorfile = None draw_tree(root, tx, rmargin=opts.rmargin, leafcolor=leafcolor, \ outgroup=outgroup, reroot=reroot, gffdir=opts.gffdir, \ sizes=opts.sizes, SH=opts.SH, scutoff=opts.scutoff, \ barcodefile=opts.barcode, leafcolorfile=leafcolorfile, leaffont=opts.leaffont) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = .7, .35 ax = fig.add_axes([.15, .6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xlds, ylds, "g-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2) root.text(.15 + w / 2, .55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax) # Panel B w = .37 p = (0, 45, 75, 110) ax = fig.add_axes([.1, .12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax) patch = [.1 + w * x / 110. for x in p] HorizontalChromosome(root, .1, .1 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([.6, .12, w, h]) patch = [.6 + w * x / 110. for x in p] ydata = ydata_orig ax.plot(xdata, ydata, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax) HorizontalChromosome(root, .6, .6 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center") labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option( "--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe( [bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print(msg, file=sys.stderr) ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname = "{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def astat(args): """ %prog astat coverage.log Create coverage-rho scatter plot. """ p = OptionParser(astat.__doc__) p.add_option("--cutoff", default=1000, type="int", help="Length cutoff") p.add_option("--genome", default="", help="Genome name") p.add_option( "--arrDist", default=False, action="store_true", help="Use arrDist instead", ) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) (covfile, ) = args cutoff = opts.cutoff genome = opts.genome plot_arrDist = opts.arrDist suffix = ".{0}".format(cutoff) small_covfile = covfile + suffix update_covfile = need_update(covfile, small_covfile) if update_covfile: fw = open(small_covfile, "w") else: logging.debug("Found `{0}`, will use this one".format(small_covfile)) covfile = small_covfile fp = open(covfile) header = next(fp) if update_covfile: fw.write(header) data = [] msg = "{0} tigs scanned ..." for row in fp: tigID, rho, covStat, arrDist = row.split() tigID = int(tigID) if tigID % 1000000 == 0: sys.stderr.write(msg.format(tigID) + "\r") rho, covStat, arrDist = [float(x) for x in (rho, covStat, arrDist)] if rho < cutoff: continue if update_covfile: fw.write(row) data.append((tigID, rho, covStat, arrDist)) print(msg.format(tigID), file=sys.stderr) from jcvi.graphics.base import plt, savefig logging.debug("Plotting {0} data points.".format(len(data))) tigID, rho, covStat, arrDist = zip(*data) y = arrDist if plot_arrDist else covStat ytag = "arrDist" if plot_arrDist else "covStat" fig = plt.figure(1, (7, 7)) ax = fig.add_axes([0.12, 0.1, 0.8, 0.8]) ax.plot(rho, y, ".", color="lightslategrey") xtag = "rho" info = (genome, xtag, ytag) title = "{0} {1} vs. {2}".format(*info) ax.set_title(title) ax.set_xlabel(xtag) ax.set_ylabel(ytag) if plot_arrDist: ax.set_yscale("log") imagename = "{0}.png".format(".".join(info)) savefig(imagename, dpi=150)
def depth(args): """ %prog depth *.regions.bed.gz Plot the mosdepth regions BED file. We recommend to generate this BED file by (please adjust the --by parameter to your required resolution): $ mosdepth --no-per-base --use-median --fast-mode --by 1000000 sample.wgs sample.bam Use --chrinfo to specify a colormap between seqid, desired color, and optionally a new name. For example: chr01A, #c51b7d, 1A chr01B, #4d9221, 1B ... Only seqids that are in the colormap will be plotted, in the order that's given in the file. When --colormap is not set, every seqid will be drawn in black. Can take multiple BED files as input and then plot all of them in a composite figure. """ p = OptionParser(depth.__doc__) p.add_option( "--chrinfo", help="Comma-separated mappings between seqid, color, new_name") p.add_option( "--titleinfo", help="Comma-separated titles mappings between filename, title", ) opts, args, iopts = p.set_image_options(args, style="dark", figsize="14x4") if len(args) < 1: sys.exit(not p.print_help()) bedfiles = args chrinfo = ChrInfoFile(opts.chrinfo) if opts.chrinfo else {} titleinfo = TitleInfoFile(opts.titleinfo) if opts.titleinfo else {} fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) npanels = len(bedfiles) yinterval = 1.0 / npanels ypos = 1 - yinterval for bedfile in bedfiles: pf = op.basename(bedfile).split(".", 1)[0] bed = Bed(bedfile) panel_root = root if npanels == 1 else fig.add_axes( [0, ypos, 1, yinterval]) panel_ax = fig.add_axes( [0.1, ypos + 0.2 * yinterval, 0.8, 0.65 * yinterval]) if ypos > 0.001: root.plot((0, 1), (ypos, ypos), "-", lw=2, color="lightslategray") title = titleinfo.get(bedfile, pf.split("_", 1)[0]) subtitle = None if isinstance(title, TitleInfoLine): subtitle = title.subtitle title = title.title draw_depth(panel_root, panel_ax, bed, chrinfo=chrinfo, title=title, subtitle=subtitle) ypos -= yinterval normalize_axes(root) if npanels > 1: pf = op.commonprefix(bedfiles) pf = pf or "depth" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulate(args): """ %prog simulate Run simulation on female restitution. """ import seaborn as sns sns.set_style("darkgrid") p = OptionParser(simulate.__doc__) p.add_option( "--verbose", default=False, action="store_true", help="Verbose logging during simulation", ) opts, args, iopts = p.set_image_options(args, figsize="6x6") if len(args) != 0: sys.exit(not p.print_help()) # Construct a composite figure with 6 tracks fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) rows = 6 ypad = 0.05 yinterval = (1 - 2 * ypad) / (rows + 1) yy = 1 - ypad xpad = 0.2 xwidth = 0.7 # Axes are vertically stacked, and share x-axis axes = [] yy_positions = [ ] # Save yy positions so we can show details to the right laterr for idx in range(rows): yy_positions.append(yy) yy -= yinterval ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85]) if idx != rows - 1: plt.setp(ax.get_xticklabels(), visible=False) axes.append(ax) ax1, ax2, ax3, ax4, ax5, ax6 = axes # Prepare the simulated data # Simulate two parents SS = Genome("SS", "SS", 10, 8) SO = Genome("SO", "SO", 8, 10) verbose = opts.verbose all_F1s = [simulate_F1(SO, SS, verbose=verbose) for _ in range(1000)] all_F2s = [simulate_F2(SO, SS, verbose=verbose) for _ in range(1000)] all_BC1s = [simulate_BCn(1, SO, SS, verbose=verbose) for _ in range(1000)] all_BC2s = [simulate_BCn(2, SO, SS, verbose=verbose) for _ in range(1000)] all_BC3s = [simulate_BCn(3, SO, SS, verbose=verbose) for _ in range(1000)] all_BC4s = [simulate_BCn(4, SO, SS, verbose=verbose) for _ in range(1000)] # Plotting plot_summary(ax1, all_F1s) plot_summary(ax2, all_F2s) plot_summary(ax3, all_BC1s) plot_summary(ax4, all_BC2s) plot_summary(ax5, all_BC3s) plot_summary(ax6, all_BC4s) # Show title to the left xx = xpad / 2 for (title, subtitle), yy in zip( ( ("F1", None), ("F2", None), ("BC1", None), ("BC2", None), ("BC3", None), ("BC4", None), ), yy_positions, ): if subtitle: yy -= 0.06 else: yy -= 0.07 root.text( xx, yy, title, color="darkslategray", ha="center", va="center", fontweight="semibold", ) if subtitle: yy -= 0.02 root.text(xx, yy, subtitle, color="lightslategray", ha="center", va="center") axes[-1].set_xlabel("Number of unique chromosomes") adjust_spines(axes[-1], ["bottom"], outward=True) normalize_axes(root) savefig("plotter.pdf", dpi=120) outdir = "simulations" mkdir(outdir) # Write chromosomes to disk for genomes, filename in ( (all_F1s, "all_F1s"), (all_F2s, "all_F2s"), (all_BC1s, "all_BC1s"), (all_BC2s, "all_BC2s"), (all_BC3s, "all_BC3s"), (all_BC4s, "all_BC4s"), ): write_chromosomes(genomes, op.join(outdir, filename))
def plot(args): """ %prog plot input.bed seqid Plot the matchings between the reconstructed pseudomolecules and the maps. Two types of visualizations are available in one canvas: 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ from jcvi.graphics.base import plt, savefig, normalize_axes, \ set2, panel_labels from jcvi.graphics.chromosome import Chromosome, GeneticMap, \ HorizontalChromosome p = OptionParser(plot.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 2: sys.exit(not p.print_help()) inputbed, seqid = args pf = inputbed.rsplit(".", 1)[0] bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" weightsfile = opts.weightsfile links = opts.links function = get_function(opts.distance) cc = Map(bedfile, function) allseqids = cc.seqids mapnames = cc.mapnames weights = Weights(weightsfile, mapnames) assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] while not mlgs: links /= 2 logging.error("No markers to plot, --links reset to {0}".format(links)) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} for mlg in mlgs: mm = cc.extract_mlg(mlg) mlgsize = max(function(x) for x in mm) mlgsizes[mlg] = mlgsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0, .5, 1]) ax2 = fig.add_axes([.5, 0, .5, 1]) # Find the layout first ystart, ystop = .9, .1 L = Layout(mlgsizes) coords = L.coords tip = .02 marker_pos = {} # Palette colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames)) colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) rhos = {} # Parallel coordinates for mlg, (x, y1, y2) in coords.items(): mm = cc.extract_mlg(mlg) markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] mx, my = zip(*xy) rho = spearmanr(mx, my) rhos[mlg] = rho flip = rho < 0 g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) extra = -3 * tip if x < .5 else 3 * tip ha = "right" if x < .5 else "left" mapname = mlg.split("-")[0] tlg = mlg.replace("_", ".") # Latex does not like underscore char label = "{0} (w={1})".format(tlg, weights[mapname]) ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg], ha=ha, va="center", rotation=90) marker_pos.update(g.marker_pos) agp = AGP(agpfile) agp = [x for x in agp if x.object == seqid] chrsize = max(x.object_end for x in agp) # Pseudomolecules in the center r = ystart - ystop ratio = r / chrsize f = lambda x: (ystart - ratio * x) patchstart = [f(x.object_beg) for x in agp if not x.is_gap] Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) ax1.text(.5, ystart + tip, label, ha="center") scatter_data = defaultdict(list) # Connecting lines for b in s.markers: marker_name = b.accn if marker_name not in marker_pos: continue cx = .5 cy = f(b.pos) mx = coords[b.mlg][0] my = marker_pos[marker_name] extra = -tip if mx < cx else tip extra *= 1.25 # leave boundaries for aesthetic reasons cx += extra mx -= extra ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) scatter_data[b.mlg].append((b.pos, function(b))) # Scatter plot, same data as parallel coordinates xstart, xstop = sorted((ystart, ystop)) f = lambda x: (xstart + ratio * x) pp = [x.object_beg for x in agp if not x.is_gap] patchstart = [f(x) for x in pp] HorizontalChromosome(ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2) gap = .03 ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) tlgs = [] for mlg, mlgsize in sorted(mlgsizes.items()): height = ratio * mlgsize ystart -= height xx = .5 + xstart / 2 width = r / 2 color = colors[mlg] ax = fig.add_axes([xx, ystart, width, height]) ypos = ystart + height / 2 ystart -= gap sd = scatter_data[mlg] xx, yy = zip(*sd) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(xx, yy, ".", color=color) rho = rhos[mlg] ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho), ha="center", va="top", transform=ax.transAxes, color="gray") tlg = mlg.replace("_", ".") tlgs.append((tlg, ypos, color)) ax.set_xlim(0, chrsize) ax.set_ylim(0, mlgsize) ax.set_xticks([]) while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2: ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks yticklabels = [int(x) for x in ax.get_yticks()] ax.set_yticklabels(yticklabels, family='Helvetica') if rho < 0: ax.invert_yaxis() for i, (tlg, ypos, color) in enumerate(tlgs): ha = "center" if len(tlgs) > 4: ha = "right" if i % 2 else "left" root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") if opts.panels: labels = ((.04, .96, 'A'), (.48, .96, 'B')) panel_labels(root, labels) normalize_axes((ax1, ax2, root)) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig)
def histogram(args): """ %prog histogram meryl.histogram species K Plot the histogram based on meryl K-mer distribution, species and N are only used to annotate the graphic. """ p = OptionParser(histogram.__doc__) p.add_option("--vmin", dest="vmin", default=1, type="int", help="minimum value, inclusive [default: %default]") p.add_option("--vmax", dest="vmax", default=100, type="int", help="maximum value, inclusive [default: %default]") p.add_option("--pdf", default=False, action="store_true", help="Print PDF instead of ASCII plot [default: %default]") p.add_option("--coverage", default=0, type="int", help="Kmer coverage [default: auto]") p.add_option("--nopeaks", default=False, action="store_true", help="Do not annotate K-mer peaks") opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) histfile, species, N = args ascii = not opts.pdf peaks = not opts.nopeaks N = int(N) if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"): logging.debug("CA kmer index found") histfile = merylhistogram(histfile) ks = KmerSpectrum(histfile) ks.analyze(K=N) Total_Kmers = int(ks.totalKmers) coverage = opts.coverage Kmer_coverage = ks.max2 if not coverage else coverage Genome_size = int(round(Total_Kmers * 1. / Kmer_coverage)) Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers)) Kmer_coverage_msg = "{0}-mer coverage: {1}".format(N, Kmer_coverage) Genome_size_msg = "Estimated genome size: {0:.1f}Mb".\ format(Genome_size / 1e6) Repetitive_msg = ks.repetitive SNPrate_msg = ks.snprate for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg): print >> sys.stderr, msg x, y = ks.get_xy(opts.vmin, opts.vmax) title = "{0} {1}-mer histogram".format(species, N) if ascii: asciiplot(x, y, title=title) return Genome_size plt.figure(1, (6, 6)) plt.plot(x, y, 'g-', lw=2, alpha=.5) ax = plt.gca() if peaks: t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3) tcounts = [(x, y) for x, y in ks.counts if x in t] if tcounts: x, y = zip(*tcounts) tcounts = dict(tcounts) plt.plot(x, y, 'ko', lw=2, mec='k', mfc='w') ax.text(ks.max1, tcounts[ks.max1], "SNP peak", va="top") ax.text(ks.max2, tcounts[ks.max2], "Main peak") messages = [Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg, Repetitive_msg, SNPrate_msg] write_messages(ax, messages) ymin, ymax = ax.get_ylim() ymax = ymax * 7 / 6 ax.set_title(markup(title)) ax.set_ylim((ymin, ymax)) xlabel, ylabel = "Coverage (X)", "Counts" ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) set_human_axis(ax) imagename = histfile.split(".")[0] + ".pdf" savefig(imagename, dpi=100) return Genome_size
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = .15, .65 w, h = .7, .3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(.05, .95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= .28 h = .25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)") # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict((x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) \ for x in g.agp if not x.is_gap) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = .6 / total_size y = .16 pad = .03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((.15 + pb_ratio * a_beg, .2), (ystart, ystart - .14), ":", color=lsg) root.plot((.15 + pb_ratio * b_end, .3), (ystart, ystart - .08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277), (.2, .2 + (asize + gapsize) * ratio)): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker, ), (y, ), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro') begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text( sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center") labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = ("Base coverage", "Mate coverage") leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, scf, color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, scf, color='b', ha="center") root.text(.5, .05, warn_msg, color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" savefig(figname, dpi=300)
def gcdepth(args): """ %prog gcdepth sample_name tag Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output: - NA12878_S1.mosdepth.global.dist.txt - NA12878_S1.mosdepth.region.dist.txt - NA12878_S1.regions.bed.gz - NA12878_S1.regions.bed.gz.csi - NA12878_S1.regions.gc.bed.gz A sample mosdepth.sh script might look like: ``` #!/bin/bash LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\ bams/$1.bam -t 4 -c chr1 -n --by 1000 bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\ -bed $1.regions.bed.gz \\ | pigz -c > $1.regions.gc.bed.gz ``` """ import hashlib from jcvi.algorithms.formula import MAD_interval as confidence_interval from jcvi.graphics.base import latex, plt, savefig, set2 p = OptionParser(gcdepth.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) sample_name, tag = args # The tag is used to add to title, also provide a random (hashed) color coloridx = int(hashlib.sha1(tag).hexdigest(), 16) % len(set2) color = set2[coloridx] # mosdepth outputs a table that we can use to plot relationship gcbedgz = sample_name + ".regions.gc.bed.gz" df = pd.read_csv(gcbedgz, delimiter="\t") mf = df.loc[:, ("4_usercol", "6_pct_gc")] mf.columns = ["depth", "gc"] # We discard any bins that are gaps mf = mf[(mf["depth"] > .001) | (mf["gc"] > .001)] # Create GC bins gcbins = defaultdict(list) for i, row in mf.iterrows(): gcp = int(round(row["gc"] * 100)) gcbins[gcp].append(row["depth"]) gcd = sorted((k * .01, confidence_interval(v)) for (k, v) in gcbins.items()) gcd_x, gcd_y = zip(*gcd) m, lo, hi = zip(*gcd_y) # Plot plt.plot(mf["gc"], mf["depth"], ".", color="lightslategray", ms=2, mec="lightslategray", alpha=.1) patch = plt.fill_between(gcd_x, lo, hi, facecolor=color, alpha=.25, zorder=10, linewidth=0.0, label="Median +/- MAD band") plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20) ax = plt.gca() ax.legend(handles=[patch], loc="best") ax.set_xlim(0, 1) ax.set_ylim(0, 100) ax.set_title("{} ({})".format(latex(sample_name), tag)) ax.set_xlabel("GC content") ax.set_ylabel("Depth") savefig(sample_name + ".gcdepth.png")
def demo(args): """ %prog demo Draw sample gene features to illustrate the various fates of duplicate genes - to be used in a book chapter. """ p = OptionParser(demo.__doc__) opts, args = p.parse_args(args) fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) panel_space = 0.23 dup_space = 0.025 # Draw a gene and two regulatory elements at these arbitrary locations locs = [ (0.5, 0.9), # ancestral gene (0.5, 0.9 - panel_space + dup_space), # identical copies (0.5, 0.9 - panel_space - dup_space), (0.5, 0.9 - 2 * panel_space + dup_space), # degenerate copies (0.5, 0.9 - 2 * panel_space - dup_space), (0.2, 0.9 - 3 * panel_space + dup_space), # sub-functionalization (0.2, 0.9 - 3 * panel_space - dup_space), (0.5, 0.9 - 3 * panel_space + dup_space), # neo-functionalization (0.5, 0.9 - 3 * panel_space - dup_space), (0.8, 0.9 - 3 * panel_space + dup_space), # non-functionalization (0.8, 0.9 - 3 * panel_space - dup_space), ] default_regulator = "gm" regulators = [ default_regulator, default_regulator, default_regulator, "wm", default_regulator, "wm", "gw", "wb", default_regulator, "ww", default_regulator, ] width = 0.24 for i, (xx, yy) in enumerate(locs): regulator = regulators[i] x1, x2 = xx - 0.5 * width, xx + 0.5 * width Glyph(root, x1, x2, yy) if i == 9: # upper copy for non-functionalization continue # coding region x1, x2 = xx - 0.16 * width, xx + 0.45 * width Glyph(root, x1, x2, yy, fc="k") # two regulatory elements x1, x2 = xx - 0.4 * width, xx - 0.28 * width for xx, fc in zip((x1, x2), regulator): if fc == "w": continue DoubleCircle(root, xx, yy, fc=fc) rotation = 30 tip = 0.02 if i == 0: ya = yy + tip root.text(x1, ya, "Flower", rotation=rotation, va="bottom") root.text(x2, ya, "Root", rotation=rotation, va="bottom") elif i == 7: ya = yy + tip root.text(x2, ya, "Leaf", rotation=rotation, va="bottom") # Draw arrows between panels (center) arrow_dist = 0.08 ar_xpos = 0.5 for ar_ypos in (0.3, 0.53, 0.76): root.annotate( " ", (ar_xpos, ar_ypos), (ar_xpos, ar_ypos + arrow_dist), arrowprops=arrowprops, ) ar_ypos = 0.3 for ar_xpos in (0.2, 0.8): root.annotate(" ", (ar_xpos, ar_ypos), (0.5, ar_ypos + arrow_dist), arrowprops=arrowprops) # Duplication, Degeneration xx = 0.6 ys = (0.76, 0.53) processes = ("Duplication", "Degeneration") for yy, process in zip(ys, processes): root.text(xx, yy + 0.02, process, fontweight="bold") # Label of fates xs = (0.2, 0.5, 0.8) fates = ("Subfunctionalization", "Neofunctionalization", "Nonfunctionalization") yy = 0.05 for xx, fate in zip(xs, fates): RoundLabel(root, xx, yy, fate) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = "demo.pdf" savefig(figname, dpi=300)
def plot(args): """ %prog plot tagged.new.bed chr1 Plot gene identifiers along a particular chromosome, often to illustrate the gene id assignment procedure. """ from jcvi.graphics.base import plt, savefig from jcvi.graphics.chromosome import ChromosomeMap p = OptionParser(plot.__doc__) p.add_option("--firstn", type="int", help="Only plot the first N genes") p.add_option("--ymax", type="int", help="Y-axis max value") p.add_option("--log", action="store_true", help="Write plotting data") opts, args, iopts = p.set_image_options(args, figsize="6x4") if len(args) != 2: sys.exit(not p.print_help()) taggedbed, chr = args bed = Bed(taggedbed) beds = list(bed.sub_bed(chr)) old, new = [], [] i = 0 for b in beds: accn = b.extra[0] if "te" in accn: continue accn, tag = accn.split("|") if tag == "OVERLAP": continue c, r = atg_name(accn) if tag == "NEW": new.append((i, r)) else: old.append((i, r)) i += 1 ngenes = i assert ngenes == len(new) + len(old) logging.debug("Imported {0} ranks on {1}.".format(ngenes, chr)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xstart, xend = 0.2, 0.8 ystart, yend = 0.2, 0.8 pad = 0.02 ngenes = opts.firstn or ngenes ymax = opts.ymax or 500000 title = "Assignment of Medtr identifiers" if opts.ymax: subtitle = "{0}, first {1} genes".format(chr, ngenes) else: subtitle = "{0}, {1} genes ({2} new)".format(chr, ngenes, len(new)) chr_map = ChromosomeMap(fig, root, xstart, xend, ystart, yend, pad, 0, ymax, 5, title, subtitle) ax = chr_map.axes if opts.log: from jcvi.utils.table import write_csv header = ["x", "y"] write_csv(header, new, filename=chr + ".new") write_csv(header, old, filename=chr + ".old") x, y = zip(*new) ax.plot(x, y, "b,") x, y = zip(*old) ax.plot(x, y, "r,") # Legends ymid = (ystart + yend) / 2 y = ymid + pad root.plot([0.2], [y], "r.", lw=2) root.text(0.2 + pad, y, "Existing Medtr ids", va="center", size=10) y = ymid - pad root.plot([0.2], [y], "b.", lw=2) root.text(0.2 + pad, y, "Newly instantiated ids", va="center", size=10) ax.set_xlim(0, ngenes) ax.set_ylim(0, ymax) ax.set_axis_off() root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + ".identifiers." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
ratio = ysize * 1. / xsize if proportional else 1 width = iopts.w height = iopts.h * ratio fig = plt.figure(1, (width, height)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed, style=opts.dotstyle, proportional=proportional, sampleN=opts.nmax, baseticks=True, stripNames=opts.stripNames, highlights=highlights) # add genome names to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qsizes, ssizes)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="9x7") if len(args) != 1: sys.exit(not p.print_help()) (mode, ) = args assert mode == "demo" a, b = 30, 70 pad = 0.08 w = 0.31 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Row separators yy = 1 - pad for i in range(3): root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray") yy -= w # Row headers xx = pad * 0.6 yy = 1 - pad - 0.5 * w for title in ("Inversion", "Indel", "Duplication"): root.text(xx, yy, title, ha="center", va="center") yy -= w # Column headers xx = pad + 0.5 * w yy = 1 - pad / 2 for title in ("Assembly alignment", "Read alignment", "Optical map alignment"): root.text(xx, yy, title, ha="center", va="center") xx += w p = PairwiseAlign(fig, [pad, 2 * w, w, w]) p.invert(a, b) p.draw() p = PairwiseAlign(fig, [pad, w, w, w]) p.delete(a, b) p.draw() p = PairwiseAlign(fig, [pad, 0, w, w]) p.duplicate(a, b, gap=5) p.draw() p = ReadAlign(fig, [pad + w, 2 * w, w, w]) p.invert(a, b) p.draw() p = ReadAlign(fig, [pad + w, w, w, w]) p.delete(a, b) p.draw() p = ReadAlign(fig, [pad + w, 0, w, w]) p.duplicate(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w]) p.invert(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w]) p.delete(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w]) p.duplicate(a, b) p.draw() normalize_axes(root) image_name = mode + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def cotton(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphic.synteny. """ p = OptionParser(cotton.__doc__) p.add_option("--depthfile", help="Use depth info in this file") p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch depthfile = opts.depthfile fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) kt = Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch) light = "lightslategrey" # Show the dup depth along the cotton chromosomes if depthfile: ymin, ymax = 0.9, 0.95 root.text(0.11, 0.96, "Cotton duplication level", color="gray", size=10) root.plot([0.1, 0.95], [ymin, ymin], color="gray") root.text(0.96, 0.9, "1x", color="gray", va="center") root.plot([0.1, 0.95], [ymax, ymax], color="gray") root.text(0.96, 0.95, "6x", color="gray", va="center") fp = open(depthfile) track = kt.tracks[0] # Cotton depths = [] for row in fp: a, b, depth = row.split() depth = int(depth) try: p = track.get_coords(a) depths.append((p, depth)) except KeyError: pass depths.sort(key=lambda x: (x[0], -x[1])) xx, yy = zip(*depths) yy = [ymin + 0.01 * (x - 1) for x in yy] root.plot(xx, yy, "-", color=light) # legend showing the orientation of the genes draw_gene_legend(root, 0.5, 0.68, 0.5) # Zoom xpos = 0.835 ytop = 0.9 xmin, xmax = 0.18, 0.82 ymin, ymax = ytop, 0.55 lc = "k" kwargs = dict(lw=3, color=lc, mec=lc, mfc="w", zorder=3) root.plot((xpos, xpos), (ymax, 0.63), ":o", **kwargs) root.plot((xpos, xmin), (ymax, ymin), ":o", **kwargs) root.plot((xpos, xmax), (ymax, ymin), ":o", **kwargs) RoundRect(root, (0.06, 0.17), 0.92, 0.35, fill=False, lw=2, ec=light) # Panels root.text(0.05, 0.95, "a", size=20, fontweight="bold") root.text(0.1, 0.45, "b", size=20, fontweight="bold") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "cotton" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def stack(args): """ %prog stack fastafile Create landscape plots that show the amounts of genic sequences, and repetitive sequences along the chromosomes. """ p = OptionParser(stack.__doc__) p.add_option("--top", default=10, type="int", help="Draw the first N chromosomes [default: %default]") p.add_option("--stacks", default="Exons,Introns,DNA_transposons,Retrotransposons", help="Features to plot in stackplot [default: %default]") p.add_option("--switch", help="Change chr names based on two-column file [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) fastafile, = args top = opts.top window, shift, subtract = check_window_options(opts) switch = opts.switch if switch: switch = DictFile(opts.switch) stacks = opts.stacks.split(",") bedfiles = get_beds(stacks) binfiles = get_binfiles(bedfiles, fastafile, shift, subtract=subtract) sizes = Sizes(fastafile) s = list(sizes.iter_sizes())[:top] maxl = max(x[1] for x in s) margin = .08 inner = .02 # y distance between tracks pf = fastafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge ratio = draw_gauge(root, margin, maxl) # Per chromosome yinterval = (1 - 2 * margin) / (top + 1) xx = margin yy = 1 - margin for chr, clen in s: yy -= yinterval xlen = clen / ratio cc = chr if "_" in chr: ca, cb = chr.split("_") cc = ca[0].upper() + cb if switch and cc in switch: cc = "\n".join((cc, "({0})".format(switch[cc]))) root.add_patch(Rectangle((xx, yy), xlen, yinterval - inner, color=gray)) ax = fig.add_axes([xx, yy, xlen, yinterval - inner]) nbins = clen / shift if clen % shift: nbins += 1 stackplot(ax, binfiles, nbins, palette, chr, window, shift) root.text(xx - .04, yy + .5 * (yinterval - inner), cc, ha="center", va="center") ax.set_xlim(0, nbins) ax.set_ylim(0, 1) ax.set_axis_off() # Legends yy -= yinterval xx = margin for b, p in zip(bedfiles, palette): b = b.rsplit(".", 1)[0].replace("_", " ") b = Registration.get(b, b) root.add_patch(Rectangle((xx, yy), inner, inner, color=p, lw=0)) xx += 2 * inner root.text(xx, yy, b, size=13) xx += len(b) * .012 + inner root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def waterlilyGOM(args): """ %prog mcmctree.tre table.csv Customized figure to plot phylogeny and related infographics. """ from jcvi.graphics.tree import ( LeafInfoFile, WGDInfoFile, draw_tree, parse_tree, draw_wgd_xy, ) from jcvi.graphics.table import CsvTable, draw_table p = OptionParser(waterlilyGOM.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 2: sys.exit(not p.print_help()) (datafile, csvfile) = args outgroup = ["ginkgo"] logging.debug("Load tree file `{0}`".format(datafile)) t, hpd = parse_tree(datafile) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) margin, rmargin = 0.15, 0.19 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache wgdinfo = WGDInfoFile("wgdinfo.csv").cache groups = "Monocots,Eudicots,ANA-grade,Gymnosperms" draw_tree( root, t, hpd=hpd, margin=margin, rmargin=rmargin, supportcolor=None, internal=False, outgroup=outgroup, leafinfo=leafinfo, wgdinfo=wgdinfo, geoscale=True, groups=groups.split(","), ) # Bottom right show legends for the WGD circles pad = 0.02 ypad = 0.04 xstart = 1 - rmargin + pad ystart = 0.2 waterlily_wgdline = wgdinfo["waterlily"][0] ypos = ystart - 2 * ypad draw_wgd_xy(root, xstart, ypos, waterlily_wgdline) root.text( xstart + pad, ypos, "Nymphaealean WGD", color=waterlily_wgdline.color, va="center", ) other_wgdline = wgdinfo["banana"][0] ypos = ystart - 3 * ypad draw_wgd_xy(root, xstart, ypos, other_wgdline) root.text( xstart + pad, ypos, "Other known WGDs", color=other_wgdline.color, va="center", ) # Top left draw the comparison table csv_table = CsvTable(csvfile) draw_table( root, csv_table, extent=(0.02, 0.44, 0.55, 0.985), stripe_color="lavender", yinflation=iopts.w / iopts.h, ) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def composite(args): """ %prog composite fastafile chr1 Combine line plots, feature bars and alt-bars, different data types specified in options. Inputs must be BED-formatted. Three types of viz are currently supported: --lines: traditional line plots, useful for plotting feature freq --bars: show where the extent of features are --altbars: similar to bars, yet in two alternating tracks, e.g. scaffolds """ from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(composite.__doc__) p.add_option("--lines", help="Features to plot in lineplot [default: %default]") p.add_option("--bars", help="Features to plot in bars [default: %default]") p.add_option("--altbars", help="Features to plot in alt-bars [default: %default]") p.add_option("--fatten", default=False, action="store_true", help="Help visualize certain narrow features [default: %default]") p.add_option("--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on [default: %default]") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) linebeds, barbeds, altbarbeds = [], [], [] fatten = opts.fatten if opts.lines: lines = opts.lines.split(",") linebeds = get_beds(lines) if opts.bars: bars = opts.bars.split(",") barbeds = get_beds(bars) if opts.altbars: altbars = opts.altbars.split(",") altbarbeds = get_beds(altbars) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode) margin = .12 clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) root.text(.5, .95, chr, ha="center", color="darkslategray") xstart, xend = margin, 1 - margin xlen = xend - xstart ratio = xlen / clen # Line plots ax = fig.add_axes([xstart, .6, xlen, .3]) lineplot(ax, linebins, nbins, chr, window, shift) # Bar plots yy = .5 yinterval = .08 xs = lambda x: xstart + ratio * x r = .01 fattend = .0025 for bb in barbeds: root.text(xend + .01, yy, bb.split(".")[0], va="center") HorizontalChromosome(root, xstart, xend, yy, height=.02) bb = Bed(bb) for b in bb: start, end = xs(b.start), xs(b.end) span = end - start if fatten and span < fattend: span = fattend root.add_patch(Rectangle((start, yy - r), span, 2 * r, \ lw=0, fc="darkslategray")) yy -= yinterval # Alternative bar plots offset = r / 2 for bb in altbarbeds: root.text(xend + .01, yy, bb.split(".")[0], va="center") bb = Bed(bb) for i, b in enumerate(bb): start, end = xs(b.start), xs(b.end) span = end - start if span < .0001: continue offset = -offset root.add_patch(Rectangle((start, yy + offset), end - start, .003, \ lw=0, fc="darkslategray")) yy -= yinterval root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def oropetium(args): """ %prog oropetium mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(oropetium.__doc__) p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra) # legend showing the orientation of the genes draw_gene_legend(root, 0.4, 0.57, 0.74, text=True, repeat=True) # On the left panel, make a species tree fc = "lightslategrey" coords = {} xs, xp = 0.16, 0.03 coords["oropetium"] = (xs, 0.7) coords["setaria"] = (xs, 0.6) coords["sorghum"] = (xs, 0.5) coords["rice"] = (xs, 0.4) coords["brachypodium"] = (xs, 0.3) xs -= xp coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) xs -= xp coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) xs -= xp coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) # Names of the internal nodes for tag in ("BEP", "Poaceae"): nx, ny = coords[tag] nx, ny = nx - 0.005, ny - 0.02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) for tag in ("PACMAD", ): nx, ny = coords[tag] nx, ny = nx - 0.005, ny + 0.02 root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "oropetium" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def bites(args): """ %prog bites Illustrate the pipeline for automated bite discovery. """ p = OptionParser(__doc__) opts, args = p.parse_args() fig = plt.figure(1, (6, 6)) root = fig.add_axes([0, 0, 1, 1]) # HSP pairs hsps = ( ((50, 150), (60, 180)), ((190, 250), (160, 235)), ((300, 360), (270, 330)), ((430, 470), (450, 490)), ((570, 620), (493, 543)), ((540, 555), (370, 385)), # non-collinear hsps ) titlepos = (0.9, 0.65, 0.4) titles = ("Compare orthologous region", "Find collinear HSPs", "Scan paired gaps") ytip = 0.01 mrange = 650.0 m = lambda x: x / mrange * 0.7 + 0.1 for i, (ya, title) in enumerate(zip(titlepos, titles)): yb = ya - 0.1 plt.plot((0.1, 0.8), (ya, ya), "-", color="gray", lw=2, zorder=1) plt.plot((0.1, 0.8), (yb, yb), "-", color="gray", lw=2, zorder=1) RoundLabel(root, 0.5, ya + 4 * ytip, title) root.text(0.9, ya, "A. thaliana", ha="center", va="center") root.text(0.9, yb, "B. rapa", ha="center", va="center") myhsps = hsps if i >= 1: myhsps = hsps[:-1] for (a, b), (c, d) in myhsps: a, b, c, d = [m(x) for x in (a, b, c, d)] r1 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fc="r", lw=0, zorder=2) r2 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fc="r", lw=0, zorder=2) r3 = Rectangle((a, ya - ytip), b - a, 2 * ytip, fill=False, zorder=3) r4 = Rectangle((c, yb - ytip), d - c, 2 * ytip, fill=False, zorder=3) r5 = Polygon( ((a, ya - ytip), (c, yb + ytip), (d, yb + ytip), (b, ya - ytip)), fc="r", alpha=0.2, ) rr = (r1, r2, r3, r4, r5) if i == 2: rr = rr[:-1] for r in rr: root.add_patch(r) # Gap pairs hspa, hspb = zip(*myhsps) gapa, gapb = [], [] for (a, b), (c, d) in pairwise(hspa): gapa.append((b + 1, c - 1)) for (a, b), (c, d) in pairwise(hspb): gapb.append((b + 1, c - 1)) gaps = zip(gapa, gapb) tpos = titlepos[-1] yy = tpos - 0.05 for i, ((a, b), (c, d)) in enumerate(gaps): i += 1 a, b, c, d = [m(x) for x in (a, b, c, d)] xx = (a + b + c + d) / 4 TextCircle(root, xx, yy, str(i)) # Bites ystart = 0.24 ytip = 0.05 bites = ( ("Bite(40=>-15)", True), ("Bite(50=>35)", False), ("Bite(70=>120)", False), ("Bite(100=>3)", True), ) for i, (bite, selected) in enumerate(bites): xx = 0.15 if (i % 2 == 0) else 0.55 yy = ystart - i / 2 * ytip i += 1 TextCircle(root, xx, yy, str(i)) color = "k" if selected else "gray" root.text(xx + ytip, yy, bite, size=10, color=color, va="center") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = fname() + ".pdf" savefig(figname, dpi=300)
def birch(args): """ %prog birch seqids layout Plot birch macro-synteny, with an embedded phylogenetic tree to the right. """ p = OptionParser(birch.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 2: sys.exit(not p.print_help()) seqids, layout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) K = Karyotype(fig, root, seqids, layout) L = K.layout xs = 0.79 dt = dict(rectangle=False, circle=False) # Embed a phylogenetic tree to the right coords = {} coords["Amborella"] = (xs, L[0].y) coords["Vitis"] = (xs, L[1].y) coords["Prunus"] = (xs, L[2].y) coords["Betula"] = (xs, L[3].y) coords["Populus"] = (xs, L[4].y) coords["Arabidopsis"] = (xs, L[5].y) coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) coords["malvids"] = join_nodes(root, coords, "Populus", "Arabidopsis", xs, **dt) coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) coords["angiosperm"] = join_nodes(root, coords, "eudicots", "Amborella", xs, **dt) # Show branch length branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") branch_length(root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top") branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") branch_length(root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top") branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "birch" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def depth(args): """ %prog depth anchorfile --qbed qbedfile --sbed sbedfile Calculate the depths in the two genomes in comparison, given in --qbed and --sbed. The synteny blocks will be layered on the genomes, and the multiplicity will be summarized to stderr. """ from jcvi.utils.range import range_depth p = OptionParser(depth.__doc__) p.add_option("--depthfile", help="Generate file with gene and depth [default: %default]") p.add_option("--histogram", default=False, action="store_true", help="Plot histograms in PDF") p.add_option("--xmax", type="int", help="x-axis maximum to display in plot") p.add_option("--title", default=None, help="Title to display in plot") p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...") p.set_beds() opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) anchorfile, = args qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) depthfile = opts.depthfile ac = AnchorFile(anchorfile) qranges = [] sranges = [] blocks = ac.blocks for ib in blocks: q, s, t = zip(*ib) q = [qorder[x] for x in q] s = [sorder[x] for x in s] qrange = (min(q)[0], max(q)[0]) srange = (min(s)[0], max(s)[0]) qranges.append(qrange) sranges.append(srange) if is_self: qranges.append(srange) qgenome = op.basename(qbed.filename).split(".")[0] sgenome = op.basename(sbed.filename).split(".")[0] qtag = "Genome {0} depths".format(qgenome) print("{}:".format(qtag), file=sys.stderr) dsq, details = range_depth(qranges, len(qbed)) if depthfile: fw = open(depthfile, "w") write_details(fw, details, qbed) if is_self: return stag = "Genome {0} depths".format(sgenome) print("{}:".format(stag), file=sys.stderr) dss, details = range_depth(sranges, len(sbed)) if depthfile: write_details(fw, details, sbed) fw.close() logging.debug("Depth written to `{0}`.".format(depthfile)) if not opts.histogram: return from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes # Plot two histograms one for query genome, one for subject genome plt.figure(1, (6, 3)) f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) xmax = opts.xmax or max(4, max(list(dsq.keys()) + list(dss.keys()))) if opts.quota: speak, qpeak = opts.quota.split(":") qpeak, speak = int(qpeak), int(speak) else: qpeak = find_peak(dsq) speak = find_peak(dss) qtag = "# of {} blocks per {} gene".format(sgenome, qgenome) stag = "# of {} blocks per {} gene".format(qgenome, sgenome) quickplot_ax(ax1, dss, 0, xmax, stag, ylabel="Percentage of genome", highlight=range(1, speak + 1)) quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None, highlight=range(1, qpeak + 1)) title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\ .format(qgenome, sgenome, speak, qpeak) root = f.add_axes([0, 0, 1, 1]) vs, pattern = title.split('\n') root.text(.5, .97, vs, ha="center", va="center", color="darkslategray") root.text(.5, .925, pattern, ha="center", va="center", color="tomato", size=16) print(title, file=sys.stderr) normalize_axes(root) pf = anchorfile.rsplit(".", 1)[0] + ".depth" image_name = pf + ".pdf" savefig(image_name)
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) (pngfile,) = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != ".": mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) nbcolor = opts.changeBackground pngfile = convert_background(pngfile, nbcolor) resizefile, mainfile, labelfile, exif = convert_image( pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols, ) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots( ncols=4, nrows=1, figsize=(iopts.w, iopts.h) ) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=0.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug( "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format( min_size, opts.minsize, max_size, opts.maxsize ) ) # Plotting ax1.set_title("Original picture") ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title("Edge detection\n({0})".format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title("Object detection") ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), "r-") ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * 0.35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d) : (y0d + d), (x0d - d) : (x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug( "Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format( i, npixels, len(pixels), 100.0 * npixels / canvas_size ) ) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle( (minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1 ) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color="w", ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g") ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m") yy = 0.8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) ax4.text(0.1, yy, o.pixeltag, va="center") yy -= 0.04 ax4.add_patch( Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb)) ) ax4.text(0.27, yy, o.hashtag, va="center") yy -= 0.06 ax4.text( 0.1, yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategray", ) normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family="Helvetica", size=8) ax.set_yticklabels(yticklabels, family="Helvetica", size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects