def draw(self, title="*Ks* distribution", filename="Ks_plot.pdf"): ax = self.ax ks_max = self.ks_max lines = self.lines labels = [markup(x) for x in self.labels] legendp = self.legendp if len(lines) > 1: leg = ax.legend( lines, labels, loc=legendp, shadow=True, fancybox=True, prop={"size": 10}, ) leg.get_frame().set_alpha(0.5) ax.set_xlim((0, ks_max - self.interval)) ylim = ax.get_ylim()[-1] ax.set_ylim(0, ylim) ax.set_title(markup(title), fontweight="bold") ax.set_xlabel(markup("Synonymous substitutions per site (*Ks*)")) ax.set_ylabel("Percentage of gene pairs (bin={})".format( self.interval)) ax.set_xticklabels(ax.get_xticks(), family="Helvetica") ax.set_yticklabels(ax.get_yticks(), family="Helvetica") adjust_spines(ax, ["left", "bottom"], outward=True) if filename: savefig(filename, dpi=300)
def __init__(self, fig, root, canvas, chr, xlim, datadir, order=None, hlsuffix=None, palette=None, cap=50, gauge="bottom", plot_label=True, plot_chr_label=True, gauge_step=5000000, vlines=None): x, y, w, h = canvas p = .01 root.add_patch(Rectangle((x - p, y - p), w + 2 * p, h + 2 * p, lw=1, fill=False, ec="darkslategray", zorder=10)) datafiles = glob(op.join(datadir, chr + "*")) ntracks = len(datafiles) yinterval = h / ntracks yy = y + h if palette is None: # Get the palette import brewer2mpl set2 = brewer2mpl.get_map('Set2', 'qualitative', ntracks).mpl_colors else: set2 = [palette] * ntracks if order: datafiles.sort(key=lambda x: order.index(x.split(".")[1])) if gauge == "top": gauge_ax = fig.add_axes([x, yy + p, w, .0001]) adjust_spines(gauge_ax, ["top"]) tpos = yy + .07 elif gauge == "bottom": gauge_ax = fig.add_axes([x, y - p, w, .0001]) adjust_spines(gauge_ax, ["bottom"]) tpos = y - .07 start, end = xlim fs = gauge_step < 1000000 setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs) if plot_chr_label: root.text(x + w / 2, tpos, chr, ha="center", va="center", color="darkslategray", size=16) for label, datafile, c in zip(order, datafiles, set2): yy -= yinterval ax = fig.add_axes([x, yy, w, yinterval * .9]) xy = XYtrack(ax, datafile, color=c) xy.interpolate(end) xy.cap(ymax=cap) if vlines: xy.vlines(vlines) if hlsuffix: hlfile = op.join(datadir, ".".join((label, hlsuffix))) xy.import_hlfile(hlfile, chr) if plot_label: root.text(x - .035, yy + yinterval / 2, label, ha="center", va="center", color=c) xy.draw() ax.set_xlim(*xlim)
def histogram(args): """ %prog histogram meryl.histogram species K Plot the histogram based on meryl K-mer distribution, species and N are only used to annotate the graphic. """ p = OptionParser(histogram.__doc__) p.add_option( "--vmin", dest="vmin", default=1, type="int", help="minimum value, inclusive", ) p.add_option( "--vmax", dest="vmax", default=100, type="int", help="maximum value, inclusive", ) p.add_option( "--pdf", default=False, action="store_true", help="Print PDF instead of ASCII plot", ) p.add_option( "--method", choices=("nbinom", "allpaths"), default="nbinom", help= "'nbinom' - slow but more accurate for het or polyploid genome; 'allpaths' - fast and works for homozygous enomes", ) p.add_option( "--maxiter", default=100, type="int", help="Max iterations for optimization. Only used with --method nbinom", ) p.add_option("--coverage", default=0, type="int", help="Kmer coverage [default: auto]") p.add_option( "--nopeaks", default=False, action="store_true", help="Do not annotate K-mer peaks", ) opts, args, iopts = p.set_image_options(args, figsize="7x7") if len(args) != 3: sys.exit(not p.print_help()) histfile, species, N = args method = opts.method vmin, vmax = opts.vmin, opts.vmax ascii = not opts.pdf peaks = not opts.nopeaks and method == "allpaths" N = int(N) if histfile.rsplit(".", 1)[-1] in ("mcdat", "mcidx"): logging.debug("CA kmer index found") histfile = merylhistogram(histfile) ks = KmerSpectrum(histfile) method_info = ks.analyze(K=N, maxiter=opts.maxiter, method=method) Total_Kmers = int(ks.totalKmers) coverage = opts.coverage Kmer_coverage = ks.lambda_ if not coverage else coverage Genome_size = int(round(Total_Kmers * 1.0 / Kmer_coverage)) Total_Kmers_msg = "Total {0}-mers: {1}".format(N, thousands(Total_Kmers)) Kmer_coverage_msg = "{0}-mer coverage: {1:.1f}x".format(N, Kmer_coverage) Genome_size_msg = "Estimated genome size: {0:.1f} Mb".format(Genome_size / 1e6) Repetitive_msg = ks.repetitive SNPrate_msg = ks.snprate for msg in (Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg): print(msg, file=sys.stderr) x, y = ks.get_xy(vmin, vmax) title = "{0} {1}-mer histogram".format(species, N) if ascii: asciiplot(x, y, title=title) return Genome_size plt.figure(1, (iopts.w, iopts.h)) plt.bar(x, y, fc="#b2df8a", lw=0) # Plot the negative binomial fit if method == "nbinom": generative_model = method_info["generative_model"] GG = method_info["Gbins"] ll = method_info["lambda"] rr = method_info["rho"] kf_range = method_info["kf_range"] stacked = generative_model(GG, ll, rr) plt.plot( kf_range, stacked, ":", color="#6a3d9a", lw=2, ) ax = plt.gca() if peaks: # Only works for method 'allpaths' t = (ks.min1, ks.max1, ks.min2, ks.max2, ks.min3) tcounts = [(x, y) for x, y in ks.counts if x in t] if tcounts: x, y = zip(*tcounts) tcounts = dict(tcounts) plt.plot(x, y, "ko", lw=3, mec="k", mfc="w") ax.text(ks.max1, tcounts[ks.max1], "SNP peak") ax.text(ks.max2, tcounts[ks.max2], "Main peak") ymin, ymax = ax.get_ylim() ymax = ymax * 7 / 6 if method == "nbinom": # Plot multiple CN locations, CN1, CN2, ... up to ploidy cn_color = "#a6cee3" for i in range(1, ks.ploidy + 1): x = i * ks.lambda_ plt.plot((x, x), (0, ymax), "-.", color=cn_color) plt.text( x, ymax * 0.95, "CN{}".format(i), ha="right", va="center", color=cn_color, rotation=90, ) messages = [ Total_Kmers_msg, Kmer_coverage_msg, Genome_size_msg, Repetitive_msg, SNPrate_msg, ] if method == "nbinom": messages += [ks.ploidy_message] + ks.copy_messages write_messages(ax, messages) ax.set_title(markup(title)) ax.set_xlim((0, vmax)) ax.set_ylim((0, ymax)) adjust_spines(ax, ["left", "bottom"], outward=True) xlabel, ylabel = "Coverage (X)", "Counts" ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) set_human_axis(ax) imagename = histfile.split(".")[0] + "." + iopts.format savefig(imagename, dpi=100) return Genome_size
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops = dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def f3a(args): """ %prog f3a chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3A displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(f3a.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a) height = .11 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True) # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, "r", asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, "g", asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, "g", ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, "r", ypos=1, asterisk=asterisk) ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-.5, 1.5) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-.5, 1.5) # Conversion legend if False: root.text(.81, .8, r"Converted A$\mathsf{_n}$ to C$\mathsf{_n}$", va="center") root.text(.81, .77, r"Converted C$\mathsf{_n}$ to A$\mathsf{_n}$", va="center") root.scatter([.8], [.8], s=20, color="g") root.scatter([.8], [.77], s=20, color="r") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = "napusf3a." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops=dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulate(args): """ %prog simulate Run simulation on female restitution. """ import seaborn as sns sns.set_style("darkgrid") p = OptionParser(simulate.__doc__) p.add_option( "--verbose", default=False, action="store_true", help="Verbose logging during simulation", ) opts, args, iopts = p.set_image_options(args, figsize="7x10") if len(args) != 0: sys.exit(not p.print_help()) # Construct a composite figure with 6 tracks fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) rows = 7 ypad = 0.05 yinterval = (1 - 2 * ypad) / (rows + 1) yy = 1 - ypad xpad = 0.18 xwidth = 0.6 # Axes are vertically stacked, and share x-axis axes = [] yy_positions = [] # Save yy positions so we can show details to the right laterr for idx in range(rows): yy_positions.append(yy) yy -= yinterval ax = fig.add_axes([xpad, yy, xwidth, yinterval * 0.85]) if idx != rows - 1: plt.setp(ax.get_xticklabels(), visible=False) axes.append(ax) ax1, ax2, ax3, ax4, ax5, ax6, ax7 = axes # Prepare the simulated data # Simulate two parents SS = Genome("SS", "SS", 10, 8) SO = Genome("SO", "SO", 8, 10) verbose = opts.verbose all_F1s = [simulate_F1(SO, SS, verbose=verbose) for _ in range(1000)] all_F2s = [simulate_F2(SO, SS, verbose=verbose) for _ in range(1000)] all_F1intercrosses = [simulate_F1intercross(SO, SS, verbose) for _ in range(1000)] all_BC1s = [simulate_BCn(1, SO, SS, verbose=verbose) for _ in range(1000)] all_BC2s = [simulate_BCn(2, SO, SS, verbose=verbose) for _ in range(1000)] all_BC3s = [simulate_BCn(3, SO, SS, verbose=verbose) for _ in range(1000)] all_BC4s = [simulate_BCn(4, SO, SS, verbose=verbose) for _ in range(1000)] # Plotting f1s = plot_summary(ax1, all_F1s) f2s = plot_summary(ax2, all_F2s) f1is = plot_summary(ax3, all_F1intercrosses) bc1s = plot_summary(ax4, all_BC1s) bc2s = plot_summary(ax5, all_BC2s) bc3s = plot_summary(ax6, all_BC3s) bc4s = plot_summary(ax7, all_BC4s) # Show title to the left xx = xpad / 2 for (title, subtitle), yy in zip( ( ("F1", None), ("F2", "via selfing"), ("F2", "via intercross"), ("BC1", None), ("BC2", None), ("BC3", None), ("BC4", None), ), yy_positions, ): if subtitle: yy -= 0.06 else: yy -= 0.07 root.text(xx, yy, title, color="darkslategray", ha="center", va="center") if subtitle: yy -= 0.02 root.text( xx, yy, subtitle, color="lightslategray", ha="center", va="center" ) # Show summary stats to the right xx = 1 - (1 - xpad - xwidth) / 2 for summary, yy in zip((f1s, f2s, f1is, bc1s, bc2s, bc3s, bc4s), yy_positions): yy -= 0.04 root.text( xx, yy, summary.SO_summary, color=SoColor, ha="center", va="center", ) yy -= 0.02 root.text( xx, yy, summary.SS_summary, color=SsColor, ha="center", va="center", ) yy -= 0.02 root.text( xx, yy, summary.percent_SO_summary, color=SoColor, ha="center", va="center", ) ax7.set_xlabel("Number of unique chromosomes") adjust_spines(ax7, ["bottom"], outward=True) normalize_axes(root) savefig("plotter.pdf", dpi=120) outdir = "simulations" mkdir(outdir) # Write chromosomes to disk for genomes, filename in ( (all_F1s, "all_F1s"), (all_F2s, "all_F2s"), (all_F1intercrosses, "all_F1intercrosses"), (all_BC1s, "all_BC1s"), (all_BC2s, "all_BC2s"), (all_BC3s, "all_BC3s"), (all_BC4s, "all_BC4s"), ): write_chromosomes(genomes, op.join(outdir, filename))
def __init__(self, fig, root, canvas, chr, xlim, datadir, order=None, hlsuffix=None, palette=None, cap=50, gauge="bottom", plot_label=True, plot_chr_label=True, gauge_step=5000000, vlines=None, labels_dict={}, diverge=('r', 'g')): x, y, w, h = canvas p = .01 root.add_patch( Rectangle((x - p, y - p), w + 2 * p, h + 2 * p, lw=1, fill=False, ec="darkslategray", zorder=10)) datafiles = glob(op.join(datadir, chr + "*")) if order: datafiles = [z for z in datafiles if z.split(".")[1] in order] datafiles.sort(key=lambda x: order.index(x.split(".")[1])) ntracks = len(datafiles) yinterval = h / ntracks yy = y + h if palette is None: # Get the palette set2 = get_map('Set2', 'qualitative', ntracks).mpl_colors else: set2 = [palette] * ntracks if gauge == "top": gauge_ax = fig.add_axes([x, yy + p, w, .0001]) adjust_spines(gauge_ax, ["top"]) tpos = yy + .07 elif gauge == "bottom": gauge_ax = fig.add_axes([x, y - p, w, .0001]) adjust_spines(gauge_ax, ["bottom"]) tpos = y - .07 start, end = xlim if gauge: fs = gauge_step < 1000000 setup_gauge_ax(gauge_ax, start, end, gauge_step, float_formatter=fs) if plot_chr_label: root.text(x + w / 2, tpos, chr, ha="center", va="center", color="darkslategray", size=16) yys = [] for label, datafile, c in zip(order, datafiles, set2): yy -= yinterval yys.append(yy) ax = fig.add_axes([x, yy, w, yinterval * .9]) xy = XYtrack(ax, datafile, color=c) xy.interpolate(end) xy.cap(ymax=cap) if vlines: xy.vlines(vlines) if hlsuffix: hlfile = op.join(datadir, ".".join((label, hlsuffix))) xy.import_hlfile(hlfile, chr, diverge=diverge) if plot_label: label = labels_dict.get(label, label.capitalize()) label = r"\textit{{{0}}}".format(label) root.text(x - .015, yy + yinterval / 2, label, ha="right", va="center") xy.draw() ax.set_xlim(*xlim) self.yys = yys
def mosdepth(args): """ %prog mosdepth mosdepth.global.dist.txt groups Plot depth vs. coverage per chromosome. Inspired by mosdepth plot. See also: https://github.com/brentp/mosdepth """ import seaborn as sns sns.set_style("darkgrid") p = OptionParser(mosdepth.__doc__) p.add_option("--maxdepth", default=100, type="int", help="Maximum depth to plot") p.add_option("--logscale", default=False, action="store_true", help="Use log-scale on depth") opts, args, iopts = p.set_image_options(args, style="dark", figsize="6x8") if len(args) != 2: sys.exit(p.print_help()) # Read in datasets distfile, groupsfile = args dists = parse_distfile(distfile) groups = parse_groupsfile(groupsfile) logscale = opts.logscale # Construct a composite figure with N tracks indicated in the groups fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) rows = len(groups) ypad = 0.05 yinterval = (1 - 2 * ypad) / (rows + 1) yy = 1 - ypad for group_idx, (chrs, colors) in enumerate(groups): yy -= yinterval ax = fig.add_axes([0.15, yy, 0.7, yinterval * 0.85]) for c, color in zip(chrs, colors): cdata = dists[c].items() logging.debug("Importing {} records for {}".format(len(cdata), c)) cx, cy = zip(*sorted(cdata)) ax.plot(cx, cy, "-", color=color) if logscale: ax.set_xscale("log", basex=2) ax.set_xlim(1 if logscale else 0, opts.maxdepth) ax.get_yaxis().set_visible(False) if group_idx != rows - 1: ax.get_xaxis().set_visible(False) # Add legend to the right of the canvas label_pad = 0.02 label_yy = yy + yinterval for c, color in zip(chrs, colors): label_yy -= label_pad root.text(0.92, label_yy, c, color=color, ha="center", va="center") root.text( 0.1, 0.5, "Proportion of bases at coverage", rotation=90, color="darkslategray", ha="center", va="center", ) root.text(0.5, 0.05, "Coverage", color="darkslategray", ha="center", va="center") normalize_axes(root) adjust_spines(ax, ["bottom"], outward=True) pf = "mosdepth" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)