def wgd(args): """ %prog wgd vplanifoliaA_blocks.bed vplanifoliaA.sizes Create a wgd figure. """ from jcvi.graphics.chromosome import draw_chromosomes p = OptionParser(synteny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x5") (bedfile, sizesfile) = args fig = plt.figure(1, (iopts.w, iopts.h)) ax1 = fig.add_axes([0, 0, 1, 1]) title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" draw_chromosomes( ax1, bedfile, sizes=sizesfile, iopts=iopts, mergedist=200000, winsize=50000, imagemap=False, gauge=True, legend=False, title=title, ) normalize_axes([ax1]) image_name = "wgd.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(args): """ %prog table.csv Render a table on canvas. Input is a CSV file. """ p = OptionParser(main.__doc__) opts, args, iopts = p.set_image_options(args, figsize="7x7") if len(args) != 1: sys.exit(not p.print_help()) (csvfile, ) = args pf = csvfile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) csv_table = CsvTable(csvfile) draw_table(root, csv_table) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw(self): self.om1.draw() self.om2.draw() self.sax.set_xlim(0, self.om1.amax) self.sax.set_ylim(-8, 8) normalize_axes(self.ax) self.sax.set_axis_off()
def draw(self, width=0.03): HorizontalChromosome( self.ax, self.xpad, 1 - self.xpad, self.ypad - 0.05, height=width * 1.5, patch=self.apatch, lw=2, ) Chromosome( self.ax, self.xpad - 0.05, self.ypad, 1 - self.ypad, width=width, patch=self.bpatch, patchcolor=self.bpatchcolor, lw=2, ) for a, b in zip(self.a, self.b): self.sax.plot(a, b, "-", color="darkslategrey", lw=2) self.sax.set_xticklabels([]) self.sax.set_yticklabels([]) self.sax.set_xlim((1, self.amax)) self.sax.set_ylim((1, self.bmax)) normalize_axes(self.ax)
def allelefreq(args): """ %prog allelefreq HD,DM1,SCA1,SCA17 Plot the allele frequencies of some STRs. """ p = OptionParser(allelefreq.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) loci, = args fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=4) treds, df = read_treds() df = df.set_index(["abbreviation"]) for ax, locus in zip((ax1, ax2, ax3, ax4), loci.split(",")): plot_allelefreq(ax, df, locus) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "allelefreq." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.1, 0.18, 0.32, 0.64]) B = fig.add_axes([0.6, 0.18, 0.32, 0.64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def likelihood2(args): """ %prog likelihood2 100_20.json Plot the likelihood surface and marginal distributions. """ from matplotlib import gridspec p = OptionParser(likelihood2.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x5", style="white", cmap="coolwarm") if len(args) != 1: sys.exit(not p.print_help()) jsonfile, = args fig = plt.figure(figsize=(iopts.w, iopts.h)) gs = gridspec.GridSpec(2, 2) ax1 = fig.add_subplot(gs[:, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[1, 1]) plt.tight_layout(pad=3) pf = plot_panel(jsonfile, ax1, ax2, ax3, opts.cmap) root = fig.add_axes([0, 0, 1, 1]) normalize_axes(root) image_name = "likelihood2.{}.".format(pf) + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.1, .18, .32, .64]) B = fig.add_axes([.6, .18, .32, .64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((.04, .92, "A"), (.54, .92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare2(args): """ %prog compare2 Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare2.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x5") if len(args) != 0: sys.exit(not p.print_help()) depth = opts.depth readlen = opts.readlen distance = opts.distance max_insert = opts.maxinsert fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=2) # ax1: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo.txt") tredparse_results = parse_results("tredparse_results_homo.txt") title = SIMULATED_HAPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax2: lobSTR vs TREDPARSE with diploid model lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) title = SIMULATED_DIPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) for ax in (ax1, ax2): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare(args): """ %prog compare Evaluation.csv Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'} pad = 2 # Read benchmark data df = pd.read_csv("Evaluation.csv") truth = df["Truth"] axes = (ax1, ax2, ax3, ax4) progs = ("Manta", "Isaac", "GATK", "lobSTR") markers = ("bx-", "yo-", "md-", "c+-") for ax, prog, marker in zip(axes, progs, markers): ax.plot(truth, df[prog], marker) ax.plot(truth, truth, 'k--') # to show diagonal ax.axhline(infected_thr, color='tomato') ax.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax.axhline(ref_thr, color='tomato') ax.text(max(truth) - pad, ref_thr - pad, 'Reference repeat count', bbox=bbox, ha="right", va="top") ax.set_title(SIMULATED_HAPLOID) ax.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax.set_ylabel('Num of CAG repeats called') ax.legend([prog, 'Truth'], loc='best') root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) (datafile,) = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1.0 / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = 0.1, 0.9 ymin, ymax = 0.1, 0.9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text( 0.05, 0.5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray", ) for x, t, c in zip((0.3, 0.5, 0.7), ("REF", "ALT", "HET"), "rbg"): root.text(x, 0.95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw(self, width=.03): HorizontalChromosome(self.ax, self.xpad, 1 - self.xpad, self.ypad - width / 2, height=width * 1.5, patch=self.apatch, lw=2) for r in self.reads: r.draw(self.sax) self.sax.set_xlim((1, self.amax)) self.sax.set_ylim((-1, self.ymax)) normalize_axes(self.ax) self.sax.set_axis_off()
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print(message, file=sys.stderr) ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1. / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = .1, .9 ymin, ymax = .1, .9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray") for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"): root.text(x, .95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw(self, width=.03): HorizontalChromosome(self.ax, self.xpad, 1 - self.xpad, self.ypad - .05, height=width * 1.5, patch=self.apatch, lw=2) Chromosome(self.ax, self.xpad - .05, self.ypad, 1 - self.ypad, width=width, patch=self.bpatch, patchcolor=self.bpatchcolor, lw=2) for a, b in zip(self.a, self.b): self.sax.plot(a, b, "-", color="darkslategrey", lw=2) self.sax.set_xticklabels([]) self.sax.set_yticklabels([]) self.sax.set_xlim((1, self.amax)) self.sax.set_ylim((1, self.bmax)) normalize_axes(self.ax)
def movieframe(args): """ %prog movieframe tour test.clm contigs.ref.anchors Draw heatmap and synteny in the same plot. """ p = OptionParser(movieframe.__doc__) p.add_option("--label", help="Figure title") p.set_beds() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 3: sys.exit(not p.print_help()) tour, clmfile, anchorsfile = args tour = tour.split(",") image_name = opts.outfile or ("movieframe." + iopts.format) label = opts.label or op.basename(image_name).rsplit(".", 1)[0] clm = CLMFile(clmfile) totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) M = read_clm(clm, totalbins, bins) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax1 = fig.add_axes([.05, .1, .4, .8]) # heatmap ax2 = fig.add_axes([.55, .1, .4, .8]) # dot plot ax2_root = fig.add_axes([.5, 0, .5, 1]) # dot plot canvas # Left axis: heatmap plot_heatmap(ax1, M, breaks, iopts) # Right axis: synteny qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") root.text(.5, .98, clm.name, color="g", ha="center", va="center") root.text(.5, .95, label, color="darkslategray", ha="center", va="center") normalize_axes(root) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.12, 0.62, 0.35, 0.35]) B = fig.add_axes([0.62, 0.62, 0.35, 0.35]) C = fig.add_axes([0.12, 0.12, 0.35, 0.35]) D = fig.add_axes([0.62, 0.12, 0.35, 0.35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=0.5) subplot( B, dataB, "Translocation error rate", "Accuracy", xlim=0.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter"), ) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ( (0.03, 0.97, "A"), (0.53, 0.97, "B"), (0.03, 0.47, "C"), (0.53, 0.47, "D"), ) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def tree(args): """ %prog tree treefile Create a tree figure. """ from jcvi.graphics.tree import parse_tree, LeafInfoFile, WGDInfoFile, draw_tree p = OptionParser(tree.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x8") (datafile, ) = args logging.debug("Load tree file `{0}`".format(datafile)) t, hpd = parse_tree(datafile) fig = plt.figure(1, (iopts.w, iopts.h)) ax1 = fig.add_axes([0, 0, 1, 1]) supportcolor = "k" margin, rmargin = 0.1, 0.2 # Left and right margin leafinfo = LeafInfoFile("leafinfo.csv").cache wgdinfo = WGDInfoFile("wgdinfo.csv").cache outgroup = "ginkgo" # Panel A draw_tree( ax1, t, hpd=hpd, margin=margin, rmargin=rmargin, supportcolor=None, internal=False, outgroup=outgroup, reroot=False, leafinfo=leafinfo, wgdinfo=wgdinfo, geoscale=True, ) normalize_axes([ax1]) image_name = "tree.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def synteny(args): """ %prog synteny vplanifoliaA_blocks.bed vplanifoliaA.sizes \ b1.blocks all.bed b1.layout Create a composite figure with (A) wgd and (B) microsynteny. """ from jcvi.graphics.chromosome import draw_chromosomes p = OptionParser(synteny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x12") (bedfile, sizesfile, blocksfile, allbedfile, blockslayout) = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0.5, 1, 0.5]) ax2 = fig.add_axes([0.02, 0, 0.98, 0.5]) # Panel A title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" draw_chromosomes( ax1, bedfile, sizes=sizesfile, iopts=iopts, mergedist=200000, winsize=50000, imagemap=False, gauge=True, legend=False, title=title, ) # Panel B draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) normalize_axes([root, ax1, ax2]) labels = ((0.05, 0.95, "A"), (0.05, 0.5, "B")) panel_labels(root, labels) image_name = "synteny.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def microsynteny(args): """ %prog microsynteny b1.blocks all.bed b1.layout Create a microsynteny figure. """ p = OptionParser(synteny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x6") (blocksfile, allbedfile, blockslayout) = args fig = plt.figure(1, (iopts.w, iopts.h)) ax2 = fig.add_axes([0, 0, 1, 1]) draw_ploidy(fig, ax2, blocksfile, allbedfile, blockslayout) normalize_axes([ax2]) image_name = "microsynteny.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.12, .62, .35, .35]) B = fig.add_axes([.62, .62, .35, .35]) C = fig.add_axes([.12, .12, .35, .35]) D = fig.add_axes([.62, .12, .35, .35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5) subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter")) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ((.03, .97, "A"), (.53, .97, "B"), (.03, .47, "C"), (.53, .47, "D")) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def likelihood3(args): """ %prog likelihood2 200_20.json 200_100.json Plot the likelihood surface and marginal distributions for two settings. """ from matplotlib import gridspec p = OptionParser(likelihood3.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10", style="white", cmap="coolwarm") if len(args) != 2: sys.exit(not p.print_help()) jsonfile1, jsonfile2 = args fig = plt.figure(figsize=(iopts.w, iopts.h)) gs = gridspec.GridSpec(9, 2) ax1 = fig.add_subplot(gs[:4, 0]) ax2 = fig.add_subplot(gs[:2, 1]) ax3 = fig.add_subplot(gs[2:4, 1]) ax4 = fig.add_subplot(gs[5:, 0]) ax5 = fig.add_subplot(gs[5:7, 1]) ax6 = fig.add_subplot(gs[7:, 1]) plt.tight_layout(pad=2) plot_panel(jsonfile1, ax1, ax2, ax3, opts.cmap) plot_panel(jsonfile2, ax4, ax5, ax6, opts.cmap) root = fig.add_axes([0, 0, 1, 1]) pad = .02 panel_labels(root, ((pad, 1 - pad, "A"), (pad, 4. / 9, "B"))) normalize_axes(root) image_name = "likelihood3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = .7, .35 ax = fig.add_axes([.15, .6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xlds, ylds, "g-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, .15, .15 + w, .57, height=.02, lw=2) root.text(.15 + w / 2, .55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax) # Panel B w = .37 p = (0, 45, 75, 110) ax = fig.add_axes([.1, .12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot(xlis, ylis, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax) patch = [.1 + w * x / 110. for x in p] HorizontalChromosome(root, .1, .1 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.1 + w / 2, .04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([.6, .12, w, h]) patch = [.6 + w * x / 110. for x in p] ydata = ydata_orig ax.plot(xdata, ydata, "r-", lw=12, alpha=.3, solid_capstyle="round", solid_joinstyle="round") ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax) HorizontalChromosome(root, .6, .6 + w, .09, patch=patch, height=.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, .09, s, va="center", ha="center") root.text(.6 + w / 2, .04, "LMS($a||-c||b$) = 10", ha="center") labels = ((.05, .95, 'A'), (.05, .48, 'B'), (.55, .48, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def wheel(args): """ %prog wheel datafile.csv groups.csv Wheel plot that shows continous data in radial axes. """ p = OptionParser(wheel.__doc__) p.add_option("--column", default="score", choices=("score", "percentile"), help="Which column to extract from `datafile.csv`") opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") if len(args) != 2: sys.exit(not p.print_help()) datafile, groupsfile = args column = opts.column linecolor = "#d6d6d6" df = parse_data(datafile, score_column=opts.column) groups = parse_groups(groupsfile) labels = [g for g in groups if g in df] print labels df = [df[g] for g in labels] print df groups = [groups[g] for g in labels] print groups pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) categories = len(df) #ax = plt.subplot(111, projection='polar') ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) brewer = [ "#FF3B30", "#DD43A0", "#5856D6", "#007AFE", "#56BDEC", "#4CD8BA", "#4CD864", "#B0F457", "#FEF221", "#FFCC01", "#FF9500", "#FF3B30", ] # Baseline theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) R = max(max(df), 10) xlim = (-R, R) if column == "score" else (-100, 100) plim = (-R / 2, R) if column == "score" else (0, 100) ci = (-.5, 2) if column == "score" else (10, 90) # Grid if column == "score": for t in theta: ax.plot([t, t], plim, color=linecolor) ax.axis('off') # Contours for t in plim: ax.plot(_theta, [t] * len(_theta), color=linecolor) # Sectors (groupings) collapsed_groups = [] gg = [] for group, c in groupby(enumerate(groups), lambda x: x[1]): c = [x[0] for x in list(c)] collapsed_groups.append(group) gg.append(c) sector = False if sector: theta_interval = 2 * np.pi / categories theta_pad = theta_interval / 2 * .9 for color, group in zip(brewer, gg): tmin, tmax = min(group), max(group) sector(ax, theta[tmin], theta[tmax], theta_pad, R * .95, "-", color=color, lw=2) # Data r = df closed_plot(ax, theta, r, color="lightslategray", alpha=.25) all_data = [] for color, group in zip(brewer, gg): hidden_data = [(theta[x], r[x]) for x in group if \ (ci[0] <= r[x] <= ci[1])] shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] \ or r[x] > ci[1])] all_data.append((theta[x], labels[x], r[x])) for alpha, data in zip((1, 1), (hidden_data, shown_data)): if not data: continue color_theta, color_r = zip(*data) ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) # Print out data diseaseNames, risks = labels, df print "var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta)) print "var risks = [{}]".format(",".join(str(x) for x in risks)) print "var diseaseNames = [{}]".format(",".join(\ ['"{}"'.format(x) for x in diseaseNames])) # Labels from math import cos, sin r = .5 for i, label in enumerate(labels): tl = theta[i] x, y = .5 + r * cos(tl), .5 + r * sin(tl) d = degrees(tl) if 90 < d % 360 < 270: # On the left quardrants d -= 180 root.text(x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor) print x, y, label # Add baseline baseline = 0 if column == "score" else 50 _r = len(_theta) * [baseline] closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) # Add confidence interval if column == "percentile": barcolor = "#eeeeee" ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) ax.set_rmin(xlim[0]) ax.set_rmax(xlim[1]) normalize_axes(root) image_name = pf + "-" + column + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) pngfile, = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != '.': mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) resizefile, mainfile, labelfile, exif = \ convert_image(pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug("Find objects with pixels between {0} ({1}%) and {2} ({3}%)"\ .format(min_size, opts.minsize, max_size, opts.maxsize)) # Plotting ax1.set_title('Original picture') ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title('Edge detection\n({0})'.format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], 'g.') ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title('Object detection') ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), 'r-') ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), 'r-') npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * .35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".\ format(i, npixels, len(pixels), 100. * npixels / canvas_size)) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, ec='w', lw=1) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color='w', ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(.1, .92, "File: {0}".format(latex(filename)), color='g') ax4.text(.1, .86, "Label: {0}".format(latex(accession)), color='m') yy = .8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(.01, yy, str(i), va="center", bbox=dict(fc='none', ec='k')) ax4.text(.1, yy, o.pixeltag, va="center") yy -= .04 ax4.add_patch(Rectangle((.1, yy - .025), .12, .05, lw=0, fc=rgb_to_hex(o.rgb))) ax4.text(.27, yy, o.hashtag, va="center") yy -= .06 ax4.text(.1 , yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategrey") normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family='Helvetica', size=8) ax.set_yticklabels(yticklabels, family='Helvetica', size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects
def compare(args): """ %prog compare Evaluation.csv Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(args, figsize="15x5") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=2) # Huntington risk allele infected_thr = 40 ref_thr = 19 # ax1: Multiple callers at lower range df = pd.read_csv("Evaluation.csv") truth = df["Truth"] ax1.plot(truth, df["Manta"], 'bx-') ax1.plot(truth, df["Isaac"], 'yo-') ax1.plot(truth, df["GATK"], 'md-') ax1.plot(truth, df["lobSTR"], 'c+-') ax1.plot(truth, truth, 'k--') # to show diagonal bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'} pad = 2 ax1.axhline(infected_thr, color='tomato') ax1.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax1.axhline(ref_thr, color='tomato') ax1.text(max(truth) - pad, ref_thr - pad, 'Reference repeat count', bbox=bbox, ha="right", va="top") ax1.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax1.set_ylabel('Num of CAG repeats called') ax1.set_title(r'Simulated haploid $\mathit{h}$') ax1.legend(['Manta', 'Isaac', 'GATK', 'lobSTR', 'Truth'], loc='best') max_insert = 120 # ax2: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo.txt") tredparse_results = parse_results("tredparse_results_homo.txt") truth = range(10, max_insert + 1) lx, ly = zip(*lobstr_results) tx, ty = zip(*tredparse_results) ax2.plot(lx, ly, 'c+-') ax2.plot(tx, ty, 'gx-') ax2.plot(truth, truth, 'k--') ax2.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax2.set_ylabel('Num of CAG repeats called') ax2.set_title(r'Simulated haploid $\mathit{h}$') ax2.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best') pad *= 2 ax2.axhline(infected_thr, color='tomato') ax2.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax2.set_xlim(10, max_insert) # ax3: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) truth = range(10, max_insert + 1) lx, ly = zip(*lobstr_results) tx, ty = zip(*tredparse_results) ax3.plot(lx, ly, 'c+-') ax3.plot(tx, ty, 'gx-') ax3.plot(truth, truth, 'k--') ax3.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax3.set_ylabel('Num of CAG repeats called') ax3.set_title(r'Simulated diploid $\mathit{20/h}$') ax3.legend(['lobSTR', 'TREDPARSE', 'Truth'], loc='best') ax3.axhline(infected_thr, color='tomato') ax3.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax3.set_xlim(10, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 3., 1 - pad, "B"), (2 / 3., 1 - pad, "C"))) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def lms(args): """ %prog lms ALLMAPS cartoon to illustrate LMS metric. """ from random import randint from jcvi.graphics.chromosome import HorizontalChromosome p = OptionParser(lms.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A w, h = 0.7, 0.35 ax = fig.add_axes([0.15, 0.6, w, h]) xdata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata = [x + randint(-3, 3) for x in range(10, 110, 10)] ydata[3:7] = ydata[3:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:3] + [xydata[4]] + xydata[7:] lds = xydata[3:7] xlis, ylis = zip(*lis) xlds, ylds = zip(*lds) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot( xlds, ylds, "g-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) HorizontalChromosome(root, 0.15, 0.15 + w, 0.57, height=0.02, lw=2) root.text(0.15 + w / 2, 0.55, "Chromosome location (bp)", ha="center", va="top") ax.text(80, 30, "LIS = 7", color="r", ha="center", va="center") ax.text(80, 20, "LDS = 4", color="g", ha="center", va="center") ax.text(80, 10, "LMS = $max$(LIS, LDS) = 7", ha="center", va="center") normalize_lms_axis(ax, xlim=110, ylim=110) # Panel B w = 0.37 p = (0, 45, 75, 110) ax = fig.add_axes([0.1, 0.12, w, h]) xdata = [x for x in range(10, 110, 10)] ydata = ydata_orig = [x for x in range(10, 110, 10)] ydata = ydata[:4] + ydata[7:] + ydata[4:7][::-1] xydata = zip(xdata, ydata) lis = xydata[:7] xlis, ylis = zip(*lis) ax.plot( xlis, ylis, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, 0, 110, colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) patch = [0.1 + w * x / 110.0 for x in p] HorizontalChromosome(root, 0.1, 0.1 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "b", "c") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.1 + w / 2, 0.04, "LMS($a||b||c$) = 7", ha="center") # Panel C ax = fig.add_axes([0.6, 0.12, w, h]) patch = [0.6 + w * x / 110.0 for x in p] ydata = ydata_orig ax.plot( xdata, ydata, "r-", lw=12, alpha=0.3, solid_capstyle="round", solid_joinstyle="round", ) ax.plot(xdata, ydata, "k.", mec="k", mfc="w", mew=3, ms=12) ax.vlines(p, [0], [110], colors="beige", lw=3) normalize_lms_axis(ax, xlim=110, ylim=110) HorizontalChromosome(root, 0.6, 0.6 + w, 0.09, patch=patch, height=0.02, lw=2) scaffolds = ("a", "-c", "b") for i, s in enumerate(scaffolds): xx = (patch[i] + patch[i + 1]) / 2 root.text(xx, 0.09, s, va="center", ha="center") root.text(0.6 + w / 2, 0.04, "LMS($a||-c||b$) = 10", ha="center") labels = ((0.05, 0.95, "A"), (0.05, 0.48, "B"), (0.55, 0.48, "C")) panel_labels(root, labels) normalize_axes(root) pf = "lms" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot(args): """ %prog plot input.bed seqid Plot the matchings between the reconstructed pseudomolecules and the maps. Two types of visualizations are available in one canvas: 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ from jcvi.graphics.base import plt, savefig, normalize_axes, \ set2, panel_labels from jcvi.graphics.chromosome import Chromosome, GeneticMap, \ HorizontalChromosome p = OptionParser(plot.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 2: sys.exit(not p.print_help()) inputbed, seqid = args pf = inputbed.rsplit(".", 1)[0] bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" weightsfile = opts.weightsfile links = opts.links function = get_function(opts.distance) cc = Map(bedfile, function) allseqids = cc.seqids mapnames = cc.mapnames weights = Weights(weightsfile, mapnames) assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] while not mlgs: links /= 2 logging.error("No markers to plot, --links reset to {0}".format(links)) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} for mlg in mlgs: mm = cc.extract_mlg(mlg) mlgsize = max(function(x) for x in mm) mlgsizes[mlg] = mlgsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0, .5, 1]) ax2 = fig.add_axes([.5, 0, .5, 1]) # Find the layout first ystart, ystop = .9, .1 L = Layout(mlgsizes) coords = L.coords tip = .02 marker_pos = {} # Palette colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames)) colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) rhos = {} # Parallel coordinates for mlg, (x, y1, y2) in coords.items(): mm = cc.extract_mlg(mlg) markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] mx, my = zip(*xy) rho = spearmanr(mx, my) rhos[mlg] = rho flip = rho < 0 g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) extra = -3 * tip if x < .5 else 3 * tip ha = "right" if x < .5 else "left" mapname = mlg.split("-")[0] tlg = mlg.replace("_", ".") # Latex does not like underscore char label = "{0} (w={1})".format(tlg, weights[mapname]) ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg], ha=ha, va="center", rotation=90) marker_pos.update(g.marker_pos) agp = AGP(agpfile) agp = [x for x in agp if x.object == seqid] chrsize = max(x.object_end for x in agp) # Pseudomolecules in the center r = ystart - ystop ratio = r / chrsize f = lambda x: (ystart - ratio * x) patchstart = [f(x.object_beg) for x in agp if not x.is_gap] Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) ax1.text(.5, ystart + tip, label, ha="center") scatter_data = defaultdict(list) # Connecting lines for b in s.markers: marker_name = b.accn if marker_name not in marker_pos: continue cx = .5 cy = f(b.pos) mx = coords[b.mlg][0] my = marker_pos[marker_name] extra = -tip if mx < cx else tip extra *= 1.25 # leave boundaries for aesthetic reasons cx += extra mx -= extra ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) scatter_data[b.mlg].append((b.pos, function(b))) # Scatter plot, same data as parallel coordinates xstart, xstop = sorted((ystart, ystop)) f = lambda x: (xstart + ratio * x) pp = [x.object_beg for x in agp if not x.is_gap] patchstart = [f(x) for x in pp] HorizontalChromosome(ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2) gap = .03 ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) tlgs = [] for mlg, mlgsize in sorted(mlgsizes.items()): height = ratio * mlgsize ystart -= height xx = .5 + xstart / 2 width = r / 2 color = colors[mlg] ax = fig.add_axes([xx, ystart, width, height]) ypos = ystart + height / 2 ystart -= gap sd = scatter_data[mlg] xx, yy = zip(*sd) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(xx, yy, ".", color=color) rho = rhos[mlg] ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho), ha="center", va="top", transform=ax.transAxes, color="gray") tlg = mlg.replace("_", ".") tlgs.append((tlg, ypos, color)) ax.set_xlim(0, chrsize) ax.set_ylim(0, mlgsize) ax.set_xticks([]) while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2: ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks yticklabels = [int(x) for x in ax.get_yticks()] ax.set_yticklabels(yticklabels, family='Helvetica') if rho < 0: ax.invert_yaxis() for i, (tlg, ypos, color) in enumerate(tlgs): ha = "center" if len(tlgs) > 4: ha = "right" if i % 2 else "left" root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") if opts.panels: labels = ((.04, .96, 'A'), (.48, .96, 'B')) panel_labels(root, labels) normalize_axes((ax1, ax2, root)) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig)
def compare3(args): """ %prog compare3 Compare performances of various variant callers on simulated STR datasets. This compares the power of various evidence types. """ p = OptionParser(compare3.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 0: sys.exit(not p.print_help()) max_insert = opts.maxinsert fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) color = "lightslategray" # ax1: Spanning tredparse_results = parse_results("tredparse_results_het-spanning.txt") title = SIMULATED_DIPLOID + "( Sub-model 1: Spanning reads)" plot_compare(ax1, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax2: Partial tredparse_results = parse_results("tredparse_results_het-partial.txt", exclude=20) title = SIMULATED_DIPLOID + " (Sub-model 2: Partial reads)" plot_compare(ax2, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax3: Repeat tredparse_results = parse_results("tredparse_results_het-repeat.txt", exclude=20) # HACK (repeat reads won't work under 50) tredparse_results = [x for x in tredparse_results if x[0] > 50] title = SIMULATED_DIPLOID + " (Sub-model 3: Repeat-only reads)" plot_compare(ax3, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) # ax4: Pair tredparse_results = parse_results("tredparse_results_het-pair.txt", exclude=20) title = SIMULATED_DIPLOID + " (Sub-model 4: Paired-end reads)" plot_compare(ax4, title, tredparse_results, None, color=color, max_insert=max_insert, risk=False) for ax in (ax1, ax2, ax3, ax4): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.1, title=None, sep=True, sepcolor="g", stdpf=True): fp = open(anchorfile) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] gx, gy = markup(gx), markup(gy) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\ .format(vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] if cmap_text: try: value = float(value) except ValueError: value = vmax if value < vmin: continue if value > vmax: continue else: value = 0 if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = downsample(data, sample_number=sample_number) x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) qbreaks = qbed.get_breaks() sbreaks = sbed.get_breaks() xlim, ylim = plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=sep, chrlw=chrlw, sepcolor=sepcolor, minfont=minfont, stdpf=stdpf) # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if title is None: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(title, x=.5, y=.96, color="k") if title: logging.debug("Dot plot title: {}".format(title)) normalize_axes(root)
def multihistogram(args): """ %prog multihistogram *.histogram species Plot the histogram based on a set of K-mer hisotograms. The method is based on Star et al.'s method (Atlantic Cod genome paper). """ p = OptionParser(multihistogram.__doc__) p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive") p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive") p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive") p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive") opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) histfiles = args[:-1] species = args[-1] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.08, 0.12, 0.38, 0.76]) B = fig.add_axes([0.58, 0.12, 0.38, 0.76]) lines = [] legends = [] genomesizes = [] for histfile in histfiles: ks = KmerSpectrum(histfile) x, y = ks.get_xy(opts.vmin, opts.vmax) K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) if not opts.kmin <= K <= opts.kmax: continue line, = A.plot(x, y, "-", lw=1) lines.append(line) legends.append("K = {0}".format(K)) ks.analyze(K=K) genomesizes.append((K, ks.genomesize / 1e6)) leg = A.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(0.5) title = "{0} genome K-mer histogram".format(species) A.set_title(markup(title)) xlabel, ylabel = "Coverage (X)", "Counts" A.set_xlabel(xlabel) A.set_ylabel(ylabel) set_human_axis(A) title = "{0} genome size estimate".format(species) B.set_title(markup(title)) x, y = zip(*genomesizes) B.plot(x, y, "ko", mfc="w") t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) p = np.poly1d(np.polyfit(x, y, 2)) B.plot(t, p(t), "r:") xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" B.set_xlabel(xlabel) B.set_ylabel(ylabel) set_ticklabels_helvetica(B) labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) panel_labels(root, labels) normalize_axes(root) imagename = species + ".multiK.pdf" savefig(imagename, dpi=iopts.dpi, iopts=iopts)
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = .15, .65 w, h = .7, .3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(.05, .95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= .28 h = .25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)") # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict((x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) \ for x in g.agp if not x.is_gap) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = .6 / total_size y = .16 pad = .03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((.15 + pb_ratio * a_beg, .2), (ystart, ystart - .14), ":", color=lsg) root.plot((.15 + pb_ratio * b_end, .3), (ystart, ystart - .08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277), (.2, .2 + (asize + gapsize) * ratio)): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro') begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center") labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def heatmap(args): """ %prog heatmap input.npy genome.json Plot heatmap based on .npy data file. The .npy stores a square matrix with bins of genome, and cells inside the matrix represent number of links between bin i and bin j. The `genome.json` contains the offsets of each contig/chr so that we know where to draw boundary lines, or extract per contig/chromosome heatmap. """ p = OptionParser(heatmap.__doc__) p.add_option("--resolution", default=500000, type="int", help="Resolution when counting the links") p.add_option("--chr", help="Plot this contig/chr only") opts, args, iopts = p.set_image_options(args, figsize="10x10", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 2: sys.exit(not p.print_help()) npyfile, jsonfile = args contig = opts.chr # Load contig/chromosome starts and sizes header = json.loads(open(jsonfile).read()) # Load the matrix A = np.load(npyfile) # Select specific submatrix if contig: contig_start = header["starts"][contig] contig_size = header["sizes"][contig] contig_end = contig_start + contig_size A = A[contig_start: contig_end, contig_start: contig_end] # Several concerns in practice: # The diagonal counts may be too strong, this can either be resolved by # masking them. Or perform a log transform on the entire heatmap. B = A.astype("float64") B += 1.0 B = np.log(B) vmin, vmax = 1, 7 B[B < vmin] = vmin B[B > vmax] = vmax print B logging.debug("Matrix log-transformation and thresholding ({}-{}) done" .format(vmin, vmax)) # Canvas fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax = fig.add_axes([.05, .05, .9, .9]) # just the heatmap breaks = header["starts"].values() breaks += [header["total_bins"]] # This is actually discarded breaks = sorted(breaks)[1:] if contig: breaks = [] plot_heatmap(ax, B, breaks, iopts, binsize=opts.resolution) # Title pf = npyfile.rsplit(".", 1)[0] title = pf if contig: title += "-{}".format(contig) root.text(.5, .98, title, color="darkslategray", size=18, ha="center", va="center") normalize_axes(root) image_name = title + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw_tree(ax, tx, rmargin=.3, leafcolor="k", supportcolor="k", outgroup=None, reroot=True, gffdir=None, sizes=None, trunc_name=None, SH=None, scutoff=0, barcodefile=None, leafcolorfile=None, leaffont=12): """ main function for drawing phylogenetic tree """ t = Tree(tx) if reroot: if outgroup: R = t.get_common_ancestor(*outgroup) else: # Calculate the midpoint node R = t.get_midpoint_outgroup() if R != t: t.set_outgroup(R) farthest, max_dist = t.get_farthest_leaf() margin = .05 xstart = margin ystart = 1 - margin canvas = 1 - rmargin - 2 * margin tip = .005 # scale the tree scale = canvas / max_dist num_leaves = len(t.get_leaf_names()) yinterval = canvas / (num_leaves + 1) # get exons structures, if any structures = {} if gffdir: gffiles = glob("{0}/*.gff*".format(gffdir)) setups, ratio = get_setups(gffiles, canvas=rmargin / 2, noUTR=True) structures = dict((a, (b, c)) for a, b, c in setups) if sizes: sizes = Sizes(sizes).mapping if barcodefile: barcodemap = DictFile(barcodefile, delimiter="\t") if leafcolorfile: leafcolors = DictFile(leafcolorfile, delimiter="\t") coords = {} i = 0 for n in t.traverse("postorder"): dist = n.get_distance(t) xx = xstart + scale * dist if n.is_leaf(): yy = ystart - i * yinterval i += 1 if trunc_name: name = truncate_name(n.name, rule=trunc_name) else: name = n.name if barcodefile: name = decode_name(name, barcodemap) sname = name.replace("_", "-") try: lc = leafcolors[n.name] except Exception: lc = leafcolor else: # if color is given as "R,G,B" if "," in lc: lc = map(float, lc.split(",")) ax.text(xx + tip, yy, sname, va="center", fontstyle="italic", size=leaffont, color=lc) gname = n.name.split("_")[0] if gname in structures: mrnabed, cdsbeds = structures[gname] ExonGlyph(ax, 1 - rmargin / 2, yy, mrnabed, cdsbeds, align="right", ratio=ratio) if sizes and gname in sizes: size = sizes[gname] size = size / 3 - 1 # base pair converted to amino acid size = "{0}aa".format(size) ax.text(1 - rmargin / 2 + tip, yy, size, size=leaffont) else: children = [coords[x] for x in n.get_children()] children_x, children_y = zip(*children) min_y, max_y = min(children_y), max(children_y) # plot the vertical bar ax.plot((xx, xx), (min_y, max_y), "k-") # plot the horizontal bar for cx, cy in children: ax.plot((xx, cx), (cy, cy), "k-") yy = sum(children_y) * 1. / len(children_y) support = n.support if support > 1: support = support / 100. if not n.is_root(): if support > scutoff / 100.: ax.text(xx, yy+.005, "{0:d}".format(int(abs(support * 100))), ha="right", size=leaffont, color=supportcolor) coords[n] = (xx, yy) # scale bar br = .1 x1 = xstart + .1 x2 = x1 + br * scale yy = ystart - i * yinterval ax.plot([x1, x1], [yy - tip, yy + tip], "k-") ax.plot([x2, x2], [yy - tip, yy + tip], "k-") ax.plot([x1, x2], [yy, yy], "k-") ax.text((x1 + x2) / 2, yy - tip, "{0:g}".format(br), va="top", ha="center", size=leaffont) if SH is not None: xs = x1 ys = (margin + yy) / 2. ax.text(xs, ys, "SH test against ref tree: {0}"\ .format(SH), ha="left", size=leaffont, color="g") normalize_axes(ax)
def wheel(args): """ %prog wheel datafile.csv groups.csv Wheel plot that shows continous data in radial axes. """ p = OptionParser(wheel.__doc__) p.add_option("--column", default="score", choices=("score", "percentile"), help="Which column to extract from `datafile.csv`") opts, args, iopts = p.set_image_options(args, figsize="5x5", format="png") if len(args) != 2: sys.exit(not p.print_help()) datafile, groupsfile = args column = opts.column linecolor = "#d6d6d6" df = parse_data(datafile, score_column=opts.column) groups = parse_groups(groupsfile) labels = [g for g in groups if g in df] print(labels) df = [df[g] for g in labels] print(df) groups = [groups[g] for g in labels] print(groups) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) categories = len(df) #ax = plt.subplot(111, projection='polar') ax = fig.add_axes([0.1, 0.1, 0.8, 0.8], polar=True) brewer = [ "#FF3B30", "#DD43A0", "#5856D6", "#007AFE", "#56BDEC", "#4CD8BA", "#4CD864", "#B0F457", "#FEF221", "#FFCC01", "#FF9500", "#FF3B30", ] # Baseline theta = np.linspace(1.5 * np.pi, 3.5 * np.pi, endpoint=False, num=categories) _theta = np.linspace(1.5 * np.pi, 3.5 * np.pi) R = max(max(df), 10) xlim = (-R, R) if column == "score" else (-100, 100) plim = (-R / 2, R) if column == "score" else (0, 100) ci = (-.5, 2) if column == "score" else (10, 90) # Grid if column == "score": for t in theta: ax.plot([t, t], plim, color=linecolor) ax.axis('off') # Contours for t in plim: ax.plot(_theta, [t] * len(_theta), color=linecolor) # Sectors (groupings) collapsed_groups = [] gg = [] for group, c in groupby(enumerate(groups), lambda x: x[1]): c = [x[0] for x in list(c)] collapsed_groups.append(group) gg.append(c) sector = False if sector: theta_interval = 2 * np.pi / categories theta_pad = theta_interval / 2 * .9 for color, group in zip(brewer, gg): tmin, tmax = min(group), max(group) sector(ax, theta[tmin], theta[tmax], theta_pad, R * .95, "-", color=color, lw=2) # Data r = df closed_plot(ax, theta, r, color="lightslategray", alpha=.25) all_data = [] for color, group in zip(brewer, gg): hidden_data = [(theta[x], r[x]) for x in group if \ (ci[0] <= r[x] <= ci[1])] shown_data = [(theta[x], r[x]) for x in group if (r[x] < ci[0] \ or r[x] > ci[1])] all_data.append((theta[x], labels[x], r[x])) for alpha, data in zip((1, 1), (hidden_data, shown_data)): if not data: continue color_theta, color_r = zip(*data) ax.plot(color_theta, color_r, "o", color=color, alpha=alpha) # Print out data diseaseNames, risks = labels, df print("var theta = [{}]".format(",".join("{:.1f}".format(degrees(x)) for x in theta))) print("var risks = [{}]".format(",".join(str(x) for x in risks))) print("var diseaseNames = [{}]".format(",".join(\ ['"{}"'.format(x) for x in diseaseNames]))) # Labels from math import cos, sin r = .5 for i, label in enumerate(labels): tl = theta[i] x, y = .5 + r * cos(tl), .5 + r * sin(tl) d = degrees(tl) if 90 < d % 360 < 270: # On the left quardrants d -= 180 root.text(x, y, label, size=4, rotation=d, ha="center", va="center", color=linecolor) print(x, y, label) # Add baseline baseline = 0 if column == "score" else 50 _r = len(_theta) * [baseline] closed_plot(ax, _theta, _r, "k:", lw=1, ms=4) # Add confidence interval if column == "percentile": barcolor = "#eeeeee" ax.bar([0], [ci[1] - ci[0]], width=2 * np.pi, bottom=ci[0], fc=barcolor) ax.set_rmin(xlim[0]) ax.set_rmax(xlim[1]) normalize_axes(root) image_name = pf + "-" + column + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="9x7") if len(args) != 1: sys.exit(not p.print_help()) mode, = args assert mode == "demo" a, b = 30, 70 pad = .08 w = .31 fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Row separators yy = 1 - pad for i in xrange(3): root.plot((0, 1), (yy, yy), "-", lw=2, color="lightgray") yy -= w # Row headers xx = pad * .6 yy = 1 - pad - .5 * w for title in ("Inversion", "Indel", "Duplication"): root.text(xx, yy, title, ha="center", va="center") yy -= w # Column headers xx = pad + .5 * w yy = 1 - pad / 2 for title in ("Assembly alignment", "Read alignment", "Optical map alignment"): root.text(xx, yy, title, ha="center", va="center") xx += w p = PairwiseAlign(fig, [pad, 2 * w, w, w]) p.invert(a, b) p.draw() p = PairwiseAlign(fig, [pad, w, w, w]) p.delete(a, b) p.draw() p = PairwiseAlign(fig, [pad, 0, w, w]) p.duplicate(a, b, gap=5) p.draw() p = ReadAlign(fig, [pad + w, 2 * w, w, w]) p.invert(a, b) p.draw() p = ReadAlign(fig, [pad + w, w, w, w]) p.delete(a, b) p.draw() p = ReadAlign(fig, [pad + w, 0, w, w]) p.duplicate(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 2 * w, w, w]) p.invert(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, w, w, w]) p.delete(a, b) p.draw() p = OpticalMapAlign(fig, [pad + 2 * w, 0, w, w]) p.duplicate(a, b) p.draw() normalize_axes(root) image_name = mode + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare4(args): """ %prog compare4 Compare performances of various variant callers on simulated STR datasets. Adds coverage comparisons as panel C and D. """ p = OptionParser(compare4.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 0: sys.exit(not p.print_help()) depth = opts.depth max_insert = opts.maxinsert fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) # ax1: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo-20x-150bp-500bp.txt") tredparse_results = parse_results( "tredparse_results_homo-20x-150bp-500bp.txt") title = SIMULATED_HAPLOID + r" ($Depth=%s\times)" % depth plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax2: lobSTR vs TREDPARSE with diploid model (depth=20x) lobstr_results = parse_results("lobstr_results_het-20x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-20x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % depth plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax3: lobSTR vs TREDPARSE with diploid model (depth=5x) lobstr_results = parse_results("lobstr_results_het-5x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-5x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 5 plot_compare(ax3, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax4: lobSTR vs TREDPARSE with diploid model (depth=80x) lobstr_results = parse_results("lobstr_results_het-80x-150bp-500bp.txt", exclude=20) tredparse_results = parse_results( "tredparse_results_het-80x-150bp-500bp.txt", exclude=20) title = SIMULATED_DIPLOID + r" ($Depth=%s\times$)" % 80 plot_compare(ax4, title, tredparse_results, lobstr_results, max_insert=max_insert) for ax in (ax1, ax2, ax3, ax4): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r / 2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r / 2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops = dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) (pngfile, ) = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != ".": mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) nbcolor = opts.changeBackground pngfile = convert_background(pngfile, nbcolor) resizefile, mainfile, labelfile, exif = convert_image( pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols, ) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=0.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug( "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format( min_size, opts.minsize, max_size, opts.maxsize)) # Plotting ax1.set_title("Original picture") ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title("Edge detection\n({0})".format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title("Object detection") ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), "r-") ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * 0.35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format( i, npixels, len(pixels), 100.0 * npixels / canvas_size)) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color="w", ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g") ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m") yy = 0.8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) ax4.text(0.1, yy, o.pixeltag, va="center") yy -= 0.04 ax4.add_patch( Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb))) ax4.text(0.27, yy, o.hashtag, va="center") yy -= 0.06 ax4.text( 0.1, yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategray", ) normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family="Helvetica", size=8) ax.set_yticklabels(yticklabels, family="Helvetica", size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = 0.15, 0.65 w, h = 0.7, 0.3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= 0.28 h = 0.25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis( ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)", ) ax.xaxis.grid(False) # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict( (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) for x in g.agp if not x.is_gap ) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = 0.6 / total_size y = 0.16 pad = 0.03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip( (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) ): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text( sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop ) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text( sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center", ) labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def fig3(args): """ %prog fig3 chrA02,A02,C2,chrC02 chr.sizes all.bed data Napus Figure 3 displays alignments between quartet chromosomes, inset with read histograms. """ from jcvi.formats.bed import Bed p = OptionParser(fig3.__doc__) p.add_option("--gauge_step", default=10000000, type="int", help="Step size for the base scale") opts, args, iopts = p.set_image_options(args, figsize="12x9") if len(args) != 4: sys.exit(not p.print_help()) chrs, sizes, bedfile, datadir = args gauge_step = opts.gauge_step diverge = iopts.diverge rr, gg = diverge chrs = [[x] for x in chrs.split(",")] sizes = Sizes(sizes).mapping fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) chr_sizes, chr_sum_sizes, ratio = calc_ratio(chrs, sizes) # Synteny panel seqidsfile = make_seqids(chrs) klayout = make_layout(chrs, chr_sum_sizes, ratio, template_f3a, shift=.05) height = .07 r = height / 4 K = Karyotype(fig, root, seqidsfile, klayout, gap=gap, height=height, lw=2, generank=False, sizes=sizes, heightpad=r, roundrect=True, plot_label=False) # Chromosome labels for kl in K.layout: if kl.empty: continue lx, ly = kl.xstart, kl.y if lx < .11: lx += .1 ly += .06 label = kl.label root.text(lx - .015, ly, label, fontsize=15, ha="right", va="center") # Inset with datafiles datafiles = ("chrA02.bzh.forxmgr", "parent.A02.per10kb.forxmgr", "parent.C2.per10kb.forxmgr", "chrC02.bzh.forxmgr") datafiles = [op.join(datadir, x) for x in datafiles] tracks = K.tracks hlfile = op.join(datadir, "bzh.regions.forhaibao") xy_axes = [] for t, datafile in zip(tracks, datafiles): ax = make_affix_axis(fig, t, -r, height=2 * r) xy_axes.append(ax) chr = t.seqids[0] xy = XYtrack(ax, datafile, color="lightslategray") start, end = 0, t.total xy.interpolate(end) xy.cap(ymax=40) xy.import_hlfile(hlfile, chr, diverge=diverge) xy.draw() ax.set_xlim(start, end) gauge_ax = make_affix_axis(fig, t, -r) adjust_spines(gauge_ax, ["bottom"]) setup_gauge_ax(gauge_ax, start, end, gauge_step) # Converted gene tracks ax_Ar = make_affix_axis(fig, tracks[1], r, height=r/2) ax_Co = make_affix_axis(fig, tracks[2], r, height=r/2) order = Bed(bedfile).order for asterisk in (False, True): conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 0, "A02", ax_Ar, rr, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.AtoC.txt", 1, "C2", ax_Co, gg, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 0, "A02", ax_Ar, gg, ypos=1, asterisk=asterisk) conversion_track(order, "data/Genes.Converted.seuil.0.6.CtoA.txt", 1, "C2", ax_Co, rr, ypos=1, asterisk=asterisk) Ar, Co = xy_axes[1:3] annotations = ((Ar, "Bra028920 Bra028897", "center", "1DAn2+"), (Ar, "Bra020081 Bra020171", "right", "2DAn2+"), (Ar, "Bra020218 Bra020286", "left", "3DAn2+"), (Ar, "Bra008143 Bra008167", "left", "4DAn2-"), (Ar, "Bra029317 Bra029251", "right", "5DAn2+ (GSL)"), (Co, "Bo2g001000 Bo2g001300", "left", "1DCn2-"), (Co, "Bo2g018560 Bo2g023700", "right", "2DCn2-"), (Co, "Bo2g024450 Bo2g025390", "left", "3DCn2-"), (Co, "Bo2g081060 Bo2g082340", "left", "4DCn2+"), (Co, "Bo2g161510 Bo2g164260", "right", "5DCn2-")) for ax, genes, ha, label in annotations: g1, g2 = genes.split() x1, x2 = order[g1][1].start, order[g2][1].start if ha == "center": x = (x1 + x2) / 2 * .8 elif ha == "left": x = x2 else: x = x1 label = r"\textit{{{0}}}".format(label) color = rr if "+" in label else gg ax.text(x, 30, label, color=color, fontsize=9, ha=ha, va="center") ax_Ar.set_xlim(0, tracks[1].total) ax_Ar.set_ylim(-1, 1) ax_Co.set_xlim(0, tracks[2].total) ax_Co.set_ylim(-1, 1) # Plot coverage in resequencing lines gstep = 5000000 order = "swede,kale,h165,yudal,aviso,abu,bristol".split(",") labels_dict = {"h165": "Resynthesized (H165)", "abu": "Aburamasari"} hlsuffix = "regions.forhaibao" chr1, chr2 = "chrA02", "chrC02" t1, t2 = tracks[0], tracks[-1] s1, s2 = sizes[chr1], sizes[chr2] canvas1 = (t1.xstart, .75, t1.xend - t1.xstart, .2) c = Coverage(fig, root, canvas1, chr1, (0, s1), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) yys = c.yys x1, x2 = .37, .72 tip = .02 annotations = ((x1, yys[2] + .3 * tip, tip, tip / 2, "FLC"), (x1, yys[3] + .6 * tip, tip, tip / 2, "FLC"), (x1, yys[5] + .6 * tip, tip, tip / 2, "FLC"), (x2, yys[0] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[4] + .9 * tip, -1.2 * tip, 0, "GSL"), (x2, yys[6] + .9 * tip, -1.2 * tip, 0, "GSL")) arrowprops=dict(facecolor='black', shrink=.05, frac=.5, width=1, headwidth=4) for x, y, dx, dy, label in annotations: label = r"\textit{{{0}}}".format(label) root.annotate(label, xy=(x, y), xytext=(x + dx, y + dy), arrowprops=arrowprops, color=rr, fontsize=9, ha="center", va="center") canvas2 = (t2.xstart, .05, t2.xend - t2.xstart, .2) Coverage(fig, root, canvas2, chr2, (0, s2), datadir, order=order, gauge=None, plot_chr_label=False, gauge_step=gstep, palette="gray", cap=40, hlsuffix=hlsuffix, labels_dict=labels_dict, diverge=diverge) pad = .03 labels = ((.1, .67, "A"), (t1.xstart - 3 * pad, .95 + pad, "B"), (t2.xstart - 3 * pad, .25 + pad, "C")) panel_labels(root, labels) normalize_axes(root) image_name = "napus-fig3." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def diagram(args): """ %prog diagram Plot the predictive power of various evidences. """ p = OptionParser(diagram.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4") if len(args) != 0: sys.exit(not p.print_help()) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Gauge on top, this is log-scale lsg = "lightslategray" yy = .7 yinterval = .1 height = .05 yp = yy - yinterval - height canvas = .95 xstart = .025 convert = lambda x: xstart + x * canvas / 1200 # Symbols root.text(.5, .9, r"$L$: Read length, $F$: Flank size, $V$: Pair distance", ha="center") root.text(.5, .85, r"ex. $L=150bp, F=9bp, V=500bp$", ha="center") root.text(xstart + canvas, yy - height, "STR repeat length", ha="center", color=lsg, size=10) # Mark the key events pad = .02 arrowlen = canvas * 1.05 arrowprops = dict(length_includes_head=True, width=.01, fc=lsg, lw=0, head_length=arrowlen * .12, head_width=.04) p = FancyArrow(xstart, yy, arrowlen, 0, shape="right", **arrowprops) root.add_patch(p) ppad = 30 keyevents = ( (0, 0, -1, r"$0$"), (150 - 18, 150 - 18 - ppad, 0, r"$L - 2F$"), (150 - 9, 150 - 9, 1, r"$L - F$"), (150, 150 + ppad, 2, r"$L$"), (500 - 9, 500 - 9, 3, r"$V - F$"), (500 * 2 - 18, 500 * 2 - 18, 2, r"$2(V - F)$"), ) for event, pos, i, label in keyevents: _event = convert(event) _pos = convert(pos) root.plot((_event, _event), (yy - height / 4, yy + height / 4), '-', color='k') root.text(_pos, yy + pad, label, rotation=45, va="bottom", size=8) if i < 0: continue ystart = yp - i * yinterval root.plot((_event, _event), (ystart, yy - height / 4), ':', color=lsg) # Range on bottom. These are simple 4 rectangles, with the range indicating # the predictive range. CLOSED, OPEN = range(2) ranges = ( (0, 150 - 18, CLOSED, "Spanning reads"), (9, 150 - 9, OPEN, "Partial reads"), (150, 500 * 2 - 18, CLOSED, "Repeat reads"), (0, 500 - 9, CLOSED, "Paired-end reads"), ) for start, end, starttag, label in ranges: _start = convert(start) _end = convert(end) data = [[0., 1.], [0., 1.]] if starttag == OPEN else \ [[1., 0.], [1., 0.]] root.imshow(data, interpolation='bicubic', cmap=plt.cm.Greens, extent=[_start, _end, yp, yp + height]) root.text(_end + pad, yp + height / 2, label, va="center") yp -= yinterval normalize_axes(root) image_name = "diagram." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multihistogram(args): """ %prog multihistogram *.histogram species Plot the histogram based on a set of K-mer hisotograms. The method is based on Star et al.'s method (Atlantic Cod genome paper). """ p = OptionParser(multihistogram.__doc__) p.add_option("--kmin", default=15, type="int", help="Minimum K-mer size, inclusive") p.add_option("--kmax", default=30, type="int", help="Maximum K-mer size, inclusive") p.add_option("--vmin", default=2, type="int", help="Minimum value, inclusive") p.add_option("--vmax", default=100, type="int", help="Maximum value, inclusive") opts, args, iopts = p.set_image_options(args, figsize="10x5", dpi=300) if len(args) < 1: sys.exit(not p.print_help()) histfiles = args[:-1] species = args[-1] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.08, 0.12, 0.38, 0.76]) B = fig.add_axes([0.58, 0.12, 0.38, 0.76]) lines = [] legends = [] genomesizes = [] for histfile in histfiles: ks = KmerSpectrum(histfile) x, y = ks.get_xy(opts.vmin, opts.vmax) K = get_number(op.basename(histfile).split(".")[0].split("-")[-1]) if not opts.kmin <= K <= opts.kmax: continue (line, ) = A.plot(x, y, "-", lw=1) lines.append(line) legends.append("K = {0}".format(K)) ks.analyze(K=K, method="allpaths") genomesizes.append((K, ks.genomesize / 1e6)) leg = A.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(0.5) title = "{0} genome K-mer histogram".format(species) A.set_title(markup(title)) xlabel, ylabel = "Coverage (X)", "Counts" A.set_xlabel(xlabel) A.set_ylabel(ylabel) set_human_axis(A) title = "{0} genome size estimate".format(species) B.set_title(markup(title)) x, y = zip(*genomesizes) B.plot(x, y, "ko", mfc="w") t = np.linspace(opts.kmin - 0.5, opts.kmax + 0.5, 100) p = np.poly1d(np.polyfit(x, y, 2)) B.plot(t, p(t), "r:") xlabel, ylabel = "K-mer size", "Estimated genome size (Mb)" B.set_xlabel(xlabel) B.set_ylabel(ylabel) set_ticklabels_helvetica(B) labels = ((0.04, 0.96, "A"), (0.54, 0.96, "B")) panel_labels(root, labels) normalize_axes(root) imagename = species + ".multiK.pdf" savefig(imagename, dpi=iopts.dpi, iopts=iopts)
def depth(args): """ %prog depth anchorfile --qbed qbedfile --sbed sbedfile Calculate the depths in the two genomes in comparison, given in --qbed and --sbed. The synteny blocks will be layered on the genomes, and the multiplicity will be summarized to stderr. """ from jcvi.utils.range import range_depth p = OptionParser(depth.__doc__) p.add_option("--depthfile", help="Generate file with gene and depth [default: %default]") p.add_option("--histogram", default=False, action="store_true", help="Plot histograms in PDF") p.add_option("--xmax", type="int", help="x-axis maximum to display in plot") p.add_option("--title", default=None, help="Title to display in plot") p.add_option("--quota", help="Force to use this quota, e.g. 1:1, 1:2 ...") p.set_beds() opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) anchorfile, = args qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) depthfile = opts.depthfile ac = AnchorFile(anchorfile) qranges = [] sranges = [] blocks = ac.blocks for ib in blocks: q, s, t = zip(*ib) q = [qorder[x] for x in q] s = [sorder[x] for x in s] qrange = (min(q)[0], max(q)[0]) srange = (min(s)[0], max(s)[0]) qranges.append(qrange) sranges.append(srange) if is_self: qranges.append(srange) qgenome = op.basename(qbed.filename).split(".")[0] sgenome = op.basename(sbed.filename).split(".")[0] qtag = "Genome {0} depths".format(qgenome) print("{}:".format(qtag), file=sys.stderr) dsq, details = range_depth(qranges, len(qbed)) if depthfile: fw = open(depthfile, "w") write_details(fw, details, qbed) if is_self: return stag = "Genome {0} depths".format(sgenome) print("{}:".format(stag), file=sys.stderr) dss, details = range_depth(sranges, len(sbed)) if depthfile: write_details(fw, details, sbed) fw.close() logging.debug("Depth written to `{0}`.".format(depthfile)) if not opts.histogram: return from jcvi.graphics.base import plt, quickplot_ax, savefig, normalize_axes # Plot two histograms one for query genome, one for subject genome plt.figure(1, (6, 3)) f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) xmax = opts.xmax or max(4, max(dsq.keys() + dss.keys())) if opts.quota: speak, qpeak = opts.quota.split(":") qpeak, speak = int(qpeak), int(speak) else: qpeak = find_peak(dsq) speak = find_peak(dss) qtag = "# of {} blocks per {} gene".format(sgenome, qgenome) stag = "# of {} blocks per {} gene".format(qgenome, sgenome) quickplot_ax(ax1, dss, 0, xmax, stag, ylabel="Percentage of genome", highlight=range(1, speak + 1)) quickplot_ax(ax2, dsq, 0, xmax, qtag, ylabel=None, highlight=range(1, qpeak + 1)) title = opts.title or "{} vs {} syntenic depths\n{}:{} pattern"\ .format(qgenome, sgenome, speak, qpeak) root = f.add_axes([0, 0, 1, 1]) vs, pattern = title.split('\n') root.text(.5, .97, vs, ha="center", va="center", color="darkslategray") root.text(.5, .925, pattern, ha="center", va="center", color="tomato", size=16) print(title, file=sys.stderr) normalize_axes(root) pf = anchorfile.rsplit(".", 1)[0] + ".depth" image_name = pf + ".pdf" savefig(image_name)