def main(): p = OptionParser(__doc__) p.add_option("--order", help="The order to plot the tracks, comma-separated") opts, args, iopts = p.set_image_options() if len(args) != 3: sys.exit(not p.print_help()) chr, sizes, datadir = args order = opts.order hlsuffix = opts.hlsuffix if order: order = order.split(",") sizes = Sizes(sizes) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) canvas = (.12, .35, .8, .35) chr_size = sizes.get_size(chr) Coverage(fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def likelihood2(args): """ %prog likelihood2 100_20.json Plot the likelihood surface and marginal distributions. """ from matplotlib import gridspec p = OptionParser(likelihood2.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x5", style="white", cmap="coolwarm") if len(args) != 1: sys.exit(not p.print_help()) jsonfile, = args fig = plt.figure(figsize=(iopts.w, iopts.h)) gs = gridspec.GridSpec(2, 2) ax1 = fig.add_subplot(gs[:, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[1, 1]) plt.tight_layout(pad=3) pf = plot_panel(jsonfile, ax1, ax2, ax3, opts.cmap) root = fig.add_axes([0, 0, 1, 1]) normalize_axes(root) image_name = "likelihood2.{}.".format(pf) + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--order", help="The order to plot the tracks, comma-separated") opts, args, iopts = p.set_image_options() if len(args) != 3: sys.exit(not p.print_help()) chr, sizes, datadir = args order = opts.order hlsuffix = opts.hlsuffix if order: order = order.split(",") sizes = Sizes(sizes) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) canvas = (.12, .35, .8, .35) chr_size = sizes.get_size(chr) c = Coverage(fig, root, canvas, chr, (0, chr_size), datadir, order=order, hlsuffix=hlsuffix) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([0.1, 0.18, 0.32, 0.64]) B = fig.add_axes([0.6, 0.18, 0.32, 0.64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((0.04, 0.92, "A"), (0.54, 0.92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option( "--customfont", default="Airswing.ttf", choices=available_fonts, help="Custom font name", ) p.add_option("--color", default="limegreen", help="Font color") p.add_option("--size", default=36, type="int", help="Font size") opts, args, iopts = p.set_image_options(figsize="2x1", dpi=60, format="png") if len(args) != 1: sys.exit(not p.print_help()) (text,) = args plt.rcdefaults() fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([0, 0, 1, 1]) ax.text(0.5, 0.5, text, color=opts.color, ha="center", va="center") fontprop(ax, opts.customfont, size=opts.size) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_axis_off() image_name = text + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--nocircles", default=False, action="store_true", help="Do not plot chromosome circles") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, layoutfile = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, layoutfile, plot_circles=(not opts.nocircles)) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog ploidy b1.blocks all.bed b1.layout Build a figure that illustrates the WGD history of the vanilla genome. """ p = OptionParser(ploidy.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x6") if len(args) != 3: sys.exit(not p.print_help()) blocksfile, bedfile, blockslayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) draw_ploidy(fig, root, blocksfile, bedfile, blockslayout) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "vanilla-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def regression(args): """ %prog regression postgenomic-s.tsv Plot chronological vs. predicted age. """ p = OptionParser(regression.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) (tsvfile, ) = args df = pd.read_csv(tsvfile, sep="\t") chrono = "Chronological age (yr)" pred = "Predicted age (yr)" resdf = pd.DataFrame({ chrono: df["hli_calc_age_sample_taken"], pred: df["Predicted Age"] }) g = sns.jointplot(chrono, pred, resdf, joint_kws={"s": 6}, xlim=(0, 100), ylim=(0, 80)) g.fig.set_figwidth(iopts.w) g.fig.set_figheight(iopts.h) outfile = tsvfile.rsplit(".", 1)[0] + ".regression.pdf" savefig(outfile)
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, 0.42, 0.52, 0.48) labels = ((0.04, 0.96, "A"), (0.04, 0.52, "B")) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def resample(args): """ %prog resample yellow-catfish-resample.txt medicago-resample.txt Plot ALLMAPS performance across resampled real data. """ p = OptionParser(resample.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x4", dpi=300) if len(args) != 2: sys.exit(not p.print_help()) dataA, dataB = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.1, .18, .32, .64]) B = fig.add_axes([.6, .18, .32, .64]) dataA = import_data(dataA) dataB = import_data(dataB) xlabel = "Fraction of markers" ylabels = ("Anchor rate", "Runtime (m)") legend = ("anchor rate", "runtime") subplot_twinx(A, dataA, xlabel, ylabels, title="Yellow catfish", legend=legend) subplot_twinx(B, dataB, xlabel, ylabels, title="Medicago", legend=legend) labels = ((.04, .92, "A"), (.54, .92, "B")) panel_labels(root, labels) normalize_axes(root) image_name = "resample." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def pomegranate(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(pomegranate.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout) # legend showing the orientation of the genes draw_gene_legend(root, .42, .52, .48) labels = ((.04, .96, 'A'), (.04, .52, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pomegranate-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def main(): p = OptionParser(__doc__) p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--extra", help="Extra features in BED format") p.add_option("--scalebar", default=False, action="store_true", help="Add scale bar to the plot") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def allelefreq(args): """ %prog allelefreq HD,DM1,SCA1,SCA17 Plot the allele frequencies of some STRs. """ p = OptionParser(allelefreq.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) loci, = args fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=4) treds, df = read_treds() df = df.set_index(["abbreviation"]) for ax, locus in zip((ax1, ax2, ax3, ax4), loci.split(",")): plot_allelefreq(ax, df, locus) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = "allelefreq." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def coverage(args): """ %prog coverage *.coverage Plot coverage along chromosome. The coverage file can be generated with: $ samtools depth a.bam > a.coverage The plot is a simple line plot using matplotlib. """ from jcvi.graphics.base import savefig p = OptionParser(coverage.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) covfile, = args df = pd.read_csv(covfile, sep='\t', names=["Ref", "Position", "Depth"]) xlabel, ylabel = "Position", "Depth" df.plot(xlabel, ylabel, color='g') image_name = covfile + "." + iopts.format savefig(image_name)
def main(args): """ %prog table.csv Render a table on canvas. Input is a CSV file. """ p = OptionParser(main.__doc__) opts, args, iopts = p.set_image_options(args, figsize="7x7") if len(args) != 1: sys.exit(not p.print_help()) (csvfile, ) = args pf = csvfile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) csv_table = CsvTable(csvfile) draw_table(root, csv_table) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def coverage(args): """ %prog coverage *.coverage Plot coverage along chromosome. The coverage file can be generated with: $ samtools depth a.bam > a.coverage The plot is a simple line plot using matplotlib. """ from jcvi.graphics.base import savefig p = OptionParser(coverage.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) (covfile, ) = args df = pd.read_csv(covfile, sep="\t", names=["Ref", "Position", "Depth"]) xlabel, ylabel = "Position", "Depth" df.plot(xlabel, ylabel, color="g") image_name = covfile + "." + iopts.format savefig(image_name)
def wgd(args): """ %prog wgd vplanifoliaA_blocks.bed vplanifoliaA.sizes Create a wgd figure. """ from jcvi.graphics.chromosome import draw_chromosomes p = OptionParser(synteny.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x5") (bedfile, sizesfile) = args fig = plt.figure(1, (iopts.w, iopts.h)) ax1 = fig.add_axes([0, 0, 1, 1]) title = r"Genome duplication $\alpha^{O}$ event in $\textit{Vanilla}$" draw_chromosomes( ax1, bedfile, sizes=sizesfile, iopts=iopts, mergedist=200000, winsize=50000, imagemap=False, gauge=True, legend=False, title=title, ) normalize_axes([ax1]) image_name = "wgd.pdf" savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def birch(args): """ %prog birch seqids layout Plot birch macro-synteny, with an embedded phylogenetic tree to the right. """ p = OptionParser(birch.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 2: sys.exit(not p.print_help()) seqids, layout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) K = Karyotype(fig, root, seqids, layout) L = K.layout xs = .79 dt = dict(rectangle=False, circle=False) # Embed a phylogenetic tree to the right coords = {} coords["Amborella"] = (xs, L[0].y) coords["Vitis"] = (xs, L[1].y) coords["Prunus"] = (xs, L[2].y) coords["Betula"] = (xs, L[3].y) coords["Populus"] = (xs, L[4].y) coords["Arabidopsis"] = (xs, L[5].y) coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) coords["malvids"] = join_nodes(root, coords, \ "Populus", "Arabidopsis", xs, **dt) coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) coords["angiosperm"] = join_nodes(root, coords, \ "eudicots", "Amborella", xs, **dt) # Show branch length branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") branch_length(root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top") branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") branch_length(root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top") branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "birch" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.utils.cbook import SummaryStats from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option("--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option("--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args data = read_ks_file(ks_file) ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins for f in fields.split(",")[1:]: columndata = [getattr(x, f) for x in data] ks = ("ks" in f) if not ks: continue columndata = [x for x in columndata if ks_min <= x <= ks_max] st = SummaryStats(columndata) title = "{0} ({1}): ".format(descriptions[f], ks_file) title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\ format(st.median, st.firstq, st.thirdq) title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\ format(st.mean, st.sd, st.size) tbins = (0, ks_max, bins) if ks else (0, .6, 10) digit = 2 if (ks_max * 1. / bins) < .1 else 1 stem_leaf_plot(columndata, *tbins, digit=digit, title=title) if not opts.pdf: return components = opts.components data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) kp.add_data(data, components, fill=opts.fill) kp.draw(title=opts.title)
def oropetium(args): """ %prog oropetium mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(oropetium.__doc__) p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny( fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra ) # legend showing the orientation of the genes draw_gene_legend(root, 0.4, 0.57, 0.74, text=True, repeat=True) # On the left panel, make a species tree fc = "lightslategrey" coords = {} xs, xp = 0.16, 0.03 coords["oropetium"] = (xs, 0.7) coords["setaria"] = (xs, 0.6) coords["sorghum"] = (xs, 0.5) coords["rice"] = (xs, 0.4) coords["brachypodium"] = (xs, 0.3) xs -= xp coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) xs -= xp coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) xs -= xp coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) # Names of the internal nodes for tag in ("BEP", "Poaceae"): nx, ny = coords[tag] nx, ny = nx - 0.005, ny - 0.02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) for tag in ("PACMAD",): nx, ny = coords[tag] nx, ny = nx - 0.005, ny + 0.02 root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "oropetium" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare2(args): """ %prog compare2 Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare2.__doc__) p.add_option('--maxinsert', default=300, type="int", help="Maximum number of repeats") add_simulate_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x5") if len(args) != 0: sys.exit(not p.print_help()) depth = opts.depth readlen = opts.readlen distance = opts.distance max_insert = opts.maxinsert fig, (ax1, ax2) = plt.subplots(ncols=2, nrows=1, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=2) # ax1: lobSTR vs TREDPARSE with haploid model lobstr_results = parse_results("lobstr_results_homo.txt") tredparse_results = parse_results("tredparse_results_homo.txt") title = SIMULATED_HAPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax1, title, tredparse_results, lobstr_results, max_insert=max_insert) # ax2: lobSTR vs TREDPARSE with diploid model lobstr_results = parse_results("lobstr_results_het.txt", exclude=20) tredparse_results = parse_results("tredparse_results_het.txt", exclude=20) title = SIMULATED_DIPLOID + \ r" ($D=%s\times, L=%dbp, V=%dbp$)" % (depth, readlen, distance) plot_compare(ax2, title, tredparse_results, lobstr_results, max_insert=max_insert) for ax in (ax1, ax2): ax.set_xlim(0, max_insert) ax.set_ylim(0, max_insert) root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"))) normalize_axes(root) image_name = "tredparse." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def utricularia(args): from jcvi.graphics.synteny import main as synteny_main p = OptionParser(synteny_main.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) s = Synteny(fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch) light = "lightslategrey" RoundRect(root, (0.02, 0.69), 0.96, 0.24, fill=False, lw=2, ec=light) RoundRect(root, (0.02, 0.09), 0.96, 0.48, fill=False, lw=2, ec=light) za, zb = s.layout[1].ratio, s.layout[-1].ratio # zoom level if za != 1: root.text( 0.96, 0.89, "{}x zoom".format(za).replace(".0x", "x"), color=light, ha="right", va="center", size=14, ) if zb != 1: root.text( 0.96, 0.12, "{}x zoom".format(zb).replace(".0x", "x"), color=light, ha="right", va="center", size=14, ) # legend showing the orientation of the genes draw_gene_legend(root, 0.22, 0.3, 0.64, text=True) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def oropetium(args): """ %prog oropetium mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(oropetium.__doc__) p.add_option("--extra", help="Extra features in BED format") opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, extra_features=opts.extra) # legend showing the orientation of the genes draw_gene_legend(root, .4, .57, .74, text=True, repeat=True) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["oropetium"] = (xs, .7) coords["setaria"] = (xs, .6) coords["sorghum"] = (xs, .5) coords["rice"] = (xs, .4) coords["brachypodium"] = (xs, .3) xs -= xp coords["Panicoideae"] = join_nodes(root, coords, "setaria", "sorghum", xs) xs -= xp coords["BEP"] = join_nodes(root, coords, "rice", "brachypodium", xs) coords["PACMAD"] = join_nodes(root, coords, "oropetium", "Panicoideae", xs) xs -= xp coords["Poaceae"] = join_nodes(root, coords, "BEP", "PACMAD", xs) # Names of the internal nodes for tag in ("BEP", "Poaceae"): nx, ny = coords[tag] nx, ny = nx - .005, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) for tag in ("PACMAD",): nx, ny = coords[tag] nx, ny = nx - .005, ny + .02 root.text(nx, ny, tag, rotation=90, ha="right", va="bottom", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "oropetium" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def litchi(args): """ %prog litchi mcscan.out all.bed layout switch.ids Build a composite figure that calls graphis.synteny. """ p = OptionParser(litchi.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x6") if len(args) != 4: sys.exit(not p.print_help()) datafile, bedfile, slayout, switch = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny(fig, root, datafile, bedfile, slayout, switch=switch) # legend showing the orientation of the genes draw_gene_legend(root, .4, .7, .82) # On the left panel, make a species tree fc = 'lightslategrey' coords = {} xs, xp = .16, .03 coords["lychee"] = (xs, .37) coords["clementine"] = (xs, .5) coords["cacao"] = (xs, .6) coords["strawberry"] = (xs, .7) coords["grape"] = (xs, .8) xs -= xp coords["Sapindales"] = join_nodes(root, coords, "clementine", "lychee", xs) xs -= xp coords["Rosid-II"] = join_nodes(root, coords, "cacao", "Sapindales", xs) xs -= xp coords["Rosid"] = join_nodes(root, coords, "strawberry", "Rosid-II", xs) xs -= xp coords["crown"] = join_nodes(root, coords, "grape", "Rosid", xs, circle=False) # Names of the internal nodes for tag in ("Rosid", "Rosid-II", "Sapindales"): nx, ny = coords[tag] nx, ny = nx - .01, ny - .02 root.text(nx, ny, tag, rotation=90, ha="right", va="top", color=fc) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "litchi" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def compare(args): """ %prog compare Evaluation.csv Compare performances of various variant callers on simulated STR datasets. """ p = OptionParser(compare.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x10") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args pf = datafile.rsplit(".", 1)[0] fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(iopts.w, iopts.h)) plt.tight_layout(pad=3) bbox = {'facecolor': 'tomato', 'alpha': .2, 'ec': 'w'} pad = 2 # Read benchmark data df = pd.read_csv("Evaluation.csv") truth = df["Truth"] axes = (ax1, ax2, ax3, ax4) progs = ("Manta", "Isaac", "GATK", "lobSTR") markers = ("bx-", "yo-", "md-", "c+-") for ax, prog, marker in zip(axes, progs, markers): ax.plot(truth, df[prog], marker) ax.plot(truth, truth, 'k--') # to show diagonal ax.axhline(infected_thr, color='tomato') ax.text(max(truth) - pad, infected_thr + pad, 'Risk threshold', bbox=bbox, ha="right") ax.axhline(ref_thr, color='tomato') ax.text(max(truth) - pad, ref_thr - pad, 'Reference repeat count', bbox=bbox, ha="right", va="top") ax.set_title(SIMULATED_HAPLOID) ax.set_xlabel(r'Num of CAG repeats inserted ($\mathit{h}$)') ax.set_ylabel('Num of CAG repeats called') ax.legend([prog, 'Truth'], loc='best') root = fig.add_axes([0, 0, 1, 1]) pad = .03 panel_labels(root, ((pad / 2, 1 - pad, "A"), (1 / 2., 1 - pad, "B"), (pad / 2, 1 / 2., "C"), (1 / 2., 1 / 2., "D"))) normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def report(args): ''' %prog report ksfile generate a report given a Ks result file (as produced by synonymous_calc.py). describe the median Ks, Ka values, as well as the distribution in stem-leaf plot ''' from jcvi.utils.cbook import SummaryStats from jcvi.graphics.histogram import stem_leaf_plot p = OptionParser(report.__doc__) p.add_option("--pdf", default=False, action="store_true", help="Generate graphic output for the histogram [default: %default]") p.add_option("--components", default=1, type="int", help="Number of components to decompose peaks [default: %default]") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) ks_file, = args data = KsFile(ks_file) ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins for f in fields.split(",")[1:]: columndata = [getattr(x, f) for x in data] ks = ("ks" in f) if not ks: continue columndata = [x for x in columndata if ks_min <= x <= ks_max] st = SummaryStats(columndata) title = "{0} ({1}): ".format(descriptions[f], ks_file) title += "Median:{0:.3f} (1Q:{1:.3f}|3Q:{2:.3f}||".\ format(st.median, st.firstq, st.thirdq) title += "Mean:{0:.3f}|Std:{1:.3f}||N:{2})".\ format(st.mean, st.sd, st.size) tbins = (0, ks_max, bins) if ks else (0, .6, 10) digit = 2 if (ks_max * 1. / bins) < .1 else 1 stem_leaf_plot(columndata, *tbins, digit=digit, title=title) if not opts.pdf: return components = opts.components data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, opts.bins, legendp=opts.legendp) kp.add_data(data, components, fill=opts.fill, fitted=opts.fit) kp.draw(title=opts.title)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) (datafile,) = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1.0 / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = 0.1, 0.9 ymin, ymax = 0.1, 0.9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text( 0.05, 0.5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray", ) for x, t, c in zip((0.3, 0.5, 0.7), ("REF", "ALT", "HET"), "rbg"): root.text(x, 0.95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def mtdotplots(args): """ %prog mtdotplots Mt3.5 Mt4.0 medicago.medicago.lifted.1x1.anchors Plot Mt3.5 and Mt4.0 side-by-side. This is essentially combined from two graphics.dotplot() function calls as panel A and B. """ from jcvi.graphics.dotplot import check_beds, dotplot p = OptionParser(mtdotplots.__doc__) p.set_beds() opts, args, iopts = p.set_image_options(args, figsize="16x8", dpi=90) if len(args) != 3: sys.exit(not p.print_help()) a, b, ac = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) r1 = fig.add_axes([0, 0, .5, 1]) r2 = fig.add_axes([.5, 0, .5, 1]) a1 = fig.add_axes([.05, .1, .4, .8]) a2 = fig.add_axes([.55, .1, .4, .8]) anchorfile = op.join(a, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r1, a1, is_self=is_self, genomenames="Mt3.5_Mt3.5") opts.qbed = opts.sbed = None anchorfile = op.join(b, ac) qbed, sbed, qorder, sorder, is_self = check_beds(anchorfile, p, opts) dotplot(anchorfile, qbed, sbed, fig, r2, a2, is_self=is_self, genomenames="Mt4.0_Mt4.0") root.text(.03, .95, "A", ha="center", va="center", size=36) root.text(.53, .95, "B", ha="center", va="center", size=36) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "mtdotplots" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multilineplot(args): """ %prog multilineplot fastafile chr1 Combine multiple line plots in one vertical stack Inputs must be BED-formatted. --lines: traditional line plots, useful for plotting feature freq """ p = OptionParser(multilineplot.__doc__) p.add_option("--lines", help="Features to plot in lineplot [default: %default]") p.add_option("--colors", help="List of colors matching number of input bed files") p.add_option("--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on [default: %default]") p.add_option("--binned", default=False, action="store_true", help="Specify whether the input is already binned; " + "if True, input files are considered to be binfiles") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract = check_window_options(opts) linebeds = [] colors = opts.colors if opts.lines: lines = opts.lines.split(",") assert len(colors) == len(lines), "Number of chosen colors must match" + \ " number of input bed files" linebeds = get_beds(lines, binned=opts.binned) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned) clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 plt.rcParams["figure.figsize"] = iopts.w, iopts.h fig, axarr = plt.subplots(nrows=len(lines)) if len(linebeds) == 1: axarr = (axarr, ) fig.suptitle(chr, color="darkslategray") for i, ax in enumerate(axarr): lineplot(ax, [linebins[i]], nbins, chr, window, shift, \ color="{0}{1}".format(colors[i], 'r')) plt.subplots_adjust(hspace=0.5) image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def birch(args): """ %prog birch seqids layout Plot birch macro-synteny, with an embedded phylogenetic tree to the right. """ p = OptionParser(birch.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 2: sys.exit(not p.print_help()) seqids, layout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) K = Karyotype(fig, root, seqids, layout) L = K.layout xs = 0.79 dt = dict(rectangle=False, circle=False) # Embed a phylogenetic tree to the right coords = {} coords["Amborella"] = (xs, L[0].y) coords["Vitis"] = (xs, L[1].y) coords["Prunus"] = (xs, L[2].y) coords["Betula"] = (xs, L[3].y) coords["Populus"] = (xs, L[4].y) coords["Arabidopsis"] = (xs, L[5].y) coords["fabids"] = join_nodes(root, coords, "Prunus", "Betula", xs, **dt) coords["malvids"] = join_nodes(root, coords, "Populus", "Arabidopsis", xs, **dt) coords["rosids"] = join_nodes(root, coords, "fabids", "malvids", xs, **dt) coords["eudicots"] = join_nodes(root, coords, "rosids", "Vitis", xs, **dt) coords["angiosperm"] = join_nodes(root, coords, "eudicots", "Amborella", xs, **dt) # Show branch length branch_length(root, coords["Amborella"], coords["angiosperm"], ">160.0") branch_length(root, coords["eudicots"], coords["angiosperm"], ">78.2", va="top") branch_length(root, coords["Vitis"], coords["eudicots"], "138.5") branch_length(root, coords["rosids"], coords["eudicots"], "19.8", va="top") branch_length( root, coords["Prunus"], coords["fabids"], "104.2", ha="right", va="top" ) branch_length(root, coords["Arabidopsis"], coords["malvids"], "110.2", va="top") branch_length( root, coords["fabids"], coords["rosids"], "19.8", ha="right", va="top" ) branch_length(root, coords["malvids"], coords["rosids"], "8.5", va="top") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "birch" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def scaffold(args): """ %prog scaffold scaffold.fasta synteny.blast synteny.sizes synteny.bed physicalmap.blast physicalmap.sizes physicalmap.bed As evaluation of scaffolding, visualize external line of evidences: * Plot synteny to an external genome * Plot alignments to physical map * Plot alignments to genetic map (TODO) Each trio defines one panel to be plotted. blastfile defines the matchings between the evidences vs scaffolds. Then the evidence sizes, and evidence bed to plot dot plots. This script will plot a dot in the dot plot in the corresponding location the plots are one contig/scaffold per plot. """ from jcvi.utils.iter import grouper p = OptionParser(scaffold.__doc__) p.add_option("--cutoff", type="int", default=1000000, help="Plot scaffolds with size larger than [default: %default]") p.add_option("--highlights", help="A set of regions in BED format to highlight [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="14x8", dpi=150) if len(args) < 4 or len(args) % 3 != 1: sys.exit(not p.print_help()) highlights = opts.highlights scafsizes = Sizes(args[0]) trios = list(grouper(args[1:], 3)) trios = [(a, Sizes(b), Bed(c)) for a, b, c in trios] if highlights: hlbed = Bed(highlights) for scaffoldID, scafsize in scafsizes.iter_sizes(): if scafsize < opts.cutoff: continue logging.debug("Loading {0} (size={1})".format(scaffoldID, thousands(scafsize))) tmpname = scaffoldID + ".sizes" tmp = open(tmpname, "w") tmp.write("{0}\t{1}".format(scaffoldID, scafsize)) tmp.close() tmpsizes = Sizes(tmpname) tmpsizes.close(clean=True) if highlights: subhighlights = list(hlbed.sub_bed(scaffoldID)) imagename = ".".join((scaffoldID, opts.format)) plot_one_scaffold(scaffoldID, tmpsizes, None, trios, imagename, iopts, highlights=subhighlights)
def main(): p = OptionParser(__doc__) p.add_option( "--basepair", default=False, action="store_true", help="Use base pair position instead of gene rank", ) p.add_option( "--nocircles", default=False, action="store_true", help="Do not plot chromosome circles", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) p.add_option("-p", "--prefix", default="karyotype", dest="outpfx", type="string", help="File prefix for output image", metavar="FILE_PREFIX") opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, layoutfile = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype( fig, root, seqidsfile, layoutfile, plot_circles=(not opts.nocircles), shadestyle=opts.shadestyle, generank=(not opts.basepair), ) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = opts.outpfx image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print(message, file=sys.stderr) ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def main(): p = OptionParser(__doc__) p.add_option( "--switch", help="Rename the seqid with two-column file [default: %default]" ) p.add_option( "--tree", help="Display trees on the bottom of the figure [default: %default]" ) p.add_option("--extra", help="Extra features in BED format") p.add_option( "--scalebar", default=False, action="store_true", help="Add scale bar to the plot", ) p.add_option( "--shadestyle", default="curve", choices=Shade.Styles, help="Style of syntenic wedges", ) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch tree = opts.tree pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Synteny( fig, root, datafile, bedfile, layoutfile, switch=switch, tree=tree, extra_features=opts.extra, scalebar=opts.scalebar, shadestyle=opts.shadestyle, ) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def multireport(args): """ %prog multireport layoutfile Generate several Ks value distributions in the same figure. If the layout file is missing then a template file listing all ks files will be written. The layout file contains the Ks file, number of components, colors, and labels: # Ks file, ncomponents, label, color, marker LAP.sorghum.ks, 1, LAP-sorghum, r, o SES.sorghum.ks, 1, SES-sorghum, g, + MOL.sorghum.ks, 1, MOL-sorghum, m, ^ If color or marker is missing, then a random one will be assigned. """ p = OptionParser(multireport.__doc__) p.set_outfile(outfile="Ks_plot.pdf") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x6") if len(args) != 1: sys.exit(not p.print_help()) (layoutfile, ) = args ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins fill = opts.fill layout = Layout(layoutfile) print(layout, file=sys.stderr) fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([0.12, 0.13, 0.8, 0.8]) kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) for lo in layout: data = KsFile(lo.ksfile) data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] kp.add_data( data, lo.components, label=lo.label, color=lo.color, marker=lo.marker, fill=fill, fitted=opts.fit, kde=opts.kde, ) kp.draw(title=opts.title, filename=opts.outfile)
def covlen(args): """ %prog covlen covfile fastafile Plot coverage vs length. `covfile` is two-column listing contig id and depth of coverage. """ import numpy as np import pandas as pd import seaborn as sns from jcvi.formats.base import DictFile p = OptionParser(covlen.__doc__) p.add_option("--maxsize", default=1000000, type="int", help="Max contig size") p.add_option("--maxcov", default=100, type="int", help="Max contig size") p.add_option("--color", default='m', help="Color of the data points") p.add_option("--kind", default="scatter", choices=("scatter", "reg", "resid", "kde", "hex"), help="Kind of plot to draw") opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 2: sys.exit(not p.print_help()) covfile, fastafile = args cov = DictFile(covfile, cast=float) s = Sizes(fastafile) data = [] maxsize, maxcov = opts.maxsize, opts.maxcov for ctg, size in s.iter_sizes(): c = cov.get(ctg, 0) if size > maxsize: continue if c > maxcov: continue data.append((size, c)) x, y = zip(*data) x = np.array(x) y = np.array(y) logging.debug("X size {0}, Y size {1}".format(x.size, y.size)) df = pd.DataFrame() xlab, ylab = "Length", "Coverage of depth (X)" df[xlab] = x df[ylab] = y sns.jointplot(xlab, ylab, kind=opts.kind, data=df, xlim=(0, maxsize), ylim=(0, maxcov), stat_func=None, edgecolor="w", color=opts.color) figname = covfile + ".pdf" savefig(figname, dpi=iopts.dpi, iopts=iopts)
def snpplot(args): """ %prog counts.cdt Illustrate the histogram per SNP site. """ p = OptionParser(snpplot.__doc__) opts, args, iopts = p.set_image_options(args, format="png") if len(args) != 1: sys.exit(not p.print_help()) datafile, = args # Read in CDT file fp = open(datafile) next(fp) next(fp) data = [] for row in fp: atoms = row.split()[4:] nval = len(atoms) values = [float(x) for x in atoms] # normalize values = [x * 1. / sum(values) for x in values] data.append(values) pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) xmin, xmax = .1, .9 ymin, ymax = .1, .9 yinterval = (ymax - ymin) / len(data) colors = "rbg" if nval == 3 else ["lightgray"] + list("rbg") ystart = ymax for d in data: xstart = xmin for dd, c in zip(d, colors): xend = xstart + (xmax - xmin) * dd root.plot((xstart, xend), (ystart, ystart), "-", color=c) xstart = xend ystart -= yinterval root.text(.05, .5, "{0} LMD50 SNPs".format(len(data)), ha="center", va="center", rotation=90, color="lightslategray") for x, t, c in zip((.3, .5, .7), ("REF", "ALT", "HET"), "rbg"): root.text(x, .95, t, color=c, ha="center", va="center") normalize_axes(root) image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def amborella(args): """ %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphics.synteny. """ p = OptionParser(amborella.__doc__) p.add_option( "--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option( "--switch", help="Rename the seqid with two-column file [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch tree = opts.tree fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) # legend showing the orientation of the genes draw_gene_legend(root, .5, .68, .5) # annotate the WGD events fc = 'lightslategrey' x = .05 radius = .012 TextCircle(root, x, .86, '$\gamma$', radius=radius) TextCircle(root, x, .95, '$\epsilon$', radius=radius) root.plot([x, x], [.83, .9], ":", color=fc, lw=2) pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02) x, y = zip(*pts) root.plot(x, y, ":", color=fc, lw=2) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "amborella" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def movieframe(args): """ %prog movieframe tour test.clm contigs.ref.anchors Draw heatmap and synteny in the same plot. """ p = OptionParser(movieframe.__doc__) p.add_option("--label", help="Figure title") p.set_beds() p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="16x8", style="white", cmap="coolwarm", format="png", dpi=120) if len(args) != 3: sys.exit(not p.print_help()) tour, clmfile, anchorsfile = args tour = tour.split(",") image_name = opts.outfile or ("movieframe." + iopts.format) label = opts.label or op.basename(image_name).rsplit(".", 1)[0] clm = CLMFile(clmfile) totalbins, bins, breaks = make_bins(tour, clm.tig_to_size) M = read_clm(clm, totalbins, bins) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # whole canvas ax1 = fig.add_axes([.05, .1, .4, .8]) # heatmap ax2 = fig.add_axes([.55, .1, .4, .8]) # dot plot ax2_root = fig.add_axes([.5, 0, .5, 1]) # dot plot canvas # Left axis: heatmap plot_heatmap(ax1, M, breaks, iopts) # Right axis: synteny qbed, sbed, qorder, sorder, is_self = check_beds(anchorsfile, p, opts, sorted=False) dotplot(anchorsfile, qbed, sbed, fig, ax2_root, ax2, sep=False, title="") root.text(.5, .98, clm.name, color="g", ha="center", va="center") root.text(.5, .95, label, color="darkslategray", ha="center", va="center") normalize_axes(root) savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog ploidy seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, .27, .37, .52) # annotate the WGD events fc = 'lightslategrey' x = .09 radius = .012 TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc) TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc) TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc) for ypos in (.825, .8, .72): root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [.85, .775], ":", color=fc, lw=2) root.plot([x, x], [.75, .675], ":", color=fc, lw=2) labels = ((.04, .96, 'A'), (.04, .54, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog cotton seqids karyotype.layout mcscan.out all.bed synteny.layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of WGD history of pineapple genome. The script calls both graphics.karyotype and graphic.synteny. """ p = OptionParser(ploidy.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="9x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=opts.switch) # legend showing the orientation of the genes draw_gene_legend(root, .27, .37, .52) # annotate the WGD events fc = 'lightslategrey' x = .09 radius = .012 TextCircle(root, x, .825, r'$\tau$', radius=radius, fc=fc) TextCircle(root, x, .8, r'$\sigma$', radius=radius, fc=fc) TextCircle(root, x, .72, r'$\rho$', radius=radius, fc=fc) for ypos in (.825, .8, .72): root.text(.12, ypos, r"$\times2$", color=fc, ha="center", va="center") root.plot([x, x], [.85, .775], ":", color=fc, lw=2) root.plot([x, x], [.75, .675], ":", color=fc, lw=2) labels = ((.04, .96, 'A'), (.04, .54, 'B')) panel_labels(root, labels) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "pineapple-karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ploidy(args): """ %prog ploidy seqids layout Build a figure that calls graphics.karyotype to illustrate the high ploidy of B. napus genome. """ p = OptionParser(ploidy.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, klayout = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) fc = "darkslategrey" radius = .012 ot = -.05 # use this to adjust vertical position of the left panel TextCircle(root, .1, .9 + ot, r'$\gamma$', radius=radius, fc=fc) root.text(.1, .88 + ot, r"$\times3$", ha="center", va="top", color=fc) TextCircle(root, .08, .79 + ot, r'$\alpha$', radius=radius, fc=fc) TextCircle(root, .12, .79 + ot, r'$\beta$', radius=radius, fc=fc) root.text(.1, .77 + ot, r"$\times3\times2\times2$", ha="center", va="top", color=fc) root.text(.1, .67 + ot, r"Brassica triplication", ha="center", va="top", color=fc, size=11) root.text(.1, .65 + ot, r"$\times3\times2\times2\times3$", ha="center", va="top", color=fc) root.text(.1, .42 + ot, r"Allo-tetraploidy", ha="center", va="top", color=fc, size=11) root.text(.1, .4 + ot, r"$\times3\times2\times2\times3\times2$", ha="center", va="top", color=fc) bb = dict(boxstyle="round,pad=.5", fc="w", ec="0.5", alpha=0.5) root.text(.5, .2 + ot, r"\noindent\textit{Brassica napus}\\" "(A$\mathsf{_n}$C$\mathsf{_n}$ genome)", ha="center", size=16, color="k", bbox=bb) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "napus" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def amborella(args): """ %prog amborella seqids karyotype.layout mcscan.out all.bed synteny.layout Build a composite figure that calls graphics.karyotype and graphics.synteny. """ p = OptionParser(amborella.__doc__) p.add_option("--tree", help="Display trees on the bottom of the figure [default: %default]") p.add_option("--switch", help="Rename the seqid with two-column file [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 5: sys.exit(not p.print_help()) seqidsfile, klayout, datafile, bedfile, slayout = args switch = opts.switch tree = opts.tree fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, klayout) Synteny(fig, root, datafile, bedfile, slayout, switch=switch, tree=tree) # legend showing the orientation of the genes draw_gene_legend(root, .5, .68, .5) # annotate the WGD events fc = 'lightslategrey' x = .05 radius = .012 TextCircle(root, x, .86, '$\gamma$', radius=radius) TextCircle(root, x, .95, '$\epsilon$', radius=radius) root.plot([x, x], [.83, .9], ":", color=fc, lw=2) pts = plot_cap((x, .95), np.radians(range(-70, 250)), .02) x, y = zip(*pts) root.plot(x, y, ":", color=fc, lw=2) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "amborella" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def ccn(args): """ %prog ccn combined.tsv Plot several ccn plots including chr1,chrX,chrY,chrM """ p = OptionParser(ccn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x8") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_ccn(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".ccn.pdf" savefig(outfile)
def correlation(args): """ %prog correlation postgenomic-s.tsv Plot correlation of age vs. postgenomic features. """ p = OptionParser(correlation.__doc__) opts, args, iopts = p.set_image_options(args, figsize="12x8") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_correlation(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".correlation.pdf" savefig(outfile)
def qc(args): """ %prog qc postgenomic-s.tsv Plot basic statistics of a given sample: Age, Gender, Ethnicity, Cohort, Chemistry """ p = OptionParser(heritability.__doc__) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") composite_qc(df, size=(iopts.w, iopts.h)) outfile = tsvfile.rsplit(".", 1)[0] + ".qc.pdf" savefig(outfile)
def multireport(args): """ %prog multireport layoutfile Generate several Ks value distributions in the same figure. If the layout file is missing then a template file listing all ks files will be written. The layout file contains the Ks file, number of components, colors, and labels: # Ks file, ncomponents, label, color, marker LAP.sorghum.ks, 1, LAP-sorghum, r, o SES.sorghum.ks, 1, SES-sorghum, g, + MOL.sorghum.ks, 1, MOL-sorghum, m, ^ If color or marker is missing, then a random one will be assigned. """ p = OptionParser(multireport.__doc__) p.set_outfile(outfile="Ks_plot.pdf") add_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="5x5") if len(args) != 1: sys.exit(not p.print_help()) layoutfile, = args ks_min = opts.vmin ks_max = opts.vmax bins = opts.bins fill = opts.fill layout = Layout(layoutfile) print >> sys.stderr, layout fig = plt.figure(1, (iopts.w, iopts.h)) ax = fig.add_axes([.12, .1, .8, .8]) kp = KsPlot(ax, ks_max, bins, legendp=opts.legendp) for lo in layout: data = KsFile(lo.ksfile) data = [x.ng_ks for x in data] data = [x for x in data if ks_min <= x <= ks_max] kp.add_data(data, lo.components, label=lo.label, \ color=lo.color, marker=lo.marker, fill=fill, fitted=opts.fit) kp.draw(title=opts.title, filename=opts.outfile)
def main(): p = OptionParser(__doc__) opts, args, iopts = p.set_image_options(figsize="8x7") if len(args) != 2: sys.exit(not p.print_help()) seqidsfile, layoutfile = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) Karyotype(fig, root, seqidsfile, layoutfile) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() pf = "karyotype" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plotall(xargs): """ %prog plotall input.bed Plot the matchings between the reconstructed pseudomolecules and the maps. This command will plot each reconstructed object (non-singleton). """ p = OptionParser(plotall.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(xargs, figsize="10x6") if len(args) != 1: sys.exit(not p.print_help()) inputbed, = args pf = inputbed.rsplit(".", 1)[0] agpfile = pf + ".agp" agp = AGP(agpfile) objects = [ob for ob, lines in agp.iter_object() if len(lines) > 1] for seqid in sorted(objects): plot(xargs + [seqid])
def covlen(args): """ %prog covlen covfile fastafile Plot coverage vs lenght. `covfile` is two-column listing contig id and depth of coverage. """ import numpy as np import seaborn as sns from jcvi.formats.base import DictFile p = OptionParser(covlen.__doc__) p.add_option("--maxsize", default=100000, type="int", help="Max contig size") p.add_option("--maxcov", default=100, type="int", help="Max contig size") opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 2: sys.exit(not p.print_help()) covfile, fastafile = args cov = DictFile(covfile, cast=float) s = Sizes(fastafile) data = [] maxsize, maxcov = opts.maxsize, opts.maxcov for ctg, size in s.iter_sizes(): c = cov[ctg] if size > maxsize: continue if c > maxcov: continue data.append((size, c)) x, y = zip(*data) x = np.array(x) y = np.array(y) logging.debug("X size {0}, Y size {1}".format(x.size, y.size)) sns.jointplot(x, y, kind="kde") figname = covfile + ".pdf" savefig(figname, dpi=iopts.dpi, iopts=iopts)
def plot(args): """ %prog plot workdir sample chr1,chr2 Plot some chromosomes for visual proof. Separate multiple chromosomes with comma. Must contain folder workdir/sample-cn/. """ from jcvi.graphics.base import savefig p = OptionParser(plot.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x7", format="png") if len(args) != 3: sys.exit(not p.print_help()) workdir, sample_key, chrs = args chrs = chrs.split(",") hmm = CopyNumberHMM(workdir=workdir) hmm.plot(sample_key, chrs=chrs) image_name = sample_key + "_cn." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def utricularia(args): from jcvi.graphics.synteny import main as synteny_main p = OptionParser(synteny_main.__doc__) p.add_option("--switch", help="Rename the seqid with two-column file") opts, args, iopts = p.set_image_options(args, figsize="8x7") if len(args) != 3: sys.exit(not p.print_help()) datafile, bedfile, layoutfile = args switch = opts.switch pf = datafile.rsplit(".", 1)[0] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) s = Synteny(fig, root, datafile, bedfile, layoutfile, loc_label=False, switch=switch) light = "lightslategrey" RoundRect(root, (.02, .69), .96, .24, fill=False, lw=2, ec=light) RoundRect(root, (.02, .09), .96, .48, fill=False, lw=2, ec=light) za, zb = s.layout[1].ratio, s.layout[-1].ratio # zoom level if za != 1: root.text(.96, .89, "{}x zoom".format(za).replace(".0x", "x"), color=light, ha="right", va="center", size=14) if zb != 1: root.text(.96, .12, "{}x zoom".format(zb).replace(".0x", "x"), color=light, ha="right", va="center", size=14) # legend showing the orientation of the genes draw_gene_legend(root, .22, .3, .64, text=True) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def regression(args): """ %prog regression postgenomic-s.tsv Plot chronological vs. predicted age. """ p = OptionParser(regression.__doc__) opts, args, iopts = p.set_image_options(args, figsize="8x8") if len(args) != 1: sys.exit(not p.print_help()) tsvfile, = args df = pd.read_csv(tsvfile, sep="\t") chrono = "Chronological age (yr)" pred = "Predicted age (yr)" resdf = pd.DataFrame({chrono: df["hli_calc_age_sample_taken"], pred: df["Predicted Age"]}) g = sns.jointplot(chrono, pred, resdf, joint_kws={"s": 6}, xlim=(0, 100), ylim=(0, 80)) g.fig.set_figwidth(iopts.w) g.fig.set_figheight(iopts.h) outfile = tsvfile.rsplit(".", 1)[0] + ".regression.pdf" savefig(outfile)
def simulation(args): """ %prog simulation inversion.txt translocation.txt maps.txt multimaps.txt Plot ALLMAPS accuracy across a range of simulated datasets. """ p = OptionParser(simulation.__doc__) opts, args, iopts = p.set_image_options(args, dpi=300) if len(args) != 4: sys.exit(not p.print_help()) dataA, dataB, dataC, dataD = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) A = fig.add_axes([.12, .62, .35, .35]) B = fig.add_axes([.62, .62, .35, .35]) C = fig.add_axes([.12, .12, .35, .35]) D = fig.add_axes([.62, .12, .35, .35]) dataA = import_data(dataA) dataB = import_data(dataB) dataC = import_data(dataC) dataD = import_data(dataD) subplot(A, dataA, "Inversion error rate", "Accuracy", xlim=.5) subplot(B, dataB, "Translocation error rate", "Accuracy", xlim=.5, legend=("intra-chromosomal", "inter-chromosomal", "75\% intra + 25\% inter")) subplot(C, dataC, "Number of input maps", "Accuracy", xcast=int) subplot(D, dataD, "Number of input maps", "Accuracy", xcast=int) labels = ((.03, .97, "A"), (.53, .97, "B"), (.03, .47, "C"), (.53, .47, "D")) panel_labels(root, labels) normalize_axes(root) image_name = "simulation." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)