def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print >> sys.stderr, message ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def venn(args): """ %prog venn *.benchmark Display benchmark results as Venn diagram. """ from matplotlib_venn import venn2 p = OptionParser(venn.__doc__) opts, args, iopts = p.set_image_options(args, figsize="9x9") if len(args) < 1: sys.exit(not p.print_help()) bcs = args fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) pad = .02 ystart = 1 ywidth = 1. / len(bcs) tags = ("Bowers", "YGOB", "Schnable") for bc, tag in zip(bcs, tags): fp = open(bc) data = [] for row in fp: prog, pcounts, tcounts, shared = row.split() pcounts = int(pcounts) tcounts = int(tcounts) shared = int(shared) data.append((prog, pcounts, tcounts, shared)) xstart = 0 xwidth = 1. / len(data) for prog, pcounts, tcounts, shared in data: a, b, c = pcounts - shared, tcounts - shared, shared ax = fig.add_axes([xstart + pad, ystart - ywidth + pad, xwidth - 2 * pad, ywidth - 2 * pad]) venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax) message = "Sn={0} Pu={1}".\ format(percentage(shared, tcounts, precision=0, mode=-1), percentage(shared, pcounts, precision=0, mode=-1)) print(message, file=sys.stderr) ax.text(.5, .92, latex(message), ha="center", va="center", transform=ax.transAxes, color='b') ax.set_axis_off() xstart += xwidth ystart -= ywidth panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"), (.04, .96 - 2 * ywidth, "C"))) panel_labels(root, ((.5, .98, "A. thaliana duplicates"), (.5, .98 - ywidth, "14 Yeast genomes"), (.5, .98 - 2 * ywidth, "4 Grass genomes"))) normalize_axes(root) savefig("venn.pdf", dpi=opts.dpi)
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = 0.15, 0.65 w, h = 0.7, 0.3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(0.05, 0.95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= 0.28 h = 0.25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis( ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)", ) ax.xaxis.grid(False) # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict( (x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) for x in g.agp if not x.is_gap ) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = 0.6 / total_size y = 0.16 pad = 0.03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((0.15 + pb_ratio * a_beg, 0.2), (ystart, ystart - 0.14), ":", color=lsg) root.plot((0.15 + pb_ratio * b_end, 0.3), (ystart, ystart - 0.08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip( (a, b), (asize, bsize), (49213, 81277), (0.2, 0.2 + (asize + gapsize) * ratio) ): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=0.025, fc="gainsboro") begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text( sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop ) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text( sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center", ) labels = ((0.05, 0.95, "A"), (0.05, 0.6, "B"), (0.05, 0.27, "C")) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=True, chrlw=.1, sepcolor="g", minfont=5, stdpf=True): xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbreaks: xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbreaks: ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # Plot the frame ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) return xlim, ylim
def gcdepth(args): """ %prog gcdepth sample_name tag Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output: - NA12878_S1.mosdepth.global.dist.txt - NA12878_S1.mosdepth.region.dist.txt - NA12878_S1.regions.bed.gz - NA12878_S1.regions.bed.gz.csi - NA12878_S1.regions.gc.bed.gz A sample mosdepth.sh script might look like: ``` #!/bin/bash LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\ bams/$1.bam -t 4 -c chr1 -n --by 1000 bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\ -bed $1.regions.bed.gz \\ | pigz -c > $1.regions.gc.bed.gz ``` """ import hashlib from jcvi.algorithms.formula import MAD_interval as confidence_interval from jcvi.graphics.base import latex, plt, savefig, set2 p = OptionParser(gcdepth.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) sample_name, tag = args # The tag is used to add to title, also provide a random (hashed) color coloridx = int(hashlib.sha256(tag).hexdigest(), 16) % len(set2) color = set2[coloridx] # mosdepth outputs a table that we can use to plot relationship gcbedgz = sample_name + ".regions.gc.bed.gz" df = pd.read_csv(gcbedgz, delimiter="\t") mf = df.loc[:, ("4_usercol", "6_pct_gc")] mf.columns = ["depth", "gc"] # We discard any bins that are gaps mf = mf[(mf["depth"] > 0.001) | (mf["gc"] > 0.001)] # Create GC bins gcbins = defaultdict(list) for i, row in mf.iterrows(): gcp = int(round(row["gc"] * 100)) gcbins[gcp].append(row["depth"]) gcd = sorted( (k * 0.01, confidence_interval(v)) for (k, v) in gcbins.items()) gcd_x, gcd_y = zip(*gcd) m, lo, hi = zip(*gcd_y) # Plot plt.plot( mf["gc"], mf["depth"], ".", color="lightslategray", ms=2, mec="lightslategray", alpha=0.1, ) patch = plt.fill_between( gcd_x, lo, hi, facecolor=color, alpha=0.25, zorder=10, linewidth=0.0, label="Median +/- MAD band", ) plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20) ax = plt.gca() ax.legend(handles=[patch], loc="best") ax.set_xlim(0, 1) ax.set_ylim(0, 100) ax.set_title("{} ({})".format(latex(sample_name), tag)) ax.set_xlabel("GC content") ax.set_ylabel("Depth") savefig(sample_name + ".gcdepth.png")
def gcdepth(args): """ %prog gcdepth sample_name tag Plot GC content vs depth vs genomnic bins. Inputs are mosdepth output: - NA12878_S1.mosdepth.global.dist.txt - NA12878_S1.mosdepth.region.dist.txt - NA12878_S1.regions.bed.gz - NA12878_S1.regions.bed.gz.csi - NA12878_S1.regions.gc.bed.gz A sample mosdepth.sh script might look like: ``` #!/bin/bash LD_LIBRARY_PATH=mosdepth/htslib/ mosdepth/mosdepth $1 \\ bams/$1.bam -t 4 -c chr1 -n --by 1000 bedtools nuc -fi GRCh38/WholeGenomeFasta/genome.fa \\ -bed $1.regions.bed.gz \\ | pigz -c > $1.regions.gc.bed.gz ``` """ import hashlib from jcvi.algorithms.formula import MAD_interval as confidence_interval from jcvi.graphics.base import latex, plt, savefig, set2 p = OptionParser(gcdepth.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) sample_name, tag = args # The tag is used to add to title, also provide a random (hashed) color coloridx = int(hashlib.sha1(tag).hexdigest(), 16) % len(set2) color = set2[coloridx] # mosdepth outputs a table that we can use to plot relationship gcbedgz = sample_name + ".regions.gc.bed.gz" df = pd.read_csv(gcbedgz, delimiter="\t") mf = df.loc[:, ("4_usercol", "6_pct_gc")] mf.columns = ["depth", "gc"] # We discard any bins that are gaps mf = mf[(mf["depth"] > .001) | (mf["gc"] > .001)] # Create GC bins gcbins = defaultdict(list) for i, row in mf.iterrows(): gcp = int(round(row["gc"] * 100)) gcbins[gcp].append(row["depth"]) gcd = sorted((k * .01, confidence_interval(v)) for (k, v) in gcbins.items()) gcd_x, gcd_y = zip(*gcd) m, lo, hi = zip(*gcd_y) # Plot plt.plot(mf["gc"], mf["depth"], ".", color="lightslategray", ms=2, mec="lightslategray", alpha=.1) patch = plt.fill_between(gcd_x, lo, hi, facecolor=color, alpha=.25, zorder=10, linewidth=0.0, label="Median +/- MAD band") plt.plot(gcd_x, m, "-", color=color, lw=2, zorder=20) ax = plt.gca() ax.legend(handles=[patch], loc="best") ax.set_xlim(0, 1) ax.set_ylim(0, 100) ax.set_title("{} ({})".format(latex(sample_name), tag)) ax.set_xlabel("GC content") ax.set_ylabel("Depth") savefig(sample_name + ".gcdepth.png")
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None, sepcolor="gainsboro"): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = len(data) # Only show random subset if npairs > sample_number: logging.debug("Showing a random subset of {0} data points (total {1}) " \ "for clarity.".format(sample_number, npairs)) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics if not palette: data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbed.get_breaks(): ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if not title: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(title, x=.5, y=.96, color="k") logging.debug(title) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) pngfile, = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != '.': mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) resizefile, mainfile, labelfile, exif = \ convert_image(pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug("Find objects with pixels between {0} ({1}%) and {2} ({3}%)"\ .format(min_size, opts.minsize, max_size, opts.maxsize)) # Plotting ax1.set_title('Original picture') ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title('Edge detection\n({0})'.format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], 'g.') ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title('Object detection') ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), 'r-') ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), 'r-') npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * .35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".\ format(i, npixels, len(pixels), 100. * npixels / canvas_size)) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, ec='w', lw=1) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color='w', ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(.1, .92, "File: {0}".format(latex(filename)), color='g') ax4.text(.1, .86, "Label: {0}".format(latex(accession)), color='m') yy = .8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(.01, yy, str(i), va="center", bbox=dict(fc='none', ec='k')) ax4.text(.1, yy, o.pixeltag, va="center") yy -= .04 ax4.add_patch(Rectangle((.1, yy - .025), .12, .05, lw=0, fc=rgb_to_hex(o.rgb))) ax4.text(.27, yy, o.hashtag, va="center") yy -= .06 ax4.text(.1 , yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategrey") normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family='Helvetica', size=8) ax.set_yticklabels(yticklabels, family='Helvetica', size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects
def seeds(args): """ %prog seeds [pngfile|jpgfile] Extract seed metrics from [pngfile|jpgfile]. Use --rows and --cols to crop image. """ p = OptionParser(seeds.__doc__) p.set_outfile() opts, args, iopts = add_seeds_options(p, args) if len(args) != 1: sys.exit(not p.print_help()) (pngfile, ) = args pf = opts.prefix or op.basename(pngfile).rsplit(".", 1)[0] sigma, kernel = opts.sigma, opts.kernel rows, cols = opts.rows, opts.cols labelrows, labelcols = opts.labelrows, opts.labelcols ff = opts.filter calib = opts.calibrate outdir = opts.outdir if outdir != ".": mkdir(outdir) if calib: calib = json.load(must_open(calib)) pixel_cm_ratio, tr = calib["PixelCMratio"], calib["RGBtransform"] tr = np.array(tr) nbcolor = opts.changeBackground pngfile = convert_background(pngfile, nbcolor) resizefile, mainfile, labelfile, exif = convert_image( pngfile, pf, outdir=outdir, rotate=opts.rotate, rows=rows, cols=cols, labelrows=labelrows, labelcols=labelcols, ) oimg = load_image(resizefile) img = load_image(mainfile) fig, (ax1, ax2, ax3, ax4) = plt.subplots(ncols=4, nrows=1, figsize=(iopts.w, iopts.h)) # Edge detection img_gray = rgb2gray(img) logging.debug("Running {0} edge detection ...".format(ff)) if ff == "canny": edges = canny(img_gray, sigma=opts.sigma) elif ff == "roberts": edges = roberts(img_gray) elif ff == "sobel": edges = sobel(img_gray) edges = clear_border(edges, buffer_size=opts.border) selem = disk(kernel) closed = closing(edges, selem) if kernel else edges filled = binary_fill_holes(closed) # Watershed algorithm if opts.watershed: distance = distance_transform_edt(filled) local_maxi = peak_local_max(distance, threshold_rel=0.05, indices=False) coordinates = peak_local_max(distance, threshold_rel=0.05) markers, nmarkers = label(local_maxi, return_num=True) logging.debug("Identified {0} watershed markers".format(nmarkers)) labels = watershed(closed, markers, mask=filled) else: labels = label(filled) # Object size filtering w, h = img_gray.shape canvas_size = w * h min_size = int(round(canvas_size * opts.minsize / 100)) max_size = int(round(canvas_size * opts.maxsize / 100)) logging.debug( "Find objects with pixels between {0} ({1}%) and {2} ({3}%)".format( min_size, opts.minsize, max_size, opts.maxsize)) # Plotting ax1.set_title("Original picture") ax1.imshow(oimg) params = "{0}, $\sigma$={1}, $k$={2}".format(ff, sigma, kernel) if opts.watershed: params += ", watershed" ax2.set_title("Edge detection\n({0})".format(params)) closed = gray2rgb(closed) ax2_img = labels if opts.edges: ax2_img = closed elif opts.watershed: ax2.plot(coordinates[:, 1], coordinates[:, 0], "g.") ax2.imshow(ax2_img, cmap=iopts.cmap) ax3.set_title("Object detection") ax3.imshow(img) filename = op.basename(pngfile) if labelfile: accession = extract_label(labelfile) else: accession = pf # Calculate region properties rp = regionprops(labels) rp = [x for x in rp if min_size <= x.area <= max_size] nb_labels = len(rp) logging.debug("A total of {0} objects identified.".format(nb_labels)) objects = [] for i, props in enumerate(rp): i += 1 if i > opts.count: break y0, x0 = props.centroid orientation = props.orientation major, minor = props.major_axis_length, props.minor_axis_length major_dx = cos(orientation) * major / 2 major_dy = sin(orientation) * major / 2 minor_dx = sin(orientation) * minor / 2 minor_dy = cos(orientation) * minor / 2 ax2.plot((x0 - major_dx, x0 + major_dx), (y0 + major_dy, y0 - major_dy), "r-") ax2.plot((x0 - minor_dx, x0 + minor_dx), (y0 - minor_dy, y0 + minor_dy), "r-") npixels = int(props.area) # Sample the center of the blob for color d = min(int(round(minor / 2 * 0.35)) + 1, 50) x0d, y0d = int(round(x0)), int(round(y0)) square = img[(y0d - d):(y0d + d), (x0d - d):(x0d + d)] pixels = [] for row in square: pixels.extend(row) logging.debug("Seed #{0}: {1} pixels ({2} sampled) - {3:.2f}%".format( i, npixels, len(pixels), 100.0 * npixels / canvas_size)) rgb = pixel_stats(pixels) objects.append(Seed(filename, accession, i, rgb, props, exif)) minr, minc, maxr, maxc = props.bbox rect = Rectangle((minc, minr), maxc - minc, maxr - minr, fill=False, ec="w", lw=1) ax3.add_patch(rect) mc, mr = (minc + maxc) / 2, (minr + maxr) / 2 ax3.text(mc, mr, "{0}".format(i), color="w", ha="center", va="center", size=6) for ax in (ax2, ax3): ax.set_xlim(0, h) ax.set_ylim(w, 0) # Output identified seed stats ax4.text(0.1, 0.92, "File: {0}".format(latex(filename)), color="g") ax4.text(0.1, 0.86, "Label: {0}".format(latex(accession)), color="m") yy = 0.8 fw = must_open(opts.outfile, "w") if not opts.noheader: print(Seed.header(calibrate=calib), file=fw) for o in objects: if calib: o.calibrate(pixel_cm_ratio, tr) print(o, file=fw) i = o.seedno if i > 7: continue ax4.text(0.01, yy, str(i), va="center", bbox=dict(fc="none", ec="k")) ax4.text(0.1, yy, o.pixeltag, va="center") yy -= 0.04 ax4.add_patch( Rectangle((0.1, yy - 0.025), 0.12, 0.05, lw=0, fc=rgb_to_hex(o.rgb))) ax4.text(0.27, yy, o.hashtag, va="center") yy -= 0.06 ax4.text( 0.1, yy, "(A total of {0} objects displayed)".format(nb_labels), color="darkslategray", ) normalize_axes(ax4) for ax in (ax1, ax2, ax3): xticklabels = [int(x) for x in ax.get_xticks()] yticklabels = [int(x) for x in ax.get_yticks()] ax.set_xticklabels(xticklabels, family="Helvetica", size=8) ax.set_yticklabels(yticklabels, family="Helvetica", size=8) image_name = op.join(outdir, pf + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) return objects
def multilineplot(args): """ %prog multilineplot fastafile chr1 Combine multiple line plots in one vertical stack Inputs must be BED-formatted. --lines: traditional line plots, useful for plotting feature freq """ p = OptionParser(multilineplot.__doc__) p.add_option("--lines", help="Features to plot in lineplot [default: %default]") p.add_option("--colors", help="List of colors matching number of input bed files") p.add_option("--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on [default: %default]") p.add_option("--binned", default=False, action="store_true", help="Specify whether the input is already binned; " + "if True, input files are considered to be binfiles") p.add_option("--ymax", type="int", help="Set Y-axis max") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract, merge = check_window_options(opts) linebeds = [] colors = opts.colors if opts.lines: lines = opts.lines.split(",") assert len(colors) == len(lines), "Number of chosen colors must match" + \ " number of input bed files" linebeds = get_beds(lines, binned=opts.binned) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned, merge=merge) clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 plt.rcParams["figure.figsize"] = iopts.w, iopts.h fig, axarr = plt.subplots(nrows=len(lines)) if len(linebeds) == 1: axarr = (axarr, ) fig.suptitle(latex(chr), color="darkslategray") for i, ax in enumerate(axarr): lineplot(ax, [linebins[i]], nbins, chr, window, shift, \ color="{0}{1}".format(colors[i], 'r')) if opts.ymax: ax.set_ylim(0, opts.ymax) plt.subplots_adjust(hspace=0.5) image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None, sepcolor="gainsboro"): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\ .format(vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] if cmap_text: try: value = float(value) except ValueError: value = vmax if value < vmin: continue if value > vmax: continue else: value = 0 if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = len(data) # Only show random subset if npairs > sample_number: logging.debug("Showing a random subset of {0} data points (total {1}) " \ "for clarity.".format(sample_number, npairs)) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics #if not palette: # data.sort(key=lambda x: -x[2]) x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbed.get_breaks(): ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(markup(gx), size=16) ax.set_ylabel(markup(gy), size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if not title: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(markup(title), x=.5, y=.96, color="k") logging.debug(title) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def estimategaps(args): """ %prog estimategaps JM-4 chr1 JMMale-1 Illustrate ALLMAPS gap estimation algorithm. """ p = OptionParser(estimategaps.__doc__) opts, args, iopts = p.set_image_options(args, figsize="6x6", dpi=300) if len(args) != 3: sys.exit(not p.print_help()) pf, seqid, mlg = args bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" function = lambda x: x.cm cc = Map(bedfile, scaffold_info=True, function=function) agp = AGP(agpfile) g = GapEstimator(cc, agp, seqid, mlg, function=function) pp, chrsize, mlgsize = g.pp, g.chrsize, g.mlgsize spl, spld = g.spl, g.spld g.compute_all_gaps(verbose=False) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # Panel A xstart, ystart = .15, .65 w, h = .7, .3 t = np.linspace(0, chrsize, 1000) ax = fig.add_axes([xstart, ystart, w, h]) mx, my = zip(*g.scatter_data) rho = spearmanr(mx, my) dsg = "g" ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(mx, my, ".", color=set2[3]) ax.plot(t, spl(t), "-", color=dsg) ax.text(.05, .95, mlg, va="top", transform=ax.transAxes) normalize_lms_axis(ax, xlim=chrsize, ylim=mlgsize, ylabel="Genetic distance (cM)") if rho < 0: ax.invert_yaxis() # Panel B ystart -= .28 h = .25 ax = fig.add_axes([xstart, ystart, w, h]) ax.vlines(pp, 0, mlgsize, colors="beige") ax.plot(t, spld(t), "-", lw=2, color=dsg) ax.plot(pp, spld(pp), "o", mfc="w", mec=dsg, ms=5) normalize_lms_axis(ax, xlim=chrsize, ylim=25 * 1e-6, xfactor=1e-6, xlabel="Physical position (Mb)", yfactor=1000000, ylabel="Recomb. rate\n(cM / Mb)") # Panel C (specific to JMMale-1) a, b = "scaffold_1076", "scaffold_861" sizes = dict((x.component_id, (x.object_beg, x.object_end, x.component_span, x.orientation)) \ for x in g.agp if not x.is_gap) a_beg, a_end, asize, ao = sizes[a] b_beg, b_end, bsize, bo = sizes[b] gapsize = g.get_gapsize(a) total_size = asize + gapsize + bsize ratio = .6 / total_size y = .16 pad = .03 pb_ratio = w / chrsize # Zoom lsg = "lightslategray" root.plot((.15 + pb_ratio * a_beg, .2), (ystart, ystart - .14), ":", color=lsg) root.plot((.15 + pb_ratio * b_end, .3), (ystart, ystart - .08), ":", color=lsg) ends = [] for tag, size, marker, beg in zip((a, b), (asize, bsize), (49213, 81277), (.2, .2 + (asize + gapsize) * ratio)): end = beg + size * ratio marker = beg + marker * ratio ends.append((beg, end, marker)) root.plot((marker,), (y,), "o", color=lsg) root.text((beg + end) / 2, y + pad, latex(tag), ha="center", va="center") HorizontalChromosome(root, beg, end, y, height=.025, fc='gainsboro') begs, ends, markers = zip(*ends) fontprop = dict(color=lsg, ha="center", va="center") ypos = y + pad * 2 root.plot(markers, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(markers) / 2, ypos + pad, "Distance: 1.29cM $\Leftrightarrow$ 211,824bp (6.1 cM/Mb)", **fontprop) ypos = y - pad xx = markers[0], ends[0] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "34,115bp", **fontprop) xx = markers[1], begs[1] root.plot(xx, (ypos, ypos), "-", lw=2, color=lsg) root.text(sum(xx) / 2, ypos - pad, "81,276bp", **fontprop) root.plot((ends[0], begs[1]), (y, y), ":", lw=2, color=lsg) root.text(sum(markers) / 2, ypos - 3 * pad, r"$\textit{Estimated gap size: 96,433bp}$", color="r", ha="center", va="center") labels = ((.05, .95, 'A'), (.05, .6, 'B'), (.05, .27, 'C')) panel_labels(root, labels) normalize_axes(root) pf = "estimategaps" image_name = pf + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def draw_chromosomes( root, bedfile, sizes, iopts, mergedist, winsize, imagemap, mappingfile=None, gauge=False, legend=True, empty=False, title=None, ): bed = Bed(bedfile) prefix = bedfile.rsplit(".", 1)[0] if imagemap: imgmapfile = prefix + ".map" mapfh = open(imgmapfile, "w") print('<map id="' + prefix + '">', file=mapfh) if mappingfile: mappings = DictFile(mappingfile, delimiter="\t") classes = sorted(set(mappings.values())) preset_colors = (DictFile( mappingfile, keypos=1, valuepos=2, delimiter="\t") if DictFile.num_columns(mappingfile) >= 3 else {}) else: classes = sorted(set(x.accn for x in bed)) mappings = dict((x, x) for x in classes) preset_colors = {} logging.debug("A total of {} classes found: {}".format( len(classes), ",".join(classes))) # Assign colors to classes ncolors = max(3, min(len(classes), 12)) palette = set1_n if ncolors <= 8 else set3_n colorset = palette(number=ncolors) colorset = sample_N(colorset, len(classes)) class_colors = dict(zip(classes, colorset)) class_colors.update(preset_colors) logging.debug("Assigned colors: {}".format(class_colors)) chr_lens = {} centromeres = {} if sizes: chr_lens = Sizes(sizes).sizes_mapping else: for b, blines in groupby(bed, key=(lambda x: x.seqid)): blines = list(blines) maxlen = max(x.end for x in blines) chr_lens[b] = maxlen for b in bed: accn = b.accn if accn == "centromere": centromeres[b.seqid] = b.start if accn in mappings: b.accn = mappings[accn] else: b.accn = "-" chr_number = len(chr_lens) if centromeres: assert chr_number == len( centromeres), "chr_number = {}, centromeres = {}".format( chr_number, centromeres) r = 0.7 # width and height of the whole chromosome set xstart, ystart = 0.15, 0.85 xinterval = r / chr_number xwidth = xinterval * 0.5 # chromosome width max_chr_len = max(chr_lens.values()) ratio = r / max_chr_len # canvas / base # first the chromosomes for a, (chr, clen) in enumerate(sorted(chr_lens.items())): xx = xstart + a * xinterval + 0.5 * xwidth root.text(xx, ystart + 0.01, str(get_number(chr)), ha="center") if centromeres: yy = ystart - centromeres[chr] * ratio ChromosomeWithCentromere(root, xx, ystart, yy, ystart - clen * ratio, width=xwidth) else: Chromosome(root, xx, ystart, ystart - clen * ratio, width=xwidth) chr_idxs = dict((a, i) for i, a in enumerate(sorted(chr_lens.keys()))) alpha = 1 # color the regions for chr in sorted(chr_lens.keys()): segment_size, excess = 0, 0 bac_list = [] prev_end, prev_klass = 0, None for b in bed.sub_bed(chr): clen = chr_lens[chr] idx = chr_idxs[chr] klass = b.accn if klass == "centromere": continue start = b.start end = b.end if start < prev_end + mergedist and klass == prev_klass: start = prev_end xx = xstart + idx * xinterval yystart = ystart - end * ratio yyend = ystart - start * ratio root.add_patch( Rectangle( (xx, yystart), xwidth, yyend - yystart, fc=class_colors.get(klass, "lightslategray"), lw=0, alpha=alpha, )) prev_end, prev_klass = b.end, klass if imagemap: """ `segment` : size of current BAC being investigated + `excess` `excess` : left-over bases from the previous BAC, as a result of iterating over `winsize` regions of `segment` """ if excess == 0: segment_start = start segment = (end - start + 1) + excess while True: if segment < winsize: bac_list.append(b.accn) excess = segment break segment_end = segment_start + winsize - 1 tlx, tly, brx, bry = ( xx, (1 - ystart) + segment_start * ratio, xx + xwidth, (1 - ystart) + segment_end * ratio, ) print( "\t" + write_ImageMapLine( tlx, tly, brx, bry, iopts.w, iopts.h, iopts.dpi, chr + ":" + ",".join(bac_list), segment_start, segment_end, ), file=mapfh, ) segment_start += winsize segment -= winsize bac_list = [] if imagemap and excess > 0: bac_list.append(b.accn) segment_end = end tlx, tly, brx, bry = ( xx, (1 - ystart) + segment_start * ratio, xx + xwidth, (1 - ystart) + segment_end * ratio, ) print( "\t" + write_ImageMapLine( tlx, tly, brx, bry, iopts.w, iopts.h, iopts.dpi, chr + ":" + ",".join(bac_list), segment_start, segment_end, ), file=mapfh, ) if imagemap: print("</map>", file=mapfh) mapfh.close() logging.debug("Image map written to `{0}`".format(mapfh.name)) if gauge: xstart, ystart = 0.9, 0.85 Gauge(root, xstart, ystart - r, ystart, max_chr_len) if "centromere" in class_colors: del class_colors["centromere"] # class legends, four in a row if legend: xstart = 0.1 xinterval = 0.8 / len(class_colors) xwidth = 0.04 yy = 0.08 for klass, cc in sorted(class_colors.items()): if klass == "-": continue root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fc=cc, lw=0, alpha=alpha)) root.text(xstart + xwidth + 0.01, yy, latex(klass), fontsize=10) xstart += xinterval if empty: root.add_patch( Rectangle((xstart, yy), xwidth, xwidth, fill=False, lw=1)) root.text(xstart + xwidth + 0.01, yy, empty, fontsize=10) if title: root.text(0.5, 0.95, markup(title), ha="center", va="center")
def test_latex(s, expected): from jcvi.graphics.base import latex assert latex(s) == expected, "Expect {}".format(expected)
def multilineplot(args): """ %prog multilineplot fastafile chr1 Combine multiple line plots in one vertical stack Inputs must be BED-formatted. --lines: traditional line plots, useful for plotting feature freq """ p = OptionParser(multilineplot.__doc__) p.add_option("--lines", help="Features to plot in lineplot") p.add_option("--colors", help="List of colors matching number of input bed files") p.add_option( "--mode", default="span", choices=("span", "count", "score"), help="Accumulate feature based on", ) p.add_option( "--binned", default=False, action="store_true", help="Specify whether the input is already binned; " + "if True, input files are considered to be binfiles", ) p.add_option("--ymax", type="int", help="Set Y-axis max") add_window_options(p) opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) != 2: sys.exit(not p.print_help()) fastafile, chr = args window, shift, subtract, merge = check_window_options(opts) linebeds = [] colors = opts.colors if opts.lines: lines = opts.lines.split(",") assert len(colors) == len(lines), ( "Number of chosen colors must match" + " number of input bed files") linebeds = get_beds(lines, binned=opts.binned) linebins = get_binfiles(linebeds, fastafile, shift, mode=opts.mode, binned=opts.binned, merge=merge) clen = Sizes(fastafile).mapping[chr] nbins = get_nbins(clen, shift) plt.rcParams["xtick.major.size"] = 0 plt.rcParams["ytick.major.size"] = 0 plt.rcParams["figure.figsize"] = iopts.w, iopts.h fig, axarr = plt.subplots(nrows=len(lines)) if len(linebeds) == 1: axarr = (axarr, ) fig.suptitle(latex(chr), color="darkslategray") for i, ax in enumerate(axarr): lineplot( ax, [linebins[i]], nbins, chr, window, shift, color="{0}{1}".format(colors[i], "r"), ) if opts.ymax: ax.set_ylim(0, opts.ymax) plt.subplots_adjust(hspace=0.5) image_name = chr + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts)
def plot_breaks_and_labels( fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=True, chrlw=0.1, sepcolor="g", minfont=5, stdpf=True, chpf=True, ): xlim = (0, 47724.0) # hard-coding xlim maximum ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbreaks: xsize_ratio = abs(end - beg) * 0.8 / xsize fontsize = th.select_fontsize(xsize_ratio) if chpf: seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbreaks: ysize_ratio = abs(end - beg) * 0.8 / ysize fontsize = th.select_fontsize(ysize_ratio) if chpf: seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize*0.85)) if sep: ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels xchr_labels = [('chr\ 1', 1997.5, 12), ('chr\ 2', 5944.5, 12), ('chr\ 3', 9014.0, 12), ('chr\ 4', 11351.5, 12), ('chr\ 5', 13639.0, 12), ('chr\ 6', 17657.5, 12), ('chr\ 7', 22329.0, 12), ('chr\ 8', 25466.0, 12), ('chr\ 9', 28092.0, 12), ('chr\ 10', 31361.5, 12), ('chr\ 11', 34457.0, 12), ('chr\ 12', 37234.0, 12), ('chr\ 13', 41112.5, 12), ('chr\ 14', 43851.0, 12), ('chr\ 15', 45258.5, 12), ('scf\ 16', 46740.5, 12), ('scf\ 458', 47724.0, 12)] for label, pos, fontsize in xchr_labels: #print(xchr_labels) pos = 0.1 + pos * 0.8 / xsize if fontsize >= minfont: root.text( pos, 0.91, latex(label), size=fontsize*0.85, ha="center", va="bottom", rotation=45, color="black", ) # remember y labels are inverted ychr_labels = [('chr\ 1', 2672.0, 10.2), ('chr\ 2', 7532.0, 10.2), ('chr\ 3', 12035.0, 10.2), ('chr\ 4', 16228.0, 10.2), ('chr\ 5', 19784.5, 10.2), ('chr\ 6', 23211.0, 10.2), ('chr\ 7', 26612.5, 10.2), ('chr\ 8', 29773.0, 10.2), ('chr\ 9', 32518.0, 10.2), ('chr\ 10', 35004.5, 10.2), ('chr\ 11', 37760.0, 10.2), ('chr\ 12', 40635.5, 10.2), ('ChrSy', 42048.0, 0), ('ChrUn', 42140.5, 0)] for label, pos, fontsize in ychr_labels: #print(ychr_labels) pos = 0.9 - pos * 0.8 / ysize if fontsize >= minfont: root.text(0.91, pos, latex(label), size=fontsize*0.85, va="center", color="black") # Plot the frame ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) ax.set_xlim(xlim) ax.set_ylim(ylim) # The axis labels have been hardcoded (vs. gx gy as in original) so taht we can get the species names spelled out in italics, rather than the BED file name. ax.set_xlabel('$\it{Zizania\ palustris}$', size=16) ax.set_ylabel('$\it{Oryza\ sativa}$', size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="black", size=10) return xlim, ylim