def test_shorten(s, expected): from jcvi.graphics.base import shorten assert shorten(s) == expected, "Expect {}".format(expected)
def dotplot(args): """ %prog dotplot map.csv ref.fasta Make dotplot between chromosomes and linkage maps. The input map is csv formatted, for example: ScaffoldID,ScaffoldPosition,LinkageGroup,GeneticPosition scaffold_2707,11508,1,0 scaffold_2707,11525,1,1.2 """ from jcvi.assembly.allmaps import CSVMapLine from jcvi.formats.sizes import Sizes from jcvi.utils.natsort import natsorted from jcvi.graphics.base import shorten from jcvi.graphics.dotplot import plt, savefig, markup, normalize_axes, \ downsample, plot_breaks_and_labels, thousands p = OptionParser(dotplot.__doc__) p.set_outfile(outfile=None) opts, args, iopts = p.set_image_options(args, figsize="8x8", style="dark", dpi=90, cmap="copper") if len(args) != 2: sys.exit(not p.print_help()) csvfile, fastafile = args sizes = natsorted(Sizes(fastafile).mapping.items()) seen = set() raw_data = [] fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot fp = must_open(csvfile) for row in fp: m = CSVMapLine(row) seen.add(m.seqid) raw_data.append(m) # X-axis is the genome assembly ctgs, ctg_sizes = zip(*sizes) xsize = sum(ctg_sizes) qb = list(np.cumsum(ctg_sizes)) qbreaks = list(zip(ctgs, [0] + qb, qb)) qstarts = dict(zip(ctgs, [0] + qb)) # Y-axis is the map key = lambda x: x.lg raw_data.sort(key=key) ssizes = {} for lg, d in groupby(raw_data, key=key): ssizes[lg] = max([x.cm for x in d]) ssizes = natsorted(ssizes.items()) lgs, lg_sizes = zip(*ssizes) ysize = sum(lg_sizes) sb = list(np.cumsum(lg_sizes)) sbreaks = list(zip([("LG" + x) for x in lgs], [0] + sb, sb)) sstarts = dict(zip(lgs, [0] + sb)) # Re-code all the scatter dots data = [(qstarts[x.seqid] + x.pos, sstarts[x.lg] + x.cm, 'g') \ for x in raw_data if (x.seqid in qstarts)] npairs = downsample(data) x, y, c = zip(*data) ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) # Flip X-Y label gy, gx = op.basename(csvfile).split(".")[:2] gx, gy = shorten(gx, maxchar=30), shorten(gy, maxchar=30) xlim, ylim = plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks) ax.set_xlim(xlim) ax.set_ylim(ylim) title = "Alignment: {} vs {}".format(gx, gy) title += " ({} markers)".format(thousands(npairs)) root.set_title(markup(title), x=.5, y=.96, color="k") logging.debug(title) normalize_axes(root) image_name = opts.outfile or \ (csvfile.rsplit(".", 1)[0] + "." + iopts.format) savefig(image_name, dpi=iopts.dpi, iopts=iopts) fig.clear()
def plot(args): """ %prog plot input.bed seqid Plot the matchings between the reconstructed pseudomolecules and the maps. Two types of visualizations are available in one canvas: 1. Parallel axes, and matching markers are shown in connecting lines; 2. Scatter plot. """ from jcvi.graphics.base import plt, savefig, normalize_axes, \ set2, panel_labels, shorten from jcvi.graphics.chromosome import Chromosome, GeneticMap, \ HorizontalChromosome p = OptionParser(plot.__doc__) add_allmaps_plot_options(p) opts, args, iopts = p.set_image_options(args, figsize="10x6") if len(args) != 2: sys.exit(not p.print_help()) inputbed, seqid = args pf = inputbed.rsplit(".", 1)[0] bedfile = pf + ".lifted.bed" agpfile = pf + ".agp" weightsfile = opts.weightsfile links = opts.links function = get_function(opts.distance) cc = Map(bedfile, function=function) allseqids = cc.seqids mapnames = cc.mapnames weights = Weights(weightsfile, mapnames) assert seqid in allseqids, "{0} not in {1}".format(seqid, allseqids) s = Scaffold(seqid, cc) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] while not mlgs: links /= 2 logging.error("No markers to plot, --links reset to {0}".format(links)) mlgs = [k for k, v in s.mlg_counts.items() if v >= links] mlgsizes = {} for mlg in mlgs: mm = cc.extract_mlg(mlg) mlgsize = max(function(x) for x in mm) mlgsizes[mlg] = mlgsize fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) ax1 = fig.add_axes([0, 0, .5, 1]) ax2 = fig.add_axes([.5, 0, .5, 1]) # Find the layout first ystart, ystop = .9, .1 L = Layout(mlgsizes) coords = L.coords tip = .02 marker_pos = {} # Palette colors = dict((mapname, set2[i]) for i, mapname in enumerate(mapnames)) colors = dict((mlg, colors[mlg.split("-")[0]]) for mlg in mlgs) rhos = {} # Parallel coordinates for mlg, (x, y1, y2) in coords.items(): mm = cc.extract_mlg(mlg) markers = [(m.accn, function(m)) for m in mm] # exhaustive marker list xy = [(m.pos, function(m)) for m in mm if m.seqid == seqid] mx, my = zip(*xy) rho = spearmanr(mx, my) rhos[mlg] = rho flip = rho < 0 g = GeneticMap(ax1, x, y1, y2, markers, tip=tip, flip=flip) extra = -3 * tip if x < .5 else 3 * tip ha = "right" if x < .5 else "left" mapname = mlg.split("-")[0] tlg = shorten(mlg.replace("_", ".")) # Latex does not like underscore char label = "{0} (w={1})".format(tlg, weights[mapname]) ax1.text(x + extra, (y1 + y2) / 2, label, color=colors[mlg], ha=ha, va="center", rotation=90) marker_pos.update(g.marker_pos) agp = AGP(agpfile) agp = [x for x in agp if x.object == seqid] chrsize = max(x.object_end for x in agp) # Pseudomolecules in the center r = ystart - ystop ratio = r / chrsize f = lambda x: (ystart - ratio * x) patchstart = [f(x.object_beg) for x in agp if not x.is_gap] Chromosome(ax1, .5, ystart, ystop, width=2 * tip, patch=patchstart, lw=2) label = "{0} ({1})".format(seqid, human_size(chrsize, precision=0)) ax1.text(.5, ystart + tip, label, ha="center") scatter_data = defaultdict(list) # Connecting lines for b in s.markers: marker_name = b.accn if marker_name not in marker_pos: continue cx = .5 cy = f(b.pos) mx = coords[b.mlg][0] my = marker_pos[marker_name] extra = -tip if mx < cx else tip extra *= 1.25 # leave boundaries for aesthetic reasons cx += extra mx -= extra ax1.plot((cx, mx), (cy, my), "-", color=colors[b.mlg]) scatter_data[b.mlg].append((b.pos, function(b))) # Scatter plot, same data as parallel coordinates xstart, xstop = sorted((ystart, ystop)) f = lambda x: (xstart + ratio * x) pp = [x.object_beg for x in agp if not x.is_gap] patchstart = [f(x) for x in pp] HorizontalChromosome(ax2, xstart, xstop, ystop, height=2 * tip, patch=patchstart, lw=2) gap = .03 ratio = (r - gap * len(mlgs) - tip) / sum(mlgsizes.values()) tlgs = [] for mlg, mlgsize in sorted(mlgsizes.items()): height = ratio * mlgsize ystart -= height xx = .5 + xstart / 2 width = r / 2 color = colors[mlg] ax = fig.add_axes([xx, ystart, width, height]) ypos = ystart + height / 2 ystart -= gap sd = scatter_data[mlg] xx, yy = zip(*sd) ax.vlines(pp, 0, 2 * mlgsize, colors="beige") ax.plot(xx, yy, ".", color=color) rho = rhos[mlg] ax.text(.5, 1 - .4 * gap / height, r"$\rho$={0:.3f}".format(rho), ha="center", va="top", transform=ax.transAxes, color="gray") tlg = shorten(mlg.replace("_", ".")) tlgs.append((tlg, ypos, color)) ax.set_xlim(0, chrsize) ax.set_ylim(0, mlgsize) ax.set_xticks([]) while height / len(ax.get_yticks()) < .03 and len(ax.get_yticks()) >= 2: ax.set_yticks(ax.get_yticks()[::2]) # Sparsify the ticks yticklabels = [int(x) for x in ax.get_yticks()] ax.set_yticklabels(yticklabels, family='Helvetica') if rho < 0: ax.invert_yaxis() for i, (tlg, ypos, color) in enumerate(tlgs): ha = "center" if len(tlgs) > 4: ha = "right" if i % 2 else "left" root.text(.5, ypos, tlg, color=color, rotation=90, ha=ha, va="center") if opts.panels: labels = ((.04, .96, 'A'), (.48, .96, 'B')) panel_labels(root, labels) normalize_axes((ax1, ax2, root)) image_name = seqid + "." + iopts.format savefig(image_name, dpi=iopts.dpi, iopts=iopts) plt.close(fig)