def plot_breaks_and_labels(fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=True, chrlw=.1, sepcolor="g", minfont=5, stdpf=True): xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbreaks: xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbreaks: ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # Plot the frame ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) return xlim, ylim
def blastplot( ax, blastfile, qsizes, ssizes, qbed, sbed, style="dot", sampleN=None, baseticks=False, insetLabels=False, stripNames=False, highlights=None, ): assert style in DotStyles fp = open(blastfile) qorder = qbed.order if qbed else None sorder = sbed.order if sbed else None data = [] for row in fp: b = BlastLine(row) query, subject = b.query, b.subject if stripNames: query = query.rsplit(".", 1)[0] subject = subject.rsplit(".", 1)[0] if qorder: if query not in qorder: continue qi, q = qorder[query] query = q.seqid qstart, qend = q.start, q.end else: qstart, qend = b.qstart, b.qstop if sorder: if subject not in sorder: continue si, s = sorder[subject] subject = s.seqid sstart, send = s.start, s.end else: sstart, send = b.sstart, b.sstop qi = qsizes.get_position(query, qstart) qj = qsizes.get_position(query, qend) si = ssizes.get_position(subject, sstart) sj = ssizes.get_position(subject, send) if None in (qi, si): continue data.append(((qi, qj), (si, sj))) if sampleN: if len(data) > sampleN: data = sample(data, sampleN) if not data: return logging.error("no blast data imported") xsize, ysize = qsizes.totalsize, ssizes.totalsize logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) if style == "line": for a, b in data: ax.plot(a, b, "ro-", mfc="w", mec="r", ms=3) else: data = [(x[0], y[0]) for x, y in data] x, y = zip(*data) if style == "circle": ax.plot(x, y, "mo", mfc="w", mec="m", ms=3) elif style == "dot": ax.scatter(x, y, s=3, lw=0) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) ignore_size_x = ignore_size_y = 0 # plot the chromosome breaks logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) for (seqid, beg, end) in qsizes.get_breaks(): ignore = abs(end - beg) < ignore_size_x if ignore: continue seqid = rename_seqid(seqid) xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([end, end], ylim, "-", lw=1, color="grey") for (seqid, beg, end) in ssizes.get_breaks(): ignore = abs(end - beg) < ignore_size_y if ignore: continue seqid = rename_seqid(seqid) ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [end, end], "-", lw=1, color="grey") # plot the chromosome labels for label, pos, ignore in xchr_labels: if not ignore: if insetLabels: ax.text(pos, 0, label, size=8, ha="center", va="top", color="grey") else: pos = 0.1 + pos * 0.8 / xsize root.text( pos, 0.91, label, size=10, ha="center", va="bottom", rotation=45, color="grey", ) # remember y labels are inverted for label, pos, ignore in ychr_labels: if not ignore: if insetLabels: continue pos = 0.9 - pos * 0.8 / ysize root.text(0.91, pos, label, size=10, va="center", color="grey") # Highlight regions based on a list of BedLine qhighlights = shighlights = None if highlights: if isinstance(highlights[0], BedLine): shighlights = highlights elif len(highlights) == 2: qhighlights, shighlights = highlights if qhighlights: for hl in qhighlights: hls = qsizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((hls, 0), hl.span, ysize, fc="r", alpha=0.2, lw=0)) if shighlights: for hl in shighlights: hls = ssizes.get_position(hl.seqid, hl.start) ax.add_patch( Rectangle((0, hls), xsize, hl.span, fc="r", alpha=0.2, lw=0)) if baseticks: def increaseDensity(a, ratio=4): assert len(a) > 1 stepsize = a[1] - a[0] newstepsize = int(stepsize / ratio) return np.arange(0, a[-1], newstepsize) # Increase the density of the ticks xticks = ax.get_xticks() yticks = ax.get_yticks() xticks = increaseDensity(xticks, ratio=2) yticks = increaseDensity(yticks, ratio=2) ax.set_xticks(xticks) # Plot outward ticklines for pos in xticks[1:]: if pos > xsize: continue pos = 0.1 + pos * 0.8 / xsize root.plot((pos, pos), (0.08, 0.1), "-", color="grey", lw=2) for pos in yticks[1:]: if pos > ysize: continue pos = 0.9 - pos * 0.8 / ysize root.plot((0.09, 0.1), (pos, pos), "-", color="grey", lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_base_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) plt.setp(ax.get_yticklabels(), rotation=90)
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, genomenames=None, sample_number=10000, ignore=.005, palette=None, chrlw=.01, title=None): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: #logging.warning("ignore %s" % query) continue if subject not in sorder: #logging.warning("ignore %s" % subject) continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) # only show random subset, default to sample_number = 5000 if len(data) > sample_number: logging.debug("Showing a random subset of %s data points (total %s) " \ "for clarity." % (sample_number, len(data))) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics if not palette: data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] # Tag to mark whether to plot chr name (skip small ones) ignore_size_x = ignore_size_y = 0 if ignore: ignore_size_x = xsize * ignore ignore_size_y = ysize * ignore # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): ignore = abs(end - beg) < ignore_size_x seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([beg, beg], ylim, "g-", lw=chrlw) for (seqid, beg, end) in sbed.get_breaks(): ignore = abs(end - beg) < ignore_size_y seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [beg, beg], "g-", lw=chrlw) # plot the chromosome labels for label, pos, ignore in xchr_labels: pos = .1 + pos * .8 / xsize if not ignore: root.text(pos, .91, label, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, ignore in ychr_labels: pos = .9 - pos * .8 / ysize if not ignore: root.text(.91, pos, label, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if title: fig.suptitle(title, x=.05, y=.98, color="k") root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None, sepcolor="gainsboro"): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = len(data) # Only show random subset if npairs > sample_number: logging.debug("Showing a random subset of {0} data points (total {1}) " \ "for clarity.".format(sample_number, npairs)) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics if not palette: data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbed.get_breaks(): ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if not title: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(title, x=.5, y=.96, color="k") logging.debug(title) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def dotplot(anchorfile, qbed, sbed, fig, root, ax, vmin=0, vmax=1, is_self=False, synteny=False, cmap_text=None, cmap="copper", genomenames=None, sample_number=10000, minfont=5, palette=None, chrlw=.01, title=None, sepcolor="gainsboro"): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Capping values within [{0:.1f}, {1:.1f}]"\ .format(vmin, vmax)) block_id = 0 for row in fp: atoms = row.split() block_color = None if row[0] == "#": block_id += 1 if palette: block_color = palette.get(block_id, "k") continue # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] if cmap_text: try: value = float(value) except ValueError: value = vmax if value < vmin: continue if value > vmax: continue else: value = 0 if query not in qorder: continue if subject not in sorder: continue qi, q = qorder[query] si, s = sorder[subject] nv = value if block_color is None else block_color data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) npairs = len(data) # Only show random subset if npairs > sample_number: logging.debug("Showing a random subset of {0} data points (total {1}) " \ "for clarity.".format(sample_number, npairs)) data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics #if not palette: # data.sort(key=lambda x: -x[2]) x, y, c = zip(*data) if palette: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0) else: ax.scatter(x, y, c=c, edgecolors="none", s=2, lw=0, cmap=cmap, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=cmap) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): xsize_ratio = abs(end - beg) * .8 / xsize fontsize = th.select_fontsize(xsize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbed.get_breaks(): ysize_ratio = abs(end - beg) * .8 / ysize fontsize = th.select_fontsize(ysize_ratio) seqid = "".join(seqid_parse(seqid)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize)) ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels for label, pos, fontsize in xchr_labels: pos = .1 + pos * .8 / xsize if fontsize >= minfont: root.text(pos, .91, latex(label), size=fontsize, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, fontsize in ychr_labels: pos = .9 - pos * .8 / ysize if fontsize >= minfont: root.text(.91, pos, latex(label), size=fontsize, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names if genomenames: gx, gy = genomenames.split("_") else: to_ax_label = lambda fname: op.basename(fname).split(".")[0] gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(markup(gx), size=16) ax.set_ylabel(markup(gy), size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) if palette: # bottom-left has the palette, if available colors = palette.colors xstart, ystart = .1, .05 for category, c in sorted(colors.items()): root.add_patch(Rectangle((xstart, ystart), .03, .02, lw=0, fc=c)) root.text(xstart + .04, ystart, category, color=c) xstart += .1 if not title: title = "Inter-genomic comparison: {0} vs {1}".format(gx, gy) if is_self: title = "Intra-genomic comparison within {0}".format(gx) npairs /= 2 title += " ({0} gene pairs)".format(thousands(npairs)) root.set_title(markup(title), x=.5, y=.96, color="k") logging.debug(title) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off()
def blastplot(ax, blastfile, qsizes, ssizes, qbed, sbed, style="dot", proportional=False, sampleN=None, baseticks=False, insetLabels=False, stripNames=False, highlights=None): assert style in DotStyles fp = open(blastfile) qorder = qbed.order if qbed else None sorder = sbed.order if sbed else None data = [] for row in fp: b = BlastLine(row) query, subject = b.query, b.subject if stripNames: query = query.rsplit(".", 1)[0] subject = subject.rsplit(".", 1)[0] if qorder: if query not in qorder: continue qi, q = qorder[query] query = q.seqid qstart, qend = q.start, q.end else: qstart, qend = b.qstart, b.qstop if sorder: if subject not in sorder: continue si, s = sorder[subject] subject = s.seqid sstart, send = s.start, s.end else: sstart, send = b.sstart, b.sstop qi = qsizes.get_position(query, qstart) qj = qsizes.get_position(query, qend) si = ssizes.get_position(subject, sstart) sj = ssizes.get_position(subject, send) if None in (qi, si): continue data.append(((qi, qj), (si, sj))) if sampleN: if len(data) > sampleN: data = sample(data, sampleN) if not data: return logging.error("no blast data imported") xsize, ysize = qsizes.totalsize, ssizes.totalsize logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) if style == "line": for a, b in data: ax.plot(a, b, 'ro-', mfc="w", mec="r", ms=3) else: data = [(x[0], y[0]) for x, y in data] x, y = zip(*data) if style == "circle": ax.plot(x, y, 'mo', mfc="w", mec="m", ms=3) elif style == "dot": ax.scatter(x, y, s=3, lw=0) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) #ignore_size_x = xsize * .02 #ignore_size_y = ysize * .02 ignore_size_x = ignore_size_y = 0 # plot the chromosome breaks logging.debug("xbreaks={0} ybreaks={1}".format(len(qsizes), len(ssizes))) for (seqid, beg, end) in qsizes.get_breaks(): ignore = abs(end - beg) < ignore_size_x if ignore: continue seqid = rename_seqid(seqid) xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([end, end], ylim, "-", lw=1, color="grey") for (seqid, beg, end) in ssizes.get_breaks(): ignore = abs(end - beg) < ignore_size_y if ignore: continue seqid = rename_seqid(seqid) ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [end, end], "-", lw=1, color="grey") # plot the chromosome labels for label, pos, ignore in xchr_labels: if not ignore: if insetLabels: ax.text(pos, 0, label, size=8, \ ha="center", va="top", color="grey") else: pos = .1 + pos * .8 / xsize root.text(pos, .91, label, size=10, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, ignore in ychr_labels: if not ignore: if insetLabels: continue pos = .9 - pos * .8 / ysize root.text(.91, pos, label, size=10, va="center", color="grey") # Highlight regions based on a list of BedLine qhighlights = shighlights = None if highlights: if isinstance(highlights[0], BedLine): shighlights = highlights elif len(highlights) == 2: qhighlights, shighlights = highlights if qhighlights: for hl in qhighlights: hls = qsizes.get_position(hl.seqid, hl.start) ax.add_patch(Rectangle((hls, 0), hl.span, ysize,\ fc="r", alpha=.2, lw=0)) if shighlights: for hl in shighlights: hls = ssizes.get_position(hl.seqid, hl.start) ax.add_patch(Rectangle((0, hls), xsize, hl.span, \ fc="r", alpha=.2, lw=0)) if baseticks: def increaseDensity(a, ratio=4): assert len(a) > 1 stepsize = a[1] - a[0] newstepsize = int(stepsize / ratio) return np.arange(0, a[-1], newstepsize) # Increase the density of the ticks xticks = ax.get_xticks() yticks = ax.get_yticks() xticks = increaseDensity(xticks, ratio=2) yticks = increaseDensity(yticks, ratio=2) ax.set_xticks(xticks) #ax.set_yticks(yticks) # Plot outward ticklines for pos in xticks[1:]: if pos > xsize: continue pos = .1 + pos * .8 / xsize root.plot((pos, pos), (.08, .1), '-', color="grey", lw=2) for pos in yticks[1:]: if pos > ysize: continue pos = .9 - pos * .8 / ysize root.plot((.09, .1), (pos, pos), '-', color="grey", lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_base_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) plt.setp(ax.get_yticklabels(), rotation=90)
def draw_depth( root, ax, bed, chrinfo={}, defaultcolor="k", sepcolor="w", ylim=100, title=None, subtitle=None, ): """ Draw depth plot on the given axes, using data from bed Args: root (matplotlib.Axes): Canvas axes ax (matplotlib.Axes): Axes to plot data on bed (Bed): Bed data from mosdepth chrinfo (ChrInfoFile): seqid => color, new name defaultcolor (str): matplotlib-compatible color for data points sepcolor (str): matplotlib-compatible color for chromosome breaks ylim (int): Upper limit of the y-axis (depth) title (str): Title of the figure, to the right of the axis subtitle (str): Subtitle of the figure, just below title """ if chrinfo is None: chrinfo = {} sizes = bed.max_bp_in_chr seqids = chrinfo.keys() if chrinfo else sizes.keys() starts = {} ends = {} label_positions = [] start = 0 for seqid in seqids: starts[seqid] = start end = start + sizes[seqid] ends[seqid] = end label_positions.append((seqid, (start + end) / 2)) start = end xsize = end # Extract plotting data data = [] data_by_seqid = defaultdict(list) for b in bed: seqid = b.seqid if seqid not in starts: continue # chr01A 2000000 3000000 113.00 x = starts[seqid] + (b.start + b.end) / 2 y = float(b.accn) c = chrinfo[seqid].color if seqid in chrinfo else "k" data.append((x, y, c)) data_by_seqid[seqid].append(y) x, y, c = zip(*data) ax.scatter( x, y, c=c, edgecolors="none", s=8, lw=0, ) logging.debug("Obtained {} data points with depth data".format(len(data))) # Per seqid median medians = {} for seqid, values in data_by_seqid.items(): c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor seqid_start = starts[seqid] seqid_end = ends[seqid] seqid_median = np.median(values) medians[seqid] = seqid_median ax.plot( (seqid_start, seqid_end), (seqid_median, seqid_median), "-", lw=4, color=c, alpha=0.5, ) # vertical lines for all the breaks for pos in starts.values(): ax.plot((pos, pos), (0, ylim), "-", lw=1, color=sepcolor) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) median_depth_y = 0.88 chr_label_y = 0.08 for seqid, position in label_positions: xpos = 0.1 + position * 0.8 / xsize c = chrinfo[seqid].color if seqid in chrinfo else defaultcolor newseqid = chrinfo[seqid].new_name if seqid in chrinfo else seqid root.text(xpos, chr_label_y, newseqid, color=c, ha="center", va="center", rotation=20) seqid_median = medians[seqid] root.text( xpos, median_depth_y, str(int(seqid_median)), color=c, ha="center", va="center", ) if title: root.text( 0.95, 0.5, markup(title), color="darkslategray", ha="center", va="center", size=15, ) if subtitle: root.text( 0.95, 0.375, markup(subtitle), color="darkslategray", ha="center", va="center", size=15, ) ax.set_xticks([]) ax.set_xlim(0, xsize) ax.set_ylim(0, ylim) ax.set_ylabel("Depth") set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="gray", size=10) normalize_axes(root)
def dotplot(anchorfile, qbed, sbed, image_name, vmin, vmax, iopts, is_self=False, synteny=False, cmap_text=None): fp = open(anchorfile) qorder = qbed.order sorder = sbed.order data = [] if cmap_text: logging.debug("Normalize values to [%.1f, %.1f]" % (vmin, vmax)) for row in fp: atoms = row.split() # first two columns are query and subject, and an optional third column if len(atoms) < 2: continue query, subject = atoms[:2] value = atoms[-1] try: value = float(value) except ValueError: value = vmax if value < vmin: value = vmin if value > vmax: value = vmax if query not in qorder: #logging.warning("ignore %s" % query) continue if subject not in sorder: #logging.warning("ignore %s" % subject) continue qi, q = qorder[query] si, s = sorder[subject] nv = vmax - value data.append((qi, si, nv)) if is_self: # Mirror image data.append((si, qi, nv)) fig = plt.figure(1, (iopts.w, iopts.h)) root = fig.add_axes([0, 0, 1, 1]) # the whole canvas ax = fig.add_axes([.1, .1, .8, .8]) # the dot plot sample_number = 5000 # only show random subset if len(data) > sample_number: data = sample(data, sample_number) # the data are plotted in this order, the least value are plotted # last for aesthetics data.sort(key=lambda x: -x[2]) default_cm = cm.copper x, y, c = zip(*data) ax.scatter(x, y, c=c, s=2, lw=0, cmap=default_cm, vmin=vmin, vmax=vmax) if synteny: clusters = batch_scan(data, qbed, sbed) draw_box(clusters, ax) if cmap_text: draw_cmap(root, cmap_text, vmin, vmax, cmap=default_cm, reverse=True) xsize, ysize = len(qbed), len(sbed) logging.debug("xsize=%d ysize=%d" % (xsize, ysize)) xlim = (0, xsize) ylim = (ysize, 0) # invert the y-axis xchr_labels, ychr_labels = [], [] ignore = True # tag to mark whether to plot chr name (skip small ones) ignore_size_x = xsize * .005 ignore_size_y = ysize * .005 # plot the chromosome breaks for (seqid, beg, end) in qbed.get_breaks(): ignore = abs(end - beg) < ignore_size_x seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass xchr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot([beg, beg], ylim, "g-", lw=1) for (seqid, beg, end) in sbed.get_breaks(): ignore = abs(end - beg) < ignore_size_y seqid = seqid.split("_")[-1] try: seqid = int(seqid) seqid = "c%d" % seqid except: pass ychr_labels.append((seqid, (beg + end) / 2, ignore)) ax.plot(xlim, [beg, beg], "g-", lw=1) # plot the chromosome labels for label, pos, ignore in xchr_labels: pos = .1 + pos * .8 / xsize if not ignore: root.text(pos, .91, label, ha="center", va="bottom", rotation=45, color="grey") # remember y labels are inverted for label, pos, ignore in ychr_labels: pos = .9 - pos * .8 / ysize if not ignore: root.text(.91, pos, label, va="center", color="grey") # create a diagonal to separate mirror image for self comparison if is_self: ax.plot(xlim, (0, ysize), 'm-', alpha=.5, lw=2) ax.set_xlim(xlim) ax.set_ylim(ylim) # add genome names to_ax_label = lambda fname: _(op.basename(fname).split(".")[0]) gx, gy = [to_ax_label(x.filename) for x in (qbed, sbed)] ax.set_xlabel(gx, size=16) ax.set_ylabel(gy, size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color='gray', size=10) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() logging.debug("Print image to `{0}` {1}".format(image_name, iopts)) plt.savefig(image_name, dpi=iopts.dpi)
def plot_breaks_and_labels( fig, root, ax, gx, gy, xsize, ysize, qbreaks, sbreaks, sep=True, chrlw=0.1, sepcolor="g", minfont=5, stdpf=True, chpf=True, ): xlim = (0, 47724.0) # hard-coding xlim maximum ylim = (ysize, 0) # invert the y-axis # Tag to mark whether to plot chr name (skip small ones) xchr_labels, ychr_labels = [], [] th = TextHandler(fig) # plot the chromosome breaks for (seqid, beg, end) in qbreaks: xsize_ratio = abs(end - beg) * 0.8 / xsize fontsize = th.select_fontsize(xsize_ratio) if chpf: seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) xchr_labels.append((seqid, (beg + end) / 2, fontsize)) if sep: ax.plot([beg, beg], ylim, "-", lw=chrlw, color=sepcolor) for (seqid, beg, end) in sbreaks: ysize_ratio = abs(end - beg) * 0.8 / ysize fontsize = th.select_fontsize(ysize_ratio) if chpf: seqid = "".join(seqid_parse(seqid, stdpf=stdpf)[:2]) ychr_labels.append((seqid, (beg + end) / 2, fontsize*0.85)) if sep: ax.plot(xlim, [beg, beg], "-", lw=chrlw, color=sepcolor) # plot the chromosome labels xchr_labels = [('chr\ 1', 1997.5, 12), ('chr\ 2', 5944.5, 12), ('chr\ 3', 9014.0, 12), ('chr\ 4', 11351.5, 12), ('chr\ 5', 13639.0, 12), ('chr\ 6', 17657.5, 12), ('chr\ 7', 22329.0, 12), ('chr\ 8', 25466.0, 12), ('chr\ 9', 28092.0, 12), ('chr\ 10', 31361.5, 12), ('chr\ 11', 34457.0, 12), ('chr\ 12', 37234.0, 12), ('chr\ 13', 41112.5, 12), ('chr\ 14', 43851.0, 12), ('chr\ 15', 45258.5, 12), ('scf\ 16', 46740.5, 12), ('scf\ 458', 47724.0, 12)] for label, pos, fontsize in xchr_labels: #print(xchr_labels) pos = 0.1 + pos * 0.8 / xsize if fontsize >= minfont: root.text( pos, 0.91, latex(label), size=fontsize*0.85, ha="center", va="bottom", rotation=45, color="black", ) # remember y labels are inverted ychr_labels = [('chr\ 1', 2672.0, 10.2), ('chr\ 2', 7532.0, 10.2), ('chr\ 3', 12035.0, 10.2), ('chr\ 4', 16228.0, 10.2), ('chr\ 5', 19784.5, 10.2), ('chr\ 6', 23211.0, 10.2), ('chr\ 7', 26612.5, 10.2), ('chr\ 8', 29773.0, 10.2), ('chr\ 9', 32518.0, 10.2), ('chr\ 10', 35004.5, 10.2), ('chr\ 11', 37760.0, 10.2), ('chr\ 12', 40635.5, 10.2), ('ChrSy', 42048.0, 0), ('ChrUn', 42140.5, 0)] for label, pos, fontsize in ychr_labels: #print(ychr_labels) pos = 0.9 - pos * 0.8 / ysize if fontsize >= minfont: root.text(0.91, pos, latex(label), size=fontsize*0.85, va="center", color="black") # Plot the frame ax.plot(xlim, [0, 0], "-", lw=chrlw, color=sepcolor) ax.plot(xlim, [ysize, ysize], "-", lw=chrlw, color=sepcolor) ax.plot([0, 0], ylim, "-", lw=chrlw, color=sepcolor) ax.plot([xsize, xsize], ylim, "-", lw=chrlw, color=sepcolor) ax.set_xlim(xlim) ax.set_ylim(ylim) # The axis labels have been hardcoded (vs. gx gy as in original) so taht we can get the species names spelled out in italics, rather than the BED file name. ax.set_xlabel('$\it{Zizania\ palustris}$', size=16) ax.set_ylabel('$\it{Oryza\ sativa}$', size=16) # beautify the numeric axis for tick in ax.get_xticklines() + ax.get_yticklines(): tick.set_visible(False) set_human_axis(ax) plt.setp(ax.get_xticklabels() + ax.get_yticklabels(), color="black", size=10) return xlim, ylim