def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option( "--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe( [bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname = "{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def coverage(args): """ %prog coverage fastafile ctg bedfile1 bedfile2 .. Plot coverage from a set of BED files that contain the read mappings. The paired read span will be converted to a new bedfile that contain the happy mates. ctg is the chr/scf/ctg that you want to plot the histogram on. If the bedfiles already contain the clone spans, turn on --spans. """ from jcvi.formats.bed import mates, bedpe p = OptionParser(coverage.__doc__) p.add_option("--ymax", default=None, type="int", help="Limit ymax [default: %default]") p.add_option("--spans", default=False, action="store_true", help="BED files already contain clone spans [default: %default]") opts, args, iopts = p.set_image_options(args, figsize="8x5") if len(args) < 3: sys.exit(not p.print_help()) fastafile, ctg = args[0:2] bedfiles = args[2:] sizes = Sizes(fastafile) size = sizes.mapping[ctg] plt.figure(1, (iopts.w, iopts.h)) ax = plt.gca() bins = 100 # smooth the curve lines = [] legends = [] not_covered = [] yy = .9 for bedfile, c in zip(bedfiles, "rgbcky"): if not opts.spans: pf = bedfile.rsplit(".", 1)[0] matesfile = pf + ".mates" if need_update(bedfile, matesfile): matesfile, matesbedfile = mates([bedfile, "--lib"]) bedspanfile = pf + ".spans.bed" if need_update(matesfile, bedspanfile): bedpefile, bedspanfile = bedpe([bedfile, "--span", "--mates={0}".format(matesfile)]) bedfile = bedspanfile bedsum = Bed(bedfile).sum(seqid=ctg) notcoveredbases = size - bedsum legend = bedfile.split(".")[0] msg = "{0}: {1} bp not covered".format(legend, thousands(notcoveredbases)) not_covered.append(msg) print >> sys.stderr, msg ax.text(.1, yy, msg, color=c, size=9, transform=ax.transAxes) yy -= .08 cov = Coverage(bedfile, sizes.filename) x, y = cov.get_plot_data(ctg, bins=bins) line, = ax.plot(x, y, '-', color=c, lw=2, alpha=.5) lines.append(line) legends.append(legend) leg = ax.legend(lines, legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ylabel = "Average depth per {0}Kb".format(size / bins / 1000) ax.set_xlim(0, size) ax.set_ylim(0, opts.ymax) ax.set_xlabel(ctg) ax.set_ylabel(ylabel) set_human_base_axis(ax) figname ="{0}.{1}.pdf".format(fastafile, ctg) savefig(figname, dpi=iopts.dpi, iopts=iopts)
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = ("Base coverage", "Mate coverage") leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, scf, color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, scf, color='b', ha="center") root.text(.5, .05, warn_msg, color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" savefig(figname, dpi=300)
def qc(args): """ %prog qc prefix Expects data files including: 1. `prefix.bedpe` draws Bezier curve between paired reads 2. `prefix.sizes` draws length of the contig/scaffold 3. `prefix.gaps.bed` mark the position of the gaps in sequence 4. `prefix.bed.coverage` plots the base coverage 5. `prefix.pairs.bed.coverage` plots the clone coverage See assembly.coverage.posmap() for the generation of these files. """ from jcvi.graphics.glyph import Bezier p = OptionParser(qc.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(p.print_help()) prefix, = args scf = prefix # All these files *must* be present in the current folder bedpefile = prefix + ".bedpe" fastafile = prefix + ".fasta" sizesfile = prefix + ".sizes" gapsbedfile = prefix + ".gaps.bed" bedfile = prefix + ".bed" bedpefile = prefix + ".bedpe" pairsbedfile = prefix + ".pairs.bed" sizes = Sizes(fastafile).mapping size = sizes[scf] fig = plt.figure(1, (8, 5)) root = fig.add_axes([0, 0, 1, 1]) # the scaffold root.add_patch(Rectangle((.1, .15), .8, .03, fc='k')) # basecoverage and matecoverage ax = fig.add_axes([.1, .45, .8, .45]) bins = 200 # Smooth the curve basecoverage = Coverage(bedfile, sizesfile) matecoverage = Coverage(pairsbedfile, sizesfile) x, y = basecoverage.get_plot_data(scf, bins=bins) baseline, = ax.plot(x, y, 'g-') x, y = matecoverage.get_plot_data(scf, bins=bins) mateline, = ax.plot(x, y, 'r-') legends = ("Base coverage", "Mate coverage") leg = ax.legend((baseline, mateline), legends, shadow=True, fancybox=True) leg.get_frame().set_alpha(.5) ax.set_xlim(0, size) # draw the read pairs fp = open(bedpefile) pairs = [] for row in fp: scf, astart, aend, scf, bstart, bend, clonename = row.split() astart, bstart = int(astart), int(bstart) aend, bend = int(aend), int(bend) start = min(astart, bstart) + 1 end = max(aend, bend) pairs.append((start, end)) bpratio = .8 / size cutoff = 1000 # inserts smaller than this are not plotted # this convert from base => x-coordinate pos = lambda x: (.1 + x * bpratio) ypos = .15 + .03 for start, end in pairs: dist = end - start if dist < cutoff: continue dist = min(dist, 10000) # 10Kb == .25 canvas height height = .25 * dist / 10000 xstart = pos(start) xend = pos(end) p0 = (xstart, ypos) p1 = (xstart, ypos + height) p2 = (xend, ypos + height) p3 = (xend, ypos) Bezier(root, p0, p1, p2, p3) # gaps on the scaffold fp = open(gapsbedfile) for row in fp: b = BedLine(row) start, end = b.start, b.end xstart = pos(start) xend = pos(end) root.add_patch(Rectangle((xstart, .15), xend - xstart, .03, fc='w')) root.text(.5, .1, scf, color='b', ha="center") warn_msg = "Only the inserts > {0}bp are shown".format(cutoff) root.text(.5, .1, scf, color='b', ha="center") root.text(.5, .05, warn_msg, color='gray', ha="center") # clean up and output set_human_base_axis(ax) root.set_xlim(0, 1) root.set_ylim(0, 1) root.set_axis_off() figname = prefix + ".pdf" savefig(figname, dpi=300)