def patcher(args): """ %prog patcher backbone.bed other.bed Given optical map alignment, prepare the patchers. Use --backbone to suggest which assembly is the major one, and the patchers will be extracted from another assembly. """ from jcvi.formats.bed import uniq p = OptionParser(patcher.__doc__) p.add_option("--backbone", default="OM", help="Prefix of the backbone assembly [default: %default]") p.add_option("--object", default="object", help="New object name [default: %default]") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) backbonebed, otherbed = args backbonebed = uniq([backbonebed]) otherbed = uniq([otherbed]) bb = opts.backbone pf = backbonebed.split(".")[0] key = lambda x: (x.seqid, x.start, x.end) is_bb = lambda x: x.startswith(bb) # Make a uniq bed keeping backbone at redundant intervals cmd = "intersectBed -v -wa" cmd += " -a {0} -b {1}".format(otherbed, backbonebed) outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed sh(cmd, outfile=outfile) uniqbed = Bed() uniqbedfile = pf + ".merged.bed" uniqbed.extend(Bed(backbonebed)) uniqbed.extend(Bed(outfile)) uniqbed.print_to_file(uniqbedfile, sorted=True) # Condense adjacent intervals, allow some chaining bed = uniqbed key = lambda x: range_parse(x.accn).seqid bed_fn = pf + ".patchers.bed" bed_fw = open(bed_fn, "w") for k, sb in groupby(bed, key=key): sb = list(sb) chr, start, end, strand = merge_ranges(sb) id = "{0}:{1}-{2}".format(chr, start, end) print >> bed_fw, "\t".join(str(x) for x in \ (chr, start, end, opts.object, 1000, strand)) bed_fw.close()
def patcher(args): """ %prog patcher backbone.bed other.bed Given optical map alignment, prepare the patchers. Use --backbone to suggest which assembly is the major one, and the patchers will be extracted from another assembly. """ from jcvi.formats.bed import uniq p = OptionParser(patcher.__doc__) p.add_option("--backbone", default="OM", help="Prefix of the backbone assembly [default: %default]") p.add_option("--object", default="object", help="New object name [default: %default]") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) backbonebed, otherbed = args backbonebed = uniq([backbonebed]) otherbed = uniq([otherbed]) pf = backbonebed.split(".")[0] key = lambda x: (x.seqid, x.start, x.end) # Make a uniq bed keeping backbone at redundant intervals cmd = "intersectBed -v -wa" cmd += " -a {0} -b {1}".format(otherbed, backbonebed) outfile = otherbed.rsplit(".", 1)[0] + ".not." + backbonebed sh(cmd, outfile=outfile) uniqbed = Bed() uniqbedfile = pf + ".merged.bed" uniqbed.extend(Bed(backbonebed)) uniqbed.extend(Bed(outfile)) uniqbed.print_to_file(uniqbedfile, sorted=True) # Condense adjacent intervals, allow some chaining bed = uniqbed key = lambda x: range_parse(x.accn).seqid bed_fn = pf + ".patchers.bed" bed_fw = open(bed_fn, "w") for k, sb in groupby(bed, key=key): sb = list(sb) chr, start, end, strand = merge_ranges(sb) print >> bed_fw, "\t".join(str(x) for x in \ (chr, start, end, opts.object, 1000, strand)) bed_fw.close()
def gaps(args): """ %prog gaps OM.bed fastafile Create patches around OM gaps. """ from jcvi.formats.bed import uniq p = OptionParser(gaps.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) ombed, fastafile = args ombed = uniq([ombed]) bed = Bed(ombed) for a, b in pairwise(bed): om_a = (a.seqid, a.start, a.end, "+") om_b = (b.seqid, b.start, b.end, "+") ch_a = range_parse(a.accn) ch_b = range_parse(b.accn) ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+") ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+") om_dist, x = range_distance(om_a, om_b, distmode="ee") ch_dist, x = range_distance(ch_a, ch_b, distmode="ee") if om_dist <= 0 and ch_dist <= 0: continue print(a) print(b) print(om_dist, ch_dist)
def paste(args): """ %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta Paste in good sequences in the final assembly. """ from jcvi.formats.bed import uniq p = OptionParser(paste.__doc__) p.add_option( "--maxsize", default=300000, type="int", help="Maximum size of patchers to be replaced", ) p.add_option("--prefix", help="Prefix of the new object") p.set_rclip(rclip=1) opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) pbed, blastfile, bbfasta = args maxsize = opts.maxsize # Max DNA size to replace gap order = Bed(pbed).order beforebed, afterbed = blast_to_twobeds( blastfile, order, log=True, rclip=opts.rclip, maxsize=maxsize, flipbeds=True ) beforebed = uniq([beforebed]) afbed = Bed(beforebed) bfbed = Bed(afterbed) shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
def paste(args): """ %prog paste flanks.bed flanks_vs_assembly.blast backbone.fasta Paste in good sequences in the final assembly. """ from jcvi.formats.bed import uniq p = OptionParser(paste.__doc__) p.add_option("--maxsize", default=300000, type="int", help="Maximum size of patchers to be replaced [default: %default]") p.add_option("--prefix", help="Prefix of the new object [default: %default]") p.set_rclip(rclip=1) opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) pbed, blastfile, bbfasta = args maxsize = opts.maxsize # Max DNA size to replace gap order = Bed(pbed).order beforebed, afterbed = blast_to_twobeds(blastfile, order, log=True, rclip=opts.rclip, maxsize=maxsize, flipbeds=True) beforebed = uniq([beforebed]) afbed = Bed(beforebed) bfbed = Bed(afterbed) shuffle_twobeds(afbed, bfbed, bbfasta, prefix=opts.prefix)
def gaps(args): """ %prog gaps OM.bed fastafile Create patches around OM gaps. """ from jcvi.formats.bed import uniq from jcvi.utils.iter import pairwise p = OptionParser(gaps.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) ombed, fastafile = args ombed = uniq([ombed]) bed = Bed(ombed) for a, b in pairwise(bed): om_a = (a.seqid, a.start, a.end, "+") om_b = (b.seqid, b.start, b.end, "+") ch_a = range_parse(a.accn) ch_b = range_parse(b.accn) ch_a = (ch_a.seqid, ch_a.start, ch_a.end, "+") ch_b = (ch_b.seqid, ch_b.start, ch_b.end, "+") om_dist, x = range_distance(om_a, om_b, distmode="ee") ch_dist, x = range_distance(ch_a, ch_b, distmode="ee") if om_dist <= 0 and ch_dist <= 0: continue print a print b print om_dist, ch_dist