def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for three modes of BWA - mem (default), aln, bwasw (long reads). """ valid_modes = ("bwasw", "aln", "mem") p = OptionParser(align.__doc__) p.add_option("--mode", default="mem", choices=valid_modes, help="BWA mode [default: %default]") p.add_option("--readtype", choices=("pacbio", "pbread"), help="Read type in bwa-mem") p.set_cutoff(cutoff=800) p.set_sam_options() opts, args = p.parse_args(args) mode = opts.mode nargs = len(args) if nargs not in (2, 3): sys.exit(not p.print_help()) tag = "bwa-{0}: ".format(mode) c = mem if nargs == 2: tag += "Single-end alignment" if mode == "bwasw": c = bwasw elif mode == "aln": c = samse else: assert mode != "bwasw", "Cannot use --bwasw with paired-end mode" tag += "Paired-end alignment" if mode == "aln": c = sampe logging.debug(tag) args[0] = get_abs_path(args[0]) cmd, samfile = c(args, opts) if cmd: cmd = output_bam(cmd, samfile) bam = opts.bam unmapped = opts.unmapped sh(cmd) if unmapped: dbfile, readfile = args[:2] mopts = [samfile, "--unmapped"] if not bam: mopts += ["--sam"] mapped(mopts) FileShredder([samfile]) return samfile, None
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for three modes of BWA - mem (default), aln, bwasw (long reads). """ valid_modes = ("bwasw", "aln", "mem") p = OptionParser(align.__doc__) p.add_option("--mode", default="mem", choices=valid_modes, help="BWA mode") p.add_option("--rg", help="Read group") p.add_option("--readtype", choices=("pacbio", "pbread", "ont2d", "intractg"), help="Read type in bwa-mem") p.set_cutoff(cutoff=800) p.set_sam_options() opts, args = p.parse_args(args) mode = opts.mode nargs = len(args) if nargs not in (2, 3): sys.exit(not p.print_help()) tag = "bwa-{0}: ".format(mode) c = mem if nargs == 2: tag += "Single-end alignment" if mode == "bwasw": c = bwasw elif mode == "aln": c = samse else: assert mode != "bwasw", "Cannot use --bwasw with paired-end mode" tag += "Paired-end alignment" if mode == "aln": c = sampe logging.debug(tag) cmd, samfile = c(args, opts) if cmd: cmd = output_bam(cmd, samfile) bam = opts.bam unmapped = opts.unmapped sh(cmd) if unmapped: dbfile, readfile = args[:2] mopts = [samfile, "--unmapped"] if not bam: mopts += ["--sam"] mapped(mopts) FileShredder([samfile]) return samfile, None
def batch(args): """ %proj batch database.fasta project_dir output_dir Run bwa in batch mode. """ p = OptionParser(batch.__doc__) set_align_options(p) p.set_sam_options() opts, args = p.parse_args(args) if len(args) != 3: sys.exit(not p.print_help()) ref_fasta, proj_dir, outdir = args outdir = outdir.rstrip("/") s3dir = None if outdir.startswith("s3://"): s3dir = outdir outdir = op.basename(outdir) mkdir(outdir) mm = MakeManager() for p, pf in iter_project(proj_dir): targs = [ref_fasta] + p cmd1, bamfile = mem(targs, opts) if cmd1: cmd1 = output_bam(cmd1, bamfile) nbamfile = op.join(outdir, bamfile) cmd2 = "mv {} {}".format(bamfile, nbamfile) cmds = [cmd1, cmd2] if s3dir: cmd = "aws s3 cp {} {} --sse".format(nbamfile, op.join(s3dir, bamfile)) cmds.append(cmd) mm.add(p, nbamfile, cmds) mm.write()
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == "+-": extra += "" elif mo == "-+": extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = get_abs_path(dbfile) safile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile( readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam ) logfile = prefix + ".log" offset = guessoffset([readfile]) if not need_update(safile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) cmd = output_bam(cmd, samfile) sh(cmd) print >>sys.stderr, open(logfile).read() return samfile, logfile
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.add_option("--null", default=False, action="store_true", help="Do not write to SAM/BAM output") p.add_option("--fasta", default=False, action="store_true", help="Query reads are FASTA") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == '+-': extra += "" elif mo == '-+': extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped fasta = opts.fasta gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile(readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam) logfile = prefix + ".log" if not fasta: offset = guessoffset([readfile]) if not need_update(dbfile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) if fasta: cmd += " -f" else: cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) if opts.null: samfile = "/dev/null" cmd = output_bam(cmd, samfile) sh(cmd) print(open(logfile).read(), file=sys.stderr) return samfile, logfile