def sampe(args, opts): """ %prog sampe database.fasta read1.fq read2.fq Wrapper for `bwa sampe`. Output will be read1.sam. """ dbfile, read1file, read2file = args dbfile = check_index(dbfile) sai1file = check_aln(dbfile, read1file, cpus=opts.cpus) sai2file = check_aln(dbfile, read2file, cpus=opts.cpus) samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update((dbfile, sai1file, sai2file), samfile): logging.error("`{0}` exists. `bwa samse` already run.".format(samfile)) return "", samfile cmd = "bwa sampe " + " ".join( (dbfile, sai1file, sai2file, read1file, read2file)) cmd += " " + opts.extra if opts.cutoff: cmd += " -a {0}".format(opts.cutoff) if opts.uniq: cmd += " -n 1" return cmd, samfile
def sampe(args, opts): """ %prog sampe database.fasta read1.fq read2.fq Wrapper for `bwa sampe`. Output will be read1.sam. """ dbfile, read1file, read2file = args safile = check_index(dbfile) sai1file = check_aln(dbfile, read1file, cpus=opts.cpus) sai2file = check_aln(dbfile, read2file, cpus=opts.cpus) samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update((safile, sai1file, sai2file), samfile): logging.error("`{0}` exists. `bwa samse` already run.".format(samfile)) return "", samfile cmd = "bwa sampe " + " ".join((dbfile, sai1file, sai2file, \ read1file, read2file)) cmd += " " + opts.extra if opts.cutoff: cmd += " -a {0}".format(opts.cutoff) if opts.uniq: cmd += " -n 1" return cmd, samfile
def mem(args, opts): """ %prog mem database.fasta read1.fq [read2.fq] Wrapper for `bwa mem`. Output will be read1.sam. """ dbfile, read1file = args[:2] readtype = opts.readtype pl = readtype or "illumina" pf = op.basename(read1file).split(".")[0] rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl) dbfile = check_index(dbfile) args[0] = dbfile samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(read1file, samfile): logging.error("`{0}` exists. `bwa mem` already run.".format(samfile)) return "", samfile cmd = "{} mem".format(opts.bwa) """ -M Mark shorter split hits as secondary (for Picard compatibility). """ cmd += " -M -t {0}".format(opts.cpus) cmd += ' -R "{0}"'.format(rg) if readtype: cmd += " -x {0}".format(readtype) cmd += " " + opts.extra cmd += " ".join(args) return cmd, samfile
def mem(args, opts): """ %prog mem database.fasta read1.fq [read2.fq] Wrapper for `bwa mem`. Output will be read1.sam. """ dbfile, read1file = args[:2] pf = op.basename(read1file).split(".")[0] rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:illumina".format(pf) dbfile = check_index(dbfile) args[0] = dbfile samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(read1file, samfile): logging.error("`{0}` exists. `bwa mem` already run.".format(samfile)) return "", samfile cmd = "bwa mem " + " ".join(args) cmd += " -M -t {0}".format(opts.cpus) cmd += ' -R "{0}"'.format(rg) if opts.readtype: cmd += " -x {0}".format(opts.readtype) cmd += " " + opts.extra return cmd, samfile
def mem(args, opts): """ %prog mem database.fasta read1.fq [read2.fq] Wrapper for `bwa mem`. Output will be read1.sam. """ dbfile, read1file = args[:2] readtype = opts.readtype pl = readtype or "illumina" pf = op.basename(read1file).split(".")[0] rg = opts.rg or r"@RG\tID:{0}\tSM:sm\tLB:lb\tPL:{1}".format(pf, pl) dbfile = check_index(dbfile) args[0] = dbfile samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(read1file, samfile): logging.error("`{0}` exists. `bwa mem` already run.".format(samfile)) return "", samfile cmd = "{} mem".format(opts.bwa) ''' -M Mark shorter split hits as secondary (for Picard compatibility). ''' cmd += " -M -t {0}".format(opts.cpus) cmd += ' -R "{0}"'.format(rg) if readtype: cmd += " -x {0}".format(readtype) cmd += " " + opts.extra cmd += " ".join(args) return cmd, samfile
def bwasw(args, opts): """ %prog bwasw database.fasta long_read.fastq Wrapper for `bwa bwasw`. Output will be long_read.sam. """ dbfile, readfile = args safile = check_index(dbfile) samfile, _, unmapped = get_samfile(readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(safile, samfile): logging.error("`{0}` exists. `bwa bwasw` already run.".format(samfile)) return "", samfile cmd = "bwa bwasw " + " ".join(args) cmd += " -t {0}".format(opts.cpus) cmd += " " + opts.extra return cmd, samfile
def mem(args, opts): """ %prog mem database.fasta read1.fq [read2.fq] Wrapper for `bwa mem`. Output will be read1.sam. """ dbfile, read1file = args[:2] check_index(dbfile) samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(read1file, samfile): logging.error("`{0}` exists. `bwa mem` already run.".format(samfile)) return "", samfile cmd = "bwa mem " + " ".join(args) cmd += " -t {0}".format(opts.cpus) cmd += " " + opts.extra return cmd, samfile
def bwasw(args, opts): """ %prog bwasw database.fasta long_read.fastq Wrapper for `bwa bwasw`. Output will be long_read.sam. """ dbfile, readfile = args dbfile = check_index(dbfile) samfile, _, unmapped = get_samfile(readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(dbfile, samfile): logging.error("`{0}` exists. `bwa bwasw` already run.".format(samfile)) return "", samfile cmd = "bwa bwasw " + " ".join(args) cmd += " -t {0}".format(opts.cpus) cmd += " " + opts.extra return cmd, samfile
def samse(args, opts): """ %prog samse database.fasta short_read.fastq Wrapper for `bwa samse`. Output will be short_read.sam. """ dbfile, readfile = args safile = check_index(dbfile) saifile = check_aln(dbfile, readfile, cpus=opts.cpus) samfile, _, unmapped = get_samfile(readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update((safile, saifile), samfile): logging.error("`{0}` exists. `bwa samse` already run.".format(samfile)) return "", samfile cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile) cmd += " " + opts.extra if opts.uniq: cmd += " -n 1" return cmd, samfile
def samse(args, opts): """ %prog samse database.fasta short_read.fastq Wrapper for `bwa samse`. Output will be short_read.sam. """ dbfile, readfile = args dbfile = check_index(dbfile) saifile = check_aln(dbfile, readfile, cpus=opts.cpus) samfile, _, unmapped = get_samfile(readfile, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update((dbfile, saifile), samfile): logging.error("`{0}` exists. `bwa samse` already run.".format(samfile)) return "", samfile cmd = "bwa samse {0} {1} {2}".format(dbfile, saifile, readfile) cmd += " " + opts.extra if opts.uniq: cmd += " -n 1" return cmd, samfile
def mem(args, opts): """ %prog mem database.fasta read1.fq [read2.fq] Wrapper for `bwa mem`. Output will be read1.sam. """ dbfile, read1file = args[:2] check_index(dbfile) samfile, _, unmapped = get_samfile(read1file, dbfile, bam=opts.bam, unmapped=opts.unmapped) if not need_update(read1file, samfile): logging.error("`{0}` exists. `bwa mem` already run.".format(samfile)) return "", samfile cmd = "bwa mem " + " ".join(args) cmd += " -t {0}".format(opts.cpus) if opts.readtype: cmd += " -x {0}".format(opts.readtype) cmd += " " + opts.extra return cmd, samfile
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == "+-": extra += "" elif mo == "-+": extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = get_abs_path(dbfile) safile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile( readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam ) logfile = prefix + ".log" offset = guessoffset([readfile]) if not need_update(safile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) cmd = output_bam(cmd, samfile) sh(cmd) print >>sys.stderr, open(logfile).read() return samfile, logfile
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.add_option("--null", default=False, action="store_true", help="Do not write to SAM/BAM output") p.add_option("--fasta", default=False, action="store_true", help="Query reads are FASTA") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == '+-': extra += "" elif mo == '-+': extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped fasta = opts.fasta gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile(readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam) logfile = prefix + ".log" if not fasta: offset = guessoffset([readfile]) if not need_update(dbfile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) if fasta: cmd += " -f" else: cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) if opts.null: samfile = "/dev/null" cmd = output_bam(cmd, samfile) sh(cmd) print(open(logfile).read(), file=sys.stderr) return samfile, logfile