def contamination(args): """ %prog contamination folder Ecoli.fasta Remove contaminated reads. The FASTQ files in the folder will automatically pair and filtered against Ecoli.fasta to remove contaminants using BOWTIE2. """ from jcvi.apps.bowtie import align p = OptionParser(contamination.__doc__) p.add_option("--mapped", default=False, action="store_true", help="Retain contaminated reads instead [default: %default]") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) folder, ecoli = args ecoli = get_abs_path(ecoli) tag = "--mapped" if opts.mapped else "--unmapped" for p, pf in iter_project(folder, 2): align_opts = [ecoli] + p + [tag] align_opts += ["--cutoff={0}".format(opts.cutoff), "--null"] if opts.mateorientation: align_opts += ["--mateorientation={0}".format(opts.mateorientation)] samfile, logfile = align(align_opts)
def contamination(args): """ %prog contamination folder Ecoli.fasta Remove contaminated reads. The FASTQ files in the folder will automatically pair and filtered against Ecoli.fasta to remove contaminants using BOWTIE2. """ from jcvi.apps.bowtie import align p = OptionParser(contamination.__doc__) p.add_option("--mapped", default=False, action="store_true", help="Retain contaminated reads instead [default: %default]") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) folder, ecoli = args ecoli = get_abs_path(ecoli) tag = "--mapped" if opts.mapped else "--unmapped" for p, pf in iter_project(folder, 2): align_opts = [ecoli] + p + [tag] align_opts += ["--cutoff={0}".format(opts.cutoff), "--null"] if opts.mateorientation: align_opts += [ "--mateorientation={0}".format(opts.mateorientation) ] samfile, logfile = align(align_opts)
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == "+-": extra += "" elif mo == "-+": extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = get_abs_path(dbfile) safile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile( readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam ) logfile = prefix + ".log" offset = guessoffset([readfile]) if not need_update(safile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) cmd = output_bam(cmd, samfile) sh(cmd) print >>sys.stderr, open(logfile).read() return samfile, logfile
def align(args): """ %prog align database.fasta read1.fq [read2.fq] Wrapper for `bowtie2` single-end or paired-end, depending on the number of args. """ from jcvi.formats.fastq import guessoffset p = OptionParser(align.__doc__) p.set_firstN(firstN=0) p.add_option("--full", default=False, action="store_true", help="Enforce end-to-end alignment [default: local]") p.add_option("--reorder", default=False, action="store_true", help="Keep the input read order [default: %default]") p.add_option("--null", default=False, action="store_true", help="Do not write to SAM/BAM output") p.add_option("--fasta", default=False, action="store_true", help="Query reads are FASTA") p.set_cutoff(cutoff=800) p.set_mateorientation(mateorientation="+-") p.set_sam_options(bowtie=True) opts, args = p.parse_args(args) extra = opts.extra mo = opts.mateorientation if mo == '+-': extra += "" elif mo == '-+': extra += "--rf" else: extra += "--ff" PE = True if len(args) == 2: logging.debug("Single-end alignment") PE = False elif len(args) == 3: logging.debug("Paired-end alignment") else: sys.exit(not p.print_help()) firstN = opts.firstN mapped = opts.mapped unmapped = opts.unmapped fasta = opts.fasta gl = "--end-to-end" if opts.full else "--local" dbfile, readfile = args[0:2] dbfile = check_index(dbfile) prefix = get_prefix(readfile, dbfile) samfile, mapped, unmapped = get_samfile(readfile, dbfile, bowtie=True, mapped=mapped, unmapped=unmapped, bam=opts.bam) logfile = prefix + ".log" if not fasta: offset = guessoffset([readfile]) if not need_update(dbfile, samfile): logging.error("`{0}` exists. `bowtie2` already run.".format(samfile)) return samfile, logfile cmd = "bowtie2 -x {0}".format(dbfile) if PE: r1, r2 = args[1:3] cmd += " -1 {0} -2 {1}".format(r1, r2) cmd += " --maxins {0}".format(opts.cutoff) mtag, utag = "--al-conc", "--un-conc" else: cmd += " -U {0}".format(readfile) mtag, utag = "--al", "--un" if mapped: cmd += " {0} {1}".format(mtag, mapped) if unmapped: cmd += " {0} {1}".format(utag, unmapped) if firstN: cmd += " --upto {0}".format(firstN) cmd += " -p {0}".format(opts.cpus) if fasta: cmd += " -f" else: cmd += " --phred{0}".format(offset) cmd += " {0}".format(gl) if opts.reorder: cmd += " --reorder" cmd += " {0}".format(extra) # Finally the log cmd += " 2> {0}".format(logfile) if opts.null: samfile = "/dev/null" cmd = output_bam(cmd, samfile) sh(cmd) print(open(logfile).read(), file=sys.stderr) return samfile, logfile