def soap_trios(p, pf, tag, extra): """ Take one pair of reads and 'widow' reads after correction and run SOAP. """ from jcvi.assembly.soap import prepare logging.debug("Work on {0} ({1})".format(pf, ",".join(p))) asm = "{0}.closed.scafSeq".format(pf) if not need_update(p, asm): logging.debug("Assembly found: {0}. Skipped.".format(asm)) return slink(p, pf, tag, extra) cwd = os.getcwd() os.chdir(pf) prepare( sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--assemble_1st_rank_only", "-K 31"] ) sh("./run.sh") sh("cp asm31.closed.scafSeq ../{0}".format(asm)) logging.debug("Assembly finished: {0}".format(asm)) os.chdir(cwd)
def correct(args): """ %prog correct *.fastq Correct the fastqfile and generated corrected fastqfiles. This calls assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The naming convention for your fastqfiles are important, and are listed below. By default, this will correct all PE reads, and remove duplicates of all MP reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq` and `jump_reads.corr.{pairs,frags}.fastq`. """ from jcvi.assembly.allpaths import prepare from jcvi.assembly.base import FastqNamings p = OptionParser(correct.__doc__ + FastqNamings) p.add_option( "--nofragsdedup", default=False, action="store_true", help="Don't deduplicate the fragment reads [default: %default]") p.add_option("--cpus", default=32, type="int", help="Number of threads to run [default: %default]") opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fastq = args tag, tagj = "frag_reads", "jump_reads" prepare(["Unknown"] + fastq + ["--norun"]) datadir = "data" mkdir(datadir) fullpath = op.join(os.getcwd(), datadir) nthreads = " NUM_THREADS={0}".format(opts.cpus) phred64 = (guessoffset([args[0]]) == 64) orig = datadir + "/{0}_orig".format(tag) origfastb = orig + ".fastb" if need_update(fastq, origfastb): cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}'".\ format(fullpath, opts.cpus) if phred64: cmd += " PHRED_64=True" sh(cmd) if op.exists(origfastb): dedup = not opts.nofragsdedup correct_frag(datadir, tag, origfastb, nthreads, dedup=dedup) origj = datadir + "/{0}_orig".format(tagj) origjfastb = origj + ".fastb" if op.exists(origjfastb): correct_jump(datadir, tagj, origjfastb, nthreads)
def correct(args): """ %prog correct *.fastq Correct the fastqfile and generated corrected fastqfiles. This calls assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The naming convention for your fastqfiles are important, and are listed below. By default, this will correct all PE reads, and remove duplicates of all MP reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq` and `jump_reads.corr.{pairs,frags}.fastq`. """ from jcvi.assembly.allpaths import prepare from jcvi.assembly.base import FastqNamings p = OptionParser(correct.__doc__ + FastqNamings) p.add_option("--nofragsdedup", default=False, action="store_true", help="Don't deduplicate the fragment reads [default: %default]") p.add_option("--cpus", default=32, type="int", help="Number of threads to run [default: %default]") p.add_option("--phred64", default=False, action="store_true", help="Reads are all phred 64 offset [default: %default]") opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fastq = args tag, tagj = "frag_reads", "jump_reads" prepare(["Unknown"] + fastq + ["--norun"]) datadir = "data" mkdir(datadir) fullpath = op.join(os.getcwd(), datadir) nthreads = " NUM_THREADS={0}".format(opts.cpus) orig = datadir + "/{0}_orig".format(tag) origfastb = orig + ".fastb" if need_update(fastq, origfastb): cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}'".\ format(fullpath, opts.cpus) if opts.phred64: cmd += " PHRED_64=True" sh(cmd) if op.exists(origfastb): dedup = not opts.nofragsdedup correct_frag(datadir, tag, origfastb, nthreads, dedup=dedup) origj = datadir + "/{0}_orig".format(tagj) origjfastb = origj + ".fastb" if op.exists(origjfastb): correct_jump(datadir, tagj, origjfastb, nthreads)
def assemble_dir(pf, target, ploidy="1"): from jcvi.assembly.allpaths import prepare logging.debug("Work on {0}".format(pf)) asm = [x.replace("final", pf) for x in target] if not need_update(pf, asm): logging.debug("Assembly found: {0}. Skipped.".format(asm)) return cwd = os.getcwd() os.chdir(pf) prepare([pf] + sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--ploidy={0}".format(ploidy)]) sh("./run.sh") for a, t in zip(asm, target): sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a)) logging.debug("Assembly finished: {0}".format(asm)) os.chdir(cwd)
def assemble_dir(pf, target, ploidy="1"): from jcvi.assembly.allpaths import prepare logging.debug("Work on {0}".format(pf)) asm = [x.replace("final", pf) for x in target] if not need_update(pf, asm): logging.debug("Assembly found: {0}. Skipped.".format(asm)) return cwd = os.getcwd() os.chdir(pf) prepare([pf] + sorted(glob("*.fastq") + glob("*.fastq.gz")) + \ ["--ploidy={0}".format(ploidy)]) sh("./run.sh") for a, t in zip(asm, target): sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a)) logging.debug("Assembly finished: {0}".format(asm)) os.chdir(cwd)
def soap_trios(p, pf, tag, extra): """ Take one pair of reads and 'widow' reads after correction and run SOAP. """ from jcvi.assembly.soap import prepare logging.debug("Work on {0} ({1})".format(pf, ",".join(p))) asm = "{0}.closed.scafSeq".format(pf) if not need_update(p, asm): logging.debug("Assembly found: {0}. Skipped.".format(asm)) return slink(p, pf, tag, extra) cwd = os.getcwd() os.chdir(pf) prepare(sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--assemble_1st_rank_only", "-K 31"]) sh("./run.sh") sh("cp asm31.closed.scafSeq ../{0}".format(asm)) logging.debug("Assembly finished: {0}".format(asm)) os.chdir(cwd)
def correct(args): """ %prog correct *.fastq Correct the fastqfile and generated corrected fastqfiles. This calls assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The naming convention for your fastqfiles are important, and are listed below. By default, this will correct all PE reads, and remove duplicates of all MP reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq` and `jump_reads.corr.{pairs,frags}.fastq`. """ from jcvi.assembly.allpaths import prepare from jcvi.assembly.base import FastqNamings p = OptionParser(correct.__doc__ + FastqNamings) p.add_option("--dir", default="data", help="Working directory [default: %default]") p.add_option("--fragsdedup", default=False, action="store_true", help="Don't deduplicate the fragment reads [default: %default]") p.add_option("--ploidy", default="2", choices=("1", "2"), help="Ploidy [default: %default]") p.add_option("--haploidify", default=False, action="store_true", help="Set HAPLOIDIFY=True [default: %default]") p.add_option("--suffix", default=False, action="store_true", help="Add suffix /1, /2 to read names") p.set_cpus() opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) fastq = args tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads" ploidy = opts.ploidy haploidify = opts.haploidify suffix = opts.suffix assert (not haploidify) or (haploidify and ploidy == '2') prepare(["Unknown"] + fastq + ["--norun"]) datadir = opts.dir mkdir(datadir) fullpath = op.join(os.getcwd(), datadir) nthreads = " NUM_THREADS={0}".format(opts.cpus) phred64 = (guessoffset([args[0]]) == 64) orig = datadir + "/{0}_orig".format(tag) origfastb = orig + ".fastb" if need_update(fastq, origfastb): cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".\ format(fullpath, opts.cpus, ploidy) if phred64: cmd += " PHRED_64=True" sh(cmd) if op.exists(origfastb): correct_frag(datadir, tag, origfastb, nthreads, dedup=opts.fragsdedup, haploidify=haploidify, suffix=suffix) origj = datadir + "/{0}_orig".format(tagj) origjfastb = origj + ".fastb" if op.exists(origjfastb): correct_jump(datadir, tagj, origjfastb, nthreads, suffix=suffix) origlj = datadir + "/{0}_orig".format(taglj) origljfastb = origlj + ".fastb" if op.exists(origljfastb): correct_jump(datadir, taglj, origljfastb, nthreads, suffix=suffix)