Exemple #1
0
def soap_trios(p, pf, tag, extra):
    """
    Take one pair of reads and 'widow' reads after correction and run SOAP.
    """
    from jcvi.assembly.soap import prepare

    logging.debug("Work on {0} ({1})".format(pf, ",".join(p)))
    asm = "{0}.closed.scafSeq".format(pf)
    if not need_update(p, asm):
        logging.debug("Assembly found: {0}. Skipped.".format(asm))
        return

    slink(p, pf, tag, extra)

    cwd = os.getcwd()
    os.chdir(pf)
    prepare(
        sorted(glob("*.fastq") + glob("*.fastq.gz"))
        + ["--assemble_1st_rank_only", "-K 31"]
    )
    sh("./run.sh")
    sh("cp asm31.closed.scafSeq ../{0}".format(asm))

    logging.debug("Assembly finished: {0}".format(asm))
    os.chdir(cwd)
Exemple #2
0
def correct(args):
    """
    %prog correct *.fastq

    Correct the fastqfile and generated corrected fastqfiles. This calls
    assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The
    naming convention for your fastqfiles are important, and are listed below.

    By default, this will correct all PE reads, and remove duplicates of all MP
    reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq`
    and `jump_reads.corr.{pairs,frags}.fastq`.
    """
    from jcvi.assembly.allpaths import prepare
    from jcvi.assembly.base import FastqNamings

    p = OptionParser(correct.__doc__ + FastqNamings)
    p.add_option(
        "--nofragsdedup",
        default=False,
        action="store_true",
        help="Don't deduplicate the fragment reads [default: %default]")
    p.add_option("--cpus",
                 default=32,
                 type="int",
                 help="Number of threads to run [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fastq = args
    tag, tagj = "frag_reads", "jump_reads"

    prepare(["Unknown"] + fastq + ["--norun"])

    datadir = "data"
    mkdir(datadir)
    fullpath = op.join(os.getcwd(), datadir)
    nthreads = " NUM_THREADS={0}".format(opts.cpus)
    phred64 = (guessoffset([args[0]]) == 64)

    orig = datadir + "/{0}_orig".format(tag)
    origfastb = orig + ".fastb"
    if need_update(fastq, origfastb):
        cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}'".\
                format(fullpath, opts.cpus)
        if phred64:
            cmd += " PHRED_64=True"
        sh(cmd)

    if op.exists(origfastb):
        dedup = not opts.nofragsdedup
        correct_frag(datadir, tag, origfastb, nthreads, dedup=dedup)

    origj = datadir + "/{0}_orig".format(tagj)
    origjfastb = origj + ".fastb"

    if op.exists(origjfastb):
        correct_jump(datadir, tagj, origjfastb, nthreads)
Exemple #3
0
def correct(args):
    """
    %prog correct *.fastq

    Correct the fastqfile and generated corrected fastqfiles. This calls
    assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The
    naming convention for your fastqfiles are important, and are listed below.

    By default, this will correct all PE reads, and remove duplicates of all MP
    reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq`
    and `jump_reads.corr.{pairs,frags}.fastq`.
    """
    from jcvi.assembly.allpaths import prepare
    from jcvi.assembly.base import FastqNamings

    p = OptionParser(correct.__doc__ + FastqNamings)
    p.add_option("--nofragsdedup", default=False, action="store_true",
                 help="Don't deduplicate the fragment reads [default: %default]")
    p.add_option("--cpus", default=32, type="int",
                 help="Number of threads to run [default: %default]")
    p.add_option("--phred64", default=False, action="store_true",
                 help="Reads are all phred 64 offset [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fastq = args
    tag, tagj = "frag_reads", "jump_reads"

    prepare(["Unknown"] + fastq + ["--norun"])

    datadir = "data"
    mkdir(datadir)
    fullpath = op.join(os.getcwd(), datadir)
    nthreads = " NUM_THREADS={0}".format(opts.cpus)

    orig = datadir + "/{0}_orig".format(tag)
    origfastb = orig + ".fastb"
    if need_update(fastq, origfastb):
        cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}'".\
                format(fullpath, opts.cpus)
        if opts.phred64:
            cmd += " PHRED_64=True"
        sh(cmd)

    if op.exists(origfastb):
        dedup = not opts.nofragsdedup
        correct_frag(datadir, tag, origfastb, nthreads, dedup=dedup)

    origj = datadir + "/{0}_orig".format(tagj)
    origjfastb = origj + ".fastb"

    if op.exists(origjfastb):
        correct_jump(datadir, tagj, origjfastb, nthreads)
Exemple #4
0
def assemble_dir(pf, target, ploidy="1"):
    from jcvi.assembly.allpaths import prepare

    logging.debug("Work on {0}".format(pf))
    asm = [x.replace("final", pf) for x in target]
    if not need_update(pf, asm):
        logging.debug("Assembly found: {0}. Skipped.".format(asm))
        return

    cwd = os.getcwd()
    os.chdir(pf)
    prepare([pf] + sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--ploidy={0}".format(ploidy)])
    sh("./run.sh")

    for a, t in zip(asm, target):
        sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a))

    logging.debug("Assembly finished: {0}".format(asm))
    os.chdir(cwd)
Exemple #5
0
def assemble_dir(pf, target, ploidy="1"):
    from jcvi.assembly.allpaths import prepare

    logging.debug("Work on {0}".format(pf))
    asm = [x.replace("final", pf) for x in target]
    if not need_update(pf, asm):
        logging.debug("Assembly found: {0}. Skipped.".format(asm))
        return

    cwd = os.getcwd()
    os.chdir(pf)
    prepare([pf] + sorted(glob("*.fastq") + glob("*.fastq.gz")) + \
            ["--ploidy={0}".format(ploidy)])
    sh("./run.sh")

    for a, t in zip(asm, target):
        sh("cp allpaths/ASSEMBLIES/run/{0} ../{1}".format(t, a))

    logging.debug("Assembly finished: {0}".format(asm))
    os.chdir(cwd)
Exemple #6
0
def soap_trios(p, pf, tag, extra):
    """
    Take one pair of reads and 'widow' reads after correction and run SOAP.
    """
    from jcvi.assembly.soap import prepare

    logging.debug("Work on {0} ({1})".format(pf, ",".join(p)))
    asm = "{0}.closed.scafSeq".format(pf)
    if not need_update(p, asm):
        logging.debug("Assembly found: {0}. Skipped.".format(asm))
        return

    slink(p, pf, tag, extra)

    cwd = os.getcwd()
    os.chdir(pf)
    prepare(sorted(glob("*.fastq") + glob("*.fastq.gz")) + ["--assemble_1st_rank_only", "-K 31"])
    sh("./run.sh")
    sh("cp asm31.closed.scafSeq ../{0}".format(asm))

    logging.debug("Assembly finished: {0}".format(asm))
    os.chdir(cwd)
Exemple #7
0
def correct(args):
    """
    %prog correct *.fastq

    Correct the fastqfile and generated corrected fastqfiles. This calls
    assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The
    naming convention for your fastqfiles are important, and are listed below.

    By default, this will correct all PE reads, and remove duplicates of all MP
    reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq`
    and `jump_reads.corr.{pairs,frags}.fastq`.
    """
    from jcvi.assembly.allpaths import prepare
    from jcvi.assembly.base import FastqNamings

    p = OptionParser(correct.__doc__ + FastqNamings)
    p.add_option("--dir", default="data",
                help="Working directory [default: %default]")
    p.add_option("--fragsdedup", default=False, action="store_true",
                 help="Don't deduplicate the fragment reads [default: %default]")
    p.add_option("--ploidy", default="2", choices=("1", "2"),
                 help="Ploidy [default: %default]")
    p.add_option("--haploidify", default=False, action="store_true",
                 help="Set HAPLOIDIFY=True [default: %default]")
    p.add_option("--suffix", default=False, action="store_true",
                 help="Add suffix /1, /2 to read names")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fastq = args
    tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads"

    ploidy = opts.ploidy
    haploidify = opts.haploidify
    suffix = opts.suffix
    assert (not haploidify) or (haploidify and ploidy == '2')

    prepare(["Unknown"] + fastq + ["--norun"])

    datadir = opts.dir
    mkdir(datadir)
    fullpath = op.join(os.getcwd(), datadir)
    nthreads = " NUM_THREADS={0}".format(opts.cpus)
    phred64 = (guessoffset([args[0]]) == 64)

    orig = datadir + "/{0}_orig".format(tag)
    origfastb = orig + ".fastb"
    if need_update(fastq, origfastb):
        cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".\
                format(fullpath, opts.cpus, ploidy)
        if phred64:
            cmd += " PHRED_64=True"
        sh(cmd)

    if op.exists(origfastb):
        correct_frag(datadir, tag, origfastb, nthreads, dedup=opts.fragsdedup,
                     haploidify=haploidify, suffix=suffix)

    origj = datadir + "/{0}_orig".format(tagj)
    origjfastb = origj + ".fastb"
    if op.exists(origjfastb):
        correct_jump(datadir, tagj, origjfastb, nthreads, suffix=suffix)

    origlj = datadir + "/{0}_orig".format(taglj)
    origljfastb = origlj + ".fastb"
    if op.exists(origljfastb):
        correct_jump(datadir, taglj, origljfastb, nthreads, suffix=suffix)
Exemple #8
0
def correct(args):
    """
    %prog correct *.fastq

    Correct the fastqfile and generated corrected fastqfiles. This calls
    assembly.allpaths.prepare() to generate input files for ALLPATHS-LG. The
    naming convention for your fastqfiles are important, and are listed below.

    By default, this will correct all PE reads, and remove duplicates of all MP
    reads, and results will be placed in `frag_reads.corr.{pairs,frags}.fastq`
    and `jump_reads.corr.{pairs,frags}.fastq`.
    """
    from jcvi.assembly.allpaths import prepare
    from jcvi.assembly.base import FastqNamings

    p = OptionParser(correct.__doc__ + FastqNamings)
    p.add_option("--dir", default="data",
                help="Working directory [default: %default]")
    p.add_option("--fragsdedup", default=False, action="store_true",
                 help="Don't deduplicate the fragment reads [default: %default]")
    p.add_option("--ploidy", default="2", choices=("1", "2"),
                 help="Ploidy [default: %default]")
    p.add_option("--haploidify", default=False, action="store_true",
                 help="Set HAPLOIDIFY=True [default: %default]")
    p.add_option("--suffix", default=False, action="store_true",
                 help="Add suffix /1, /2 to read names")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    fastq = args
    tag, tagj, taglj = "frag_reads", "jump_reads", "long_jump_reads"

    ploidy = opts.ploidy
    haploidify = opts.haploidify
    suffix = opts.suffix
    assert (not haploidify) or (haploidify and ploidy == '2')

    prepare(["Unknown"] + fastq + ["--norun"])

    datadir = opts.dir
    mkdir(datadir)
    fullpath = op.join(os.getcwd(), datadir)
    nthreads = " NUM_THREADS={0}".format(opts.cpus)
    phred64 = (guessoffset([args[0]]) == 64)

    orig = datadir + "/{0}_orig".format(tag)
    origfastb = orig + ".fastb"
    if need_update(fastq, origfastb):
        cmd = "PrepareAllPathsInputs.pl DATA_DIR={0} HOSTS='{1}' PLOIDY={2}".\
                format(fullpath, opts.cpus, ploidy)
        if phred64:
            cmd += " PHRED_64=True"
        sh(cmd)

    if op.exists(origfastb):
        correct_frag(datadir, tag, origfastb, nthreads, dedup=opts.fragsdedup,
                     haploidify=haploidify, suffix=suffix)

    origj = datadir + "/{0}_orig".format(tagj)
    origjfastb = origj + ".fastb"
    if op.exists(origjfastb):
        correct_jump(datadir, tagj, origjfastb, nthreads, suffix=suffix)

    origlj = datadir + "/{0}_orig".format(taglj)
    origljfastb = origlj + ".fastb"
    if op.exists(origljfastb):
        correct_jump(datadir, taglj, origljfastb, nthreads, suffix=suffix)