Example #1
0
def pairs(args):
    """
    %prog pairs folder reference.fasta

    Estimate insert size distribution. Compatible with a variety of aligners,
    including CLC, BOWTIE and BWA.
    """
    p = OptionParser(pairs.__doc__)
    p.set_firstN()
    p.set_mates()
    p.set_aligner()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    cwd = os.getcwd()
    aligner = opts.aligner
    work = "-".join(("pairs", aligner))
    mkdir(work)

    if aligner == "clc":
        from jcvi.apps.clc import align
        from jcvi.formats.cas import pairs as ps
    else:
        from jcvi.formats.sam import pairs as ps

    if aligner == "bowtie":
        from jcvi.apps.bowtie import align
    elif aligner == "bwa":
        from jcvi.apps.bwa import align

    folder, ref = args
    ref = get_abs_path(ref)
    messages = []
    for p, prefix in iter_project(folder, 2):
        samplefq = op.join(work, prefix + ".first.fastq")
        first([str(opts.firstN)] + p + ["-o", samplefq])

        os.chdir(work)
        align_args = [ref, op.basename(samplefq)]
        outfile, logfile = align(align_args)
        bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)])
        os.chdir(cwd)

        median = stats.median
        tag = "MP" if median > 1000 else "PE"
        median = str(median)
        pf, sf = median[:2], median[2:]
        if sf and int(sf) != 0:
            pf = str(int(pf) + 1)  # Get the first two effective digits
        lib = "{0}-{1}".format(tag, pf + "0" * len(sf))
        for i, xp in enumerate(p):
            suffix = "fastq.gz" if xp.endswith(".gz") else "fastq"
            link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix)
            m = "\t".join(str(x) for x in (xp, link))
            messages.append(m)

    messages = "\n".join(messages)
    write_file("f.meta", messages, tee=True)
Example #2
0
def pairs(args):
    """
    %prog pairs folder reference.fasta

    Estimate insert size distribution. Compatible with a variety of aligners,
    including BOWTIE and BWA.
    """
    p = OptionParser(pairs.__doc__)
    p.set_firstN()
    p.set_mates()
    p.set_aligner()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    cwd = os.getcwd()
    aligner = opts.aligner
    work = "-".join(("pairs", aligner))
    mkdir(work)

    from jcvi.formats.sam import pairs as ps

    if aligner == "bowtie":
        from jcvi.apps.bowtie import align
    elif aligner == "bwa":
        from jcvi.apps.bwa import align

    folder, ref = args
    ref = get_abs_path(ref)
    messages = []
    for p, prefix in iter_project(folder):
        samplefq = []
        for i in range(2):
            samplefq.append(
                op.join(work, prefix + "_{0}.first.fastq".format(i + 1)))
            first([str(opts.firstN)] + [p[i]] + ["-o", samplefq[i]])

        os.chdir(work)
        align_args = [ref] + [op.basename(fq) for fq in samplefq]
        outfile, logfile = align(align_args)
        bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)])
        os.chdir(cwd)

        median = stats.median
        tag = "MP" if median > 1000 else "PE"
        median = str(median)
        pf, sf = median[:2], median[2:]
        if sf and int(sf) != 0:
            pf = str(int(pf) + 1)  # Get the first two effective digits
        lib = "{0}-{1}".format(tag, pf + "0" * len(sf))
        for i, xp in enumerate(p):
            suffix = "fastq.gz" if xp.endswith(".gz") else "fastq"
            link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""),
                                            i + 1, suffix)
            m = "\t".join(str(x) for x in (xp, link))
            messages.append(m)

    messages = "\n".join(messages)
    write_file("f.meta", messages, tee=True)
Example #3
0
def scaffold(args):
    """
    %prog scaffold contigs.fasta MP*.fastq

    Run SSPACE scaffolding.
    """
    p = OptionParser(scaffold.__doc__)
    p.set_aligner(aligner="bwa")
    p.set_home("sspace")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    contigs = args[0]
    libtxt = write_libraries(args[1:], aligner=opts.aligner)
    # Requires getopts.pl which may be missing
    download("http://mflib.org/xampp/perl/lib/getopts.pl")

    cmd = "perl " + op.join(opts.sspace_home, "SSPACE_Standard_v3.0.pl")
    cmd += " -l {0} -s {1} -T {2}".format(libtxt, contigs, opts.cpus)
    runsh = "run.sh"
    write_file(runsh, cmd)
Example #4
0
def scaffold(args):
    """
    %prog scaffold contigs.fasta MP*.fastq

    Run SSPACE scaffolding.
    """
    p = OptionParser(scaffold.__doc__)
    p.set_aligner(aligner="bwa")
    p.set_home("sspace")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) < 1:
        sys.exit(not p.print_help())

    contigs = args[0]
    libtxt = write_libraries(args[1:], aligner=opts.aligner)
    # Requires getopts.pl which may be missing
    download("http://web.vims.edu/bridge/bridge2/aw/lib/getopts.pl")

    cmd = "perl " + op.join(opts.sspace_home, "SSPACE_Standard_v3.0.pl")
    cmd += " -l {0} -s {1} -T {2}".format(libtxt, contigs, opts.cpus)
    runsh = "run.sh"
    write_file(runsh, cmd)
Example #5
0
def prepare(args):
    """
    %prog prepare barcode_key.csv reference.fasta

    Prepare TASSEL pipeline.
    """
    valid_enzymes = "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|" \
                    "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|" \
                    "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI" \
                    "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|")
    p = OptionParser(prepare.__doc__)
    p.add_option("--enzyme",
                 default="ApeKI",
                 choices=valid_enzymes,
                 help="Restriction enzyme used [default: %default]")
    p.set_home("tassel")
    p.set_aligner(aligner="bwa")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    barcode, reference = args
    thome = opts.tassel_home
    reference = get_abs_path(reference)
    folders = ("fastq", "tagCounts", "mergedTagCounts", "topm", "tbt",
               "mergedTBT", "hapmap", "hapmap/raw", "hapmap/mergedSNPs",
               "hapmap/filt", "hapmap/bpec")
    for f in folders:
        mkdir(f)

    # Build the pipeline
    runsh = []
    o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme)
    cmd = run_pipeline(thome, "FastqToTagCountPlugin", o)
    runsh.append(cmd)

    o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt"
    o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq"
    cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o)
    runsh.append(cmd)
    runsh.append("cd mergedTagCounts")

    cmd = "python -m jcvi.apps.{0} align --cpus {1}".\
                format(opts.aligner, opts.cpus)
    cmd += " {0} myMasterTags.cnt.fq".format(reference)
    runsh.append(cmd)
    runsh.append("cd ..")

    o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm"
    cmd = run_pipeline(thome, "SAMConverterPlugin", o)
    runsh.append(cmd)

    o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm"
    o += " -mUpd topm/myMasterTagsWithVariants.topm"
    o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000"
    o += " -ref {0} -sC 1 -eC 10".format(reference)
    cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10"
    cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt"
    o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10"
    #o += "-hLD -mnR2 0.2 -mnBonP 0.005"
    cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o)
    runsh.append(cmd)

    runfile = "run.sh"
    write_file(runfile, "\n".join(runsh), meta="run script")
Example #6
0
def prepare(args):
    """
    %prog prepare barcode_key.csv reference.fasta

    Prepare TASSEL pipeline.
    """
    valid_enzymes = "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|" \
                    "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|" \
                    "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI" \
                    "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|")
    p = OptionParser(prepare.__doc__)
    p.add_option("--enzyme", default="ApeKI", choices=valid_enzymes,
                 help="Restriction enzyme used [default: %default]")
    p.set_home("tassel")
    p.set_aligner(aligner="bwa")
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    barcode, reference = args
    thome = opts.tassel_home
    reference = get_abs_path(reference)
    folders = ("fastq", "tagCounts", "mergedTagCounts", "topm",
               "tbt", "mergedTBT", "hapmap", "hapmap/raw",
               "hapmap/mergedSNPs", "hapmap/filt", "hapmap/bpec")
    for f in folders:
        mkdir(f)

    # Build the pipeline
    runsh = []
    o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme)
    cmd = run_pipeline(thome, "FastqToTagCountPlugin", o)
    runsh.append(cmd)

    o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt"
    o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq"
    cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o)
    runsh.append(cmd)
    runsh.append("cd mergedTagCounts")

    cmd = "python -m jcvi.apps.{0} align --cpus {1}".\
                format(opts.aligner, opts.cpus)
    cmd += " {0} myMasterTags.cnt.fq".format(reference)
    runsh.append(cmd)
    runsh.append("cd ..")

    o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm"
    cmd = run_pipeline(thome, "SAMConverterPlugin", o)
    runsh.append(cmd)

    o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm"
    o += " -mUpd topm/myMasterTagsWithVariants.topm"
    o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000"
    o += " -ref {0} -sC 1 -eC 10".format(reference)
    cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt"
    o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10"
    cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o)
    runsh.append(cmd)

    o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt"
    o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt"
    o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10"
    #o += "-hLD -mnR2 0.2 -mnBonP 0.005"
    cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o)
    runsh.append(cmd)

    runfile = "run.sh"
    write_file(runfile, "\n".join(runsh))