def pairs(args): """ %prog pairs folder reference.fasta Estimate insert size distribution. Compatible with a variety of aligners, including CLC, BOWTIE and BWA. """ p = OptionParser(pairs.__doc__) p.set_firstN() p.set_mates() p.set_aligner() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) cwd = os.getcwd() aligner = opts.aligner work = "-".join(("pairs", aligner)) mkdir(work) if aligner == "clc": from jcvi.apps.clc import align from jcvi.formats.cas import pairs as ps else: from jcvi.formats.sam import pairs as ps if aligner == "bowtie": from jcvi.apps.bowtie import align elif aligner == "bwa": from jcvi.apps.bwa import align folder, ref = args ref = get_abs_path(ref) messages = [] for p, prefix in iter_project(folder, 2): samplefq = op.join(work, prefix + ".first.fastq") first([str(opts.firstN)] + p + ["-o", samplefq]) os.chdir(work) align_args = [ref, op.basename(samplefq)] outfile, logfile = align(align_args) bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)]) os.chdir(cwd) median = stats.median tag = "MP" if median > 1000 else "PE" median = str(median) pf, sf = median[:2], median[2:] if sf and int(sf) != 0: pf = str(int(pf) + 1) # Get the first two effective digits lib = "{0}-{1}".format(tag, pf + "0" * len(sf)) for i, xp in enumerate(p): suffix = "fastq.gz" if xp.endswith(".gz") else "fastq" link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix) m = "\t".join(str(x) for x in (xp, link)) messages.append(m) messages = "\n".join(messages) write_file("f.meta", messages, tee=True)
def pairs(args): """ %prog pairs folder reference.fasta Estimate insert size distribution. Compatible with a variety of aligners, including BOWTIE and BWA. """ p = OptionParser(pairs.__doc__) p.set_firstN() p.set_mates() p.set_aligner() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) cwd = os.getcwd() aligner = opts.aligner work = "-".join(("pairs", aligner)) mkdir(work) from jcvi.formats.sam import pairs as ps if aligner == "bowtie": from jcvi.apps.bowtie import align elif aligner == "bwa": from jcvi.apps.bwa import align folder, ref = args ref = get_abs_path(ref) messages = [] for p, prefix in iter_project(folder): samplefq = [] for i in range(2): samplefq.append( op.join(work, prefix + "_{0}.first.fastq".format(i + 1))) first([str(opts.firstN)] + [p[i]] + ["-o", samplefq[i]]) os.chdir(work) align_args = [ref] + [op.basename(fq) for fq in samplefq] outfile, logfile = align(align_args) bedfile, stats = ps([outfile, "--rclip={0}".format(opts.rclip)]) os.chdir(cwd) median = stats.median tag = "MP" if median > 1000 else "PE" median = str(median) pf, sf = median[:2], median[2:] if sf and int(sf) != 0: pf = str(int(pf) + 1) # Get the first two effective digits lib = "{0}-{1}".format(tag, pf + "0" * len(sf)) for i, xp in enumerate(p): suffix = "fastq.gz" if xp.endswith(".gz") else "fastq" link = "{0}-{1}.{2}.{3}".format(lib, prefix.replace("-", ""), i + 1, suffix) m = "\t".join(str(x) for x in (xp, link)) messages.append(m) messages = "\n".join(messages) write_file("f.meta", messages, tee=True)
def scaffold(args): """ %prog scaffold contigs.fasta MP*.fastq Run SSPACE scaffolding. """ p = OptionParser(scaffold.__doc__) p.set_aligner(aligner="bwa") p.set_home("sspace") p.set_cpus() opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) contigs = args[0] libtxt = write_libraries(args[1:], aligner=opts.aligner) # Requires getopts.pl which may be missing download("http://mflib.org/xampp/perl/lib/getopts.pl") cmd = "perl " + op.join(opts.sspace_home, "SSPACE_Standard_v3.0.pl") cmd += " -l {0} -s {1} -T {2}".format(libtxt, contigs, opts.cpus) runsh = "run.sh" write_file(runsh, cmd)
def scaffold(args): """ %prog scaffold contigs.fasta MP*.fastq Run SSPACE scaffolding. """ p = OptionParser(scaffold.__doc__) p.set_aligner(aligner="bwa") p.set_home("sspace") p.set_cpus() opts, args = p.parse_args(args) if len(args) < 1: sys.exit(not p.print_help()) contigs = args[0] libtxt = write_libraries(args[1:], aligner=opts.aligner) # Requires getopts.pl which may be missing download("http://web.vims.edu/bridge/bridge2/aw/lib/getopts.pl") cmd = "perl " + op.join(opts.sspace_home, "SSPACE_Standard_v3.0.pl") cmd += " -l {0} -s {1} -T {2}".format(libtxt, contigs, opts.cpus) runsh = "run.sh" write_file(runsh, cmd)
def prepare(args): """ %prog prepare barcode_key.csv reference.fasta Prepare TASSEL pipeline. """ valid_enzymes = "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|" \ "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|" \ "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI" \ "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|") p = OptionParser(prepare.__doc__) p.add_option("--enzyme", default="ApeKI", choices=valid_enzymes, help="Restriction enzyme used [default: %default]") p.set_home("tassel") p.set_aligner(aligner="bwa") p.set_cpus() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) barcode, reference = args thome = opts.tassel_home reference = get_abs_path(reference) folders = ("fastq", "tagCounts", "mergedTagCounts", "topm", "tbt", "mergedTBT", "hapmap", "hapmap/raw", "hapmap/mergedSNPs", "hapmap/filt", "hapmap/bpec") for f in folders: mkdir(f) # Build the pipeline runsh = [] o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme) cmd = run_pipeline(thome, "FastqToTagCountPlugin", o) runsh.append(cmd) o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt" o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq" cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o) runsh.append(cmd) runsh.append("cd mergedTagCounts") cmd = "python -m jcvi.apps.{0} align --cpus {1}".\ format(opts.aligner, opts.cpus) cmd += " {0} myMasterTags.cnt.fq".format(reference) runsh.append(cmd) runsh.append("cd ..") o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm" cmd = run_pipeline(thome, "SAMConverterPlugin", o) runsh.append(cmd) o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm" o += " -mUpd topm/myMasterTagsWithVariants.topm" o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt" o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000" o += " -ref {0} -sC 1 -eC 10".format(reference) cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o) runsh.append(cmd) o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt" o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt" o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10" cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o) runsh.append(cmd) o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt" o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt" o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10" #o += "-hLD -mnR2 0.2 -mnBonP 0.005" cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o) runsh.append(cmd) runfile = "run.sh" write_file(runfile, "\n".join(runsh), meta="run script")
def prepare(args): """ %prog prepare barcode_key.csv reference.fasta Prepare TASSEL pipeline. """ valid_enzymes = "ApeKI|ApoI|BamHI|EcoT22I|HinP1I|HpaII|MseI|MspI|" \ "NdeI|PasI|PstI|Sau3AI|SbfI|AsiSI-MspI|BssHII-MspI|" \ "FseI-MspI|PaeR7I-HhaI|PstI-ApeKI|PstI-EcoT22I|PstI-MspI" \ "PstI-TaqI|SalI-MspI|SbfI-MspI".split("|") p = OptionParser(prepare.__doc__) p.add_option("--enzyme", default="ApeKI", choices=valid_enzymes, help="Restriction enzyme used [default: %default]") p.set_home("tassel") p.set_aligner(aligner="bwa") p.set_cpus() opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) barcode, reference = args thome = opts.tassel_home reference = get_abs_path(reference) folders = ("fastq", "tagCounts", "mergedTagCounts", "topm", "tbt", "mergedTBT", "hapmap", "hapmap/raw", "hapmap/mergedSNPs", "hapmap/filt", "hapmap/bpec") for f in folders: mkdir(f) # Build the pipeline runsh = [] o = "-i fastq -k {0} -e {1} -o tagCounts".format(barcode, opts.enzyme) cmd = run_pipeline(thome, "FastqToTagCountPlugin", o) runsh.append(cmd) o = "-i tagCounts -o mergedTagCounts/myMasterTags.cnt" o += " -c 5 -t mergedTagCounts/myMasterTags.cnt.fq" cmd = run_pipeline(thome, "MergeMultipleTagCountPlugin", o) runsh.append(cmd) runsh.append("cd mergedTagCounts") cmd = "python -m jcvi.apps.{0} align --cpus {1}".\ format(opts.aligner, opts.cpus) cmd += " {0} myMasterTags.cnt.fq".format(reference) runsh.append(cmd) runsh.append("cd ..") o = "-i mergedTagCounts/*.sam -o topm/myMasterTags.topm" cmd = run_pipeline(thome, "SAMConverterPlugin", o) runsh.append(cmd) o = "-i mergedTBT/myStudy.tbt.byte -y -m topm/myMasterTags.topm" o += " -mUpd topm/myMasterTagsWithVariants.topm" o += " -o hapmap/raw/myGBSGenos_chr+.hmp.txt" o += " -mnF 0.8 -p myPedigreeFile.ped -mnMAF 0.02 -mnMAC 100000" o += " -ref {0} -sC 1 -eC 10".format(reference) cmd = run_pipeline(thome, "TagsToSNPByAlignmentPlugin", o) runsh.append(cmd) o = "-hmp hapmap/raw/myGBSGenos_chr+.hmp.txt" o += " -o hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt" o += " -misMat 0.1 -p myPedigreeFile.ped -callHets -sC 1 -eC 10" cmd = run_pipeline(thome, "MergeDuplicateSNPsPlugin", o) runsh.append(cmd) o = "-hmp hapmap/mergedSNPs/myGBSGenos_mergedSNPs_chr+.hmp.txt" o += " -o hapmap/filt/myGBSGenos_mergedSNPsFilt_chr+.hmp.txt" o += " -mnTCov 0.01 -mnSCov 0.2 -mnMAF 0.01 -sC 1 -eC 10" #o += "-hLD -mnR2 0.2 -mnBonP 0.005" cmd = run_pipeline(thome, "GBSHapMapFiltersPlugin", o) runsh.append(cmd) runfile = "run.sh" write_file(runfile, "\n".join(runsh))