def customize(cls, reference, infile, outfile, regions = None, dependencies = ()): assert outfile.lower().endswith(".vcf.bgz") pileup = AtomicCmdBuilder(["samtools", "mpileup"], IN_REFERENCE = reference, IN_BAMFILE = infile, IN_REGIONS = regions, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAM = SAMTOOLS_VERSION) pileup.set_option("-u") # Uncompressed output pileup.set_option("-f", "%(IN_REFERENCE)s") pileup.add_value("%(IN_BAMFILE)s") if regions: pileup.set_option("-l", "%(IN_REGIONS)s") genotype = AtomicCmdBuilder(["bcftools", "view"], IN_STDIN = pileup, OUT_STDOUT = AtomicCmd.PIPE) genotype.add_value("-") bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN = genotype, OUT_STDOUT = outfile) return {"commands" : {"pileup" : pileup, "genotype" : genotype, "bgzip" : bgzip}}
def customize(cls, reference, infiles, outfile, options, dependencies= ()): assert outfile.lower().endswith('.vcf') # Create the pileup command pileup = AtomicCmdBuilder( ['samtools','mpileup'], IN_REFERENCE = reference, OUT_STDOUT = AtomicCmd.PIPE, CHECK_SAM = SAMTOOLS_VERSION ) pileup.set_option('-u') # uncompressed output pileup.set_option('-r','chrUn2:1-19214051') pileup.set_option('-f', "%(IN_REFERENCE)s") # Add reference option for bam in infiles: pileup.add_option(bam) # Create variant caller command bcftools = AtomicCmdBuilder( ['bcftools','view'], IN_STDIN = pileup, OUT_STDOUT = outfile ) bcftools.set_option('-v') # output potential variant sites bcftools.set_option('-c') # SNP calling bcftools.set_option('-g') # call genotypes at vairant sites bcftools.set_option('-') # STDIN return { "commands" : { "pileup" : pileup, "bcftools" : bcftools, } }
def customize(cls, input_alignment, input_partition, output_template, threads=1, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree'""" if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) else: command = AtomicCmdBuilder("raxmlHPC") # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs( # Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced", TEMP_OUT_ALN=os.path.basename(input_alignment), TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output files, are not created directly OUT_INFO=output_template % "info", OUT_BESTTREE=output_template % "bestTree", OUT_BOOTSTRAP=output_template % "bootstrap", OUT_BIPART=output_template % "bipartitions", OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels") # Use the GTRGAMMAI model of NT substitution by default command.set_option("-m", "GTRGAMMAI", fixed=False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability. command.set_option("-x", int(random.random() * 2**31 - 1), fixed=False) # Set random seed for parsimony inference. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions command.set_option("-N", "autoMRE", fixed=False) return {"command": command}
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=in_vcf, OUT_STDOUT=AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF=reference, IN_BAM=in_bam, IN_STDIN=unicat, OUT_PILEUP=outfile, OUT_TBI=outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
def customize(self, reference, directory, dependencies=()): command = AtomicCmdBuilder( [ "mapDamage", "--stats-only", "-r", "%(IN_REFERENCE)s", "-d", "%(TEMP_DIR)s" ], IN_REFERENCE=reference, TEMP_OUT_FREQ_3p="3pGtoA_freq.txt", TEMP_OUT_FREQ_5p="5pCtoT_freq.txt", TEMP_OUT_COMP_USER="******", TEMP_OUT_MISINCORP="misincorporation.txt", TEMP_OUT_LOG="Runtime_log.txt", TEMP_OUT_STDOUT="pipe_mapDamage.stdout", TEMP_OUT_STDERR="pipe_mapDamage.stderr", OUT_COMP_GENOME=os.path.join(directory, "dnacomp_genome.csv"), OUT_MCMC_PROBS=os.path.join(directory, "Stats_out_MCMC_correct_prob.csv"), OUT_MCMC_HIST=os.path.join(directory, "Stats_out_MCMC_hist.pdf"), OUT_MCMC_ITER=os.path.join(directory, "Stats_out_MCMC_iter.csv"), OUT_MCMC_ITERSUM=os.path.join(directory, "Stats_out_MCMC_iter_summ_stat.csv"), OUT_MCMC_POSTPRED=os.path.join(directory, "Stats_out_MCMC_post_pred.pdf"), OUT_MCMC_TRACE=os.path.join(directory, "Stats_out_MCMC_trace.pdf"), CHECK_RSCRIPT=RSCRIPT_VERSION, CHECK_MAPDAMAGE=MAPDAMAGE_VERSION) return {"command": command, "dependencies": dependencies}
def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies=()): command = AtomicCmdBuilder("raxmlHPC") # Read and (in the case of empty columns) reduce input command.set_option("-f", "c") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s") command.set_option("-q", "%(TEMP_IN_PARTITION)s") command.set_kwargs(IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, TEMP_IN_ALIGNMENT="RAXML_alignment", TEMP_IN_PARTITION="RAXML_partitions", TEMP_OUT_INFO="RAxML_info.Pypeline", OUT_ALIGNMENT=output_alignment, OUT_PARTITION=output_partition) return {"command": command}
def test_builder__add_option__overwrite(): builder = AtomicCmdBuilder("find") builder.add_option("-name", "*.txt") builder.add_option("-or") builder.add_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.txt", "-or", "-name", "*.bat"])
def customize(cls, input_alignment, input_partitions, output_tree, dependencies=()): command = AtomicCmdBuilder("raxmlHPC") # Compute a randomized parsimony starting tree command.set_option("-y") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s") command.set_option("-q", "%(TEMP_OUT_PARTITION)s") command.set_kwargs( IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partitions, # TEMP_OUT_ is used to automatically remove these files TEMP_OUT_ALIGNMENT="RAxML_alignment", TEMP_OUT_PARTITION="RAxML_partitions", TEMP_OUT_INFO="RAxML_info.Pypeline", OUT_TREE=output_tree, CHECK_VERSION=RAXML_VERSION) return {"command": command}
def customize(cls, input_alignment, input_partition, output_file, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("examlParser", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output file, are not created directly OUT_BINARY=output_file) return {"command": command}
def _do_test_builder__pop_option(setter): builder = AtomicCmdBuilder("find") setter(builder, "-empty", fixed=False) setter(builder, "-size", "1", fixed=False) setter(builder, "-name", "*.txt", fixed=False) builder.pop_option("-size") assert_equal(builder.call, ["find", "-empty", "-name", "*.txt"])
def customize(cls, input_alignment, input_partition, output_alignment, dependencies=()): command = AtomicCmdBuilder("raxmlHPC", set_cwd=True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed=False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", 1, fixed=False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") command.set_kwargs(IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, OUT_ALIGNMENT=output_alignment, OUT_INFO=fileutils.swap_ext(output_alignment, ".info")) return {"command": command}
def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn( " Download the newest version of AdapterRemoval at ") ui.print_warn( " https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def customize(self, config, reference, input_files, output_file, directory, dependencies=()): stats_out_fname = "Stats_out_MCMC_correct_prob.csv" command = AtomicCmdBuilder([ "mapDamage", "--rescale-only", "-i", "%(TEMP_IN_BAM)s", "-d", "%(TEMP_DIR)s", "-r", "%(IN_REFERENCE)s", "--rescale-out", "%(OUT_BAM)s" ], IN_REFERENCE=reference, TEMP_OUT_LOG="Runtime_log.txt", TEMP_OUT_CSV=stats_out_fname, OUT_BAM=output_file, CHECK_VERSION=MAPDAMAGE_VERSION) return { "command": command, "config": config, "input_files": input_files, "directory": directory, "dependencies": dependencies }
def _build_cat_command(): """Returns a AtomicCmdBuilder for the 'paleomix cat' command.""" return AtomicCmdBuilder([_PALEOMIX_PATH, "cat"], EXEC_GZIP="gzip", EXEC_BZIP="bzip2", EXEC_CAT="cat", CHECK_PALEOMIX=VERSION_PALEOMIX)
def test_builder__set__kwargs__overwriting(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") assert_raises(AtomicCmdBuilderError, builder.set_kwargs, IN_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def _bowtie2_template(call, prefix, iotype="IN", **kwargs): params = AtomicCmdBuilder(call, **kwargs) for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"): key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key: (prefix + "." + postfix)}) return params
def customize(cls, infile, intervals, outfile, dependencies=()): params = AtomicCmdBuilder(["bam_sample_regions"], IN_PILEUP=infile, IN_INTERVALS=intervals, OUT_STDOUT=outfile) params.set_option("--genotype", "%(IN_PILEUP)s") params.set_option("--intervals", "%(IN_INTERVALS)s") return {"command": params}
def test_builder__set_kwargs__after_finalize(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") builder.finalize() assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()): command = AtomicCmdBuilder(_PRESETS[algorithm.lower()]) command.add_value("%(IN_FASTA)s") command.set_kwargs(IN_FASTA = input_file, OUT_STDOUT = output_file, CHECK_VERSION = MAFFT_VERSION) return {"command" : command, "dependencies" : dependencies}
def test_builder__add_multiple_values_with_template(): values = ("file_a", "file_b") expected = {"OUT_BAM_1": "file_a", "OUT_BAM_2": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_values(values, template="OUT_BAM_%i") assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "%(OUT_BAM_1)s", "%(OUT_BAM_2)s"])
def test_builder__add_multiple_values(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_values(values) assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "%(IN_FILE_01)s", "%(IN_FILE_02)s"])
def _process_output(stdin, output_file, reference, run_fixmate=False): convert = AtomicCmdBuilder("safeSAM2BAM") convert.set_option("--flag-as-sorted") convert.set_option("-F", "0x4", sep="", fixed=False) # Remove misses convert.set_kwargs(IN_STDIN=stdin, OUT_STDOUT=AtomicCmd.PIPE, CHECK_PYSAM=PYSAM_VERSION, CHECK_SAMTOOLS=SAMTOOLS_VERSION) fixmate = None if run_fixmate: fixmate = AtomicCmdBuilder(("samtools", "fixmate", "-", "-"), IN_STDIN=convert, OUT_STDOUT=AtomicCmd.PIPE, CHECK_SAMTOOLS=SAMTOOLS_VERSION) sort = AtomicCmdBuilder(("samtools", "sort")) sort.set_option("-o") # Output to STDOUT on completion sort.add_value("-") sort.add_value("%(TEMP_OUT_BAM)s") sort.set_kwargs(IN_STDIN=fixmate or convert, OUT_STDOUT=AtomicCmd.PIPE, TEMP_OUT_BAM="sorted", CHECK_SAM=SAMTOOLS_VERSION) calmd = AtomicCmdBuilder(("samtools", "calmd")) calmd.add_value("-") calmd.add_value("%(IN_REF)s") calmd.set_option("-b") # Output BAM calmd.set_kwargs(IN_REF=reference, IN_STDIN=sort, OUT_STDOUT=output_file, CHECK_SAM=SAMTOOLS_VERSION) order = ["convert", "sort", "calmd"] commands = {"convert": convert, "sort": sort, "calmd": calmd} if run_fixmate: order.insert(1, "fixmate") commands["fixmate"] = fixmate return order, commands
def test_builder__add_multiple_options_with_sep(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_options("-i", values, sep="=") assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "-i=%(IN_FILE_01)s", "-i=%(IN_FILE_02)s"])
def test_builder__add_multiple_options_with_template_fixed(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_options("-i", values) assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_raises(AtomicCmdBuilderError, builder.add_multiple_options, "-i", values)
def customize(cls, groups, prefix, options, dependencies = ()): # Merge the VCF files merge_vcf = AtomicCmdBuilder(['vcf_merge'], OUT_VCF = "merged.vcf") for group in groups: vcf_file = os.path.join(options.makefile['RecalDir'], 'gatk.{}.{}.raw.recal_final.vcf'.format(group,prefix) ) merge_vcf.add_option("-i",vcf_file) merge_vcf.add_option("-o", '%(OUT_VCF)s') # Create the snp list snp_list = AtomicCmdBuilder(['vcf_snp_list']) snp_list.add_option('--recal_dir',options.makefile['RecalDir']) snp_list.add_option('') return { 'commands' : { 'merge' : merge_vcf, 'Snp' : snp_list } }
def _get_bwa_template(call, prefix, iotype="IN", **kwargs): extensions = ["amb", "ann", "bwt", "pac", "sa"] try: if BWA_VERSION.version < (0, 6, 0): extensions.extend(("rbwt", "rpac", "rsa")) except versions.VersionRequirementError: pass # Ignored here, handled elsewhere params = AtomicCmdBuilder(call, **kwargs) for postfix in extensions: key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key: (prefix + "." + postfix)}) return params
def customize(cls, pileup, infile, outfile, interval, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE) vcffilter = AtomicCmdBuilder( ["vcf_filter", "--pileup", "%(IN_PILEUP)s"], IN_PILEUP=pileup, IN_STDIN=unicat, OUT_STDOUT=AtomicCmd.PIPE) for contig in interval.get("Homozygous Contigs", ()): vcffilter.set_option("--homozygous-chromosome", contig) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN=vcffilter, OUT_STDOUT=outfile) return { "commands": { "unicat": unicat, "filter": vcffilter, "bgzip": bgzip } }
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) return cmd
def customize(cls, reference, infile, outfile, filters, options, dependencies = ()): # filter reads percentile = str(options.makefile['vcf_percentile_threshold']) flt = AtomicCmdBuilder(['vcf_qual_percentile'], IN_VCF = infile, OUT_VCF = outfile ) for key,val in filters.items(): flt.add_option(key,val) flt.set_option('--out','%(OUT_VCF)s') flt.add_option(infile) return { 'commands':{ 'Filter': flt } }