def _do_test_builder__pop_option(setter): builder = AtomicCmdBuilder("find") setter(builder, "-empty", fixed = False) setter(builder, "-size", "1", fixed = False) setter(builder, "-name", "*.txt", fixed = False) builder.pop_option("-size") assert_equal(builder.call, ["find", "-empty", "-name", "*.txt"])
def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn(" Download the newest version of AdapterRemoval at ") ui.print_warn(" https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def _bowtie2_template(call, prefix, iotype = "IN", **kwargs): params = AtomicCmdBuilder(call, **kwargs) for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"): key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key : (prefix + "." + postfix)}) return params
def _do_test_builder__pop_option(setter): builder = AtomicCmdBuilder("find") setter(builder, "-empty", fixed=False) setter(builder, "-size", "1", fixed=False) setter(builder, "-name", "*.txt", fixed=False) builder.pop_option("-size") assert_equal(builder.call, ["find", "-empty", "-name", "*.txt"])
def test_builder__set_kwargs__after_finalize(): expected = {"IN_PATH" : "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH = "/a/b/") builder.finalize() assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH = "/dst/file") assert_equal(builder.kwargs, expected)
def _bowtie2_template(call, prefix, iotype="IN", **kwargs): params = AtomicCmdBuilder(call, **kwargs) for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"): key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key: (prefix + "." + postfix)}) return params
def test_builder__set__kwargs__overwriting(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") assert_raises(AtomicCmdBuilderError, builder.set_kwargs, IN_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def customize(cls, infile, intervals, outfile, dependencies = ()): params = AtomicCmdBuilder(["bam_sample_regions"], IN_PILEUP = infile, IN_INTERVALS = intervals, OUT_STDOUT = outfile) params.set_option("--genotype", "%(IN_PILEUP)s") params.set_option("--intervals", "%(IN_INTERVALS)s") return {"command" : params}
def test_builder__add_multiple_options_with_sep(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_options("-i", values, sep="=") assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "-i=%(IN_FILE_01)s", "-i=%(IN_FILE_02)s"])
def test_builder__add_multiple_values_with_template(): values = ("file_a", "file_b") expected = {"OUT_BAM_1": "file_a", "OUT_BAM_2": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_values(values, template="OUT_BAM_%i") assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "%(OUT_BAM_1)s", "%(OUT_BAM_2)s"])
def test_builder__add_multiple_values(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_values(values) assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "%(IN_FILE_01)s", "%(IN_FILE_02)s"])
def test_builder__add_multiple_options_with_template_fixed(): values = ("file_a", "file_b") expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_options("-i", values) assert_equal(kwargs, expected) assert_equal(builder.kwargs, expected) assert_raises(AtomicCmdBuilderError, builder.add_multiple_options, "-i", values)
def test_builder__add_multiple_options_multiple_times(): expected = {"IN_FILE_01": "file_a", "IN_FILE_02": "file_b"} builder = AtomicCmdBuilder("ls") kwargs = builder.add_multiple_options("-i", ("file_a",)) assert_equal(kwargs, {"IN_FILE_01": "file_a"}) kwargs = builder.add_multiple_options("-i", ("file_b",)) assert_equal(kwargs, {"IN_FILE_02": "file_b"}) assert_equal(builder.kwargs, expected) assert_equal(builder.call, ["ls", "-i", "%(IN_FILE_01)s", "-i", "%(IN_FILE_02)s"])
def test_builder__add_option__overwrite(): builder = AtomicCmdBuilder("find") builder.add_option("-name", "*.txt") builder.add_option("-or") builder.add_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.txt", "-or", "-name", "*.bat"])
def _get_bwa_template(call, prefix, iotype = "IN", **kwargs): extensions = ["amb", "ann", "bwt", "pac", "sa"] try: if BWA_VERSION.version < (0, 6, 0): extensions.extend(("rbwt", "rpac", "rsa")) except versions.VersionRequirementError: pass # Ignored here, handled elsewhere params = AtomicCmdBuilder(call, **kwargs) for postfix in extensions: key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key : (prefix + "." + postfix)}) return params
def _get_bwa_template(call, prefix, iotype="IN", **kwargs): extensions = ["amb", "ann", "bwt", "pac", "sa"] try: if BWA_VERSION.version < (0, 6, 0): extensions.extend(("rbwt", "rpac", "rsa")) except versions.VersionRequirementError: pass # Ignored here, handled elsewhere params = AtomicCmdBuilder(call, **kwargs) for postfix in extensions: key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key: (prefix + "." + postfix)}) return params
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies = ()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF = in_vcf, OUT_STDOUT = AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF = reference, IN_BAM = in_bam, IN_STDIN = unicat, OUT_PILEUP = outfile, OUT_TBI = outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands" : {"unicat" : unicat, "pileup" : vcfpileup}}
def customize(self, config, reference, input_files, output_file, directory, dependencies=()): stats_out_fname = "Stats_out_MCMC_correct_prob.csv" command = AtomicCmdBuilder([ "mapDamage", "--rescale-only", "-i", "%(TEMP_IN_BAM)s", "-d", "%(TEMP_DIR)s", "-r", "%(IN_REFERENCE)s", "--rescale-out", "%(OUT_BAM)s" ], IN_REFERENCE=reference, TEMP_OUT_LOG="Runtime_log.txt", TEMP_OUT_CSV=stats_out_fname, OUT_BAM=output_file, CHECK_VERSION=MAPDAMAGE_VERSION) return { "command": command, "config": config, "input_files": input_files, "directory": directory, "dependencies": dependencies }
def customize(self, reference, directory, dependencies=()): command = AtomicCmdBuilder( [ "mapDamage", "--stats-only", "-r", "%(IN_REFERENCE)s", "-d", "%(TEMP_DIR)s" ], IN_REFERENCE=reference, TEMP_OUT_FREQ_3p="3pGtoA_freq.txt", TEMP_OUT_FREQ_5p="5pCtoT_freq.txt", TEMP_OUT_COMP_USER="******", TEMP_OUT_MISINCORP="misincorporation.txt", TEMP_OUT_LOG="Runtime_log.txt", TEMP_OUT_STDOUT="pipe_mapDamage.stdout", TEMP_OUT_STDERR="pipe_mapDamage.stderr", OUT_COMP_GENOME=os.path.join(directory, "dnacomp_genome.csv"), OUT_MCMC_PROBS=os.path.join(directory, "Stats_out_MCMC_correct_prob.csv"), OUT_MCMC_HIST=os.path.join(directory, "Stats_out_MCMC_hist.pdf"), OUT_MCMC_ITER=os.path.join(directory, "Stats_out_MCMC_iter.csv"), OUT_MCMC_ITERSUM=os.path.join(directory, "Stats_out_MCMC_iter_summ_stat.csv"), OUT_MCMC_POSTPRED=os.path.join(directory, "Stats_out_MCMC_post_pred.pdf"), OUT_MCMC_TRACE=os.path.join(directory, "Stats_out_MCMC_trace.pdf"), CHECK_RSCRIPT=RSCRIPT_VERSION, CHECK_MAPDAMAGE=MAPDAMAGE_VERSION) return {"command": command, "dependencies": dependencies}
def _build_cat_command(): """Returns a AtomicCmdBuilder for the 'paleomix cat' command.""" return AtomicCmdBuilder([_PALEOMIX_PATH, "cat"], EXEC_GZIP="gzip", EXEC_BZIP="bzip2", EXEC_CAT="cat", CHECK_PALEOMIX=VERSION_PALEOMIX)
def _get_common_parameters(version): if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) return cmd
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder( ["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, pileup, infile, outfile, interval, dependencies = ()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF = infile, OUT_STDOUT = AtomicCmd.PIPE) vcffilter = AtomicCmdBuilder(["vcf_filter", "--pileup", "%(IN_PILEUP)s"], IN_PILEUP = pileup, IN_STDIN = unicat, OUT_STDOUT = AtomicCmd.PIPE) for contig in interval.get("Homozygous Contigs", ()): vcffilter.set_option("--homozygous-chromosome", contig) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN = vcffilter, OUT_STDOUT = outfile) return {"commands" : {"unicat" : unicat, "filter" : vcffilter, "bgzip" : bgzip}}
def customize(cls, options, infile, interval, outfile, padding, dependencies = ()): prefix = "{Genome}.{Name}".format(**interval) intervals = os.path.join(options.intervals_root, prefix + ".bed") params = AtomicCmdBuilder(["bam_genotype_regions"], IN_VCFFILE = infile, IN_TABIX = infile + ".tbi", IN_INTERVALS = intervals, OUT_STDOUT = outfile) params.set_option("--genotype", "%(IN_VCFFILE)s") params.set_option("--intervals", "%(IN_INTERVALS)s") if interval.get("Protein coding"): params.set_option("--whole-codon-indels-only") if not interval.get("Indels"): params.set_option("--ignore-indels") return {"command" : params}
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],)) assert_equal(kwargs, {"IN_FILE" : "/in/file", "OUT_FILE" : "/out/file", "set_cwd" : True}) was_called.append(True) with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd = True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE = "/out/file", IN_FILE = "/in/file") builder.finalize() assert was_called
def _get_common_parameters(version): global _DEPRECATION_WARNING_PRINTED if version == VERSION_14: version_check = _VERSION_14_CHECK elif version == VERSION_15: version_check = _VERSION_15_CHECK else: raise CmdError("Unknown version: %s" % version) cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check) # Trim Ns at read ends cmd.set_option("--trimns", fixed=False) # Trim low quality scores cmd.set_option("--trimqualities", fixed=False) try: if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0): import pypeline.ui as ui ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;") ui.print_warn(" Upgrading to 2.1.x is strongly adviced!\n") ui.print_warn( " Download the newest version of AdapterRemoval at ") ui.print_warn( " https://github.com/MikkelSchubert/adapterremoval\n") _DEPRECATION_WARNING_PRINTED = True except versions.VersionRequirementError: pass return cmd
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], )) assert_equal(kwargs, { "IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True }) was_called.append(True) with Monkeypatch("pypeline.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def customize(cls, options, infile, interval, outfile, padding, dependencies=()): prefix = "{Genome}.{Name}".format(**interval) intervals = os.path.join(options.intervals_root, prefix + ".bed") params = AtomicCmdBuilder(["bam_genotype_regions"], IN_VCFFILE=infile, IN_TABIX=infile + ".tbi", IN_INTERVALS=intervals, OUT_STDOUT=outfile) params.set_option("--genotype", "%(IN_VCFFILE)s") params.set_option("--intervals", "%(IN_INTERVALS)s") if interval.get("Protein coding"): params.set_option("--whole-codon-indels-only") if not interval.get("Indels"): params.set_option("--ignore-indels") return {"command": params}
def customize(cls, reference, infile, outfile, filters, options, dependencies = ()): # filter reads percentile = str(options.makefile['vcf_percentile_threshold']) flt = AtomicCmdBuilder(['vcf_qual_percentile'], IN_VCF = infile, OUT_VCF = outfile ) for key,val in filters.items(): flt.add_option(key,val) flt.set_option('--out','%(OUT_VCF)s') flt.add_option(infile) return { 'commands':{ 'Filter': flt } }
def customize(cls, input_alignment, output_tree, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, # Final output file, are not created directly OUT_TREE=output_tree) return {"command": command}
def customize(cls, infile, intervals, outfile, dependencies=()): params = AtomicCmdBuilder(["bam_sample_regions"], IN_PILEUP=infile, IN_INTERVALS=intervals, OUT_STDOUT=outfile) params.set_option("--genotype", "%(IN_PILEUP)s") params.set_option("--intervals", "%(IN_INTERVALS)s") return {"command": params}
def test_builder__set_kwargs__after_finalize(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") builder.finalize() assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()): command = AtomicCmdBuilder(_PRESETS[algorithm.lower()]) command.add_value("%(IN_FASTA)s") command.set_kwargs(IN_FASTA = input_file, OUT_STDOUT = output_file, CHECK_VERSION = MAFFT_VERSION) return {"command" : command, "dependencies" : dependencies}
def customize(cls, reference, in_bam, in_vcf, outfile, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=in_vcf, OUT_STDOUT=AtomicCmd.PIPE) vcfpileup = AtomicCmdBuilder(["vcf_create_pileup", "%(OUT_PILEUP)s"], IN_REF=reference, IN_BAM=in_bam, IN_STDIN=unicat, OUT_PILEUP=outfile, OUT_TBI=outfile + ".tbi") vcfpileup.add_value("%(IN_BAM)s") vcfpileup.set_option("-f", "%(IN_REF)s") return {"commands": {"unicat": unicat, "pileup": vcfpileup}}
def customize(cls, input_alignment, output_tree, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, # Final output file, are not created directly OUT_TREE = output_tree) return {"command" : command}
def customize(self, config, reference, input_files, output_directory, title="mapDamage", dependencies=()): command = AtomicCmdBuilder( [ "mapDamage", "--no-stats", # Prevent references with many contigs from using excessive # amounts of memory, at the cost of per-contig statistics: "--merge-reference-sequences", "-t", title, "-i", "%(TEMP_IN_BAM)s", "-d", "%(TEMP_DIR)s", "-r", "%(IN_REFERENCE)s" ], IN_REFERENCE=reference, OUT_FREQ_3p=os.path.join(output_directory, "3pGtoA_freq.txt"), OUT_FREQ_5p=os.path.join(output_directory, "5pCtoT_freq.txt"), OUT_COMP_USER=os.path.join(output_directory, "dnacomp.txt"), OUT_PLOT_FRAG=os.path.join(output_directory, "Fragmisincorporation_plot.pdf"), OUT_PLOT_LEN=os.path.join(output_directory, "Length_plot.pdf"), OUT_LENGTH=os.path.join(output_directory, "lgdistribution.txt"), OUT_MISINCORP=os.path.join(output_directory, "misincorporation.txt"), OUT_LOG=os.path.join(output_directory, "Runtime_log.txt"), TEMP_OUT_STDOUT="pipe_mapDamage.stdout", TEMP_OUT_STDERR="pipe_mapDamage.stderr", CHECK_RSCRIPT=RSCRIPT_VERSION, CHECK_MAPDAMAGE=MAPDAMAGE_VERSION) return { "command": command, "config": config, "input_files": input_files, "dependencies": dependencies }
def __init__(self, config, reference, input_bam, output_bam, tags, min_mapq=0, filter_unmapped=False, dependencies=()): flt_params = AtomicCmdBuilder(("samtools", "view", "-bu"), IN_BAM=input_bam, OUT_STDOUT=AtomicCmd.PIPE) if min_mapq: flt_params.set_option("-q", min_mapq, sep="") if filter_unmapped: flt_params.set_option("-F", "0x4", sep="") flt_params.add_value("%(IN_BAM)s") jar_params = picard.picard_command(config, "AddOrReplaceReadGroups") jar_params.set_option("INPUT", "/dev/stdin", sep="=") # Output is written to a named pipe, since the JVM may, in some cases, # emit warning messages to stdout, resulting in a malformed BAM. jar_params.set_option("OUTPUT", "%(TEMP_OUT_BAM)s", sep="=") jar_params.set_option("COMPRESSION_LEVEL", "0", sep="=") # Ensure that the BAM is sorted; this is required by the pipeline, and # needs to be done before calling calmd (avoiding pathologic runtimes). jar_params.set_option("SORT_ORDER", "coordinate", sep="=") # All tags are overwritten; ID is set since the default (e.g. '1') # causes problems with pysam due to type inference (is read as a length # 1 string, but written as a character). for tag in ("ID", "SM", "LB", "PU", "PL"): jar_params.set_option(tag, tags[tag], sep="=") jar_params.set_kwargs(IN_STDIN=flt_params, TEMP_OUT_BAM="bam.pipe") calmd = AtomicCmdBuilder(["samtools", "calmd", "-b", "%(TEMP_IN_BAM)s", "%(IN_REF)s"], IN_REF=reference, TEMP_IN_BAM="bam.pipe", OUT_STDOUT=output_bam) commands = [cmd.finalize() for cmd in (flt_params, jar_params, calmd)] description = "<Cleanup BAM: %s -> '%s'>" \ % (input_bam, output_bam) PicardNode.__init__(self, command=ParallelCmds(commands), description=description, dependencies=dependencies)
def customize(cls, pileup, infile, outfile, regions, dependencies=()): cat = factory.new("cat") cat.add_value("%(IN_VCF)s") cat.set_kwargs(IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE) vcffilter = factory.new("vcf_filter") vcffilter.add_option("--pileup", "%(IN_PILEUP)s") for contig in regions["HomozygousContigs"]: vcffilter.add_option("--homozygous-chromosome", contig) vcffilter.set_kwargs(IN_PILEUP=pileup, IN_STDIN=cat, OUT_STDOUT=AtomicCmd.PIPE) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN=vcffilter, OUT_STDOUT=outfile) return {"commands": {"cat": cat, "filter": vcffilter, "bgzip": bgzip}}
def customize(cls, pileup, infile, outfile, interval, dependencies=()): unicat = AtomicCmdBuilder(["unicat", "%(IN_VCF)s"], IN_VCF=infile, OUT_STDOUT=AtomicCmd.PIPE) vcffilter = AtomicCmdBuilder( ["vcf_filter", "--pileup", "%(IN_PILEUP)s"], IN_PILEUP=pileup, IN_STDIN=unicat, OUT_STDOUT=AtomicCmd.PIPE) for contig in interval.get("Homozygous Contigs", ()): vcffilter.set_option("--homozygous-chromosome", contig) bgzip = AtomicCmdBuilder(["bgzip"], IN_STDIN=vcffilter, OUT_STDOUT=outfile) return { "commands": { "unicat": unicat, "filter": vcffilter, "bgzip": bgzip } }
def test_builder__set_option__overwrite(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt", fixed=False) builder.set_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.bat"])
def test_builder__set_kwargs__called_twice(): expected = {"IN_PATH" : "/a/b/", "OUT_PATH" : "/dst/file"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(OUT_PATH = "/dst/file") builder.set_kwargs(IN_PATH = "/a/b/") assert_equal(builder.kwargs, expected)
def test_builder__set_option(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt") assert_equal(builder.call, ["find", "-name", "*.txt"])
def test_builder__set_option__overwrite(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt", fixed = False) builder.set_option("-name", "*.bat") assert_equal(builder.call, ["find", "-name", "*.bat"])
def test_builder__set_option__overwrite_fixed(): builder = AtomicCmdBuilder("find") builder.set_option("-name", "*.txt") assert_raises(AtomicCmdBuilderError, builder.set_option, "-name", "*.bat")
def customize(cls, input_alignment, input_partition, template, start = 0, bootstraps = 50, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC", set_cwd = True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", int(bootstraps), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") bootstrap_files = {"IN_ALIGNMENT" : input_alignment, "IN_PARTITION" : input_partition, "TEMP_OUT_INF" : "RAxML_info.Pypeline", "TEMP_OUT_ALN" : "input.alignment", "TEMP_OUT_PAR" : "input.partition", "CHECK_VERSION": RAXML_VERSION} for (index, (_, filename)) in enumerate(cls._bootstraps(template, bootstraps, start)): bootstrap_files["OUT_BS_%03i" % index] = filename command.set_kwargs(**bootstrap_files) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_template, threads = 1, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree'""" if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) version = RAXML_PTHREADS_VERSION else: command = AtomicCmdBuilder("raxmlHPC") version = RAXML_VERSION # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs(# Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_PART = os.path.basename(input_partition), TEMP_OUT_PART_R = os.path.basename(input_partition) + ".reduced", TEMP_OUT_ALN = os.path.basename(input_alignment), TEMP_OUT_ALN_R = os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, # Final output files, are not created directly OUT_INFO = output_template % "info", OUT_BESTTREE = output_template % "bestTree", OUT_BOOTSTRAP = output_template % "bootstrap", OUT_BIPART = output_template % "bipartitions", OUT_BIPARTLABEL = output_template % "bipartitionsBranchLabels", CHECK_VERSION = version) # Use the GTRGAMMAI model of NT substitution by default command.set_option("-m", "GTRGAMMAI", fixed = False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability. command.set_option("-x", int(random.random() * 2**31 - 1), fixed = False) # Set random seed for parsimony inference. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions command.set_option("-N", "autoMRE", fixed = False) return {"command" : command}
def customize(cls, input_alignment, input_partitions, output_tree, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Compute a randomized parsimony starting tree command.set_option("-y") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s") command.set_option("-q", "%(TEMP_OUT_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partitions, # TEMP_OUT_ is used to automatically remove these files TEMP_OUT_ALIGNMENT = "RAxML_alignment", TEMP_OUT_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_TREE = output_tree, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_file, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("examlParser", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_PART = os.path.basename(input_partition), TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, # Final output file, are not created directly OUT_BINARY = output_file, CHECK_EXAML = PARSER_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Read and (in the case of empty columns) reduce input command.set_option("-f", "c") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s") command.set_option("-q", "%(TEMP_IN_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, TEMP_IN_ALIGNMENT = "RAxML_alignment", TEMP_IN_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_ALIGNMENT = output_alignment, OUT_PARTITION = output_partition, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def test_builder__kwargs__set_cwd(): builder = AtomicCmdBuilder(["ls"], set_cwd=True) assert_equal(builder.kwargs, {"set_cwd": True})
def test_builder__set__kwargs__overwriting(): expected = {"IN_PATH" : "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH = "/a/b/") assert_raises(AtomicCmdBuilderError, builder.set_kwargs, IN_PATH = "/dst/file") assert_equal(builder.kwargs, expected)
def test_builder__finalize__returns_singleton(): builder = AtomicCmdBuilder("echo") assert builder.finalize() is builder.finalize()