def customize(cls, input_alignment, input_partitions, output_tree, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Compute a randomized parsimony starting tree command.set_option("-y") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s") command.set_option("-q", "%(TEMP_OUT_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partitions, # TEMP_OUT_ is used to automatically remove these files TEMP_OUT_ALIGNMENT = "RAxML_alignment", TEMP_OUT_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_TREE = output_tree, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC") # Read and (in the case of empty columns) reduce input command.set_option("-f", "c") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s") command.set_option("-q", "%(TEMP_IN_PARTITION)s") command.set_kwargs(IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, TEMP_IN_ALIGNMENT = "RAxML_alignment", TEMP_IN_PARTITION = "RAxML_partitions", TEMP_OUT_INFO = "RAxML_info.Pypeline", OUT_ALIGNMENT = output_alignment, OUT_PARTITION = output_partition, CHECK_VERSION = RAXML_VERSION) return {"command" : command}
def customize(cls, input_alignment, input_partition, template, start = 0, bootstraps = 50, dependencies = ()): command = AtomicCmdBuilder("raxmlHPC", set_cwd = True) # Read and (in the case of empty columns) reduce input command.set_option("-f", "j") # Output files are saved with a .Pypeline postfix, and subsequently renamed command.set_option("-n", "Pypeline") # Model required, but not used command.set_option("-m", "GTRGAMMA") # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability. command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False) # Generate a single bootstrap alignment (makes growing the number of bootstraps easier). command.set_option("-N", int(bootstraps), fixed = False) # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "input.alignment") command.set_option("-q", "input.partition") bootstrap_files = {"IN_ALIGNMENT" : input_alignment, "IN_PARTITION" : input_partition, "TEMP_OUT_INF" : "RAxML_info.Pypeline", "TEMP_OUT_ALN" : "input.alignment", "TEMP_OUT_PAR" : "input.partition", "CHECK_VERSION": RAXML_VERSION} for (index, (_, filename)) in enumerate(cls._bootstraps(template, bootstraps, start)): bootstrap_files["OUT_BS_%03i" % index] = filename command.set_kwargs(**bootstrap_files) return {"command" : command}
def customize(cls, input_alignment, input_partition, output_file, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("parse-examl", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output file, are not created directly OUT_BINARY=output_file, CHECK_EXAML=PARSER_VERSION) return {"command": command}
def test_builder__set__kwargs__overwriting(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") assert_raises(AtomicCmdBuilderError, builder.set_kwargs, IN_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def test_builder__set_kwargs__after_finalize(): expected = {"IN_PATH": "/a/b/"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(IN_PATH="/a/b/") builder.finalize() assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH="/dst/file") assert_equal(builder.kwargs, expected)
def customize(cls, input_alignment, input_partition, output_file, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("parse-examl", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_PART = os.path.basename(input_partition), TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, IN_PARTITION = input_partition, # Final output file, are not created directly OUT_BINARY = output_file, CHECK_EXAML = PARSER_VERSION) return {"command" : command}
def _bowtie2_template(call, prefix, iotype = "IN", **kwargs): params = AtomicCmdBuilder(call, **kwargs) for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"): key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key : (prefix + "." + postfix)}) return params
def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()): command = AtomicCmdBuilder(_PRESETS[algorithm.lower()]) command.add_value("%(IN_FASTA)s") command.set_kwargs(IN_FASTA = input_file, OUT_STDOUT = output_file, CHECK_VERSION = MAFFT_VERSION) return {"command" : command, "dependencies" : dependencies}
def _get_bwa_template(call, prefix, iotype="IN", **kwargs): extensions = ["amb", "ann", "bwt", "pac", "sa"] try: if BWA_VERSION.version < (0, 6, 0): extensions.extend(("rbwt", "rpac", "rsa")) except versions.VersionRequirementError: pass # Ignored here, handled elsewhere params = AtomicCmdBuilder(call, **kwargs) for postfix in extensions: key = "%s_PREFIX_%s" % (iotype, postfix.upper()) params.set_kwargs(**{key: (prefix + "." + postfix)}) return params
def __init__(self, input_alignment, input_partition, output_file, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_filename -- Filename for the output binary sequence.""" command = AtomicCmdBuilder("parse-examl", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-q", "%(TEMP_OUT_PART)s") # Output file will be named output.binary, and placed in the CWD command.set_option("-n", "output") # Substitution model command.set_option("-m", "DNA", fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, IN_PARTITION=input_partition, # Final output file, are not created directly OUT_BINARY=output_file, CHECK_EXAML=PARSER_VERSION, ) CommandNode.__init__( self, command=command.finalize(), description="<ExaMLParser: '%s' -> '%s'>" % (input_alignment, output_file), dependencies=dependencies, ) self._symlinks = [ os.path.abspath(input_alignment), os.path.abspath(input_partition), ] self._output_file = os.path.basename(output_file)
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock: def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],)) assert_equal(kwargs, {"IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True}) was_called.append(True) with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def test_builder__finalize__calls_atomiccmd(): was_called = [] class _AtomicCmdMock(object): def __init__(self, *args, **kwargs): assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], )) assert_equal(kwargs, { "IN_FILE": "/in/file", "OUT_FILE": "/out/file", "set_cwd": True }) was_called.append(True) with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock): builder = AtomicCmdBuilder("echo", set_cwd=True) builder.add_option("-out", "%(OUT_FILE)s") builder.add_value("%(IN_FILE)s") builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file") builder.finalize() assert was_called
def customize(cls, input_alignment, output_tree, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd=True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) command.set_kwargs( # Auto-delete: Symlinks TEMP_OUT_ALN=os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, # Final output file, are not created directly OUT_TREE=output_tree) return {"command": command}
def customize(cls, input_alignment, output_tree, dependencies = ()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. output_tree -- Filename for the output newick tree.""" command = AtomicCmdBuilder("parsimonator", set_cwd = True) command.set_option("-s", "%(TEMP_OUT_ALN)s") command.set_option("-n", "output") # Random seed for the stepwise addition process command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False) command.set_kwargs(# Auto-delete: Symlinks TEMP_OUT_ALN = os.path.basename(input_alignment), # Input files, are not used directly (see below) IN_ALIGNMENT = input_alignment, # Final output file, are not created directly OUT_TREE = output_tree) return {"command" : command}
def test_builder__set_kwargs__called_twice(): expected = {"IN_PATH": "/a/b/", "OUT_PATH": "/dst/file"} builder = AtomicCmdBuilder("echo") builder.set_kwargs(OUT_PATH="/dst/file") builder.set_kwargs(IN_PATH="/a/b/") assert_equal(builder.kwargs, expected)
def customize(cls, input_alignment, output_template, input_partition=None, threads=1, dependencies=()): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree' """ if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) version = RAXML_PTHREADS_VERSION else: command = AtomicCmdBuilder("raxmlHPC") version = RAXML_VERSION # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .PALEOMIX postfix, and subsequently renamed command.set_option("-n", "PALEOMIX") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder # In addition, it may be nessesary to remove the .reduced files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") if input_partition is not None: command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs(IN_PARTITION=input_partition, TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced") command.set_kwargs( # Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_ALN=os.path.basename(input_alignment), TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, # Final output files, are not created directly OUT_INFO=output_template % "info", OUT_BESTTREE=output_template % "bestTree", OUT_BOOTSTRAP=output_template % "bootstrap", OUT_BIPART=output_template % "bipartitions", OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels", CHECK_VERSION=version) # Use the GTRGAMMA model of NT substitution by default command.set_option("-m", "GTRGAMMAI", fixed=False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability. command.set_option("-x", int(random.random() * 2**31 - 1), fixed=False) # Set random seed for parsimony inference. May be set to a fixed value to allow replicability. command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False) # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions command.set_option("-N", "autoMRE", fixed=False) return {"command": command}
def __init__( self, input_alignment, output_template, input_partition=None, model="GTRGAMMAI", replicates="autoMRE", threads=1, dependencies=(), ): """ Arguments: input_alignment -- An alignment file in a format readable by RAxML. input_partition -- A set of partitions in a format readable by RAxML. output_template -- A template string used to construct final filenames. Should consist of a full path, including a single '%s', which is replaced with the variable part of RAxML output files (e.g. 'info', 'bestTree', ...). Example destination: '/disk/project/SN013420.RAxML.%s' Example output: '/disk/project/SN013420.RAxML.bestTree' """ if threads > 1: command = AtomicCmdBuilder("raxmlHPC-PTHREADS") command.set_option("-T", threads) version = RAXML_PTHREADS_VERSION else: command = AtomicCmdBuilder("raxmlHPC") version = RAXML_VERSION # Perform rapid bootstrapping command.set_option("-f", "a") # Output files are saved with a .PALEOMIX postfix, and subsequently renamed command.set_option("-n", "PALEOMIX") # Ensures that output is saved to the temporary directory command.set_option("-w", "%(TEMP_DIR)s") # Symlink to sequence and partitions, to prevent the creation of *.reduced files # outside temp folder. In addition, it may be nessesary to remove the .reduced # files if created command.set_option("-s", "%(TEMP_OUT_ALN)s") if input_partition is not None: command.set_option("-q", "%(TEMP_OUT_PART)s") command.set_kwargs( IN_PARTITION=input_partition, TEMP_OUT_PART=os.path.basename(input_partition), TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced", ) command.set_kwargs( # Auto-delete: Symlinks and .reduced files that RAxML may generate TEMP_OUT_ALN=os.path.basename(input_alignment), TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced", # Input files, are not used directly (see below) IN_ALIGNMENT=input_alignment, # Final output files, are not created directly OUT_INFO=output_template % "info", OUT_BESTTREE=output_template % "bestTree", OUT_BOOTSTRAP=output_template % "bootstrap", OUT_BIPART=output_template % "bipartitions", OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels", CHECK_VERSION=version, ) # Use the GTRGAMMA model of NT substitution by default command.set_option("-m", model, fixed=False) # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to # allow replicability. command.set_option("-x", int(random.random() * 2 ** 31 - 1), fixed=False) # Set random seed for parsimony inference. May be set to allow replicability. command.set_option("-p", int(random.random() * 2 ** 31 - 1), fixed=False) # Terminate bootstrapping upon convergence, not after N repetitions command.set_option("-N", replicates, fixed=False) self._symlinks = [input_alignment, input_partition] self._template = os.path.basename(output_template) CommandNode.__init__( self, command=command.finalize(), description="<RAxMLRapidBS: '%s' -> '%s'>" % (input_alignment, output_template % ("*",)), threads=threads, dependencies=dependencies, )