Ejemplo n.º 1
0
    def customize(cls, input_alignment, input_partitions, output_tree, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC")

        # Compute a randomized parsimony starting tree
        command.set_option("-y")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        command.set_option("-s", "%(TEMP_OUT_ALIGNMENT)s")
        command.set_option("-q", "%(TEMP_OUT_PARTITION)s")

        command.set_kwargs(IN_ALIGNMENT       = input_alignment,
                           IN_PARTITION       = input_partitions,

                           # TEMP_OUT_ is used to automatically remove these files
                           TEMP_OUT_ALIGNMENT = "RAxML_alignment",
                           TEMP_OUT_PARTITION = "RAxML_partitions",
                           TEMP_OUT_INFO      = "RAxML_info.Pypeline",

                           OUT_TREE           = output_tree,

                           CHECK_VERSION      = RAXML_VERSION)

        return {"command" : command}
Ejemplo n.º 2
0
    def customize(cls, input_alignment, input_partition, output_alignment, output_partition, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC")

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "c")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "%(TEMP_IN_ALIGNMENT)s")
        command.set_option("-q", "%(TEMP_IN_PARTITION)s")

        command.set_kwargs(IN_ALIGNMENT      = input_alignment,
                          IN_PARTITION      = input_partition,

                          TEMP_IN_ALIGNMENT = "RAxML_alignment",
                          TEMP_IN_PARTITION = "RAxML_partitions",
                          TEMP_OUT_INFO     = "RAxML_info.Pypeline",

                          OUT_ALIGNMENT     = output_alignment,
                          OUT_PARTITION     = output_partition,
                          CHECK_VERSION     = RAXML_VERSION)

        return {"command" : command}
Ejemplo n.º 3
0
    def customize(cls, input_alignment, input_partition, template, start = 0, bootstraps = 50, dependencies = ()):
        command = AtomicCmdBuilder("raxmlHPC", set_cwd = True)

        # Read and (in the case of empty columns) reduce input
        command.set_option("-f", "j")
        # Output files are saved with a .Pypeline postfix, and subsequently renamed
        command.set_option("-n", "Pypeline")
        # Model required, but not used
        command.set_option("-m", "GTRGAMMA")
        # Set random seed for bootstrap generation. May be set to a fixed value to allow replicability.
        command.set_option("-b", int(random.random() * 2**31 - 1), fixed = False)
        # Generate a single bootstrap alignment (makes growing the number of bootstraps easier).
        command.set_option("-N", int(bootstraps), fixed = False)

        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "input.alignment")
        command.set_option("-q", "input.partition")

        bootstrap_files = {"IN_ALIGNMENT" : input_alignment,
                           "IN_PARTITION" : input_partition,
                           "TEMP_OUT_INF" : "RAxML_info.Pypeline",
                           "TEMP_OUT_ALN" : "input.alignment",
                           "TEMP_OUT_PAR" : "input.partition",
                           "CHECK_VERSION": RAXML_VERSION}

        for (index, (_, filename)) in enumerate(cls._bootstraps(template, bootstraps, start)):
            bootstrap_files["OUT_BS_%03i" % index] = filename
        command.set_kwargs(**bootstrap_files)

        return {"command" : command}
Ejemplo n.º 4
0
    def customize(cls,
                  input_alignment,
                  input_partition,
                  output_file,
                  dependencies=()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_filename  -- Filename for the output binary sequence."""

        command = AtomicCmdBuilder("parse-examl", set_cwd=True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-q", "%(TEMP_OUT_PART)s")
        # Output file will be named output.binary, and placed in the CWD
        command.set_option("-n", "output")

        # Substitution model
        command.set_option("-m", "DNA", fixed=False)

        command.set_kwargs(  # Auto-delete: Symlinks
            TEMP_OUT_PART=os.path.basename(input_partition),
            TEMP_OUT_ALN=os.path.basename(input_alignment),

            # Input files, are not used directly (see below)
            IN_ALIGNMENT=input_alignment,
            IN_PARTITION=input_partition,

            # Final output file, are not created directly
            OUT_BINARY=output_file,
            CHECK_EXAML=PARSER_VERSION)

        return {"command": command}
Ejemplo n.º 5
0
def test_builder__set__kwargs__overwriting():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    assert_raises(AtomicCmdBuilderError,
                  builder.set_kwargs, IN_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 6
0
def test_builder__set_kwargs__after_finalize():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    builder.finalize()
    assert_raises(AtomicCmdBuilderError, builder.set_kwargs, OUT_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 7
0
    def customize(cls, input_alignment, input_partition, output_file, dependencies = ()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_filename  -- Filename for the output binary sequence."""

        command = AtomicCmdBuilder("parse-examl", set_cwd = True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-q", "%(TEMP_OUT_PART)s")
        # Output file will be named output.binary, and placed in the CWD
        command.set_option("-n", "output")

        # Substitution model
        command.set_option("-m", "DNA", fixed = False)


        command.set_kwargs(# Auto-delete: Symlinks
                          TEMP_OUT_PART   = os.path.basename(input_partition),
                          TEMP_OUT_ALN    = os.path.basename(input_alignment),

                          # Input files, are not used directly (see below)
                          IN_ALIGNMENT    = input_alignment,
                          IN_PARTITION    = input_partition,

                          # Final output file, are not created directly
                          OUT_BINARY      = output_file,

                          CHECK_EXAML     = PARSER_VERSION)

        return {"command" : command}
Ejemplo n.º 8
0
def _bowtie2_template(call, prefix, iotype = "IN", **kwargs):
    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in ("1.bt2", "2.bt2", "3.bt2", "4.bt2", "rev.1.bt2", "rev.2.bt2"):
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key : (prefix + "." + postfix)})

    return params
Ejemplo n.º 9
0
def test_builder__set__kwargs__overwriting():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    assert_raises(AtomicCmdBuilderError,
                  builder.set_kwargs,
                  IN_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 10
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
Ejemplo n.º 11
0
def test_builder__set_kwargs__after_finalize():
    expected = {"IN_PATH": "/a/b/"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(IN_PATH="/a/b/")
    builder.finalize()
    assert_raises(AtomicCmdBuilderError,
                  builder.set_kwargs,
                  OUT_PATH="/dst/file")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 12
0
    def customize(cls, input_file, output_file, algorithm = "auto", dependencies = ()):
        command = AtomicCmdBuilder(_PRESETS[algorithm.lower()])
        command.add_value("%(IN_FASTA)s")
        command.set_kwargs(IN_FASTA   = input_file,
                           OUT_STDOUT = output_file,
                           CHECK_VERSION = MAFFT_VERSION)

        return {"command"      : command,
                "dependencies" : dependencies}
Ejemplo n.º 13
0
def _get_bwa_template(call, prefix, iotype="IN", **kwargs):
    extensions = ["amb", "ann", "bwt", "pac", "sa"]
    try:
        if BWA_VERSION.version < (0, 6, 0):
            extensions.extend(("rbwt", "rpac", "rsa"))
    except versions.VersionRequirementError:
        pass  # Ignored here, handled elsewhere

    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in extensions:
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key: (prefix + "." + postfix)})

    return params
Ejemplo n.º 14
0
def _get_bwa_template(call, prefix, iotype="IN", **kwargs):
    extensions = ["amb", "ann", "bwt", "pac", "sa"]
    try:
        if BWA_VERSION.version < (0, 6, 0):
            extensions.extend(("rbwt", "rpac", "rsa"))
    except versions.VersionRequirementError:
        pass  # Ignored here, handled elsewhere

    params = AtomicCmdBuilder(call, **kwargs)
    for postfix in extensions:
        key = "%s_PREFIX_%s" % (iotype, postfix.upper())
        params.set_kwargs(**{key: (prefix + "." + postfix)})

    return params
Ejemplo n.º 15
0
    def __init__(self,
                 input_alignment,
                 input_partition,
                 output_file,
                 dependencies=()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_filename  -- Filename for the output binary sequence."""

        command = AtomicCmdBuilder("parse-examl", set_cwd=True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-q", "%(TEMP_OUT_PART)s")
        # Output file will be named output.binary, and placed in the CWD
        command.set_option("-n", "output")

        # Substitution model
        command.set_option("-m", "DNA", fixed=False)

        command.set_kwargs(  # Auto-delete: Symlinks
            TEMP_OUT_PART=os.path.basename(input_partition),
            TEMP_OUT_ALN=os.path.basename(input_alignment),
            # Input files, are not used directly (see below)
            IN_ALIGNMENT=input_alignment,
            IN_PARTITION=input_partition,
            # Final output file, are not created directly
            OUT_BINARY=output_file,
            CHECK_EXAML=PARSER_VERSION,
        )

        CommandNode.__init__(
            self,
            command=command.finalize(),
            description="<ExaMLParser: '%s' -> '%s'>" %
            (input_alignment, output_file),
            dependencies=dependencies,
        )

        self._symlinks = [
            os.path.abspath(input_alignment),
            os.path.abspath(input_partition),
        ]
        self._output_file = os.path.basename(output_file)
Ejemplo n.º 16
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock:
        def __init__(self, *args, **kwargs):
            assert_equal(args, (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"],))
            assert_equal(kwargs, {"IN_FILE": "/in/file",
                                  "OUT_FILE": "/out/file",
                                  "set_cwd": True})
            was_called.append(True)

    with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file",
                           IN_FILE="/in/file")

        builder.finalize()
        assert was_called
Ejemplo n.º 17
0
def test_builder__finalize__calls_atomiccmd():
    was_called = []

    class _AtomicCmdMock(object):
        def __init__(self, *args, **kwargs):
            assert_equal(args,
                         (["echo", "-out", "%(OUT_FILE)s", "%(IN_FILE)s"], ))
            assert_equal(kwargs, {
                "IN_FILE": "/in/file",
                "OUT_FILE": "/out/file",
                "set_cwd": True
            })
            was_called.append(True)

    with Monkeypatch("paleomix.atomiccmd.builder.AtomicCmd", _AtomicCmdMock):
        builder = AtomicCmdBuilder("echo", set_cwd=True)
        builder.add_option("-out", "%(OUT_FILE)s")
        builder.add_value("%(IN_FILE)s")
        builder.set_kwargs(OUT_FILE="/out/file", IN_FILE="/in/file")

        builder.finalize()
        assert was_called
Ejemplo n.º 18
0
    def customize(cls, input_alignment, output_tree, dependencies=()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        output_tree      -- Filename for the output newick tree."""

        command = AtomicCmdBuilder("parsimonator", set_cwd=True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-n", "output")
        # Random seed for the stepwise addition process
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False)

        command.set_kwargs(  # Auto-delete: Symlinks
            TEMP_OUT_ALN=os.path.basename(input_alignment),

            # Input files, are not used directly (see below)
            IN_ALIGNMENT=input_alignment,

            # Final output file, are not created directly
            OUT_TREE=output_tree)

        return {"command": command}
Ejemplo n.º 19
0
    def customize(cls, input_alignment, output_tree, dependencies = ()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        output_tree      -- Filename for the output newick tree."""

        command = AtomicCmdBuilder("parsimonator", set_cwd = True)

        command.set_option("-s", "%(TEMP_OUT_ALN)s")
        command.set_option("-n", "output")
        # Random seed for the stepwise addition process
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed = False)

        command.set_kwargs(# Auto-delete: Symlinks
                          TEMP_OUT_ALN   = os.path.basename(input_alignment),

                          # Input files, are not used directly (see below)
                          IN_ALIGNMENT    = input_alignment,

                          # Final output file, are not created directly
                          OUT_TREE       = output_tree)

        return {"command"         : command}
Ejemplo n.º 20
0
def test_builder__set_kwargs__called_twice():
    expected = {"IN_PATH": "/a/b/", "OUT_PATH": "/dst/file"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(OUT_PATH="/dst/file")
    builder.set_kwargs(IN_PATH="/a/b/")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 21
0
def test_builder__set_kwargs__called_twice():
    expected = {"IN_PATH": "/a/b/", "OUT_PATH": "/dst/file"}
    builder = AtomicCmdBuilder("echo")
    builder.set_kwargs(OUT_PATH="/dst/file")
    builder.set_kwargs(IN_PATH="/a/b/")
    assert_equal(builder.kwargs, expected)
Ejemplo n.º 22
0
    def customize(cls, input_alignment, output_template, input_partition=None,
                  threads=1, dependencies=()):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_template  -- A template string used to construct final filenames. Should consist
                            of a full path, including a single '%s', which is replaced with the
                            variable part of RAxML output files (e.g. 'info', 'bestTree', ...).
                            Example destination: '/disk/project/SN013420.RAxML.%s'
                            Example output:      '/disk/project/SN013420.RAxML.bestTree'
        """

        if threads > 1:
            command = AtomicCmdBuilder("raxmlHPC-PTHREADS")
            command.set_option("-T", threads)
            version = RAXML_PTHREADS_VERSION
        else:
            command = AtomicCmdBuilder("raxmlHPC")
            version = RAXML_VERSION

        # Perform rapid bootstrapping
        command.set_option("-f", "a")
        # Output files are saved with a .PALEOMIX postfix, and subsequently renamed
        command.set_option("-n", "PALEOMIX")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")
        # Symlink to sequence and partitions, to prevent the creation of *.reduced files outside temp folder
        # In addition, it may be nessesary to remove the .reduced files if created
        command.set_option("-s", "%(TEMP_OUT_ALN)s")

        if input_partition is not None:
            command.set_option("-q", "%(TEMP_OUT_PART)s")
            command.set_kwargs(IN_PARTITION=input_partition,
                               TEMP_OUT_PART=os.path.basename(input_partition),
                               TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced")

        command.set_kwargs(  # Auto-delete: Symlinks and .reduced files that RAxML may generate
                           TEMP_OUT_ALN=os.path.basename(input_alignment),
                           TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced",

                           # Input files, are not used directly (see below)
                           IN_ALIGNMENT=input_alignment,

                           # Final output files, are not created directly
                           OUT_INFO=output_template % "info",
                           OUT_BESTTREE=output_template % "bestTree",
                           OUT_BOOTSTRAP=output_template % "bootstrap",
                           OUT_BIPART=output_template % "bipartitions",
                           OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels",

                           CHECK_VERSION=version)

        # Use the GTRGAMMA model of NT substitution by default
        command.set_option("-m", "GTRGAMMAI", fixed=False)
        # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to allow replicability.
        command.set_option("-x", int(random.random() * 2**31 - 1), fixed=False)
        # Set random seed for parsimony inference. May be set to a fixed value to allow replicability.
        command.set_option("-p", int(random.random() * 2**31 - 1), fixed=False)
        # Terminate bootstrapping upon convergence, rather than after a fixed number of repetitions
        command.set_option("-N", "autoMRE", fixed=False)

        return {"command": command}
Ejemplo n.º 23
0
    def __init__(
        self,
        input_alignment,
        output_template,
        input_partition=None,
        model="GTRGAMMAI",
        replicates="autoMRE",
        threads=1,
        dependencies=(),
    ):
        """
        Arguments:
        input_alignment  -- An alignment file in a format readable by RAxML.
        input_partition  -- A set of partitions in a format readable by RAxML.
        output_template  -- A template string used to construct final filenames. Should
                            consist of a full path, including a single '%s', which is
                            replaced with the  variable part of RAxML output files (e.g.
                            'info', 'bestTree', ...).

                            Example destination: '/disk/project/SN013420.RAxML.%s'
                            Example output:      '/disk/project/SN013420.RAxML.bestTree'
        """

        if threads > 1:
            command = AtomicCmdBuilder("raxmlHPC-PTHREADS")
            command.set_option("-T", threads)
            version = RAXML_PTHREADS_VERSION
        else:
            command = AtomicCmdBuilder("raxmlHPC")
            version = RAXML_VERSION

        # Perform rapid bootstrapping
        command.set_option("-f", "a")
        # Output files are saved with a .PALEOMIX postfix, and subsequently renamed
        command.set_option("-n", "PALEOMIX")
        # Ensures that output is saved to the temporary directory
        command.set_option("-w", "%(TEMP_DIR)s")
        # Symlink to sequence and partitions, to prevent the creation of *.reduced files
        # outside temp folder. In addition, it may be nessesary to remove the .reduced
        # files if created
        command.set_option("-s", "%(TEMP_OUT_ALN)s")

        if input_partition is not None:
            command.set_option("-q", "%(TEMP_OUT_PART)s")
            command.set_kwargs(
                IN_PARTITION=input_partition,
                TEMP_OUT_PART=os.path.basename(input_partition),
                TEMP_OUT_PART_R=os.path.basename(input_partition) + ".reduced",
            )

        command.set_kwargs(
            # Auto-delete: Symlinks and .reduced files that RAxML may generate
            TEMP_OUT_ALN=os.path.basename(input_alignment),
            TEMP_OUT_ALN_R=os.path.basename(input_alignment) + ".reduced",
            # Input files, are not used directly (see below)
            IN_ALIGNMENT=input_alignment,
            # Final output files, are not created directly
            OUT_INFO=output_template % "info",
            OUT_BESTTREE=output_template % "bestTree",
            OUT_BOOTSTRAP=output_template % "bootstrap",
            OUT_BIPART=output_template % "bipartitions",
            OUT_BIPARTLABEL=output_template % "bipartitionsBranchLabels",
            CHECK_VERSION=version,
        )

        # Use the GTRGAMMA model of NT substitution by default
        command.set_option("-m", model, fixed=False)
        # Enable Rapid Boostrapping and set random seed. May be set to a fixed value to
        # allow replicability.
        command.set_option("-x", int(random.random() * 2 ** 31 - 1), fixed=False)
        # Set random seed for parsimony inference. May be set to allow replicability.
        command.set_option("-p", int(random.random() * 2 ** 31 - 1), fixed=False)
        # Terminate bootstrapping upon convergence, not after N repetitions
        command.set_option("-N", replicates, fixed=False)

        self._symlinks = [input_alignment, input_partition]
        self._template = os.path.basename(output_template)

        CommandNode.__init__(
            self,
            command=command.finalize(),
            description="<RAxMLRapidBS: '%s' -> '%s'>"
            % (input_alignment, output_template % ("*",)),
            threads=threads,
            dependencies=dependencies,
        )