Exemple #1
0
def make_staged_target(stage_dir, target):
    if not isinstance(stage_dir, LocalDirectoryTarget):
        stage_dir = LocalDirectoryTarget(stage_dir)

    return stage_dir.child(target.unique_basename,
                           type=target.type,
                           **target._copy_kwargs())
Exemple #2
0
    def from_directory(cls, directory, **kwargs):
        # dir should be a FileSystemDirectoryTarget or a string, in which case it is interpreted as
        # a local path
        if isinstance(directory, six.string_types):
            d = LocalDirectoryTarget(directory)
        elif isinstance(d, FileSystemDirectoryTarget):
            d = directory
        else:
            raise TypeError("directory must either be a string or a FileSystemDirectoryTarget "
                "object, got '{}'".format(directory))

        # find all files, pass kwargs which may filter the result further
        kwargs["type"] = "f"
        basenames = d.listdir(**kwargs)

        # convert to file targets
        targets = [d.child(basename, type="f") for basename in basenames]

        return cls(targets)
Exemple #3
0
    def env(self):
        # strategy: unlike docker, singularity might not allow binding of paths that do not exist
        # in the container, so create a tmp directory on the host system and bind it as /tmp, let
        # python dump its full env into a file, and read the file again on the host system
        if self.image not in self._envs:
            tmp_dir = LocalDirectoryTarget(is_tmp=True)
            tmp_dir.touch()

            tmp = tmp_dir.child("env", type="f")
            tmp.touch()

            # determine whether volume binding is allowed
            allow_binds_cb = getattr(self.task, "singularity_allow_binds",
                                     None)
            if callable(allow_binds_cb):
                allow_binds = allow_binds_cb()
            else:
                cfg = Config.instance()
                allow_binds = cfg.get_expanded(self.get_config_section(),
                                               "allow_binds")

            # arguments to configure the environment
            args = ["-e"]
            if allow_binds:
                args.extend(["-B", "{}:/tmp".format(tmp_dir.path)])
                env_file = "/tmp/{}".format(tmp.basename)
            else:
                env_file = tmp.path

            # get the singularity exec command
            singularity_exec_cmd = self._singularity_exec_cmd() + args

            # build commands to setup the environment
            setup_cmds = self._build_setup_cmds(self._get_env())

            # build the python command that dumps the environment
            py_cmd = "import os,pickle;" \
                + "pickle.dump(dict(os.environ),open('{}','wb'),protocol=2)".format(env_file)

            # build the full command
            cmd = quote_cmd(singularity_exec_cmd + [
                self.image,
                "bash",
                "-l",
                "-c",
                "; ".join(
                    flatten(setup_cmds, quote_cmd(["python", "-c", py_cmd]))),
            ])

            # run it
            code, out, _ = interruptable_popen(cmd,
                                               shell=True,
                                               executable="/bin/bash",
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.STDOUT)
            if code != 0:
                raise Exception(
                    "singularity sandbox env loading failed:\n{}".format(out))

            # load the environment from the tmp file
            env = tmp.load(formatter="pickle")

            # cache
            self._envs[self.image] = env

        return self._envs[self.image]
Exemple #4
0
def hadd_task(task, inputs, output, cwd=None, local=False, force=True):
    """
    This method is intended to be used by tasks that are supposed to merge root files, e.g. when
    inheriting from :py:class:`law.contrib.tasks.MergeCascade`. *inputs* should be a sequence of
    local targets that represent the files to merge into *output*. *cwd* is the working directory
    in which hadd is invoked. When empty, a temporary directory is used. The *task* itself is
    used to print and publish messages via its :py:meth:`law.Task.publish_message` and
    :py:meth:`law.Task.publish_step` methods.

    When *local* is *True*, the input and output targets are assumed to be local and the merging is
    based on their local paths. Otherwise, the targets are fetched first and the output target is
    localized.

    When *force* is *True*, any existing output file is overwritten (by adding the ``-f`` flag to
    ``hadd``).
    """
    # ensure inputs are targets
    inputs = [
        LocalFileTarget(inp) if isinstance(inp, six.string_types) else inp
        for inp in inputs
    ]

    # ensure output is a target
    if isinstance(output, six.string_types):
        output = LocalFileTarget(output)

    # default cwd
    if not cwd:
        cwd = LocalDirectoryTarget(is_tmp=True)
    elif isinstance(cwd, six.string_types):
        cwd = LocalDirectoryTarget(cwd)
    cwd.touch()

    # helper to create the hadd cmd
    def hadd_cmd(input_paths, output_path):
        cmd = ["hadd", "-n", "0"]
        if force:
            cmd.append("-f")
        cmd.extend(["-d", cwd.path])
        cmd.append(output_path)
        cmd.extend(input_paths)
        return quote_cmd(cmd)

    if local:
        # when local, there is no need to download inputs
        input_paths = [inp.path for inp in inputs]

        with task.publish_step("merging ...", runtime=True):
            if len(inputs) == 1:
                output.copy_from_local(inputs[0])
            else:
                # merge using hadd
                cmd = hadd_cmd(input_paths, output.path)
                code = interruptable_popen(cmd, shell=True, executable="/bin/bash")[0]
                if code != 0:
                    raise Exception("hadd failed")

        task.publish_message("merged file size: {}".format(human_bytes(
            output.stat.st_size, fmt=True)))

    else:
        # when not local, we need to fetch files first into the cwd
        with task.publish_step("fetching inputs ...", runtime=True):
            def fetch(inp):
                inp.copy_to_local(cwd.child(inp.unique_basename, type="f"), cache=False)
                return inp.unique_basename

            def callback(i):
                task.publish_message("fetch file {} / {}".format(i + 1, len(inputs)))

            bases = map_verbose(fetch, inputs, every=5, callback=callback)

        # start merging into the localized output
        with output.localize("w", cache=False) as tmp_out:
            with task.publish_step("merging ...", runtime=True):
                if len(bases) == 1:
                    tmp_out.path = cwd.child(bases[0]).path
                else:
                    # merge using hadd
                    cmd = hadd_cmd(bases, tmp_out.path)
                    code = interruptable_popen(cmd, shell=True, executable="/bin/bash",
                        cwd=cwd.path)[0]
                    if code != 0:
                        raise Exception("hadd failed")

                    task.publish_message("merged file size: {}".format(human_bytes(
                        tmp_out.stat.st_size, fmt=True)))