Esempio n. 1
0
def _datalad_check_container(ds, spec):
    """Adjust spec for `datalad-container`-configured container.

    If a "container" key is found, "command_str" will be replaced, and the
    previous "command_str" value will be placed under
    "command_str_nocontainer".
    """
    container = spec.get("container")
    if container is not None:
        try:
            from datalad_container.find_container import find_container
        except ImportError:
            raise OrchestratorError(
                "Specified container '{}' "
                "but datalad-container extension is not installed".format(
                    container))
        try:
            cinfo = find_container(ds, container)
        except ValueError as exc:
            raise OrchestratorError(exc)

        cmdexec = cinfo["cmdexec"]
        image = op.relpath(cinfo["path"], ds.path)

        command_str = spec["command_str"]
        spec["commmand_str_nocontainer"] = command_str
        spec["command_str"] = cmdexec.format(img=image, cmd=command_str)

        # TODO: When datalad-container starts passing the image as
        # extra_inputs, we should handle that here (and in fetch below).
        inputs = spec.get("inputs", [])
        if image not in inputs:
            spec["inputs"] = inputs + [image]
Esempio n. 2
0
    def __init__(self,
                 resource,
                 submission_type,
                 job_spec=None,
                 resurrection=False):
        if not external_versions["datalad"]:
            raise MissingExternalDependency(
                "DataLad is required for orchestrator '{}'".format(self.name))

        super(DataladOrchestrator, self).__init__(resource,
                                                  submission_type,
                                                  job_spec,
                                                  resurrection=resurrection)

        from datalad.api import Dataset
        self.ds = Dataset(".")
        if not self.ds.id:
            raise OrchestratorError(
                "orchestrator {} requires a local dataset".format(self.name))

        if self._resurrection:
            self.head = self.job_spec.get("_head")
        else:
            if self.ds.repo.dirty:
                raise OrchestratorError(
                    "Local dataset {} is dirty. "
                    "Save or discard uncommitted changes".format(self.ds.path))
            self._configure_repo()
            self.head = self.ds.repo.get_hexsha()
            _datalad_check_container(self.ds, self.job_spec)
            _datalad_format_command(self.ds, self.job_spec)
Esempio n. 3
0
    def __init__(self, resource, submission_type, job_spec=None,
                 resurrection=False):
        external_versions.check("datalad", min_version="0.13")
        super(DataladOrchestrator, self).__init__(
            resource, submission_type, job_spec, resurrection=resurrection)

        from datalad.api import Dataset
        self.ds = Dataset(".")
        if not self.ds.id:
            raise OrchestratorError("orchestrator {} requires a local dataset"
                                    .format(self.name))

        if self._resurrection:
            self.head = self.job_spec.get("_head")
        else:
            if self.ds.repo.dirty:
                raise OrchestratorError("Local dataset {} is dirty. "
                                        "Save or discard uncommitted changes"
                                        .format(self.ds.path))
            self._configure_repo()
            self.head = self.ds.repo.get_hexsha()
            _datalad_check_container(self.ds, self.job_spec)
            _datalad_format_command(self.ds, self.job_spec)

        if isinstance(self.session, SSHSession) and resource.key_filename:
            # Make the identity file available to 'datalad sshrun' even
            # if it is not configured in .ssh/config. This is
            # particularly important for AWS keys.
            os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
            from datalad import cfg
            cfg.reload(force=True)
Esempio n. 4
0
 def _find_root(self):
     home = self.session.query_envvars().get("HOME")
     if not home:
         raise OrchestratorError("Could not determine $HOME on remote")
     root_directory = op.join(home, ".reproman", "run-root")
     lgr.info("No root directory supplied for %s; using '%s'",
              self.resource.name, root_directory)
     if not op.isabs(root_directory):
         raise OrchestratorError(
             "Root directory is not an absolute path: {}".format(
                 root_directory))
     return root_directory
Esempio n. 5
0
def head_at(dataset, commit):
    """Run block with `commit` checked out in `dataset`.

    Check `commit` out if HEAD isn't already at it and restore the previous
    HEAD and branch on exit. Note: If `commit` is a ref, this function is
    concerned only with checking out the dereferenced commit.

    Parameters
    ----------
    dataset : DataLad dataset
    commit : str
        A commit-ish.

    Yields
    ------
    A boolean indicating whether HEAD needed to be moved in order to make
    `commit` current.
    """
    if dataset.repo.dirty:
        raise OrchestratorError(
            "Refusing to work with dirty repository: {}"
            .format(dataset.path))

    try:
        commit = dataset.repo.get_hexsha(commit)
    except ValueError:
        raise OrchestratorError("Could not resolve '{}' in {}"
                                .format(commit, dataset.path))
    current = dataset.repo.get_hexsha()
    if current is None:
        raise OrchestratorError("No commits on current branch in {}"
                                .format(dataset.path))
    to_restore = dataset.repo.get_active_branch() or current

    moved = commit != current
    if moved:
        lgr.info("Checking out %s", commit)
        try:
            dataset.repo.checkout(commit)
            # Note: It's tempting try to use --recurse-submodules here, but
            # that will absorb submodule's .git/ directories, and DataLad
            # relies on plain .git/ directories.
            if dataset.repo.dirty:
                raise OrchestratorError(
                    "Refusing to move HEAD due to submodule state change "
                    "within {}".format(dataset.path))
            yield moved
        finally:
            lgr.info("Restoring checkout of %s", to_restore)
            dataset.repo.checkout(to_restore)
    else:
        yield moved
Esempio n. 6
0
def _datalad_check_container(ds, spec):
    """Adjust spec for `datalad-container`-configured container.

    If a "container" key is found, a new key "_container_command_str" will be
    added with the container-formatted command.
    """
    container = spec.get("container")
    if container is not None:
        # TODO: This is repeating too much logic from containers-run. Consider
        # reworking datalad-container to enable outside use.
        external_versions.check("datalad_container", min_version="0.4.0")
        from datalad_container.containers_run import get_command_pwds
        from datalad_container.find_container import find_container

        try:
            cinfo = find_container(ds, container)
        except ValueError as exc:
            raise OrchestratorError(exc)

        cmdexec = cinfo["cmdexec"]
        image = op.relpath(cinfo["path"], ds.path)

        pwd, _ = get_command_pwds(ds)
        image_dspath = op.relpath(cinfo.get('parentds', ds.path), pwd)

        spec["_container_command_str"] = cmdexec.format(
            img=image,
            cmd=spec["_resolved_command_str"],
            img_dspath=image_dspath)
        spec["_extra_inputs"] = [image]
Esempio n. 7
0
    def _execute_in_wdir(self, command, err_msg=None):
        """Helper to run command in remote working directory.

        Parameters
        ----------
        command : list of str or str
        err_msg : optional
            Message to use if an OrchestratorError is raised.

        Returns
        -------
        standard output

        Raises
        ------
        OrchestratorError if command fails.
        """
        try:
            out, _ = self.session.execute_command(
                command,
                cwd=self.working_directory)
        except CommandError as exc:
            raise OrchestratorError(
                str(exc) if err_msg is None else err_msg)
        return out
Esempio n. 8
0
 def _fix_up_dataset(self):
     """Try to get datataset and subdatasets into the correct state.
     """
     self._checkout_target()
     # fixup 1: Check out target commit in subdatasets. This should later be
     # replaced by the planned Datalad functionality to get an entire
     # dataset hierarchy to a recorded state.
     #
     # fixup 2: Autoenable remotes:
     # 'datalad publish' does not autoenable remotes, and 'datalad
     # create-sibling' calls 'git annex init' too early to trigger
     # autoenabling. Temporarily work around this issue, though this
     # should very likely be addressed in DataLad. And if this is here
     # to stay, we should avoid this call for non-annex datasets.
     lgr.info("Adjusting state of remote dataset")
     self._execute_in_wdir(["git", "annex", "init"])
     for res in self._execute_datalad_json_command(
         ["subdatasets", "--fulfilled=true", "--recursive"]):
         cwd = res["path"]
         self._assert_clean_repo(cwd=cwd)
         lgr.debug("Adjusting state of %s", cwd)
         cmds = [["git", "checkout", res["revision"]],
                 ["git", "annex", "init"]]
         for cmd in cmds:
             try:
                 out, _ = self.session.execute_command(cmd, cwd=cwd)
             except CommandError as exc:
                 raise OrchestratorError(str(exc))
Esempio n. 9
0
    def __init__(self, resource, submission_type, job_spec=None,
                 resurrection=False):
        self.resource = resource
        self.resource.connect()
        self.session = resource.get_session()
        self._resurrection = resurrection

        # TODO: Probe remote and try to infer.
        submitter_class = SUBMITTERS[submission_type or "local"]
        self.submitter = submitter_class(self.session)

        self.job_spec = job_spec or {}

        if resurrection:
            important_keys = ["_jobid", "root_directory", "working_directory",
                              "local_directory"]
            for key in important_keys:
                if key not in self.job_spec:
                    raise OrchestratorError(
                        "Job spec must have key '{}' to resurrect orchestrator"
                        .format(key))

            self.jobid = self.job_spec["_jobid"]
        else:
            self.jobid = "{}-{}".format(time.strftime("%Y%m%d-%H%M%S"),
                                        str(uuid.uuid4())[:4])
            self._prepare_spec()

        self.template = None
Esempio n. 10
0
    def _execute_in_wdir(self, command, err_msg=None):
        """Helper to run command in remote working directory.

        TODO: Adjust (or perhaps remove entirely) once
        `SSHSession.execute_command` supports the `cwd` argument.

        Parameters
        ----------
        command : str
        err_msg : optional
            Message to use if an OrchestratorError is raised.

        Returns
        -------
        standard output

        Raises
        ------
        OrchestratorError if command fails.
        """
        prefix = "cd '{}' && ".format(self.working_directory)
        try:
            out, _ = self.session.execute_command(prefix + command)
        except CommandError as exc:
            raise OrchestratorError(str(exc) if err_msg is None else err_msg)
        return out
Esempio n. 11
0
 def _assert_clean_repo(self, cwd=None):
     cmd = ["git", "status", "--porcelain",
            "--ignore-submodules=all", "--untracked-files=normal"]
     out, _ = self.session.execute_command(
         cmd, cwd=cwd or self.working_directory)
     if out:
         raise OrchestratorError("Remote repository {} is dirty"
                                 .format(cwd or self.working_directory))
Esempio n. 12
0
 def follow(self):
     """Follow command, exiting when post-command processing completes."""
     self.submitter.follow()
     # We're done according to the submitter. This includes the
     # post-processing. Make sure it looks like it passed.
     if not self.has_completed:
         raise OrchestratorError(
             "Post-processing failed for {} [status: {}] ({})".format(
                 self.jobid, self.status, self.working_directory))
Esempio n. 13
0
 def _find_root(self):
     root_directory = op.join(self.home, ".reproman", "run-root")
     lgr.info("No root directory supplied for %s; using '%s'",
              self.resource.name, root_directory)
     if not op.isabs(root_directory):
         raise OrchestratorError(
             "Root directory is not an absolute path: {}"
             .format(root_directory))
     return root_directory
Esempio n. 14
0
 def get_failed_subjobs(self):
     """List of failed subjobs (represented by index, starting with 0).
     """
     failed_dir = op.join(self.meta_directory, "failed")
     try:
         stdout, _ = self.session.execute_command(["ls", failed_dir])
     except CommandError:
         if self.session.exists(failed_dir):
             # This shouldn't have failed.
             raise OrchestratorError(CommandError)
         return []
     return list(map(int, stdout.strip().split()))
Esempio n. 15
0
    def prepare_remote(self):
        if not isinstance(self.session, ShellSession):
            raise OrchestratorError(
                "The {} orchestrator must be used with a local session, "
                "but session for resource {} is {}"
                .format(self.name, self.resource.name,
                        type(self.session).__name__))

        inputs = list(self.get_inputs())
        if inputs:
            lgr.info("Making inputs available")
            call_check_dl_results(
                self.ds.get, "'datalad get' failed",
                inputs, on_failure="ignore")
Esempio n. 16
0
def _resurrect_orc(job):
    resource = get_manager().get_resource(job["resource_id"], "id")
    try:
        # Create chpwd separately so that this try-except block doesn't cover
        # the context manager suite below.
        cd = chpwd(job["local_directory"])
    except FileNotFoundError:
        raise OrchestratorError(
            "local directory for job {} no longer exists: {}"
            .format(job["_jobid"], job["local_directory"]))

    with cd:
        orchestrator_class = ORCHESTRATORS[job["orchestrator"]]
        orc = orchestrator_class(resource, job["submitter"], job,
                                 resurrection=True)
        orc.submitter.submission_id = job.get("_submission_id")
    return orc
Esempio n. 17
0
    def _prepare_spec(self):
        """Prepare the spec for the run.

        At the moment, this involves constructing the "_command_array",
        "_inputs_array", and "_outputs_array" keys.
        """
        from reproman.support.globbedpaths import GlobbedPaths

        spec = self.job_spec
        if spec.get("_resolved_batch_parameters"):
            raise OrchestratorError(
                "Batch parameters are currently only supported "
                "in DataLad orchestrators")

        for key in ["inputs", "outputs"]:
            if key in spec:
                gp = GlobbedPaths(spec[key])
                spec["_{}_array".format(key)] = [gp.expand(dot=False)]
        if "_resolved_command_str" in spec:
            spec["_command_array"] = [spec["_resolved_command_str"]]
Esempio n. 18
0
def call_check_dl_results(fn, failure_msg, *args, **kwds):
    """Call function, checking status of DataLad-style results.

    Parameters
    ----------
    fn : callable
        Function that yields results.
    failure_msg : str
        Message to show on failure. The result dict is appended to this text.
    *args, **kwds
        Arguments passed to `fn`.

    Raises
    ------
    An OrchestratorError if a failure is encountered.
    """
    for res in fn(*args, **kwds):
        lgr.debug("datalad push result: %s", res)
        if res["status"] not in ["ok", "notneeded"]:

            raise OrchestratorError("{}: {}".format(failure_msg, res))
Esempio n. 19
0
    def fetch(self):
        """Fetch results tarball and inject run record into the local dataset.
        """
        lgr.info("Fetching results for %s", self.jobid)
        import tarfile
        tfile = "{}.tar.gz".format(self.jobid)
        remote_tfile = op.join(self.root_directory, "outputs", tfile)

        if not self.session.exists(remote_tfile):
            raise OrchestratorError(
                "Expected output file does not exist: {}".format(remote_tfile))

        with head_at(self.ds, self.head) as moved:
            with chpwd(self.ds.path):
                self.session.get(remote_tfile)
                with tarfile.open(tfile, mode="r:gz") as tar:
                    tar.extractall(path=".")
                os.unlink(tfile)
                # TODO: How to handle output cleanup on the remote?

                from datalad.interface.run import run_command
                lgr.info("Creating run commit in %s", self.ds.path)
                for res in run_command(
                        inputs=self.job_spec.get("inputs_unexpanded"),
                        outputs=self.job_spec.get("outputs_unexpanded"),
                        inject=True,
                        extra_info={"reproman_jobid": self.jobid},
                        message=self.job_spec.get("message"),
                        cmd=self.job_spec["command_str_unexpanded"]):
                    # Oh, if only I were a datalad extension.
                    pass
                ref = self.job_refname
                if moved:
                    lgr.info(
                        "Results stored on %s. "
                        "Bring them into this branch with "
                        "'git merge %s'", ref, ref)
                self.ds.repo.update_ref(ref, "HEAD")

        self.log_failed()
Esempio n. 20
0
    def _fix_up_dataset(self):
        """Try to get datataset and subdatasets into the correct state.
        """
        self._checkout_target()
        # fixup 0: 'datalad create-sibling --recursive' leaves the subdataset
        # uninitialized (see DataLad's 78e00dcd2).
        self._execute_in_wdir(["git", "submodule", "update", "--init"])

        # fixup 1: Check out target commit in subdatasets. This should later be
        # replaced by the planned Datalad functionality to get an entire
        # dataset hierarchy to a recorded state.
        #
        # fixup 2: Autoenable remotes:
        # 'datalad push' does not autoenable remotes, and 'datalad
        # create-sibling' calls 'git annex init' too early to trigger
        # autoenabling. Temporarily work around this issue, though this
        # should very likely be addressed in DataLad. And if this is here
        # to stay, we should avoid this call for non-annex datasets.
        lgr.info("Adjusting state of remote dataset")
        self._execute_in_wdir(["git", "annex", "init"])
        for res in self._execute_datalad_json_command(
                ["subdatasets", "--fulfilled=true", "--recursive"]):
            cwd = res["path"]
            self._assert_clean_repo(cwd=cwd)
            lgr.debug("Adjusting state of %s", cwd)
            # "gitshasum" replaced "revision" in v0.12, with the old name kept
            # for compatibility until v0.14. Even though the minimum version
            # for DataLad in setup.py is above 0.12, support both keys until
            # the minimum version for the _remote_ is specified/checked (see
            # gh-477).
            revision = res.get("gitshasum", res.get("revision"))
            assert revision, "bug: incorrectly assumed revision is in results"
            cmds = [["git", "checkout", revision],
                    ["git", "annex", "init"]]
            for cmd in cmds:
                try:
                    out, _ = self.session.execute_command(
                        cmd, cwd=cwd)
                except CommandError as exc:
                    raise OrchestratorError(str(exc))
Esempio n. 21
0
 def die_orc(*args, **kwargs):
     raise OrchestratorError("resurrection failed")
Esempio n. 22
0
 def _assert_clean_repo(self):
     if self._execute_in_wdir("git status --porcelain"):
         raise OrchestratorError("Remote repository {} is dirty".format(
             self.working_directory))
Esempio n. 23
0
    def prepare_remote(self):
        """Prepare dataset sibling on remote.
        """
        repo = self.ds.repo
        if not repo.get_active_branch():
            # push() fails when HEAD is detached (assuming no additional
            # configuration).
            raise OrchestratorError(
                "You must be on a branch to use the {} orchestrator"
                .format(self.name))
        if not self.session.exists(self.root_directory):
            self.session.mkdir(self.root_directory, parents=True)

        resource = self.resource
        session = self.session

        inputs = list(self.get_inputs())
        if isinstance(session, (SSHSession, ShellSession)):
            if isinstance(session, SSHSession):
                target_path = _format_ssh_url(
                    resource.user,
                    # AWS resource does not have host attribute.
                    getattr(resource, "host", None) or session.connection.host,
                    getattr(resource, "port", None),
                    self.working_directory)
            else:
                target_path = self.working_directory

            # TODO: Add one level deeper with reckless clone per job to deal
            # with concurrent jobs?
            target_exists = session.exists(self.working_directory)
            need_push = True
            if not target_exists:
                since = None  # Avoid since="^" for non-existing repo.
            else:
                remote_branch = "{}/{}".format(
                    resource.name,
                    repo.get_active_branch())
                if repo.commit_exists(remote_branch):
                    need_push = not repo.is_ancestor(repo.get_hexsha(),
                                                     remote_branch)
                    since = "^"
                else:
                    # If the remote branch doesn't exist yet, push() will fail
                    # with since="^".
                    since = None

            remotes = repo.get_remotes()
            if resource.name in remotes:
                if repo.get_remote_url(resource.name) != target_path:
                    raise OrchestratorError(
                        "Remote '{}' already exists with another URL. "
                        "Either delete remote or rename resource."
                        .format(resource.name))
                elif not target_exists:
                    lgr.debug(
                        "Remote '%s' matches resource name "
                        "and points to the expected target, "
                        "which doesn't exist.  "
                        "Removing remote and recreating",
                        resource.name)
                    repo.remove_remote(resource.name)

            self.ds.create_sibling(target_path, name=resource.name,
                                   recursive=True, existing="skip")

            if need_push:
                call_check_dl_results(
                    self.ds.push, "'datalad push' failed",
                    to=resource.name, since=since, data="nothing",
                    recursive=True, on_failure="ignore")

            self._fix_up_dataset()

            if inputs:
                lgr.info("Making inputs available")
                try:
                    # TODO: Whether we try this `get` should be configurable.
                    self._execute_in_wdir("datalad get {}".format(
                        # FIXME: This should use something like
                        # execute_command_batch.
                        " ".join(map(shlex_quote, inputs))))
                except OrchestratorError:
                    # Should use --since for existing repo, but it doesn't seem
                    # to sync wrt content.
                    self.ds.push(to=resource.name, path=inputs,
                                 # Pass "anything" so that `path` overrides any
                                 # git-annex-wanted configuration.
                                 data="anything",
                                 recursive=True)
        else:
            # TODO: Handle more types?
            raise OrchestratorError("Unsupported resource type {}"
                                    .format(resource.type))

        if not session.exists(self.meta_directory):
            session.mkdir(self.meta_directory, parents=True)
Esempio n. 24
0
    def prepare_remote(self):
        """Prepare dataset sibling on remote.
        """
        repo = self.ds.repo
        if not repo.get_active_branch():
            # publish() fails when HEAD is detached.
            raise OrchestratorError(
                "You must be on a branch to use the {} orchestrator"
                .format(self.name))
        if not self.session.exists(self.root_directory):
            self.session.mkdir(self.root_directory, parents=True)

        resource = self.resource
        session = self.session

        inputs = list(self.get_inputs())
        if isinstance(session, (SSHSession, ShellSession)):
            if isinstance(session, SSHSession):
                if resource.key_filename:
                    # Make the identity file available to 'datalad sshrun' even
                    # if it is not configured in .ssh/config. This is
                    # particularly important for AWS keys.
                    os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
                    from datalad import cfg
                    cfg.reload(force=True)

                target_path = _format_ssh_url(
                    resource.user,
                    # AWS resource does not have host attribute.
                    getattr(resource, "host", None) or session.connection.host,
                    getattr(resource, "port", None),
                    self.working_directory)
            else:
                target_path = self.working_directory

            # TODO: Add one level deeper with reckless clone per job to deal
            # with concurrent jobs?
            target_exists = session.exists(self.working_directory)
            if not target_exists:
                since = None  # Avoid since="" for non-existing repo.
            else:
                remote_branch = "{}/{}".format(
                    resource.name,
                    repo.get_active_branch())
                if repo.commit_exists(remote_branch):
                    since = ""
                else:
                    # If the remote branch doesn't exist yet, publish will fail
                    # with since="".
                    since = None

            remotes = repo.get_remotes()
            if resource.name in remotes:
                if repo.get_remote_url(resource.name) != target_path:
                    raise OrchestratorError(
                        "Remote '{}' already exists with another URL. "
                        "Either delete remote or rename resource."
                        .format(resource.name))
                elif not target_exists:
                    lgr.debug(
                        "Remote '%s' matches resource name "
                        "and points to the expected target, "
                        "which doesn't exist.  "
                        "Removing remote and recreating",
                        resource.name)
                    repo.remove_remote(resource.name)

            self.ds.create_sibling(target_path, name=resource.name,
                                   recursive=True, existing="skip")

            call_check_dl_results(
                self.ds.publish, "'datalad publish' failed",
                to=resource.name, since=since,
                recursive=True, on_failure="ignore")

            self._fix_up_dataset()

            if inputs:
                lgr.info("Making inputs available")
                try:
                    # TODO: Whether we try this `get` should be configurable.
                    self._execute_in_wdir("datalad get {}".format(
                        # FIXME: This should use something like
                        # execute_command_batch.
                        " ".join(map(shlex_quote, inputs))))
                except OrchestratorError:
                    # Should use --since for existing repo, but it doesn't seem
                    # to sync wrt content.
                    self.ds.publish(to=resource.name, path=inputs,
                                    recursive=True)
        else:
            # TODO: Handle more types?
            raise OrchestratorError("Unsupported resource type {}"
                                    .format(resource.type))

        if not session.exists(self.meta_directory):
            session.mkdir(self.meta_directory, parents=True)
Esempio n. 25
0
    def prepare_remote(self):
        """Prepare dataset sibling on remote.
        """
        if not self.ds.repo.get_active_branch():
            # publish() fails when HEAD is detached.
            raise OrchestratorError(
                "You must be on a branch to use the {} orchestrator".format(
                    self.name))
        if not self.session.exists(self.root_directory):
            self.session.mkdir(self.root_directory, parents=True)

        resource = self.resource
        session = self.session

        inputs = list(self.get_inputs())
        if isinstance(session, SSHSession):
            if resource.key_filename:
                dl_version = external_versions["datalad"]
                if dl_version < "0.11.3":
                    # Connecting will probably fail because `key_filename` is
                    # set, but we have no way to tell DataLad about it.
                    lgr.warning(
                        "DataLad version %s detected. "
                        "0.11.3 or greater is required to use an "
                        "identity file not specified in ~/.ssh/config",
                        dl_version)
                # Make the identity file available to 'datalad sshrun' even if
                # it is not configured in .ssh/config. This is particularly
                # important for AWS keys.
                os.environ["DATALAD_SSH_IDENTITYFILE"] = resource.key_filename
                from datalad import cfg
                cfg.reload(force=True)

            sshurl = _format_ssh_url(
                resource.user,
                # AWS resource does not have host attribute.
                getattr(resource, "host", None) or session.connection.host,
                getattr(resource, "port", None),
                self.working_directory)

            # TODO: Add one level deeper with reckless clone per job to deal
            # with concurrent jobs?
            if not session.exists(self.working_directory):
                remotes = self.ds.repo.get_remotes()
                if resource.name in remotes:
                    raise OrchestratorError(
                        "Remote '{}' unexpectedly exists. "
                        "Either delete remote or rename resource.".format(
                            resource.name))

                self.ds.create_sibling(sshurl,
                                       name=resource.name,
                                       recursive=True)
                since = None  # Avoid since="" for non-existing repo.
            else:
                remote_branch = "{}/{}".format(
                    resource.name, self.ds.repo.get_active_branch())
                if self.ds.repo.commit_exists(remote_branch):
                    since = ""
                else:
                    # If the remote branch doesn't exist yet, publish will fail
                    # with since="".
                    since = None

            from datalad.support.exceptions import IncompleteResultsError
            try:
                self.ds.publish(to=resource.name, since=since, recursive=True)
            except IncompleteResultsError:
                raise OrchestratorError(
                    "'datalad publish' failed. Try running "
                    "'datalad update -s {} --merge --recursive' first".format(
                        resource.name))

            self._fix_up_dataset()

            if inputs:
                lgr.info("Making inputs available")
                try:
                    # TODO: Whether we try this `get` should be configurable.
                    self._execute_in_wdir("datalad get {}".format(
                        # FIXME: This should use something like
                        # execute_command_batch.
                        " ".join(map(shlex_quote, inputs))))
                except OrchestratorError:
                    # Should use --since for existing repo, but it doesn't seem
                    # to sync wrt content.
                    self.ds.publish(to=resource.name,
                                    path=inputs,
                                    recursive=True)
        elif resource.type == "shell":
            import datalad.api as dl
            if not session.exists(self.working_directory):
                dl.install(self.working_directory, source=self.ds.path)

            self.session.execute_command("git push '{}' HEAD:{}-base".format(
                self.working_directory, self.job_refname))
            self._checkout_target()

            if inputs:
                installed_ds = dl.Dataset(self.working_directory)
                installed_ds.get(inputs)
        else:
            # TODO: Handle more types?
            raise OrchestratorError("Unsupported resource type {}".format(
                resource.type))

        if not session.exists(self.meta_directory):
            session.mkdir(self.meta_directory, parents=True)
Esempio n. 26
0
    def fetch(self, on_remote_finish=None):
        """Fetch results tarball and inject run record into the local dataset.

        on_remote_finish : callable, optional
            Function to be called when work with the resource is finished. It
            will be passed two arguments, the resource and the failed subjobs
            (list of ints).
        """
        lgr.info("Fetching results for %s", self.jobid)
        import tarfile
        tfile = "{}.tar.gz".format(self.jobid)
        remote_tfile = op.join(self.root_directory, "outputs", tfile)

        if not self.session.exists(remote_tfile):
            raise OrchestratorError("Expected output file does not exist: {}"
                                    .format(remote_tfile))

        failed = self.get_failed_subjobs()
        with head_at(self.ds, self.head) as moved:
            with chpwd(self.ds.path):
                resource_name = self.resource.name
                lgr.info("Fetching output tarball from '%s'", resource_name)
                self.session.get(remote_tfile)
                # This log_failed() may mention files that won't be around
                # until the tarball extraction below, but we do call
                # log_failed() now because it might need the remote resource
                # and we want to finish up with remote operations.
                self.log_failed(failed)

                lgr.info("Finished with remote resource '%s'", resource_name)
                if on_remote_finish:
                    on_remote_finish(self.resource, failed)
                lgr.info("Extracting output tarball into local dataset '%s'",
                         self.ds.path)
                with tarfile.open(tfile, mode="r:gz") as tar:
                    tar.extractall(path=".")
                os.unlink(tfile)
                # TODO: How to handle output cleanup on the remote?

                from datalad.core.local.run import run_command
                lgr.info("Creating run commit in %s", self.ds.path)

                cmds = self.job_spec["_command_array"]
                if len(cmds) == 1:
                    cmd = cmds[0]
                else:
                    # FIXME: Can't use unexpanded command because of unknown
                    # placeholders.
                    cmd = self.jobid

                call_check_dl_results(
                    run_command,
                    "Making datalad-run commit failed",
                    # FIXME: How to represent inputs and outputs given that
                    # they are formatted per subjob and then expanded by glob?
                    inputs=self.job_spec.get("inputs"),
                    extra_inputs=self.job_spec.get("_extra_inputs"),
                    outputs=self.job_spec.get("outputs"),
                    inject=True,
                    extra_info={"reproman_jobid": self.jobid},
                    message=self.job_spec.get("message"),
                    cmd=cmd)

                ref = self.job_refname
                if moved:
                    lgr.info("Results stored on %s. "
                             "Bring them into this branch with "
                             "'git merge %s'",
                             ref, ref)
                self.ds.repo.update_ref(ref, "HEAD")
Esempio n. 27
0
 def home(self):
     "$HOME directory on resource."
     home = self.session.query_envvars().get("HOME")
     if not home:
         raise OrchestratorError("Could not determine $HOME on remote")
     return home