예제 #1
0
파일: test_run.py 프로젝트: loj/datalad
def test_inject(path):
    ds = Dataset(path).rev_create(force=True)
    assert_repo_status(ds.path, untracked=['foo', 'bar'])
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
예제 #2
0
파일: test_run.py 프로젝트: hanke/datalad
def test_inject(path):
    ds = Dataset(path).create(force=True)
    ok_(ds.repo.is_dirty())
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
예제 #3
0
def test_inject(path):
    ds = Dataset(path).create(force=True)
    ok_(ds.repo.is_dirty())
    list(
        run_command("nonsense command",
                    dataset=ds,
                    inject=True,
                    extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
예제 #4
0
def _rerun(dset, results):
    for res in results:
        if res["status"] == "error":
            yield res
            return

        rerun_action = res.get("rerun_action")
        if rerun_action == "skip":
            yield res
        elif rerun_action == "checkout":
            if res.get("branch"):
                checkout_options = ["-b", res["branch"]]
            else:
                checkout_options = ["--detach"]
            dset.repo.checkout(res["commit"], options=checkout_options)
        elif rerun_action == "pick":
            dset.repo._git_custom_command(
                None, ["git", "cherry-pick", res["commit"]],
                check_fake_dates=True)
            yield res
        else:
            hexsha = res["commit"]
            run_info = res["run_info"]

            # Keep a "rerun" trail.
            if "chain" in run_info:
                run_info["chain"].append(hexsha)
            else:
                run_info["chain"] = [hexsha]

            # now we have to find out what was modified during the last run,
            # and enable re-modification ideally, we would bring back the
            # entire state of the tree with #1424, but we limit ourself to file
            # addition/not-in-place-modification for now
            auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"]))
            outputs = run_info.get("outputs", [])
            outputs_dir = op.join(dset.path, run_info["pwd"])
            auto_outputs = [
                p for p in auto_outputs
                # run records outputs relative to the "pwd" field.
                if op.relpath(p, outputs_dir) not in outputs
            ]

            message = res["rerun_message"] or res["run_message"]
            for r in run_command(run_info['cmd'],
                                 dataset=dset,
                                 inputs=run_info.get("inputs", []),
                                 outputs=outputs,
                                 rerun_outputs=auto_outputs,
                                 message=message,
                                 rerun_info=run_info):
                yield r
예제 #5
0
 def __call__(cmd=None,
              dataset=None,
              inputs=None,
              outputs=None,
              expand=None,
              explicit=False,
              message=None,
              sidecar=None):
     for r in run_command(cmd,
                          dataset=dataset,
                          inputs=inputs,
                          outputs=outputs,
                          expand=expand,
                          explicit=explicit,
                          message=message,
                          sidecar=sidecar,
                          saver=_save_outputs):
         yield r
예제 #6
0
    def fetch(self):
        """Fetch results tarball and inject run record into the local dataset.
        """
        lgr.info("Fetching results for %s", self.jobid)
        import tarfile
        tfile = "{}.tar.gz".format(self.jobid)
        remote_tfile = op.join(self.root_directory, "outputs", tfile)

        if not self.session.exists(remote_tfile):
            raise OrchestratorError(
                "Expected output file does not exist: {}".format(remote_tfile))

        with head_at(self.ds, self.head) as moved:
            with chpwd(self.ds.path):
                self.session.get(remote_tfile)
                with tarfile.open(tfile, mode="r:gz") as tar:
                    tar.extractall(path=".")
                os.unlink(tfile)
                # TODO: How to handle output cleanup on the remote?

                from datalad.interface.run import run_command
                lgr.info("Creating run commit in %s", self.ds.path)
                for res in run_command(
                        inputs=self.job_spec.get("inputs_unexpanded"),
                        outputs=self.job_spec.get("outputs_unexpanded"),
                        inject=True,
                        extra_info={"reproman_jobid": self.jobid},
                        message=self.job_spec.get("message"),
                        cmd=self.job_spec["command_str_unexpanded"]):
                    # Oh, if only I were a datalad extension.
                    pass
                ref = self.job_refname
                if moved:
                    lgr.info(
                        "Results stored on %s. "
                        "Bring them into this branch with "
                        "'git merge %s'", ref, ref)
                self.ds.repo.update_ref(ref, "HEAD")

        self.log_failed()
예제 #7
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True)
    src_ds = Dataset(src).create(force=True)
    src_ds.add(".", recursive=True)

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("cat {inputs} {inputs} >doubled.dat",
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(opj(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.DEBUG) as cml:
        with swallow_outputs():
            ds.run("echo blah", outputs=["not-there"])
        assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)
예제 #8
0
    def __call__(cmd,
                 container_name=None,
                 dataset=None,
                 inputs=None,
                 outputs=None,
                 message=None,
                 expand=None,
                 explicit=False,
                 sidecar=None):
        from mock import patch  # delayed, since takes long (~600ms for yoh)
        pwd, _ = get_command_pwds(dataset)
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='run a containerized command execution')

        container = find_container(ds, container_name)
        image_path = op.relpath(container["path"], pwd)
        # container record would contain path to the (sub)dataset containing
        # it.  If not - take current dataset, as it must be coming from it
        image_dspath = op.relpath(container.get('parentds', ds.path), pwd)

        # sure we could check whether the container image is present,
        # but it might live in a subdataset that isn't even installed yet
        # let's leave all this business to `get` that is called by `run`

        cmd = normalize_command(cmd)
        # expand the command with container execution
        if 'cmdexec' in container:
            callspec = container['cmdexec']

            # Temporary kludge to give a more helpful message
            if callspec.startswith("["):
                import simplejson
                try:
                    simplejson.loads(callspec)
                except simplejson.errors.JSONDecodeError:
                    pass  # Never mind, false positive.
                else:
                    raise ValueError(
                        'cmdexe {!r} is in an old, unsupported format. '
                        'Convert it to a plain string.'.format(callspec))
            try:
                cmd_kwargs = dict(
                    img=image_path,
                    cmd=cmd,
                    img_dspath=image_dspath,
                )
                cmd = callspec.format(**cmd_kwargs)
            except KeyError as exc:
                yield get_status_dict(
                    'run',
                    ds=ds,
                    status='error',
                    message=(
                        'Unrecognized cmdexec placeholder: %s. '
                        'See containers-add for information on known ones: %s',
                        exc, ", ".join(cmd_kwargs)))
                return
        else:
            # just prepend and pray
            cmd = container['path'] + ' ' + cmd

        with patch.dict('os.environ',
                        {CONTAINER_NAME_ENVVAR: container['name']}):
            # fire!
            for r in run_command(cmd=cmd,
                                 dataset=dataset
                                 or (ds if ds.path == pwd else None),
                                 inputs=inputs,
                                 extra_inputs=[image_path],
                                 outputs=outputs,
                                 message=message,
                                 expand=expand,
                                 explicit=explicit,
                                 sidecar=sidecar):
                yield r
예제 #9
0
파일: rerun.py 프로젝트: xlecours/datalad
    def __call__(revision="HEAD",
                 since=None,
                 dataset=None,
                 branch=None,
                 message=None,
                 onto=None):

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='rerunning a command')

        lgr.debug('rerunning command output underneath %s', ds)

        from datalad.tests.utils import ok_clean_git
        try:
            ok_clean_git(ds.path)
        except AssertionError:
            yield get_status_dict('run',
                                  ds=ds,
                                  status='impossible',
                                  message=('unsaved modifications present, '
                                           'cannot detect changes by command'))
            return

        err_info = get_status_dict('run', ds=ds)
        if not ds.repo.get_hexsha():
            yield dict(err_info,
                       status='impossible',
                       message='cannot rerun command, nothing recorded')
            return

        if branch and branch in ds.repo.get_branches():
            yield get_status_dict(
                "run",
                ds=ds,
                status="error",
                message="branch '{}' already exists".format(branch))
            return

        if not commit_exists(ds, revision + "^"):
            # Only a single commit is reachable from `revision`.  In
            # this case, --since has no effect on the range construction.
            revrange = revision
        elif since is None:
            revrange = "{rev}^..{rev}".format(rev=revision)
        elif since.strip() == "":
            revrange = revision
        else:
            revrange = "{}..{}".format(since, revision)

        if ds.repo.repo.git.rev_list("--merges", revrange, "--"):
            yield get_status_dict(
                "run",
                ds=ds,
                status="error",
                message="cannot rerun history with merge commits")
            return

        Revision = namedtuple("Revision", ["id", "message", "info"])

        def revision_with_info(rev):
            msg, info = get_commit_runinfo(ds.repo, rev)
            return Revision(rev, msg, info)

        ids = ds.repo.repo.git.rev_list("--reverse", revrange, "--").split()

        try:
            revs = list(map(revision_with_info, ids))
        except ValueError as exc:
            yield dict(err_info, status='error', message=exc_str(exc))
            return

        if since is not None and since.strip() == "":
            # For --since='', drop any leading commits that don't have
            # a run command.
            revs = list(dropwhile(lambda r: r.info is None, revs))

        if onto is not None and onto.strip() == "":
            # Special case: --onto='' is the value of --since.
            # Because we're currently aborting if the revision list
            # contains merges, we know that, regardless of if and how
            # --since is specified, the effective value for --since is
            # the parent of the first revision.
            onto = revs[0].id + "^"
            if not commit_exists(ds, onto):
                # This is unlikely to happen in the wild because it
                # means that the first commit is a datalad run commit.
                # Just abort rather than trying to checkout on orphan
                # branch or something like that.
                yield get_status_dict(
                    "run",
                    ds=ds,
                    status="error",
                    message="Commit for --onto does not exist.")
                return

        if branch or onto:
            start_point = onto or "HEAD"
            if branch:
                checkout_options = ["-b", branch]
            else:
                checkout_options = ["--detach"]
            ds.repo.checkout(start_point, options=checkout_options)

        for rev in revs:
            if not rev.info:
                pick = False
                try:
                    ds.repo.repo.git.merge_base("--is-ancestor", rev.id,
                                                "HEAD")
                except GitCommandError:  # Revision is NOT an ancestor of HEAD.
                    pick = True

                shortrev = ds.repo.repo.git.rev_parse("--short", rev.id)
                err_msg = "no command for {} found; {}".format(
                    shortrev, "cherry picking" if pick else "skipping")
                yield dict(err_info, status='ok', message=err_msg)

                if pick:
                    ds.repo.repo.git.cherry_pick(rev.id)
                continue

            # Keep a "rerun" trail.
            if "chain" in rev.info:
                rev.info["chain"].append(rev.id)
            else:
                rev.info["chain"] = [rev.id]

            # now we have to find out what was modified during the
            # last run, and enable re-modification ideally, we would
            # bring back the entire state of the tree with #1424, but
            # we limit ourself to file addition/not-in-place-modification
            # for now
            for r in ds.unlock(new_or_modified(ds, rev.id),
                               return_type='generator',
                               result_xfm=None):
                yield r

            for r in run_command(rev.info['cmd'],
                                 ds,
                                 message or rev.message,
                                 rerun_info=rev.info):
                yield r
예제 #10
0
파일: test_run.py 프로젝트: hanke/datalad
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"),
                  ("s0", "s1_1", "s2"),
                  ("s0", "s1_0"),
                  ("s0", "s1_1"),
                  ("s0", "ss"),
                  ("s0",)]:
        Dataset(op.join(*((src,) + subds))).create(force=True)
    src_ds = Dataset(src).create(force=True)
    src_ds.add(".", recursive=True)

    ds = install(path, source=src,
                 result_xfm='datasets', return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(run_command("cat {inputs} {inputs} >doubled.dat",
                     dataset=ds,
                     inputs=["input.dat"], extra_inputs=["extra-input.dat"]))

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(opj(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a",
                                     "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.DEBUG) as cml:
        ds.run("echo blah", outputs=["not-there"])
        assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)
예제 #11
0
    def fetch(self, on_remote_finish=None):
        """Fetch results tarball and inject run record into the local dataset.

        on_remote_finish : callable, optional
            Function to be called when work with the resource is finished. It
            will be passed two arguments, the resource and the failed subjobs
            (list of ints).
        """
        lgr.info("Fetching results for %s", self.jobid)
        import tarfile
        tfile = "{}.tar.gz".format(self.jobid)
        remote_tfile = op.join(self.root_directory, "outputs", tfile)

        if not self.session.exists(remote_tfile):
            raise OrchestratorError(
                "Expected output file does not exist: {}".format(remote_tfile))

        failed = self.get_failed_subjobs()
        with head_at(self.ds, self.head) as moved:
            with chpwd(self.ds.path):
                resource_name = self.resource.name
                lgr.info("Fetching output tarball from '%s'", resource_name)
                self.session.get(remote_tfile)
                # This log_failed() may mention files that won't be around
                # until the tarball extraction below, but we do call
                # log_failed() now because it might need the remote resource
                # and we want to finish up with remote operations.
                self.log_failed(failed)

                lgr.info("Finished with remote resource '%s'", resource_name)
                if on_remote_finish:
                    on_remote_finish(self.resource, failed)
                lgr.info("Extracting output tarball into local dataset '%s'",
                         self.ds.path)
                with tarfile.open(tfile, mode="r:gz") as tar:
                    tar.extractall(path=".")
                os.unlink(tfile)
                # TODO: How to handle output cleanup on the remote?

                from datalad.interface.run import run_command
                lgr.info("Creating run commit in %s", self.ds.path)

                cmds = self.job_spec["_command_array"]
                if len(cmds) == 1:
                    cmd = cmds[0]
                else:
                    # FIXME: Can't use unexpanded command because of unknown
                    # placeholders.
                    cmd = self.jobid

                for res in run_command(
                        # FIXME: How to represent inputs and outputs given that
                        # they are formatted per subjob and then expanded by
                        # glob?
                        inputs=self.job_spec.get("inputs"),
                        extra_inputs=self.job_spec.get("_extra_inputs"),
                        outputs=self.job_spec.get("outputs"),
                        inject=True,
                        extra_info={"reproman_jobid": self.jobid},
                        message=self.job_spec.get("message"),
                        cmd=cmd):
                    # Oh, if only I were a datalad extension.
                    if res["status"] in ["impossible", "error"]:
                        raise OrchestratorError(
                            "Making datalad-run commit failed: {}".format(
                                res["message"]))

                ref = self.job_refname
                if moved:
                    lgr.info(
                        "Results stored on %s. "
                        "Bring them into this branch with "
                        "'git merge %s'", ref, ref)
                self.ds.repo.update_ref(ref, "HEAD")
예제 #12
0
파일: rerun.py 프로젝트: raamana/datalad
    def __call__(revision="HEAD",
                 since=None,
                 dataset=None,
                 branch=None,
                 message=None,
                 onto=None,
                 script=None):

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='rerunning a command')

        lgr.debug('rerunning command output underneath %s', ds)

        if script is None and ds.repo.dirty:
            yield get_status_dict('run',
                                  ds=ds,
                                  status='impossible',
                                  message=('unsaved modifications present, '
                                           'cannot detect changes by command'))
            return

        err_info = get_status_dict('run', ds=ds)
        if not ds.repo.get_hexsha():
            yield dict(err_info,
                       status='impossible',
                       message='cannot rerun command, nothing recorded')
            return

        if branch and branch in ds.repo.get_branches():
            yield get_status_dict(
                "run",
                ds=ds,
                status="error",
                message="branch '{}' already exists".format(branch))
            return

        if not commit_exists(ds, revision + "^"):
            # Only a single commit is reachable from `revision`.  In
            # this case, --since has no effect on the range construction.
            revrange = revision
        elif since is None:
            revrange = "{rev}^..{rev}".format(rev=revision)
        elif since.strip() == "":
            revrange = revision
        else:
            revrange = "{}..{}".format(since, revision)

        if ds.repo.repo.git.rev_list("--merges", revrange, "--"):
            yield get_status_dict(
                "run",
                ds=ds,
                status="error",
                message="cannot rerun history with merge commits")
            return

        revs = [{
            "hexsha":
            hexsha,
            "message":
            ds.repo.repo.git.show(hexsha, "--format=%B", "--no-patch")
        }
                for hexsha in ds.repo.repo.git.rev_list(
                    "--reverse", revrange, "--").split()]

        for rev in revs:
            try:
                msg, info = get_run_info(rev["message"])
            except ValueError as exc:
                yield dict(err_info,
                           status='error',
                           message="Error on {}'s message: {}".format(
                               rev["hexsha"], exc_str(exc)))
                return
            if info is not None:
                rev["run_info"] = info
                rev["run_message"] = msg

        if since is not None and since.strip() == "":
            # For --since='', drop any leading commits that don't have
            # a run command.
            revs = list(dropwhile(lambda r: "run_info" not in r, revs))

        if script:
            ofh = sys.stdout if script.strip() == "-" else open(script, "w")
            header = """\
#!/bin/sh
#
# This file was generated by running (the equivalent of)
#
#   datalad rerun --script={script}{since} {revision}
#
# in {ds}{path}\n"""
            ofh.write(
                header.format(
                    script=script,
                    since="" if since is None else " --since=" + since,
                    revision=ds.repo.repo.git.rev_parse(revision),
                    ds='dataset {} at '.format(ds.id) if ds.id else '',
                    path=ds.path))

            for rev in revs:
                if "run_info" not in rev:
                    continue

                cmd = rev["run_info"]["cmd"]
                msg = rev["run_message"]
                if msg == _format_cmd_shorty(cmd):
                    msg = ''
                ofh.write("\n" + "".join("# " + ln
                                         for ln in msg.splitlines(True)) +
                          "\n")
                commit_descr = ds.repo.describe(rev['hexsha'])
                ofh.write('# (record: {})\n'.format(
                    commit_descr if commit_descr else rev['hexsha']))

                if isinstance(cmd, list):
                    cmd = " ".join(cmd)
                ofh.write(cmd + "\n")
            if ofh is not sys.stdout:
                ofh.close()
        else:
            if onto is not None and onto.strip() == "":
                # Special case: --onto='' is the value of --since.
                # Because we're currently aborting if the revision list
                # contains merges, we know that, regardless of if and how
                # --since is specified, the effective value for --since is
                # the parent of the first revision.
                onto = revs[0]["hexsha"] + "^"
                if not commit_exists(ds, onto):
                    # This is unlikely to happen in the wild because it
                    # means that the first commit is a datalad run commit.
                    # Just abort rather than trying to checkout on orphan
                    # branch or something like that.
                    yield get_status_dict(
                        "run",
                        ds=ds,
                        status="error",
                        message="Commit for --onto does not exist.")
                    return

            if branch or onto:
                start_point = onto or "HEAD"
                if branch:
                    checkout_options = ["-b", branch]
                else:
                    checkout_options = ["--detach"]
                ds.repo.checkout(start_point, options=checkout_options)

            for rev in revs:
                hexsha = rev["hexsha"]
                if "run_info" not in rev:
                    pick = False
                    try:
                        ds.repo.repo.git.merge_base("--is-ancestor", hexsha,
                                                    "HEAD")
                    except GitCommandError:
                        # Revision is NOT an ancestor of HEAD.
                        pick = True

                    shortrev = ds.repo.repo.git.rev_parse("--short", hexsha)
                    err_msg = "no command for {} found; {}".format(
                        shortrev, "cherry picking" if pick else "skipping")
                    yield dict(err_info, status='ok', message=err_msg)

                    if pick:
                        ds.repo._git_custom_command(
                            None, ["git", "cherry-pick", hexsha],
                            check_fake_dates=True)
                    continue

                run_info = rev["run_info"]
                # Keep a "rerun" trail.
                if "chain" in run_info:
                    run_info["chain"].append(hexsha)
                else:
                    run_info["chain"] = [hexsha]

                # now we have to find out what was modified during the
                # last run, and enable re-modification ideally, we would
                # bring back the entire state of the tree with #1424, but
                # we limit ourself to file addition/not-in-place-modification
                # for now
                for r in ds.unlock(new_or_modified(ds, hexsha),
                                   return_type='generator',
                                   result_xfm=None):
                    yield r

                for r in run_command(run_info['cmd'],
                                     ds,
                                     message or rev["run_message"],
                                     rerun_info=run_info):
                    yield r
예제 #13
0
def _apply_output(ds, jdir, sdir):
    common = dict(
        action='htc_result_merge',
        refds=text_type(ds.pathobj),
        path=text_type(jdir),
        logger=lgr,
    )
    args_path = sdir / 'runargs.json'
    try:
        # anything below PY3.6 needs stringification
        runargs = json_py.load(str(args_path))
    except Exception as e:
        yield dict(
            common,
            status='error',
            message=("could not load submission arguments from '%s': %s",
                     args_path, exc_str(e)))
        return
    # TODO check recursive status to have dataset clean
    # TODO have query limited to outputs if exlicit was given
    # prep outputs (unlock or remove)
    # COPY: this is a copy of the code from run_command
    outputs = GlobbedPaths(runargs['outputs'],
                           pwd=runargs['pwd'],
                           expand=runargs['expand'] in ["outputs", "both"])
    if outputs:
        for res in _install_and_reglob(ds, outputs):
            yield res
        for res in _unlock_or_remove(ds, outputs.expand(full=True)):
            yield res
    # END COPY

    # TODO need to immitate PWD change, if needed
    # -> extract tarball
    try:
        stdout, stderr = Runner().run(
            ['tar', '-xf', '{}'.format(jdir / 'output')], cwd=ds.path)
    except CommandError as e:
        yield dict(
            common,
            status='error',
            message=("could not un-tar job results from '%s' at '%s': %s",
                     str(jdir / 'output'), ds.path, exc_str(e)))
        return

    # fake a run record, as if we would have executed locally
    for res in run_command(
            runargs['cmd'],
            dataset=ds,
            inputs=runargs['inputs'],
            outputs=runargs['outputs'],
            expand=runargs['expand'],
            explicit=runargs['explicit'],
            message=runargs['message'],
            sidecar=runargs['sidecar'],
            # TODO pwd, exit code
            extra_info=None,
            inject=True):
        yield res

    res = list(_remove_dir(ds, jdir))[0]
    res['action'] = 'htc_results_merge'
    res['status'] = 'ok'
    res.pop('message', None)
    # not removing the submission files (for now), even if the last job output
    # might be removed now. Those submissions are tiny and could be resubmitted
    yield res