Example #1
0
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    assert_repo_status(ds.path)
    # ATTN windows is sensitive to spaces before redirect symbol
    ds.run("echo {inputs}>{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(op.join(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    # ATTN windows is sensitive to spaces before redirect symbol
    ds.run("echo {inputs[0]}>getitem", inputs=["*.in"])
    ok_file_has_content(op.join(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(op.join(path, "expanded-pwd"), path, strip=True)

    ds.run("echo {dspath} >expanded-dspath")
    ok_file_has_content(op.join(path, "expanded-dspath"), ds.path, strip=True)

    subdir_path = op.join(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(op.join(path, "subdir", "expanded-pwd"),
                        subdir_path,
                        strip=True)
    eq_(get_run_info(ds, last_commit_msg(ds.repo))[1]["pwd"], "subdir")

    # Double brackets can be used to escape placeholders.
    ds.run("cd .> {{inputs}}", inputs=["*.in"])
    ok_exists(op.join(path, "{inputs}"))

    # rerun --script expands the placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-", since="")
        script_out = cmout.getvalue()
        assert_in("echo a.in b.in>c.out", script_out)
        assert_in("echo {} >expanded-pwd".format(subdir_path), script_out)
        assert_in("echo {} >expanded-dspath".format(ds.path), script_out)

    assert_result_count(ds.run("{unknown_placeholder}", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run")

    # Configured placeholders.
    ds.config.add("datalad.run.substitutions.license", "gpl3", where="local")
    ds.run("echo {license} >configured-license")
    ok_file_has_content(op.join(path, "configured-license"),
                        "gpl3",
                        strip=True)
    # --script handles configured placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-")
        assert_in("gpl3", cmout.getvalue())

    ds.run("echo {tmpdir} >tout")
    ok_file_has_content(op.join(path, "tout"), ".*datalad-run.*", re_=True)
Example #2
0
def test_rerun_chain(path):
    ds = Dataset(path).create()
    commits = []

    with swallow_outputs():
        ds.run('echo x$(cat grows) > grows')
    ds.repo.tag("first-run", commit=DEFAULT_BRANCH)

    for _ in range(3):
        commits.append(ds.repo.get_hexsha(DEFAULT_BRANCH))
        ds.rerun()
        _, info = get_run_info(ds, last_commit_msg(ds.repo))
        eq_(info["chain"], commits)

    ds.rerun(revision="first-run")
    _, info = get_run_info(ds, last_commit_msg(ds.repo))
    eq_(info["chain"], commits[:1])
Example #3
0
def test_rerun_subdir(path):
    # Note: Using with_tree rather than with_tempfile is matters. The latter
    # calls realpath on the path, which masks a failure in the
    # TMPDIR="/var/tmp/sym link" test case
    ds = Dataset(path).create(force=True)
    subdir = op.join(path, 'subdir')
    with chpwd(subdir):
        run("touch test.dat")
    assert_repo_status(ds.path)

    # FIXME: A plain ok_file_under_git call doesn't properly resolve the file
    # in the TMPDIR="/var/tmp/sym link" test case. Temporarily call realpath.
    def ok_file_under_git_kludge(path, basename):
        ok_file_under_git(op.join(str(Path(path).resolve()), basename),
                          annexed=True)

    ok_file_under_git_kludge(subdir, "test.dat")

    rec_msg, runinfo = get_run_info(ds, last_commit_msg(ds.repo))
    eq_(runinfo['pwd'], 'subdir')
    # now, rerun within root of the dataset
    with chpwd(ds.path):
        ds.rerun()
    assert_repo_status(ds.path)
    ok_file_under_git_kludge(subdir, "test.dat")
    # and not on top
    assert_raises(AssertionError,
                  ok_file_under_git,
                  op.join(ds.path, "test.dat"),
                  annexed=True)

    # but if we run ds.run -- runs within top of the dataset
    with chpwd(subdir):
        ds.run("touch test2.dat")
    assert_repo_status(ds.path)
    ok_file_under_git_kludge(ds.path, "test2.dat")
    rec_msg, runinfo = get_run_info(ds, last_commit_msg(ds.repo))
    eq_(runinfo['pwd'], '.')
    # now, rerun within subdir -- smoke for now
    with chpwd(subdir):
        ds.rerun()
Example #4
0
def test_rerun_invalid_merge_run_commit(path):
    ds = Dataset(path).create()
    ds.run("echo foo >>foo")
    ds.run("echo invalid >>invalid")
    run_msg = last_commit_msg(ds.repo)
    run_hexsha = ds.repo.get_hexsha()
    ds.repo.call_git(["reset", "--hard", DEFAULT_BRANCH + "~"])
    with open(op.join(ds.path, "non-run"), "w") as nrfh:
        nrfh.write("non-run")
    ds.save()
    # Assign two parents to the invalid run commit.
    commit = ds.repo.call_git_oneline(
        ["commit-tree", run_hexsha + "^{tree}", "-m", run_msg,
         "-p", run_hexsha + "^",
         "-p", ds.repo.get_hexsha()])
    ds.repo.call_git(["reset", "--hard", commit])
    hexsha_orig = ds.repo.get_hexsha()
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.rerun(since="")
        assert_in("has run information but is a merge commit", cml.out)
    eq_(len(ds.repo.get_revisions(hexsha_orig + ".." + DEFAULT_BRANCH)), 1)
Example #5
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = op.join(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    assert_repo_status(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    assert_repo_status(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, last_commit_msg(sub.repo))
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = op.join(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    assert_repo_status(ds.path)

    # Make a non-run commit.
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision=DEFAULT_BRANCH + "~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        last_commit_msg(ds.repo).splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since=DEFAULT_BRANCH + "~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # The "diff" section of the report doesn't include the unchanged files that
    # would come in "-f json diff" output.
    for entry in report:
        if entry["rerun_action"] == "run":
            # None of the run commits touch .datalad/config or any other config
            # file.
            assert_false(
                any(r["path"].endswith("config") for r in entry["diff"]))

    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip-or-pick")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
Example #6
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True).save()
    src_ds = Dataset(src).create(force=True)
    src_ds.save()

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("{} {{inputs}} {{inputs}} >doubled.dat".format(
            'type' if on_windows else 'cat'),
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    assert_repo_status(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(op.join(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("cd .> dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.save()
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], last_commit_msg(ds.repo))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.save("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")])
    ok_(all(
        ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(op.join("subdir", "a"), options=["--force"])
    with chpwd(op.join(path, "subdir")):
        run("cd .> subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(op.join("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("cd .> dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.save()
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun(DEFAULT_BRANCH + "^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(op.join(path, "a.dat")) as fh:
        eq_(fh.read(),
            "a.dat' appended' \n" if on_windows else "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), " appended\n appended\n")

    if not on_windows:
        # see datalad#2606
        with swallow_logs(new_level=logging.DEBUG) as cml:
            with swallow_outputs():
                ds.run("echo blah", outputs=["not-there"])
                assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("cd .> expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", last_commit_msg(ds.repo))
    assert_in("b.dat", last_commit_msg(ds.repo))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'"
                        if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)