def test_placeholders(path): ds = Dataset(path).create(force=True) ds.save() assert_repo_status(ds.path) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs}>{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(op.join(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs[0]}>getitem", inputs=["*.in"]) ok_file_has_content(op.join(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(op.join(path, "expanded-dspath"), ds.path, strip=True) subdir_path = op.join(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, last_commit_msg(ds.repo))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("cd .> {{inputs}}", inputs=["*.in"]) ok_exists(op.join(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in>c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count(ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(op.join(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue()) ds.run("echo {tmpdir} >tout") ok_file_has_content(op.join(path, "tout"), ".*datalad-run.*", re_=True)
def test_rerun_chain(path): ds = Dataset(path).create() commits = [] with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.repo.tag("first-run", commit=DEFAULT_BRANCH) for _ in range(3): commits.append(ds.repo.get_hexsha(DEFAULT_BRANCH)) ds.rerun() _, info = get_run_info(ds, last_commit_msg(ds.repo)) eq_(info["chain"], commits) ds.rerun(revision="first-run") _, info = get_run_info(ds, last_commit_msg(ds.repo)) eq_(info["chain"], commits[:1])
def test_rerun_subdir(path): # Note: Using with_tree rather than with_tempfile is matters. The latter # calls realpath on the path, which masks a failure in the # TMPDIR="/var/tmp/sym link" test case ds = Dataset(path).create(force=True) subdir = op.join(path, 'subdir') with chpwd(subdir): run("touch test.dat") assert_repo_status(ds.path) # FIXME: A plain ok_file_under_git call doesn't properly resolve the file # in the TMPDIR="/var/tmp/sym link" test case. Temporarily call realpath. def ok_file_under_git_kludge(path, basename): ok_file_under_git(op.join(str(Path(path).resolve()), basename), annexed=True) ok_file_under_git_kludge(subdir, "test.dat") rec_msg, runinfo = get_run_info(ds, last_commit_msg(ds.repo)) eq_(runinfo['pwd'], 'subdir') # now, rerun within root of the dataset with chpwd(ds.path): ds.rerun() assert_repo_status(ds.path) ok_file_under_git_kludge(subdir, "test.dat") # and not on top assert_raises(AssertionError, ok_file_under_git, op.join(ds.path, "test.dat"), annexed=True) # but if we run ds.run -- runs within top of the dataset with chpwd(subdir): ds.run("touch test2.dat") assert_repo_status(ds.path) ok_file_under_git_kludge(ds.path, "test2.dat") rec_msg, runinfo = get_run_info(ds, last_commit_msg(ds.repo)) eq_(runinfo['pwd'], '.') # now, rerun within subdir -- smoke for now with chpwd(subdir): ds.rerun()
def test_rerun_invalid_merge_run_commit(path): ds = Dataset(path).create() ds.run("echo foo >>foo") ds.run("echo invalid >>invalid") run_msg = last_commit_msg(ds.repo) run_hexsha = ds.repo.get_hexsha() ds.repo.call_git(["reset", "--hard", DEFAULT_BRANCH + "~"]) with open(op.join(ds.path, "non-run"), "w") as nrfh: nrfh.write("non-run") ds.save() # Assign two parents to the invalid run commit. commit = ds.repo.call_git_oneline( ["commit-tree", run_hexsha + "^{tree}", "-m", run_msg, "-p", run_hexsha + "^", "-p", ds.repo.get_hexsha()]) ds.repo.call_git(["reset", "--hard", commit]) hexsha_orig = ds.repo.get_hexsha() with swallow_logs(new_level=logging.WARN) as cml: ds.rerun(since="") assert_in("has run information but is a merge commit", cml.out) eq_(len(ds.repo.get_revisions(hexsha_orig + ".." + DEFAULT_BRANCH)), 1)
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = op.join(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean assert_repo_status(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() assert_repo_status(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, last_commit_msg(sub.repo)) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = op.join(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) assert_repo_status(ds.path) # Make a non-run commit. with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Now rerun the buried command. ds.rerun(revision=DEFAULT_BRANCH + "~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( last_commit_msg(ds.repo).splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since=DEFAULT_BRANCH + "~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # The "diff" section of the report doesn't include the unchanged files that # would come in "-f json diff" output. for entry in report: if entry["rerun_action"] == "run": # None of the run commits touch .datalad/config or any other config # file. assert_false( any(r["path"].endswith("config") for r in entry["diff"])) # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip-or-pick") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read())
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True).save() src_ds = Dataset(src).create(force=True) src_ds.save() ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("{} {{inputs}} {{inputs}} >doubled.dat".format( 'type' if on_windows else 'cat'), dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) assert_repo_status(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(op.join(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("cd .> dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.save() ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], last_commit_msg(ds.repo)) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.save("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")]) ok_(all( ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(op.join("subdir", "a"), options=["--force"]) with chpwd(op.join(path, "subdir")): run("cd .> subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(op.join("subdir", "a"))) # --input=. runs "datalad get ." ds.run("cd .> dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.save() ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun(DEFAULT_BRANCH + "^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), "a.dat' appended' \n" if on_windows else "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n appended\n") if not on_windows: # see datalad#2606 with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("cd .> expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", last_commit_msg(ds.repo)) assert_in("b.dat", last_commit_msg(ds.repo)) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'" if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)