def test_rerun_subdir(path): ds = Dataset(path).create() subdir = opj(path, 'subdir') mkdir(subdir) with chpwd(subdir): run("touch test.dat") ok_clean_git(ds.path) ok_file_under_git(opj(subdir, "test.dat"), annexed=True) rec_msg, runinfo = get_run_info(ds.repo.repo.head.commit.message) eq_(runinfo['pwd'], 'subdir') # now, rerun within root of the dataset with chpwd(ds.path): ds.rerun() ok_clean_git(ds.path) ok_file_under_git(opj(subdir, "test.dat"), annexed=True) # and not on top assert_raises(AssertionError, ok_file_under_git, opj(ds.path, "test.dat"), annexed=True) # but if we run ds.run -- runs within top of the dataset with chpwd(subdir): ds.run("touch test2.dat") ok_clean_git(ds.path) ok_file_under_git(opj(ds.path, "test2.dat"), annexed=True) rec_msg, runinfo = get_run_info(ds.repo.repo.head.commit.message) eq_(runinfo['pwd'], '.') # now, rerun within subdir -- smoke for now with chpwd(subdir): ds.rerun()
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.save() assert_repo_status(ds.path) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs}>{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(op.join(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs[0]}>getitem", inputs=["*.in"]) ok_file_has_content(op.join(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(op.join(path, "expanded-dspath"), ds.path, strip=True) subdir_path = op.join(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("cd .> {{inputs}}", inputs=["*.in"]) ok_exists(op.join(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in>c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count(ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(op.join(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue()) ds.run("echo {tmpdir} >tout") ok_file_has_content(op.join(path, "tout"), ".*datalad-run.*", re_=True)
def test_rerun_subdir(path): ds = Dataset(path).create() subdir = opj(path, 'subdir') mkdir(subdir) with chpwd(subdir): run("python -c 'open(\"test.dat\", \"wb\").close()'") ok_clean_git(ds.path) ok_file_under_git(opj(subdir, "test.dat"), annexed=True) rec_msg, runinfo = get_commit_runinfo(ds.repo) eq_(runinfo['pwd'], 'subdir') # now, rerun within root of the dataset with chpwd(ds.path): ds.run(rerun=True) ok_clean_git(ds.path) ok_file_under_git(opj(subdir, "test.dat"), annexed=True) # and not on top assert_raises(AssertionError, ok_file_under_git, opj(ds.path, "test.dat"), annexed=True) # but if we run ds.run -- runs within top of the dataset with chpwd(subdir): ds.run("python -c 'open(\"test2.dat\", \"wb\").close()'") ok_clean_git(ds.path) ok_file_under_git(opj(ds.path, "test2.dat"), annexed=True) rec_msg, runinfo = get_commit_runinfo(ds.repo) eq_(runinfo['pwd'], '.') # now, rerun within subdir -- smoke for now with chpwd(subdir): ds.run(rerun=True)
def test_run_from_subds_gh3551(path): ds = Dataset(path).create(force=True) ds.save() ds.create("output") with chpwd(op.join(ds.path, "sub")): assert_in_results( run("echo", inputs=[op.join(op.pardir, "sub", "input")], outputs=[op.join(op.pardir, "output")], return_type="list", result_filter=None, result_xfm=None), action="get", status="notneeded") assert_repo_status(ds.path) subds_path = op.join("output", "subds") ds.create(subds_path) with chpwd(op.join(ds.path, "sub")): output_dir = op.join(op.pardir, "output", "subds") # The below command is trying to be compatible. It could be made better # (e.g., actually using the input file) by someone that knows something # about Windows. assert_in_results( run("cd .> {}".format(op.join(output_dir, "f")), inputs=[op.join(op.pardir, "sub", "input")], outputs=[output_dir], return_type="list", result_filter=None, result_xfm=None), action="save", status="ok") assert_repo_status(ds.path) subds = Dataset(op.join(ds.path, subds_path)) ok_exists(op.join(subds.path, "f")) if not ds.repo.is_managed_branch(): # FIXME # This check fails on Windows: # https://github.com/datalad/datalad/pull/3747/checks?check_run_id=248506560#step:8:254 ok_(subds.repo.file_has_content("f"))
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}"))
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count( ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue())
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.repo.head.commit.message)[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out)
def test_rerun_subdir(path): # Note: Using with_tree rather than with_tempfile is matters. The latter # calls realpath on the path, which masks a failure in the # TMPDIR="/var/tmp/sym link" test case ds = Dataset(path).create(force=True) subdir = op.join(path, 'subdir') with chpwd(subdir): run("touch test.dat") assert_repo_status(ds.path) # FIXME: A plain ok_file_under_git call doesn't properly resolve the file # in the TMPDIR="/var/tmp/sym link" test case. Temporarily call realpath. def ok_file_under_git_kludge(path, basename): ok_file_under_git(op.join(str(Path(path).resolve()), basename), annexed=True) ok_file_under_git_kludge(subdir, "test.dat") rec_msg, runinfo = get_run_info(ds, ds.repo.format_commit("%B")) eq_(runinfo['pwd'], 'subdir') # now, rerun within root of the dataset with chpwd(ds.path): ds.rerun() assert_repo_status(ds.path) ok_file_under_git_kludge(subdir, "test.dat") # and not on top assert_raises(AssertionError, ok_file_under_git, op.join(ds.path, "test.dat"), annexed=True) # but if we run ds.run -- runs within top of the dataset with chpwd(subdir): ds.run("touch test2.dat") assert_repo_status(ds.path) ok_file_under_git_kludge(ds.path, "test2.dat") rec_msg, runinfo = get_run_info(ds, ds.repo.format_commit("%B")) eq_(runinfo['pwd'], '.') # now, rerun within subdir -- smoke for now with chpwd(subdir): ds.rerun()
def test_rerun_subdir(path): # Note: Using with_tree rather than with_tempfile is matters. The latter # calls realpath on the path, which masks a failure in the # TMPDIR="/var/tmp/sym link" test case ds = Dataset(path).create(force=True) subdir = opj(path, 'subdir') with chpwd(subdir): run("touch test.dat") ok_clean_git(ds.path) # FIXME: A plain ok_file_under_git call doesn't properly resolve the file # in the TMPDIR="/var/tmp/sym link" test case. Temporarily call realpath. def ok_file_under_git_kludge(path, basename): ok_file_under_git(opj(op.realpath(path), basename), annexed=True) ok_file_under_git_kludge(subdir, "test.dat") rec_msg, runinfo = get_run_info(ds, ds.repo.format_commit("%B")) eq_(runinfo['pwd'], 'subdir') # now, rerun within root of the dataset with chpwd(ds.path): ds.rerun() ok_clean_git(ds.path) ok_file_under_git_kludge(subdir, "test.dat") # and not on top assert_raises(AssertionError, ok_file_under_git, opj(ds.path, "test.dat"), annexed=True) # but if we run ds.run -- runs within top of the dataset with chpwd(subdir): ds.run("touch test2.dat") ok_clean_git(ds.path) ok_file_under_git_kludge(ds.path, "test2.dat") rec_msg, runinfo = get_run_info(ds, ds.repo.format_commit("%B")) eq_(runinfo['pwd'], '.') # now, rerun within subdir -- smoke for now with chpwd(subdir): ds.rerun()
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_run_explicit(origpath=None, path=None): origds = Dataset(origpath).create() (origds.pathobj / "test-annex.dat").write_text('content') origds.save() ds = clone(origpath, path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True, result_renderer='disabled') ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with assert_raises(IncompleteResultsError): ds.run("ls", inputs=["not-there"], explicit=True, on_failure="stop", result_renderer='disabled') remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True, result_renderer='disabled') ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"], result_renderer='disabled') ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_run_path_semantics(path): # Test that we follow path resolution from gh-3435: paths are relative to # dataset if a dataset instance is given and relative to the current # working directory otherwise. ds0 = Dataset(op.join(path, "ds0")).create() ds0_subdir = op.join(ds0.path, "s0") os.mkdir(ds0_subdir) # Although not useful, we can specify `dataset` as a string that lines up # with the one from the current directory. with chpwd(ds0_subdir): run("cd .> one", dataset="..") run("cd .> one", outputs=["one"], dataset=ds0.path) ok_exists(op.join(ds0_subdir, "one")) assert_repo_status(ds0.path) # Specify string dataset argument, running from another dataset ... ds1 = Dataset(op.join(path, "ds1")).create() ds1_subdir = op.join(ds1.path, "s1") os.mkdir(ds1_subdir) # ... producing output file in specified dataset with chpwd(ds1_subdir): run("cd .> {}".format(op.join(ds0.path, "two")), dataset=ds0.path) ok_exists(op.join(ds0.path, "two")) assert_repo_status(ds0.path) # ... producing output file in specified dataset and passing output file as # relative to current directory with chpwd(ds1_subdir): out = op.join(ds0.path, "three") run("cd .> {}".format(out), dataset=ds0.path, explicit=True, outputs=[op.relpath(out, ds1_subdir)]) ok_exists(op.join(ds0.path, "three")) assert_repo_status(ds0.path) # ... producing output file outside of specified dataset, leaving it # untracked in the other dataset assert_repo_status(ds1.path) with chpwd(ds1_subdir): run("cd .> four", dataset=ds0.path) assert_repo_status(ds1.path, untracked=[ds1_subdir]) # If we repeat above with an instance instead of the string, the directory # for the run is the specified dataset. with chpwd(ds1_subdir): run("cd .> five", dataset=ds0) ok_exists(op.join(ds0.path, "five")) assert_repo_status(ds0.path)
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0",)]: Dataset(op.join(*((src,) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list(run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True).save() src_ds = Dataset(src).create(force=True) src_ds.save() ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("{} {{inputs}} {{inputs}} >doubled.dat".format( 'type' if on_windows else 'cat'), dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) assert_repo_status(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(op.join(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.save() ds.repo.copy_to(inputs, remote=DEFAULT_REMOTE) ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], last_commit_msg(ds.repo)) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.save("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote=DEFAULT_REMOTE) ds.repo.drop("subdir", options=["--force"]) ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")]) ok_(all( ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(op.join("subdir", "a"), options=["--force"]) with chpwd(op.join(path, "subdir")): run("cd .> subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(op.join("subdir", "a"))) # --input=. runs "datalad get ." ds.run("cd .> dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.save() ds.repo.copy_to(["after-dot-run"], remote=DEFAULT_REMOTE) ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun(DEFAULT_BRANCH + "^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), "a.dat' appended' \n" if on_windows else "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n appended\n") ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("cd .> expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", last_commit_msg(ds.repo)) assert_in("b.dat", last_commit_msg(ds.repo)) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We uninstall subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'" if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_run_inputs_outputs(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"]) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("test-annex.dat")) ok_(ds.repo.file_has_content("doubled.dat")) # Rerunning the commit will also get the input file. ds.repo.drop("test-annex.dat", options=["--force"]) assert_false(ds.repo.file_has_content("test-annex.dat")) ds.rerun() ok_(ds.repo.file_has_content("test-annex.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["*.not-an-extension"]) assert_in("No matching files found for '*.not-an-extension'", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.repo.head.commit.message) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop("a.dat", options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.WARN) as cml: ds.run("echo blah", outputs=["*.not-an-extension"]) assert_in("No matching files found for '*.not-an-extension'", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.repo.head.commit.message) assert_in("b.dat", ds.repo.repo.head.commit.message) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"])