def test_inject(path): ds = Dataset(path).create(force=True) assert_repo_status(ds.path, untracked=['foo', 'bar']) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_inject(path): ds = Dataset(path).create(force=True) assert_repo_status(ds.path, untracked=['foo', 'bar']) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = last_commit_msg(ds.repo) assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_inject(path): ds = Dataset(path).create(force=True) assert_repo_status(ds.path, untracked=['foo', 'bar']) list( run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) # ATTN: Use master explicitly so that this check works when we're on an # adjusted branch too (e.g., when this test is executed under Windows). msg = ds.repo.format_commit("%B", "master") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def _rerun(dset, results): for res in results: rerun_action = res.get("rerun_action") if not rerun_action: yield res elif rerun_action == "skip": yield res elif rerun_action == "checkout": if res.get("branch"): checkout_options = ["-b", res["branch"]] else: checkout_options = ["--detach"] dset.repo.checkout(res["commit"], options=checkout_options) elif rerun_action == "pick": dset.repo.cherry_pick(res["commit"]) yield res elif rerun_action == "run": hexsha = res["commit"] run_info = res["run_info"] # Keep a "rerun" trail. if "chain" in run_info: run_info["chain"].append(hexsha) else: run_info["chain"] = [hexsha] # now we have to find out what was modified during the last run, # and enable re-modification ideally, we would bring back the # entire state of the tree with #1424, but we limit ourself to file # addition/not-in-place-modification for now auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"])) outputs = run_info.get("outputs", []) outputs_dir = op.join(dset.path, run_info["pwd"]) auto_outputs = [p for p in auto_outputs # run records outputs relative to the "pwd" field. if op.relpath(p, outputs_dir) not in outputs] message = res["rerun_message"] or res["run_message"] for r in run_command(run_info['cmd'], dataset=dset, inputs=run_info.get("inputs", []), extra_inputs=run_info.get("extra_inputs", []), outputs=outputs, rerun_outputs=auto_outputs, message=message, rerun_info=run_info): yield r
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True).save() src_ds = Dataset(src).create(force=True) src_ds.save() ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("{} {{inputs}} {{inputs}} >doubled.dat".format( 'type' if on_windows else 'cat'), dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) assert_repo_status(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(op.join(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("cd .> dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.save() ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.save("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")]) ok_(all( ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(op.join("subdir", "a"), options=["--force"]) with chpwd(op.join(path, "subdir")): run("cd .> subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(op.join("subdir", "a"))) # --input=. runs "datalad get ." ds.run("cd .> dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.save() ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), "a.dat' appended' \n" if on_windows else "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.call_git(["reset", "--hard", "HEAD~2"]) ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") if not on_windows: # see datalad#2606 with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("cd .> expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'" if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def _rerun(dset, results, assume_ready=None, explicit=False, jobs=None): ds_repo = dset.repo # Keep a map from an original hexsha to a new hexsha created by the rerun # (i.e. a reran, cherry-picked, or merged commit). new_bases = {} # original hexsha => reran hexsha branch_to_restore = ds_repo.get_active_branch() head = onto = ds_repo.get_hexsha() for res in results: lgr.info(_get_rerun_log_msg(res)) rerun_action = res.get("rerun_action") if not rerun_action: yield res continue res_hexsha = res["commit"] if rerun_action == "checkout": if res.get("branch"): branch = res["branch"] checkout_options = ["-b", branch] branch_to_restore = branch else: checkout_options = ["--detach"] branch_to_restore = None ds_repo.checkout(res_hexsha, options=checkout_options) head = onto = res_hexsha continue # First handle the two cases that don't require additional steps to # identify the base, a root commit or a merge commit. if not res["parents"]: _mark_nonrun_result(res, "skip") yield res continue if rerun_action == "merge": old_parents = res["parents"] new_parents = [new_bases.get(p, p) for p in old_parents] if old_parents == new_parents: if not ds_repo.is_ancestor(res_hexsha, head): ds_repo.checkout(res_hexsha) elif res_hexsha != head: if ds_repo.is_ancestor(res_hexsha, onto): new_parents = [p for p in new_parents if not ds_repo.is_ancestor(p, onto)] if new_parents: if new_parents[0] != head: # Keep the direction of the original merge. ds_repo.checkout(new_parents[0]) if len(new_parents) > 1: msg = ds_repo.format_commit("%B", res_hexsha) ds_repo.call_git( ["merge", "-m", msg, "--no-ff", "--allow-unrelated-histories"] + new_parents[1:]) head = ds_repo.get_hexsha() new_bases[res_hexsha] = head yield res continue # For all the remaining actions, first make sure we're on the # appropriate base. parent = res["parents"][0] new_base = new_bases.get(parent) head_to_restore = None # ... to find our way back if we skip. if new_base: if new_base != head: ds_repo.checkout(new_base) head_to_restore, head = head, new_base elif parent != head and ds_repo.is_ancestor(onto, parent): if rerun_action == "run": ds_repo.checkout(parent) head = parent else: _mark_nonrun_result(res, "skip") yield res continue else: if parent != head: new_bases[parent] = head # We've adjusted base. Now skip, pick, or run the commit. if rerun_action == "skip-or-pick": if ds_repo.is_ancestor(res_hexsha, head): _mark_nonrun_result(res, "skip") if head_to_restore: ds_repo.checkout(head_to_restore) head, head_to_restore = head_to_restore, None yield res continue else: ds_repo.cherry_pick(res_hexsha) _mark_nonrun_result(res, "pick") yield res elif rerun_action == "run": run_info = res["run_info"] # Keep a "rerun" trail. if "chain" in run_info: run_info["chain"].append(res_hexsha) else: run_info["chain"] = [res_hexsha] # now we have to find out what was modified during the last run, # and enable re-modification ideally, we would bring back the # entire state of the tree with #1424, but we limit ourself to file # addition/not-in-place-modification for now auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"])) outputs = run_info.get("outputs", []) outputs_dir = op.join(dset.path, run_info["pwd"]) auto_outputs = [p for p in auto_outputs # run records outputs relative to the "pwd" field. if op.relpath(p, outputs_dir) not in outputs] message = res["rerun_message"] or res["run_message"] for r in run_command(run_info['cmd'], dataset=dset, inputs=run_info.get("inputs", []), extra_inputs=run_info.get("extra_inputs", []), outputs=outputs, assume_ready=assume_ready, explicit=explicit, rerun_outputs=auto_outputs, message=message, jobs=jobs, rerun_info=run_info): yield r new_head = ds_repo.get_hexsha() if new_head not in [head, res_hexsha]: new_bases[res_hexsha] = new_head head = new_head if branch_to_restore: # The user asked us to replay the sequence onto a branch, but the # history had merges, so we're in a detached state. ds_repo.update_ref("refs/heads/" + branch_to_restore, "HEAD") ds_repo.checkout(branch_to_restore)
def __call__(cmd, container_name=None, dataset=None, inputs=None, outputs=None, message=None, expand=None, explicit=False, sidecar=None): from unittest.mock import patch # delayed, since takes long (~600ms for yoh) pwd, _ = get_command_pwds(dataset) ds = require_dataset(dataset, check_installed=True, purpose='run a containerized command execution') container = None for res in find_container_(ds, container_name): if res.get("action") == "containers": container = res else: yield res assert container, "bug: container should always be defined here" image_path = op.relpath(container["path"], pwd) # container record would contain path to the (sub)dataset containing # it. If not - take current dataset, as it must be coming from it image_dspath = op.relpath(container.get('parentds', ds.path), pwd) # sure we could check whether the container image is present, # but it might live in a subdataset that isn't even installed yet # let's leave all this business to `get` that is called by `run` cmd = normalize_command(cmd) # expand the command with container execution if 'cmdexec' in container: callspec = container['cmdexec'] # Temporary kludge to give a more helpful message if callspec.startswith("["): import simplejson try: simplejson.loads(callspec) except simplejson.errors.JSONDecodeError: pass # Never mind, false positive. else: raise ValueError( 'cmdexe {!r} is in an old, unsupported format. ' 'Convert it to a plain string.'.format(callspec)) try: cmd_kwargs = dict( img=image_path, cmd=cmd, img_dspath=image_dspath, ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: yield get_status_dict( 'run', ds=ds, status='error', message=( 'Unrecognized cmdexec placeholder: %s. ' 'See containers-add for information on known ones: %s', exc, ", ".join(cmd_kwargs))) return else: # just prepend and pray cmd = container['path'] + ' ' + cmd with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! for r in run_command( cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, extra_inputs=[image_path], outputs=outputs, message=message, expand=expand, explicit=explicit, sidecar=sidecar): yield r