def test_run_resource_specification(context): path = context["directory"] run = context["run_fn"] create_tree( path, tree={"js0.yaml": "resource_name: name-via-js", "js1.yaml": ("resource_id: id-via-js\n" "resource_name: name-via-js")}) # Can specify name via job spec. with pytest.raises(ResourceNotFoundError) as exc: run(command=["doesnt", "matter"], job_specs=["js0.yaml"]) assert "name-via-js" in str(exc.value) # If job spec as name and ID, ID takes precedence. with pytest.raises(ResourceNotFoundError) as exc: run(command=["doesnt", "matter"], job_specs=["js1.yaml"]) assert "id-via-js" in str(exc.value) # Command-line overrides job spec. with pytest.raises(ResourceNotFoundError) as exc: run(command=["doesnt", "matter"], resref="fromcli", job_specs=["js1.yaml"]) assert "fromcli" in str(exc.value)
def test_run_and_fetch(context): path = context["directory"] run = context["run_fn"] jobs = context["jobs_fn"] registry = context["registry"] create_tree(path, tree={ "js0.yaml": ("resource_name: myshell\n" "command_str: 'touch ok'\n" "outputs: ['ok']") }) run(job_specs=["js0.yaml"]) with swallow_logs(new_level=logging.INFO) as log: with swallow_outputs() as output: jobs(queries=[], status=True) assert "myshell" in output.out assert len(registry.find_job_files()) == 1 jobs(queries=[], action="fetch", all_=True) assert len(registry.find_job_files()) == 0 jobs(queries=[], status=True) assert "No jobs" in log.out assert op.exists(op.join(path, "ok"))
def test_recursive_transfer(context): run = context["run_fn"] path = context["directory"] jobs = context["jobs_fn"] # Our script takes an inventory of the execute directory so we can be # sure that all of the files have transferred. It then creates a tree # that we verify is returned. create_tree( path, { "script": "find . > out_file ; mkdir out_dir ; touch out_dir/subfile", "in_dir": { "subfile": "" } }) with swallow_outputs() as output: run(command=["sh", "-e", "script"], inputs=["script", "in_dir"], outputs=["out_file", "out_dir"], resref="myshell") try_fetch(lambda: jobs(queries=[], action="fetch", all_=True)) assert op.exists(op.join(path, "out_file")) with open(op.join(path, "out_file")) as fo: lines = [line.strip() for line in fo] assert "./in_dir/subfile" in lines assert op.exists(op.join(path, "out_dir", "subfile"))
def test_git_install(traced_repo_copy, tmpdir): git_dist = traced_repo_copy["git_dist"] git_pkg = git_dist.packages[0] tmpdir = str(tmpdir) # Install package to a new location. install_dir = op.join(tmpdir, "installed") git_pkg.path = install_dir install(git_dist, install_dir, check=True) # Installing a second time works if the root hexsha's match. install(git_dist, install_dir, check=True) runner = GitRunner(cwd=install_dir) # We don't try to change the state of the repository if it's dirty. runner(["git", "reset", "--hard", "HEAD^"]) hexsha_existing = current_hexsha(runner) create_tree(install_dir, {"dirt": "dirt"}) with swallow_logs(new_level=logging.WARNING) as log: install(git_dist, install_dir) assert "repository is dirty" in log.out assert current_hexsha(runner) == hexsha_existing # We end up on the intended commit (detached) if the existing installation # repo is clean. os.remove(op.join(install_dir, "dirt")) install(git_dist, install_dir) assert current_hexsha(runner) == git_pkg.hexsha assert not current_branch(runner)
def test_head_at_no_move(dataset): with orcs.head_at(dataset, "master") as moved: assert not moved create_tree(dataset.path, {"on-master": "on-maser"}) dataset.add("on-master", message="advance master") assert dataset.repo.get_active_branch() == "master" assert dataset.repo.get_active_branch() == "master"
def test_orc_datalad_pair_run_ontop(job_spec, dataset, ssh): # Run one orchestrator and fetch, then run another and fetch: # # orc 1, master # | # o orc 0 # | # o ds = dataset create_tree(ds.path, {"in": "content\n"}) ds.add(".") js0 = job_spec js1 = dict(job_spec, _resolved_command_str='bash -c "echo other >other"') with chpwd(ds.path): def do(js): orc = orcs.DataladPairRunOrchestrator( ssh, submission_type="local", job_spec=js) orc.prepare_remote() orc.submit() orc.follow() orc.fetch() return orc orc0 = do(js0) orc1 = do(js1) ref0 = "refs/reproman/{}".format(orc0.jobid) ref1 = "refs/reproman/{}".format(orc1.jobid) assert ds.repo.is_ancestor(ref0, ref1) assert ds.repo.get_hexsha(ref0) != ds.repo.get_hexsha(ref1) assert ds.repo.get_hexsha(ref1) == ds.repo.get_hexsha("master") assert ds.repo.get_active_branch() == "master"
def test_head_at_no_move(dataset): repo = dataset.repo branch_orig = repo.get_active_branch() with orcs.head_at(dataset, branch_orig) as moved: assert not moved create_tree(dataset.path, {"f0": "on original branch"}) dataset.save("f0", message="advance branch") assert repo.get_active_branch() == branch_orig assert repo.get_active_branch() == branch_orig
def test_combine_batch_params_glob(tmpdir): tmpdir = str(tmpdir) create_tree(tmpdir, {"aaa": "a", "subdir": {"b": "b", "c": "c"}}) with chpwd(tmpdir): res = sorted(_combine_batch_params(["foo=a*,subdir/*,other"]), key=lambda d: d["foo"]) assert list(res) == [ {"foo": "aaa"}, {"foo": "other"}, {"foo": "subdir/b"}, {"foo": "subdir/c"}]
def base_dataset(tmpdir_factory): skipif.no_datalad() import datalad.api as dl path = str(tmpdir_factory.mktemp("dataset")) ds = dl.Dataset(path).create(force=True) create_tree(ds.path, {"foo": "foo", "bar": "bar", "d": {"in": "content\n"}}) ds.add(".") ds.repo.tag("root") return ds
def test_orc_datalad_abort_if_dirty(job_spec, dataset, ssh): subds = dataset.create(path="sub") subds.create(path="subsub") dataset.save() job_spec["inputs"] = [] job_spec["outputs"] = [] def get_orc(jspec=None): return orcs.DataladPairRunOrchestrator( ssh, submission_type="local", job_spec=jspec or job_spec) def run(**spec_kwds): jspec = dict(job_spec, **spec_kwds) with chpwd(dataset.path): orc = get_orc(jspec) # Run one job so that we create the remote repository. orc.prepare_remote() orc.submit() orc.follow() orc.fetch() return orc with chpwd(dataset.path): # We abort if the local dataset is dirty. create_tree(dataset.path, {"local-dirt": ""}) with pytest.raises(OrchestratorError) as exc: get_orc() assert "dirty" in str(exc.value) os.unlink("local-dirt") # Run one job so that we create the remote repository. run(_resolved_command_str="echo one >one") with chpwd(dataset.path): orc1 = get_orc() create_tree(orc1.working_directory, {"dirty": ""}) with pytest.raises(OrchestratorError) as exc: orc1.prepare_remote() assert "dirty" in str(exc.value) os.unlink(op.join(orc1.working_directory, "dirty")) # We can run if the submodule simply has a different commit checked out. run(_resolved_command_str="echo two >two") create_tree(op.join(dataset.path, "sub"), {"for-local-commit": ""}) dataset.add(".", recursive=True) run(_resolved_command_str="echo three >three") # But we abort if subdataset is actually dirty. with chpwd(dataset.path): orc2 = get_orc() create_tree(orc2.working_directory, {"sub": {"subsub": {"subdirt": ""}}}) with pytest.raises(OrchestratorError) as exc: orc2.prepare_remote() assert "dirty" in str(exc.value) os.unlink(op.join(orc2.working_directory, "sub", "subsub", "subdirt"))
def fn(resource, jspec): create_tree(local_dir, {"d": {"in": "content\n"}}) with chpwd(local_dir): orc = orcs.PlainOrchestrator(resource, submission_type="local", job_spec=jspec) orc.prepare_remote() assert orc.session.exists( op.join(orc.working_directory, "d", "in")) orc.submit() orc.follow() assert orc.session.exists(op.join(orc.working_directory, "out")) orc.fetch() assert open("out").read() == "content\nmore\n"
def test_head_at_move(dataset): def dataset_path_exists(path): return op.exists(op.join(dataset.path, path)) create_tree(dataset.path, {"pre": "pre"}) dataset.add("pre") with orcs.head_at(dataset, "master~1") as moved: assert moved assert dataset.repo.get_active_branch() is None assert not dataset_path_exists("pre") create_tree(dataset.path, {"at-head": "at-head"}) dataset.add("at-head", message="advance head (not master)") assert dataset_path_exists("pre") assert not dataset_path_exists("at-head") assert dataset.repo.get_active_branch() == "master"
def test_orc_datalad_abort_if_dirty(job_spec, dataset, shell): with chpwd(dataset.path): orc0 = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) # Run one job so that we create the remote repository. orc0.prepare_remote() orc0.submit() orc0.follow() with chpwd(dataset.path): orc1 = orcs.DataladPairOrchestrator( shell, submission_type="local", job_spec=job_spec) create_tree(orc1.working_directory, {"dirty": ""}) with pytest.raises(OrchestratorError) as exc: orc1.prepare_remote() assert "dirty" in str(exc)
def test_head_at_move(dataset): repo = dataset.repo branch_orig = repo.get_active_branch() def dataset_path_exists(path): return op.exists(op.join(dataset.path, path)) create_tree(dataset.path, {"pre": "pre"}) dataset.save("pre") with orcs.head_at(dataset, branch_orig + "~1") as moved: assert moved assert repo.get_active_branch() is None assert not dataset_path_exists("pre") create_tree(dataset.path, {"at-head": "at-head"}) dataset.save("at-head", message="advance head (not {})".format(branch_orig)) assert dataset_path_exists("pre") assert not dataset_path_exists("at-head") assert repo.get_active_branch() == branch_orig
def test_orc_datalad_run_change_head(job_spec, dataset, shell): with chpwd(dataset.path): orc = orcs.DataladLocalRunOrchestrator( shell, submission_type="local", job_spec=job_spec) orc.prepare_remote() orc.submit() orc.follow() create_tree(dataset.path, {"sinceyouvebeengone": "imsomovingon,yeahyeah"}) dataset.add(".") orc.fetch() ref = "refs/reproman/{}".format(orc.jobid) assert not dataset.repo.is_ancestor(ref, "HEAD") with orcs.head_at(dataset, ref): assert dataset.repo.file_has_content("out") assert open("out").read() == "content\nmore\n"
def fn(resource, jspec): create_tree(local_dir, {"d": {"in": "content\n"}}) with chpwd(local_dir): orc = orcs.PlainOrchestrator(resource, submission_type="local", job_spec=jspec) orc.prepare_remote() assert orc.session.exists(op.join(orc.working_directory, "d", "in")) orc.submit() orc.follow() assert orc.session.exists(op.join(orc.working_directory, "out")) orc.fetch() assert open("out").read() == "content\nmore\n" metadir_local = op.relpath(orc.meta_directory, orc.working_directory) for fname in "status", "stderr", "stdout": assert op.exists(op.join(metadir_local, fname + ".0"))
def test_head_at_dirty(dataset): create_tree(dataset.path, {"dirt": ""}) with pytest.raises(OrchestratorError): with orcs.head_at(dataset, "doesntmatter"): pass