def test_addurls_url_parts(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_basename}") for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join("udir", fname))
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_rerun_script(path): ds = Dataset(path).create() ds.run("echo a >foo") ds.run(["touch", "bar"], message='BAR', sidecar=True) # a run record sidecar file was added with the last commit assert(any(d['path'].startswith(opj(ds.path, '.datalad', 'runinfo')) for d in ds.rerun(report=True, return_type='item-or-list')['diff'])) bar_hexsha = ds.repo.get_hexsha() script_file = opj(path, "commands.sh") ds.rerun(script=script_file) ok_exists(script_file) with open(script_file) as sf: lines = sf.readlines() assert_in("touch bar\n", lines) # The commit message is there too. assert_in("# BAR\n", lines) assert_in("# (record: {})\n".format(bar_hexsha), lines) assert_not_in("echo a >foo\n", lines) ds.rerun(since="", script=script_file) with open(script_file) as sf: lines = sf.readlines() assert_in("touch bar\n", lines) # Automatic commit messages aren't included. assert_not_in("# echo a >foo\n", lines) assert_in("echo a >foo\n", lines) # --script=- writes to stdout. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("touch bar", cmout.getvalue().splitlines())
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) # correct ls_json command in hook content (path wrapped in quotes) ok_file_has_content(_path_(target_path, '.git/hooks/post-update'), '.*datalad ls -a --json file \'%s\'.*' % target_path, re_=True, flags=re.DOTALL)
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count( ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue())
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results( ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_addurls_repindex(self, path): ds = Dataset(path).create(force=True) with chpwd(path): with assert_raises(IncompleteResultsError) as raised: ds.addurls(self.json_file, "{url}", "{subdir}") assert_in("There are file name collisions", str(raised.exception)) ds.addurls(self.json_file, "{url}", "{subdir}-{_repindex}") for fname in ["foo-0", "bar-0", "foo-1"]: ok_exists(fname)
def test_lock_if_check_fails(tempfile): # basic test, should never try to lock so filename is not important with lock_if_check_fails(True, None) as (check, lock): assert check is True assert lock is None assert check # still available outside # and with a callable with lock_if_check_fails(lambda: "valuable", None) as (check, lock): eq_(check, "valuable") assert lock is None eq_(check, "valuable") # basic test, should never try to lock so filename is not important with lock_if_check_fails(False, tempfile) as (check, lock): ok_(lock.acquired) ok_exists(tempfile + '.lck') assert not op.exists(tempfile + '.lck') # and it gets removed after # the same with providing operation # basic test, should never try to lock so filename is not important with lock_if_check_fails(False, tempfile, operation='get') as (check, lock): ok_(lock.acquired) ok_exists(tempfile + '.get-lck') assert not op.exists(tempfile + '.get-lck') # and it gets removed after def subproc(q): with lock_if_check_fails(False, tempfile, blocking=False) as (_, lock2): q.put(lock2.acquired) from multiprocessing import Queue, Process q = Queue() p = Process(target=subproc, args=(q,)) # now we need somehow to actually check the bloody lock functioning with lock_if_check_fails((op.exists, (tempfile,)), tempfile) as (check, lock): eq_(check, False) ok_(lock.acquired) # but now we will try to lock again, but we need to do it in another # process p.start() assert q.get() is False p.join() with open(tempfile, 'w') as f: pass ok_exists(tempfile) ok_exists(tempfile) # and we redo -- it will acquire it p = Process(target=subproc, args=(q,)) p.start() ok_(q.get()) p.join()
def test_py2_unicode_command(path): # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929). ds = Dataset(path).create() touch_cmd = "import sys; open(sys.argv[1], 'w').write('')" cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat") ds.run(cmd_str) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ0.dat")) if not on_windows: # FIXME ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"]) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ1.dat")) # Send in a list of byte-strings to mimic a py2 command-line # invocation. ds.run([s.encode("utf-8") for s in [sys.executable, "-c", touch_cmd, u" β1 "]]) assert_repo_status(ds.path) ok_exists(op.join(path, u" β1 ")) with assert_raises(CommandError), swallow_outputs(): ds.run(u"bβ2.dat")
def test_addurls_version(self, path): ds = Dataset(path).create(force=True) def version_fn(url): if url.endswith("b.dat"): raise ValueError("Scheme error") return url + ".v1" with patch("datalad.plugin.addurls.get_versioned_url", version_fn): with swallow_logs(new_level=logging.WARNING) as cml: ds.addurls(self.json_file, "{url}", "{name}", version_urls=True) assert_in("b.dat", str(cml.out)) names = ["a", "c"] for fname in names: ok_exists(os.path.join(path, fname)) whereis = ds.repo.whereis(names, output="full") for fname, info in whereis.items(): eq_(info[ds.repo.WEB_UUID]['urls'], ["{}udir/{}.dat.v1".format(self.url, fname)])
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with assert_raises(CommandError): ds.run("echo x$(cat sub/grows) > sub/grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", recursive=True, message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(subds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_(set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) hook_path = _path_(target_path, '.git/hooks/post-update') # No longer the case -- we are no longer using absolute path in the # script # ok_file_has_content(hook_path, # '.*\ndsdir="%s"\n.*' % target_path, # re_=True, # flags=re.DOTALL) # No absolute path (so dataset could be moved) in the hook with open(hook_path) as f: assert_not_in(target_path, f.read()) # correct ls_json command in hook content (path wrapped in "quotes) ok_file_has_content(hook_path, '.*datalad ls -a --json file \..*', re_=True, flags=re.DOTALL)
def test_addurls_create_newdataset(self, path): dspath = os.path.join(path, "ds") addurls(dspath, self.json_file, "{url}", "{name}") for fname in ["a", "b", "c"]: ok_exists(os.path.join(dspath, fname))
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return len(ds.repo.get_revisions("git-annex")) n_annex_commits = get_annex_commit_counts() # Meanwhile also test that we can specify path relative # to the top of the dataset, as we generally treat paths in # Python API, and it will be the one saved in commit # message record json_file = op.relpath(self.json_file, ds.path) ds.addurls(json_file, "{url}", "{name}", exclude_autometa="(md5sum|size)") ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH), f"url_file='{json_file}'") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(op.join(ds.path, fname)) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. # Also ignore if on Windows as it seems as if a git-annex bug # leads to separate meta data commits: # https://github.com/datalad/datalad/pull/5202#discussion_r535429704 if not (dl_cfg.get('datalad.fake-dates') or on_windows): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open(op.join(ds.path, "a"), "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_addurls_url_filename(self, path): ds = Dataset(path).create(force=True) with chpwd(path): ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}") for fname in ["udir/a.dat", "udir/b.dat", "udir/c.dat"]: ok_exists(fname)
def test_addurls_from_list(self, path): ds = Dataset(path).create() ds.addurls(self.data, "{url}", "{name}") for fname in ["a", "b", "c"]: ok_exists(op.join(path, fname))
def _test_create_store(host, base_path, ds_path, clone_path): ds = Dataset(ds_path).create(force=True) subds = ds.create('sub', force=True) subds2 = ds.create('sub2', force=True, annex=False) ds.save(recursive=True) assert_repo_status(ds.path) # don't specify special remote. By default should be git-remote + "-storage" res = ds.create_sibling_ria("ria+ssh://test-store:", "datastore", post_update_hook=True) assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_(len(res), 1) # remotes exist, but only in super siblings = ds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in siblings}) sub_siblings = subds.siblings(result_renderer=None) eq_({'here'}, {s['name'] for s in sub_siblings}) sub2_siblings = subds2.siblings(result_renderer=None) eq_({'here'}, {s['name'] for s in sub2_siblings}) # check bare repo: git_dir = Path(base_path) / ds.id[:3] / ds.id[3:] # The post-update hook was enabled. ok_exists(git_dir / "hooks" / "post-update") # And create_sibling_ria took care of an initial call to # git-update-server-info. ok_exists(git_dir / "info" / "refs") git_config = git_dir / 'config' ok_exists(git_config) content = git_config.read_text() assert_in("[datalad \"ora-remote\"]", content) super_uuid = ds.config.get( "remote.{}.annex-uuid".format('datastore-storage')) assert_in("uuid = {}".format(super_uuid), content) # implicit test of success by ria-installing from store: ds.push(to="datastore") with chpwd(clone_path): if host: # note, we are not using the "test-store"-label here clone('ria+ssh://{}{}#{}'.format(host, base_path, ds.id), path='test_install') else: # TODO: Whenever ria+file supports special remote config (label), # change here: clone('ria+file://{}#{}'.format(base_path, ds.id), path='test_install') installed_ds = Dataset(op.join(clone_path, 'test_install')) assert installed_ds.is_installed() assert_repo_status(installed_ds.repo) eq_(installed_ds.id, ds.id) # Note: get_annexed_files() always reports POSIX paths. assert_in('ds/file1.txt', installed_ds.repo.get_annexed_files()) assert_result_count(installed_ds.get(op.join('ds', 'file1.txt')), 1, status='ok', action='get', path=op.join(installed_ds.path, 'ds', 'file1.txt')) # now, again but recursive. res = ds.create_sibling_ria("ria+ssh://test-store:", "datastore", recursive=True, existing='reconfigure') eq_(len(res), 3) assert_result_count(res, 1, path=str(ds.pathobj), status='ok', action="create-sibling-ria") assert_result_count(res, 1, path=str(subds.pathobj), status='ok', action="create-sibling-ria") assert_result_count(res, 1, path=str(subds2.pathobj), status='ok', action="create-sibling-ria") # remotes now exist in super and sub siblings = ds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in siblings}) sub_siblings = subds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in sub_siblings}) # but no special remote in plain git subdataset: sub2_siblings = subds2.siblings(result_renderer=None) eq_({'datastore', 'here'}, {s['name'] for s in sub2_siblings}) # for testing trust_level parameter, redo for each label: for trust in ['trust', 'semitrust', 'untrust']: ds.create_sibling_ria("ria+ssh://test-store:", "datastore", existing='reconfigure', trust_level=trust) res = ds.repo.repo_info() assert_in( '[datastore-storage]', [r['description'] for r in res['{}ed repositories'.format(trust)]])
def check_addurls_stdin_input(self, input_text, input_type, path): ds = Dataset(path).create(force=True) with patch("sys.stdin", new=StringIO(input_text)): ds.addurls("-", "{url}", "{name}", input_type=input_type) for fname in ["a", "b", "c"]: ok_exists(op.join(ds.path, fname))
def test_run_inputs_no_annex_repo(path): ds = Dataset(path).create(annex=False) # Running --input in a plain Git repo doesn't fail. ds.run("cd .> dummy", inputs=["*"]) ok_exists(op.join(ds.path, "dummy")) ds.rerun()
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.save() assert_repo_status(ds.path) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs}>{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(op.join(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) # ATTN windows is sensitive to spaces before redirect symbol ds.run("echo {inputs[0]}>getitem", inputs=["*.in"]) ok_file_has_content(op.join(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(op.join(path, "expanded-dspath"), ds.path, strip=True) subdir_path = op.join(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(op.join(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, last_commit_msg(ds.repo))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("cd .> {{inputs}}", inputs=["*.in"]) ok_exists(op.join(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in>c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count( ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(op.join(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue()) ds.run("echo {tmpdir} >tout") ok_file_has_content(op.join(path, "tout"), ".*datalad-run.*", re_=True)
def test_run_inputs_no_annex_repo(path): ds = Dataset(path).create(no_annex=True) # Running --input in a plain Git repo doesn't fail. ds.run("touch dummy", inputs=["*"]) ok_exists(opj(ds.path, "dummy")) ds.rerun()
def test_addurls_url_filename(self, path): ds = Dataset(path).create(force=True) ds.addurls(self.json_file, "{url}", "{_url0}/{_url_filename}") for fname in ["a.dat", "b.dat", "c.dat"]: ok_exists(op.join(ds.path, "udir", fname))