def test_run_from_subds_gh3551(path): ds = Dataset(path).create(force=True) ds.save() ds.create("output") with chpwd(op.join(ds.path, "sub")): assert_in_results( run("echo", inputs=[op.join(op.pardir, "sub", "input")], outputs=[op.join(op.pardir, "output")], return_type="list", result_filter=None, result_xfm=None), action="get", status="notneeded") assert_repo_status(ds.path) subds_path = op.join("output", "subds") ds.create(subds_path) with chpwd(op.join(ds.path, "sub")): output_dir = op.join(op.pardir, "output", "subds") # The below command is trying to be compatible. It could be made better # (e.g., actually using the input file) by someone that knows something # about Windows. assert_in_results( run("cd .> {}".format(op.join(output_dir, "f")), inputs=[op.join(op.pardir, "sub", "input")], outputs=[output_dir], return_type="list", result_filter=None, result_xfm=None), action="save", status="ok") assert_repo_status(ds.path) subds = Dataset(op.join(ds.path, subds_path)) ok_exists(op.join(subds.path, "f")) if not ds.repo.is_managed_branch(): # FIXME # This check fails on Windows: # https://github.com/datalad/datalad/pull/3747/checks?check_run_id=248506560#step:8:254 ok_(subds.repo.file_has_content("f"))
def test_addurls_url_on_collision_error_if_different(self, path): ds = Dataset(path).create(force=True) data = [self.data[0].copy(), self.data[0].copy()] data[0]["some_metadata"] = "1" data[1]["some_metadata"] = "2" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}", exclude_autometa="*", on_collision="error-if-different") ok_exists(op.join(ds.path, "a"))
def test_merge_follow_parentds_subdataset_other_branch(path): path = Path(path) ds_src = Dataset(path / "source").create() on_adjusted = ds_src.repo.is_managed_branch() ds_src_subds = ds_src.create("subds") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_subds = Dataset(ds_clone.pathobj / "subds") ds_src_subds.repo.call_git(["checkout", "-b", "other"]) (ds_src_subds.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) res = ds_clone.update(merge=True, follow="parentds", recursive=True, on_failure="ignore") if on_adjusted: # Our git-annex sync based on approach on adjusted branches is # incompatible with follow='parentds'. assert_in_results(res, action="update", status="impossible") return else: assert_in_results(res, action="update", status="ok") eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha()) ok_(ds_clone_subds.repo.is_under_annex("foo")) (ds_src_subds.pathobj / "bar").write_text("bar content") ds_src.save(recursive=True) ds_clone_subds.repo.checkout("master", options=["-bnew"]) ds_clone.update(merge=True, follow="parentds", recursive=True) if not on_adjusted: eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
def test_run_remove_keeps_leading_directory(path): ds = Dataset(op.join(path, "ds")).create() repo = ds.repo (ds.pathobj / "d").mkdir() output = (ds.pathobj / "d" / "foo") output.write_text("foo") ds.save() output_rel = str(output.relative_to(ds.pathobj)) repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> {}".format(output_rel), outputs=[output_rel], result_renderer=None), action="run.remove", status="ok") assert_repo_status(ds.path) # Remove still gets saved() if command doesn't generate the output (just as # it would if git-rm were used instead of unlink). repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> something-else", outputs=[output_rel], result_renderer=None), action="run.remove", status="ok") assert_repo_status(ds.path)
def test_addurls_url_on_collision_choose(self, path): ds = Dataset(path).create(force=True) data = deepcopy(self.data) for row in data: row["name"] = "a" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-first", on_collision="take-first") ok_file_has_content(op.join(ds.path, "a-first"), "a content", strip=True) with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-last", on_collision="take-last") ok_file_has_content(op.join(ds.path, "a-last"), "c content", strip=True)
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: assert_raises(NoDatasetArgumentFound, unlock, dataset=None, path=path2) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: (ds.pathobj / ".noannex").unlink() AnnexRepo(path, create=True) # One that doesn't exist. res = ds.unlock(path="notexistent.txt", result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path does not exist") # And one that isn't associated with a dataset. assert_in_results(ds.unlock(path=path2, on_failure="ignore"), status="error", message="path not underneath this dataset") chpwd(_cwd)
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count(result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results(result, status='error', path=sub1.path) assert_in_results(result, status='ok', path=sub2.path) cml.assert_logged( msg="target path already exists and not empty".format(sub1.path), regex=False, level='ERROR')
def test_merge_follow_parentds_subdataset_adjusted_warning(path): path = Path(path) ds_src = Dataset(path / "source").create() if ds_src.repo.is_managed_branch(): raise SkipTest("This test depends on the source repo being " "an un-adjusted branch") ds_src_subds = ds_src.create("subds") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_subds = Dataset(ds_clone.pathobj / "subds") _adjust(ds_clone_subds.repo) # Note: Were we to save ds_clone here, we would get a merge conflict in the # top repo for the submodule (even if using 'git annex sync' rather than # 'git merge'). ds_src_subds.repo.call_git(["checkout", DEFAULT_BRANCH + "^0"]) (ds_src_subds.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) assert_in_results(ds_clone.update(merge=True, recursive=True, follow="parentds", on_failure="ignore"), status="impossible", path=ds_clone_subds.path, action="update") eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() if src.repo.is_managed_branch(): # on crippled FS post-update hook enabling via create-sibling doesn't # work ATM raise SkipTest("no create-sibling on crippled FS") (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, if instructed by configuration src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local') res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check that dataset-config cannot overrule this src.config.set('datalad.push.copy-auto-if-wanted', 'false', where='dataset') res = src.push(to='target') assert_status('notneeded', res) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # remove local config, must enable push of secure file src.config.unset('datalad.push.copy-auto-if-wanted', where='local') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_configs(path): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} {{mysub}} {args}' % quote_cmdlinearg(sys.executable), where='dataset') ds.config.add('datalad.run.substitutions.mysub', 'dataset-call-config', where='dataset') # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} local {args}' % quote_cmdlinearg(sys.executable), where='local') ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add('datalad.procedures.datalad_test_proc.help', "This is a help message", where='dataset') r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_addurls_no_rows(self, path): ds = Dataset(path).create(force=True) for fname in ["in.csv", "in.json"]: with swallow_logs(new_level=logging.WARNING) as cml: assert_in_results(ds.addurls(fname, "{url}", "{name}"), action="addurls", status="notneeded") cml.assert_logged("No rows", regex=False)
def test_base(dspath, records): # make fake UKB datarecord downloads make_datarecord_zips('12345', records) # init dataset ds = create(dspath) ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0']) # dummy key file, no needed to bypass tests ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local') # fake ukbfetch bin_dir = make_ukbfetch(ds, records) # refuse to operate on dirty datasets (ds.pathobj / 'dirt').write_text('dust') assert_status('error', ds.ukb_update(on_failure='ignore')) (ds.pathobj / 'dirt').unlink() # meaningful crash with no ukbfetch assert_raises(RuntimeError, ds.ukb_update) # put fake ukbfetch in the path and run with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True) # get expected file layout incoming = ds.repo.get_files('incoming') incoming_p = ds.repo.get_files('incoming-native') for i in [ '12345_25748_2_0.txt', '12345_25748_3_0.txt', '12345_20227_2_0.zip' ]: assert_in(i, incoming) for i in ['25748_2_0.txt', '25748_3_0.txt', '20227_2_0/fMRI/rfMRI.nii.gz']: assert_in(i, incoming_p) # not ZIPs after processing assert_not_in('12345_20227_2_0.zip', incoming_p) assert_not_in('20227_2_0.zip', incoming_p) # rerun works with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True) # rightfully refuse to merge when active branch is an incoming* one ds.repo.checkout('incoming') with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): assert_in_results( ds.ukb_update(merge=True, force=True, on_failure='ignore'), status='impossible', message='Refuse to merge into incoming* branch', )
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_install_recursive(src, path_nr, path_r): # first install non-recursive: ds = install(path_nr, source=src, recursive=False) ok_(ds.is_installed()) for sub in ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(not sub.is_installed(), "Unintentionally installed: %s" % (sub, )) # this also means, subdatasets to be listed as not fulfilled: eq_( set( ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')), {'subm 1', '2'}) # now recursively: # don't filter implicit results so we can inspect them res = install(path_r, source=src, recursive=True, result_xfm=None, result_filter=None) # installed a dataset and two subdatasets assert_result_count(res, 3, action='install', type='dataset') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path_r) top_ds = Dataset(res[0]['path']) ok_(top_ds.is_installed()) # the subdatasets are contained in returned list: # (Note: Until we provide proper (singleton) instances for Datasets, # need to check for their paths) assert_in_results(res, path=opj(top_ds.path, 'subm 1'), type='dataset') assert_in_results(res, path=opj(top_ds.path, '2'), type='dataset') eq_(len(top_ds.subdatasets(recursive=True)), 2) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds, )) # no content was installed: ainfo = subds.repo.get_content_annexinfo(init=None, eval_availability=True) assert_false(any(st["has_content"] for st in ainfo.values())) # no unfulfilled subdatasets: ok_(top_ds.subdatasets(recursive=True, fulfilled=False) == []) # check if we can install recursively into a dataset # https://github.com/datalad/datalad/issues/2982 subds = ds.install('recursive-in-ds', source=src, recursive=True) ok_(subds.is_installed()) for subsub in subds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subsub.is_installed()) # check that we get subdataset instances manufactored from notneeded results # to install existing subdatasets again eq_(subds, ds.install('recursive-in-ds'))
def test_invalid_call(path): with chpwd(path): # ^ Change directory so that we don't fail with an # InvalidGitRepositoryError if the test is executed from a git # worktree. # needs spec or discover assert_raises(InsufficientArgumentsError, run_procedure) res = run_procedure('unknown', on_failure='ignore') assert_true(len(res) == 1) assert_in_results(res, status="impossible")
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results( ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_nested_create(path): # to document some more organic usage pattern ds = Dataset(path).create() ok_clean_git(ds.path) lvl2relpath = opj('lvl1', 'lvl2') lvl2path = opj(ds.path, lvl2relpath) os.makedirs(lvl2path) os.makedirs(opj(ds.path, 'lvl1', 'empty')) with open(opj(lvl2path, 'file'), 'w') as f: f.write('some') ok_(ds.add('.')) # later create subdataset in a fresh dir subds1 = ds.create(opj('lvl1', 'subds')) ok_clean_git(ds.path) eq_(ds.subdatasets(result_xfm='relpaths'), [opj('lvl1', 'subds')]) # later create subdataset in an existing empty dir subds2 = ds.create(opj('lvl1', 'empty')) ok_clean_git(ds.path) # later try to wrap existing content into a new subdataset # but that won't work assert_in_results( ds.create(lvl2relpath, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `force` option to ignore' ) # even with force, as to do this properly complicated surgery would need to # take place # MIH disable shaky test till proper dedicated upfront check is in-place in `create` # gh-1725 #assert_in_results( # ds.create(lvl2relpath, force=True, # on_failure='ignore', result_xfm=None, result_filter=None), # status='error', action='add') # only way to make it work is to unannex the content upfront ds.repo._run_annex_command('unannex', annex_options=[opj(lvl2relpath, 'file')]) # nothing to save, git-annex commits the unannex itself assert_status( 'ok' if ds.repo.config.getint("annex", "version") == 6 else 'notneeded', ds.save()) # still nothing without force # "err='lvl1/lvl2' already exists in the index" assert_in_results( ds.create(lvl2relpath, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `force` option to ignore' ) # XXX even force doesn't help, because (I assume) GitPython doesn't update # its representation of the Git index properly ds.create(lvl2relpath, force=True) assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
def test_on_failure_continue(path): ds = Dataset(op.join(path, "ds")).create(force=True) # save() calls status() in a way that respects on_failure. assert_in_results(ds.save( path=[op.join(path, "outside"), op.join(path, "ds", "within")], on_failure="ignore"), action="status", status="error") # save() continued despite the failure and saved ds/within. assert_repo_status(ds.path)
def test_merge_no_merge_target(path): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") assert_repo_status(ds_src.path) ds_clone.repo.checkout(DEFAULT_BRANCH, options=["-bnew"]) res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, status="impossible", action="update")
def test_rerun_cherry_pick(path): ds = Dataset(path).create() ds.repo.tag("prerun") ds.run('echo abc > runfile') with open(opj(path, "nonrun-file"), "w") as f: f.write("foo") ds.add("nonrun-file") for onto, action in [("HEAD", "skip"), ("prerun", "pick")]: results = ds.rerun(since="prerun", onto=onto) assert_in_results(results, status='ok', rerun_action=action)
def test_nested_create(path): # to document some more organic usage pattern ds = Dataset(path).create() assert_repo_status(ds.path) lvl2relpath = op.join('lvl1', 'lvl2') lvl2path = op.join(ds.path, lvl2relpath) os.makedirs(lvl2path) os.makedirs(op.join(ds.path, 'lvl1', 'empty')) with open(op.join(lvl2path, 'file'), 'w') as f: f.write('some') ok_(ds.save()) # Empty directories are filtered out. assert_repo_status(ds.path, untracked=[]) # later create subdataset in a fresh dir # WINDOWS FAILURE IS NEXT LINE subds1 = ds.create(op.join('lvl1', 'subds')) assert_repo_status(ds.path, untracked=[]) eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')]) # later create subdataset in an existing empty dir subds2 = ds.create(op.join('lvl1', 'empty')) assert_repo_status(ds.path) # later try to wrap existing content into a new subdataset # but that won't work assert_in_results( ds.create(lvl2relpath, **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [op.join(lvl2path, 'file')])) # even with force, as to do this properly complicated surgery would need to # take place # MIH disable shaky test till proper dedicated upfront check is in-place in `create` # gh-1725 #assert_in_results( # ds.create(lvl2relpath, force=True, # on_failure='ignore', result_xfm=None, result_filter=None), # status='error', action='add') # only way to make it work is to unannex the content upfront ds.repo.call_annex(['unannex', op.join(lvl2relpath, 'file')]) # nothing to save, git-annex commits the unannex itself, but only on v5 ds.repo.commit() # still nothing without force # "err='lvl1/lvl2' already exists in the index" assert_in_results( ds.create(lvl2relpath, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `force` option to ignore' ) # XXX even force doesn't help, because (I assume) GitPython doesn't update # its representation of the Git index properly ds.create(lvl2relpath, force=True) assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
def test_empty_git_upstairs(topdir): # create() doesn't get confused by an empty .git/ upstairs (gh-3473) assert_in_results( create(op.join(topdir, "empty", "ds"), **raw), status="ok", type="dataset", action="create") # ... and it will ignore non-meaningful content in .git assert_in_results( create(op.join(topdir, "nonempty", "ds"), **raw), status="ok", type="dataset", action="create") # ... but it will raise if it detects a valid repo # (by existence of .git/HEAD as defined in GitRepo._valid_git_test_path) with assert_raises(CommandError): create(op.join(topdir, "git_with_head", "ds"), **raw)
def test_rerun_cherry_pick(path): ds = Dataset(path).create() ds.repo.tag("prerun") ds.run('echo abc > runfile') with open(opj(path, "nonrun-file"), "w") as f: f.write("foo") ds.add("nonrun-file") for onto, text in [("HEAD", "skipping"), ("prerun", "cherry picking")]: results = ds.rerun(since="prerun", onto=onto) assert_in_results(results, status='ok', path=ds.path) assert any(r.get("message", "").endswith(text) for r in results)
def test_nested_create(path): # to document some more organic usage pattern ds = Dataset(path).create() assert_repo_status(ds.path) lvl2relpath = op.join('lvl1', 'lvl2') lvl2path = op.join(ds.path, lvl2relpath) os.makedirs(lvl2path) os.makedirs(op.join(ds.path, 'lvl1', 'empty')) with open(op.join(lvl2path, 'file'), 'w') as f: f.write('some') ok_(ds.save()) # Empty directories are filtered out. assert_repo_status(ds.path, untracked=[]) # later create subdataset in a fresh dir # WINDOWS FAILURE IS NEXT LINE subds1 = ds.create(op.join('lvl1', 'subds')) assert_repo_status(ds.path, untracked=[]) eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')]) # later create subdataset in an existing empty dir subds2 = ds.create(op.join('lvl1', 'empty')) assert_repo_status(ds.path) # later try to wrap existing content into a new subdataset # but that won't work assert_in_results( ds.create(lvl2relpath, **raw), status='error', message=( 'collision with content in parent dataset at %s: %s', ds.path, [op.join(lvl2path, 'file')])) # even with force, as to do this properly complicated surgery would need to # take place # MIH disable shaky test till proper dedicated upfront check is in-place in `create` # gh-1725 #assert_in_results( # ds.create(lvl2relpath, force=True, # on_failure='ignore', result_xfm=None, result_filter=None), # status='error', action='add') # only way to make it work is to unannex the content upfront ds.repo._run_annex_command('unannex', annex_options=[op.join(lvl2relpath, 'file')]) # nothing to save, git-annex commits the unannex itself, but only on v5 ds.repo.commit() # still nothing without force # "err='lvl1/lvl2' already exists in the index" assert_in_results( ds.create(lvl2relpath, **raw), status='error', message='will not create a dataset in a non-empty directory, use `force` option to ignore') # XXX even force doesn't help, because (I assume) GitPython doesn't update # its representation of the Git index properly ds.create(lvl2relpath, force=True) assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
def test_merge_no_merge_target(path): path = Path(path) ds_src = Dataset(path / "source").create() if ds_src.repo.is_managed_branch(): # `git annex sync REMOTE` rather than `git merge TARGET` is used on an # adjusted branch, so we don't give an error if TARGET can't be # determined. raise SkipTest("Test depends on non-adjusted branch") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") assert_repo_status(ds_src.path) ds_clone.repo.checkout("master", options=["-bnew"]) res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, status="impossible", action="update")
def test_surprise_subds(path): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # If subrepo is an adjusted branch, it would have a commit, making most of # this test irrelevant because it is about the unborn branch edge case. adjusted = somerepo.is_managed_branch() # This edge case goes away with Git v2.22.0. fixed_git = external_versions['cmd:git'] >= '2.22.0' # save non-recursive res = ds.save(recursive=False, on_failure='ignore') if not adjusted and fixed_git: # We get an appropriate error about no commit being checked out. assert_in_results(res, action='add_submodule', status='error') # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) if adjusted or fixed_git: if adjusted: # adjusted branch: #datalad/3178 (that would have a commit) modified = [subds.repo.pathobj, somerepo.pathobj] untracked = [] else: # Newer Git versions refuse to add a sub-repository with no commits # checked out. modified = [subds.repo.pathobj] untracked = ['d1'] assert_repo_status(ds.path, modified=modified, untracked=untracked) assert_not_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) else: # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_update_adjusted_incompatible_with_ff_only(path): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") maybe_adjust_repo(ds_clone.repo) assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"), action="update", status="impossible") assert_in_results(ds_clone.update(on_failure="ignore"), action="update", status="ok")
def test_copy_file_prevent_dotgit_placement(srcpath, destpath): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_reobtain_data(originpath, destpath): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_get_invalid_call(path, file_outside): # no argument at all: assert_raises(InsufficientArgumentsError, get, None) assert_raises(InsufficientArgumentsError, get, []) # invalid dataset: assert_status('impossible', get(None, dataset=path, on_failure='ignore')) # have a plain git: ds = Dataset(path) ds.create(no_annex=True) with open(opj(path, "some.txt"), "w") as f: f.write("whatever") ds.save("some.txt", to_git=True, message="Initial commit.") # make it an annex (remove indicator file that create has placed # in the dataset to make it possible): (ds.pathobj / '.noannex').unlink() AnnexRepo(path, init=True, create=True) # call get again on a file in git: result = ds.get("some.txt") assert_status('notneeded', result) # invalid source: # yoh: but now we would need to add it to annex since clever code first # checks what needs to be fetched at all create_tree(path, {'annexed.dat': 'some'}) ds.save("annexed.dat") ds.repo.drop("annexed.dat", options=['--force']) with assert_raises(RemoteNotAvailableError) as ce: ds.get("annexed.dat", source='MysteriousRemote') eq_("MysteriousRemote", ce.exception.remote) res = ds.get("NotExistingFile.txt", on_failure='ignore') assert_status('impossible', res) assert_message("path does not exist", res) # path outside repo errors as with most other commands: res = ds.get(file_outside, on_failure='ignore') assert_in_results( res, status='impossible', message='path not associated with any dataset')
def test_reobtain_data(originpath, destpath): origin = Dataset(originpath).create() ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1) # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1) assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 2) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get( os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count( result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results(result, status='error', path=sub1.path) assert_in_results(result, status='ok', path=sub2.path) cml.assert_logged( msg="target path already exists and not empty".format(sub1.path), regex=False, level='ERROR')
def test_create_raises(path, outside_path): ds = Dataset(path) # incompatible arguments (annex only): assert_raises(ValueError, ds.create, no_annex=True, description='some') with open(op.join(path, "somefile.tst"), 'w') as f: f.write("some") # non-empty without `force`: assert_in_results( ds.create(force=False, **raw), status='error', message='will not create a dataset in a non-empty directory, use `force` option to ignore') # non-empty with `force`: ds.create(force=True) # create sub outside of super: assert_in_results( ds.create(outside_path, **raw), status='error', message=( 'dataset containing given paths is not underneath the reference ' 'dataset %s: %s', ds, outside_path)) obscure_ds = u"ds-" + OBSCURE_FILENAME # create a sub: ds.create(obscure_ds) # fail when doing it again assert_in_results( ds.create(obscure_ds, **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [text_type(ds.pathobj / obscure_ds)]), ) # now deinstall the sub and fail trying to create a new one at the # same location ds.uninstall(obscure_ds, check=False) assert_in(obscure_ds, ds.subdatasets(fulfilled=False, result_xfm='relpaths')) # and now should fail to also create inplace or under assert_in_results( ds.create(obscure_ds, **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [text_type(ds.pathobj / obscure_ds)]), ) assert_in_results( ds.create(op.join(obscure_ds, 'subsub'), **raw), status='error', message=('collision with %s (dataset) in dataset %s', text_type(ds.pathobj / obscure_ds), ds.path) ) os.makedirs(op.join(ds.path, 'down')) with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f: f.write("someother") ds.save() assert_in_results( ds.create('down', **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [text_type(ds.pathobj / 'down' / 'someotherfile.tst')]), )
def test_get_recurse_subdatasets(src, path): ds = install( path, source=src, result_xfm='datasets', return_type='item-or-list') # ask for the two subdatasets specifically. This will obtain them, # but not any content of any files in them subds1, subds2 = ds.get(['subm 1', '2'], get_data=False, description="youcouldnotmakethisup", result_xfm='datasets') for d in (subds1, subds2): eq_(d.repo.get_description(), 'youcouldnotmakethisup') # there are 3 files to get: test-annex.dat within each dataset: rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat') rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat') annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2} # None of them is currently present: ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) ok_clean_git(subds1.path) # explicitly given path in subdataset => implicit recursion: # MIH: Nope, we fulfill the dataset handle, but that doesn't # imply fulfilling all file handles result = ds.get(rel_path_sub1, recursive=True) # all good actions assert_status('ok', result) assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok') ok_(subds1.repo.file_has_content('test-annex.dat') is True) # drop it: subds1.repo.drop('test-annex.dat') ok_(subds1.repo.file_has_content('test-annex.dat') is False) # now, with a path not explicitly pointing within a # subdataset, but recursive option: # get everything: result = ds.get(recursive=True, result_filter=lambda x: x.get('type') != 'dataset') assert_status('ok', result) eq_(set([item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file']), annexed_files) ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is True) ok_(subds2.repo.file_has_content('test-annex.dat') is True) # drop them: ds.repo.drop('test-annex.dat') subds1.repo.drop('test-annex.dat') subds2.repo.drop('test-annex.dat') ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) # now, the very same call, but without recursive: result = ds.get('.', recursive=False) assert_status('ok', result) # one report is on the requested dir eq_(len(result) - 1, 1) assert_result_count( result, 1, path=opj(ds.path, 'test-annex.dat'), status='ok') ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False)
def test_configs(path): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" {{mysub}} {args}', where='dataset' ) ds.config.add( 'datalad.run.substitutions.mysub', 'dataset-call-config', where='dataset' ) # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" local {args}', where='local' ) ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add( 'datalad.procedures.datalad_test_proc.help', "This is a help message", where='dataset' ) r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_invalid_call(): # needs spec or discover assert_raises(InsufficientArgumentsError, run_procedure) res = run_procedure('unknown', on_failure='ignore') assert_true(len(res) == 1) assert_in_results(res, status="impossible")
def test_procedure_discovery(path, super_path): ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # configure dataset to run the demo procedure prior to the clean command ds.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') ds.add(op.join('.datalad', 'config')) # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py')) # make it a subdataset and try again: super = Dataset(super_path).create() super.install('sub', source=ds.path) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'datalad_test_proc.py')) if not on_windows: # no symlinks import os # create a procedure which is a broken symlink, but recognizable as a # python script: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'broken_link_proc.py')) # broken symlink at procedure location, but we can't tell, whether it is # an actual procedure without any guess on how to execute it: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'unknwon_broken_link')) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad and the dataset # procedure registered before assert_true(len(ps) > 3) assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'broken_link_proc.py'), state='absent') assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))
def test_unlock(path): ds = Dataset(path) # file is currently locked: # TODO: use get_annexed_files instead of hardcoded filename assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # in direct mode there is no unlock: if ds.repo.is_direct_mode(): res = ds.unlock() assert_result_count(res, 1) assert_status('notneeded', res) # in V6+ we can unlock even if the file's content isn't present: elif ds.repo.supports_unlocked_pointers: res = ds.unlock() assert_result_count(res, 1) assert_status('ok', res) # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) else: # cannot unlock without content (annex get wasn't called) assert_raises(CommandError, ds.unlock) # FIXME ds.repo.get('test-annex.dat') result = ds.unlock() assert_result_count(result, 1) if ds.repo.is_direct_mode(): assert_status('notneeded', result) else: assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content") ds.repo.add('test-annex.dat') # in V6+ we need to explicitly re-lock it: if ds.repo.supports_unlocked_pointers: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") if not ds.repo.is_direct_mode(): # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content", f.read()) # unlock again, this time more specific: result = ds.unlock(path='test-annex.dat') assert_result_count(result, 1) if ds.repo.is_direct_mode(): assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='notneeded') else: assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content again") ds.repo.add('test-annex.dat') # in V6+ we need to explicitly re-lock it: if ds.repo.supports_unlocked_pointers: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") # TODO: # BOOOM: test-annex.dat writeable in V6! # Why the hell is this different than the first time we wrote to the file # and locked it again? # Also: After opening the file is empty. if not ds.repo.is_direct_mode(): # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content again", f.read())