def test_install_known_subdataset(src=None, path=None): _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever") # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated assert_in("test-annex.dat", subds.repo.get_indexed_files()), assert_not_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_py2_unicode_command(path=None): # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929). ds = Dataset(path).create() touch_cmd = "import sys; open(sys.argv[1], 'w').write('')" cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat") ds.run(cmd_str) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ0.dat")) # somewhat desperate attempt to detect our own Github CI tests on a # crippled filesystem (VFAT) that is so crippled that it doesn't handle # what is needed here. It just goes mad with encoded bytestrings: # CommandError: ''python -c '"'"'import sys; open(sys.argv[1], '"'"'"'"'"'"'"'"'w'"'"'"'"'"'"'"'"').write('"'"'"'"'"'"'"'"''"'"'"'"'"'"'"'"')'"'"' '"'"' β1 '"'"''' failed with exitcode 1 under /crippledfs/ if not on_windows and os.environ.get('TMPDIR', None) != '/crippledfs': # FIXME ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"]) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ1.dat")) # Send in a list of byte-strings to mimic a py2 command-line # invocation. ds.run([ s.encode("utf-8") for s in [sys.executable, "-c", touch_cmd, u" β1 "] ]) assert_repo_status(ds.path) ok_exists(op.join(path, u" β1 ")) assert_in_results(ds.run(u"bβ2.dat", result_renderer=None, on_failure="ignore"), status="error", action="run")
def test_add_archive_use_archive_dir(repo_path=None): ds = Dataset(repo_path).create(force=True) with chpwd(repo_path): # Let's add first archive to the repo with default setting archive_path = opj('4u', '1.tar.gz') # check it gives informative error if archive is not already added res = add_archive_content(archive_path, on_failure='ignore') message = \ "Can not add an untracked archive. Run 'datalad save 4u\\1.tar.gz'"\ if on_windows else \ "Can not add an untracked archive. Run 'datalad save 4u/1.tar.gz'" assert_in_results(res, action='add-archive-content', message=message, status='impossible') with swallow_outputs(): ds.save(archive_path) ok_archives_caches(ds.path, 0) add_archive_content(archive_path, strip_leading_dirs=True, use_current_dir=True) ok_(not exists(opj('4u', '1 f.txt'))) ok_file_under_git(ds.path, '1 f.txt', annexed=True) ok_archives_caches(ds.path, 0) # and now let's extract under archive dir add_archive_content(archive_path, strip_leading_dirs=True) ok_file_under_git(ds.path, opj('4u', '1 f.txt'), annexed=True) ok_archives_caches(ds.path, 0) add_archive_content(opj('4u', 'sub.tar.gz')) ok_file_under_git(ds.path, opj('4u', 'sub', '2 f.txt'), annexed=True) ok_archives_caches(ds.path, 0)
def test_merge_follow_parentds_subdataset_adjusted_warning(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_src_subds = ds_src.create("subds") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_subds = Dataset(ds_clone.pathobj / "subds") maybe_adjust_repo(ds_clone_subds.repo) # Note: Were we to save ds_clone here, we would get a merge conflict in the # top repo for the submodule (even if using 'git annex sync' rather than # 'git merge'). ds_src_subds.repo.call_git(["checkout", DEFAULT_BRANCH + "^0"]) (ds_src_subds.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) assert_in_results(ds_clone.update(merge=True, recursive=True, follow="parentds", on_failure="ignore"), status="impossible", path=ds_clone_subds.path, action="update") eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
def test_install_skip_failed_recursive(src=None, path=None): _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever") # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count(result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results( result, status='error', path=sub1.path, type='dataset', message='target path already exists and not empty, refuse to ' 'clone into target path') assert_in_results(result, status='ok', path=sub2.path)
def test_sibling_enable_sameas(repo=None, clone_path=None): ds = Dataset(repo.path) create_tree(ds.path, {"f0": "0"}) ds.save(path="f0") ds.push(["f0"], to="r_dir") ds.repo.drop(["f0"]) ds_cloned = clone(ds.path, clone_path) assert_false(ds_cloned.repo.file_has_content("f0")) # does not work without a name res = ds_cloned.siblings( action="enable", result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='error', message='require `name` of sibling to enable') # does not work with the wrong name res = ds_cloned.siblings( action="enable", name='wrong', result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='impossible', message=("cannot enable sibling '%s', not known", 'wrong')) # works with the right name res = ds_cloned.siblings(action="enable", name="r_rsync") assert_status("ok", res) ds_cloned.get(path=["f0"]) ok_(ds_cloned.repo.file_has_content("f0"))
def test_addurls_url_on_collision_error_if_different(self=None, path=None): ds = Dataset(path).create(force=True) data = [self.data[0].copy(), self.data[0].copy()] data[0]["some_metadata"] = "1" data[1]["some_metadata"] = "2" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}", exclude_autometa="*", on_collision="error-if-different") ok_exists(op.join(ds.path, "a"))
def test_unlock_raises(path=None, path2=None, path3=None): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: assert_raises(NoDatasetFound, unlock, dataset=None, path=path2) create(path=path, annex=False) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: (ds.pathobj / ".noannex").unlink() AnnexRepo(path, create=True) # One that doesn't exist. res = ds.unlock(path="notexistent.txt", result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path does not exist") # And one that isn't associated with a dataset. assert_in_results( ds.unlock(path=path2, on_failure="ignore"), status="error", message=("path not underneath the reference dataset %s", ds.path)) chpwd(_cwd)
def test_run_remove_keeps_leading_directory(path=None): ds = Dataset(op.join(path, "ds")).create() repo = ds.repo (ds.pathobj / "d").mkdir() output = (ds.pathobj / "d" / "foo") output.write_text("foo") ds.save() output_rel = str(output.relative_to(ds.pathobj)) repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> {}".format(output_rel), outputs=[output_rel], result_renderer='disabled'), action="run.remove", status="ok") assert_repo_status(ds.path) # Remove still gets saved() if command doesn't generate the output (just as # it would if git-rm were used instead of unlink). repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> something-else", outputs=[output_rel], result_renderer='disabled'), action="run.remove", status="ok") assert_repo_status(ds.path)
def test_addurls_url_on_collision_choose(self=None, path=None): ds = Dataset(path).create(force=True) data = deepcopy(self.data) for row in data: row["name"] = "a" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-first", on_collision="take-first") ok_file_has_content(op.join(ds.path, "a-first"), "a content", strip=True) with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-last", on_collision="take-last") ok_file_has_content(op.join(ds.path, "a-last"), "c content", strip=True)
def test_configs(path=None): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', scope='branch') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} {{mysub}} {args}' % quote_cmdlinearg(sys.executable), scope='branch') ds.config.add('datalad.run.substitutions.mysub', 'dataset-call-config', scope='branch') # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} local {args}' % quote_cmdlinearg(sys.executable), scope='local') ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add('datalad.procedures.datalad_test_proc.help', "This is a help message", scope='branch') r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def check_push(ds): # create a file and push it to GIN to see of the # access is set up properly (ds.pathobj / 'file').write_text('some') ds.save() assert_in_results(ds.push(to='gin', result_renderer='disabled'), action='copy', status='ok', path=str(ds.pathobj / 'file'))
def test_as_common_datasource(testbed=None, viapath=None, viaurl=None, remotepath=None, url=None): ds = Dataset(remotepath).create() (ds.pathobj / 'testfile').write_text('likemagic') (ds.pathobj / 'testfile2').write_text('likemagic2') ds.save() # make clonable via HTTP ds.repo.call_git(['update-server-info']) # this does not work for remotes that have path URLs ds_frompath = clone(source=remotepath, path=viapath) res = ds_frompath.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike', on_failure='ignore', result_renderer='disabled', ) assert_in_results( res, status='impossible', message='cannot configure as a common data source, URL protocol ' 'is not http or https', ) # but it works for HTTP ds_fromurl = clone(source=url, path=viaurl) res = ds_fromurl.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike2', result_renderer='disabled', ) assert_status('ok', res) # same thing should be possible by adding a fresh remote res = ds_fromurl.siblings( 'add', name='fresh', url=url, as_common_datasrc='fresh-sr', result_renderer='disabled', ) assert_status('ok', res) # now try if it works. we will clone the clone, and get a repo that does # not know its ultimate origin. still, we should be able to pull data # from it via the special remote testbed = clone(source=ds_fromurl, path=testbed) assert_status('ok', testbed.get('testfile')) eq_('likemagic', (testbed.pathobj / 'testfile').read_text()) # and the other one assert_status('ok', testbed.get('testfile2'))
def test_download_url_dataset(toppath=None, topurl=None, path=None): # Non-dataset directory. file1_fullpath = opj(path, "file1.txt") with chpwd(path): download_url(topurl + "file1.txt") ok_exists(file1_fullpath) os.remove(file1_fullpath) files_tosave = ['file1.txt', 'file2.txt'] urls_tosave = [topurl + f for f in files_tosave] ds = Dataset(opj(path, "ds")).create() # By default, files are saved when called in a dataset. ds.download_url(urls_tosave) for fname in files_tosave: ok_(ds.repo.file_has_content(fname)) eq_(ds.repo.get_urls("file1.txt"), [urls_tosave[0]]) eq_(ds.repo.get_urls("file2.txt"), [urls_tosave[1]]) ds.download_url([topurl + "file3.txt"], save=False) assert_false(ds.repo.file_has_content("file3.txt")) # Leading paths for target are created if needed. subdir_target = opj("l1", "l2", "f") ds.download_url([opj(topurl, "file1.txt")], path=subdir_target) ok_(ds.repo.file_has_content(subdir_target)) subdir_path = opj(ds.path, "subdir", "") os.mkdir(subdir_path) with chpwd(subdir_path): download_url(topurl + "file4.txt") download_url(topurl + "file5.txt", path="five.txt") ds.download_url(topurl + "file6.txt") download_url(topurl + "file7.txt", dataset=ds.path) # download_url calls within a subdirectory save the file there ok_(ds.repo.file_has_content(opj("subdir", "file4.txt"))) ok_(ds.repo.file_has_content(opj("subdir", "five.txt"))) # ... unless the dataset instance is provided ok_(ds.repo.file_has_content("file6.txt")) # ... but a string for the dataset (as it would be from the command line) # still uses CWD semantics ok_(ds.repo.file_has_content(opj("subdir", "file7.txt"))) with chpwd(path): # We're in a non-dataset path and pass in a string as the dataset. The # path is taken as relative to the current working directory, so we get # an error when trying to save it. assert_in_results(download_url(topurl + "file8.txt", dataset=ds.path, on_failure="ignore"), status="error", action="status") assert_false((ds.pathobj / "file8.txt").exists())
def test_merge_no_merge_target(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") assert_repo_status(ds_src.path) ds_clone.repo.checkout(DEFAULT_BRANCH, options=["-bnew"]) res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, status="impossible", action="update")
def test_on_failure_continue(path=None): ds = Dataset(op.join(path, "ds")).create(force=True) # save() calls status() in a way that respects on_failure. assert_in_results(ds.save( path=[op.join(path, "outside"), op.join(path, "ds", "within")], on_failure="ignore"), action="status", status="error") # save() continued despite the failure and saved ds/within. assert_repo_status(ds.path)
def test_addurls_no_rows(self=None, path=None): ds = Dataset(path).create(force=True) for fname in ["in.csv", "in.tsv", "in.json"]: with swallow_logs(new_level=logging.WARNING) as cml: assert_in_results(ds.addurls(fname, "{url}", "{name}", result_renderer='disabled'), action="addurls", status="notneeded") cml.assert_logged("No rows", regex=False)
def test_rerun_cherry_pick(path=None): ds = Dataset(path).create() ds.repo.tag("prerun") ds.run('echo abc > runfile') with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") for onto, action in [("HEAD", "skip"), ("prerun", "pick")]: results = ds.rerun(since="prerun", onto=onto) assert_in_results(results, status='ok', rerun_action=action)
def test_nested_create(path=None): # to document some more organic usage pattern ds = Dataset(path).create() assert_repo_status(ds.path) lvl2relpath = op.join('lvl1', 'lvl2') lvl2path = op.join(ds.path, lvl2relpath) os.makedirs(lvl2path) os.makedirs(op.join(ds.path, 'lvl1', 'empty')) with open(op.join(lvl2path, 'file'), 'w') as f: f.write('some') ok_(ds.save()) # Empty directories are filtered out. assert_repo_status(ds.path, untracked=[]) # later create subdataset in a fresh dir # WINDOWS FAILURE IS NEXT LINE subds1 = ds.create(op.join('lvl1', 'subds')) assert_repo_status(ds.path, untracked=[]) eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')]) # later create subdataset in an existing empty dir subds2 = ds.create(op.join('lvl1', 'empty')) assert_repo_status(ds.path) # later try to wrap existing content into a new subdataset # but that won't work assert_in_results( ds.create(lvl2relpath, **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [op.join(lvl2path, 'file')])) # even with force, as to do this properly complicated surgery would need to # take place # MIH disable shaky test till proper dedicated upfront check is in-place in `create` # gh-1725 #assert_in_results( # ds.create(lvl2relpath, force=True, # on_failure='ignore', result_xfm=None, result_filter=None), # status='error', action='add') # only way to make it work is to unannex the content upfront ds.repo.call_annex(['unannex', op.join(lvl2relpath, 'file')]) # nothing to save, git-annex commits the unannex itself, but only on v5 ds.repo.commit() # still nothing without force # "err='lvl1/lvl2' already exists in the index" assert_in_results( ds.create(lvl2relpath, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `--force` option to ignore' ) # XXX even force doesn't help, because (I assume) GitPython doesn't update # its representation of the Git index properly ds.create(lvl2relpath, force=True) assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
def test_specialremote(dspath=None, remotepath=None): ds = Dataset(dspath).create() ds.repo.call_annex([ 'initremote', 'myremote', 'type=directory', f'directory={remotepath}', 'encryption=none' ]) res = ds.siblings('query', result_renderer='disabled') assert_in_results( res, **{ 'name': 'myremote', 'annex-type': 'directory', 'annex-directory': remotepath })
def test_dirty(path=None): ds = Dataset(path).create(force=True) # must fail, because README.md is to be modified, but already dirty assert_in_results(ds.run_procedure('cfg_yoda', on_failure="ignore", result_renderer=None), action="run", status="error") # make sure that was the issue # save to git explicitly to keep the test simple and avoid unlocking... ds.save('README.md', to_git=True) ds.run_procedure('cfg_yoda') assert_repo_status(ds.path)
def test_run_failure(path=None): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() if on_windows: # this does not do exactly the same as the cmd on other systems # but is close enough to make running the test worthwhile cmd_failing = "echo x>{} & false".format(op.join("sub", "grows")) else: cmd_failing = ("echo x$(cat {0}) > {0} && false" .format(op.join("sub", "grows"))) with assert_raises(IncompleteResultsError): ds.run(cmd_failing, result_renderer=None) eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = op.join(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.save(recursive=True, message_file=msgfile) assert_repo_status(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = op.join(subds.path, "grows") eq_('x \n' if on_windows else 'x\n', open(outfile).read()) if on_windows: # FIXME: Make the remaining code compatible with Windows. return # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") assert_repo_status(ds.path) assert_in_results(ds.rerun(result_renderer=None, on_failure="ignore"), action="run", status="error") # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(IncompleteResultsError): ds.run("false", explicit=True, outputs=None, on_failure="stop") assert_false(op.exists(msgfile))
def test_surprise_subds(path=None): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # If subrepo is an adjusted branch, it would have a commit, making most of # this test irrelevant because it is about the unborn branch edge case. adjusted = somerepo.is_managed_branch() # This edge case goes away with Git v2.22.0. fixed_git = somerepo.git_version >= '2.22.0' # save non-recursive res = ds.save(recursive=False, on_failure='ignore') if not adjusted and fixed_git: # We get an appropriate error about no commit being checked out. assert_in_results(res, action='add_submodule', status='error') # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) if adjusted or fixed_git: if adjusted: # adjusted branch: #datalad/3178 (that would have a commit) modified = [subds.repo.pathobj, somerepo.pathobj] untracked = [] else: # Newer Git versions refuse to add a sub-repository with no commits # checked out. modified = [subds.repo.pathobj] untracked = ['d1'] assert_repo_status(ds.path, modified=modified, untracked=untracked) assert_not_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) else: # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_empty_git_upstairs(topdir=None): # create() doesn't get confused by an empty .git/ upstairs (gh-3473) assert_in_results(create(op.join(topdir, "empty", "ds"), **raw), status="ok", type="dataset", action="create") # ... and it will ignore non-meaningful content in .git assert_in_results(create(op.join(topdir, "nonempty", "ds"), **raw), status="ok", type="dataset", action="create") # ... but it will raise if it detects a valid repo # (by existence of .git/HEAD as defined in GitRepo._valid_git_test_path) with assert_raises(CommandError): create(op.join(topdir, "git_with_head", "ds"), **raw)
def test_download_url_archive(toppath=None, topurl=None, path=None): ds = Dataset(path).create() ds.download_url([topurl + "archive.tar.gz"], archive=True) ok_(ds.repo.file_has_content(opj("archive", "file1.txt"))) assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B")) # we should yield an impossible from add archive content when there is # untracked content (gh-#6170) create_tree(ds.path, {'this': 'dirty'}) assert_in_results( ds.download_url([topurl + "archive.tar.gz"], archive=True, on_failure='ignore'), status='impossible', action='add-archive-content', message='clean dataset required. Use `datalad status` to inspect ' 'unsaved changes')
def test_update_adjusted_incompatible_with_ff_only(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") maybe_adjust_repo(ds_clone.repo) assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"), action="update", status="impossible") assert_in_results(ds_clone.update(on_failure="ignore"), action="update", status="ok")
def test_arg_missing(path=None, path2=None): # test fix for gh-3553 ds = create(path) assert_raises( InsufficientArgumentsError, ds.siblings, 'add', url=path2, ) assert_status('ok', ds.siblings('add', url=path2, name='somename')) # trigger some name guessing functionality that will still not # being able to end up using a hostnames-spec despite being # given a URL if not on_windows: # the trick with the file:// URL creation only works on POSIX # the underlying tested code here is not about paths, though, # so it is good enough to run this on POSIX system to be # reasonably sure that things work assert_raises( InsufficientArgumentsError, ds.siblings, 'add', url=f'file://{path2}', ) # there is no name guessing with 'configure' assert_in_results(ds.siblings('configure', url='http://somename', on_failure='ignore'), status='error', message='need sibling `name` for configuration') # needs a URL assert_raises(InsufficientArgumentsError, ds.siblings, 'add', name='somename') # just pushurl is OK assert_status('ok', ds.siblings('add', pushurl=path2, name='somename2')) # needs group with groupwanted assert_raises(InsufficientArgumentsError, ds.siblings, 'add', url=path2, name='somename', annex_groupwanted='whatever')
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())
def test_no_annex(path=None): # few smoke tests regarding the 'here' sibling ds = create(path, annex=False) res = ds.siblings('configure', name='here', description='very special', on_failure='ignore', result_renderer='disabled') assert_status('impossible', res) res = ds.siblings('enable', name='doesnotmatter', on_failure='ignore', result_renderer='disabled') assert_in_results(res, status='impossible', message='cannot enable sibling of non-annex dataset')
def test_push_wanted(srcpath=None, dstpath=None): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', reckless='kill') # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')