def test_clone_unborn_head_no_other_ref(path): ds_origin = Dataset(op.join(path, "a")).create(annex=False) ds_origin.repo.call_git(["update-ref", "-d", "refs/heads/" + DEFAULT_BRANCH]) with swallow_logs(new_level=logging.WARNING) as cml: clone(source=ds_origin.path, path=op.join(path, "b")) assert_in("could not find a branch with commits", cml.out)
def test_reckless(src, top_path, sharedpath): ds = clone(src, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') eq_(ds.config.get('annex.hardlink', None), 'true') # actual value is 'auto', because True is a legacy value and we map it eq_(ds.config.get('datalad.clone.reckless', None), 'auto') eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True) # now, if we clone another repo into this one, it will inherit the setting # without having to provide it explicitly sub = ds.clone(src, 'sub', result_xfm='datasets', return_type='item-or-list') eq_(sub.config.get('datalad.clone.reckless', None), 'auto') eq_(sub.config.get('annex.hardlink', None), 'true') if ds.repo.is_managed_branch(): raise SkipTest("Remainder of test needs proper filesystem permissions") # the standard setup keeps the annex locks accessible to the user only nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP) # but we can set it up for group-shared access too sharedds = clone( src, sharedpath, reckless='shared-group', result_xfm='datasets', return_type='item-or-list') ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP)
def test_expanduser(srcpath, destpath): src = Dataset(Path(srcpath) / 'src').create() dest = Dataset(Path(destpath) / 'dest').create() with chpwd(destpath), patch.dict('os.environ', {'HOME': srcpath}): res = clone(op.join('~', 'src'), 'dest', result_xfm=None, return_type='list', on_failure='ignore') assert_result_count(res, 1) assert_result_count( res, 1, action='install', status='error', path=dest.path, message='target path already exists and not empty, refuse to ' 'clone into target path') # wipe out destination, and try again assert_status('ok', remove(dataset=dest, check=False)) # now it should do it, and clone the right one cloneds = clone(op.join('~', 'src'), 'dest') eq_(cloneds.pathobj, Path(destpath) / 'dest') eq_(src.id, cloneds.id) # and it shouldn't fail when doing it again, because it detects # the re-clone cloneds = clone(op.join('~', 'src'), 'dest') eq_(cloneds.pathobj, Path(destpath) / 'dest')
def test_fetch_git_special_remote(url_path, url, path): url_path = Path(url_path) path = Path(path) ds_special = Dataset(url_path / "special").create(force=True) if ds_special.repo.is_managed_branch(): # TODO: git-annex-init fails in the second clone call below when this is # executed under ./tools/eval_under_testloopfs. raise SkipTest("Test fails on managed branch") ds_special.save() ds_special.repo.call_git(["update-server-info"]) clone_url = url + "special/.git" ds_a = clone(clone_url, path / "a") ds_a.repo._run_annex_command( "initremote", annex_options=["special", "type=git", "autoenable=true", "location=" + clone_url]) # Set up a situation where a file is present only on the special remote, # and its existence is known only to the special remote's git-annex branch. (ds_special.pathobj / "f1").write_text("1") ds_special.save() ds_special.repo.call_git(["update-server-info"]) ds_a.repo.fetch("origin") ds_a.repo.merge("origin/" + DEFAULT_BRANCH) ds_b = clone(ds_a.path, path / "other") ds_b.get("f1") ok_(ds_b.repo.file_has_content("f1"))
def test_invalid_args(path, otherpath, alienpath): # source == path assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg') assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg') # "invalid URL" is a valid filepath... and since no clone to remote # is possible - we can just assume that it is the (legit) file path # which is provided, not a URL. So both below should fail as any # other clone from a non-existing source and not for the reason of # "invalid something". Behavior is similar to how Git performs - can # clone into a URL-like path. # install to an "invalid URL" path res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore') assert_status('error', res) # install to a "remote location" path res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore') assert_status('error', res) # make fake dataset ds = create(path) assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg') # make real dataset, try to install outside ds_target = create(Path(otherpath) / 'target') assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path) assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
def test_invalid_args(path, otherpath, alienpath): # source == path assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg') assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg') # "invalid URL" is a valid filepath... and since no clone to remote # is possible - we can just assume that it is the (legit) file path # which is provided, not a URL. So both below should fail as any # other clone from a non-existing source and not for the reason of # "invalid something". Behavior is similar to how Git performs - can # clone into a URL-like path. # install to an "invalid URL" path res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore') assert_status('error', res) # install to a "remote location" path res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore') assert_status('error', res) # make fake dataset ds = create(path) assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg') # make real dataset, try to install outside ds_target = create(opj(otherpath, 'target')) assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path) assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
def test_ephemeral(origin_path, clone1_path, clone2_path): file_test = Path('ds') / 'test.txt' file_testsub = Path('ds') / 'subdir' / 'testsub.txt' origin = Dataset(origin_path).create(force=True) origin.save() # 1. clone via path clone1 = clone(origin_path, clone1_path, reckless='ephemeral') can_symlink = has_symlink_capability() if can_symlink: clone1_annex = (clone1.repo.dot_git / 'annex') ok_(clone1_annex.is_symlink()) ok_(clone1_annex.resolve().samefile(origin.repo.dot_git / 'annex')) if not clone1.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet eq_((clone1.pathobj / file_test).read_text(), 'some') eq_((clone1.pathobj / file_testsub).read_text(), 'somemore') # 2. clone via file-scheme URL clone2 = clone('file://' + Path(origin_path).as_posix(), clone2_path, reckless='ephemeral') if can_symlink: clone2_annex = (clone2.repo.dot_git / 'annex') ok_(clone2_annex.is_symlink()) ok_(clone2_annex.resolve().samefile(origin.repo.dot_git / 'annex')) if not clone2.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet eq_((clone1.pathobj / file_test).read_text(), 'some') eq_((clone1.pathobj / file_testsub).read_text(), 'somemore') # 3. add something to clone1 and push back to origin availability from # clone1 should not be propagated (we declared 'here' dead to that end) (clone1.pathobj / 'addition.txt').write_text("even more") clone1.save() origin.config.set("receive.denyCurrentBranch", "updateInstead", where="local") # Note, that the only thing to test is git-annex-dead here, # if we couldn't symlink: clone1.publish(to='origin', transfer_data='none' if can_symlink else 'auto') if not origin.repo.is_managed_branch(): # test logic cannot handle adjusted branches eq_(origin.repo.get_hexsha(), clone1.repo.get_hexsha()) res = origin.repo.whereis("addition.txt") if can_symlink: # obv. present in origin, but this is not yet known to origin: eq_(res, []) res = origin.repo.fsck() assert_result_count(res, 3, success=True) # TODO: Double check whether annex reports POSIX paths o windows! eq_({str(file_test), str(file_testsub), "addition.txt"}, {r['file'] for r in res}) # now origin knows: res = origin.repo.whereis("addition.txt") eq_(res, [origin.config.get("annex.uuid")])
def test_remove_nowhining(path=None): # when removing a dataset under a dataset (but not a subdataset) # should not provide a meaningless message that something was not right ds = Dataset(path).create() # just install/clone inside of it subds_path = ds.pathobj / 'subds' clone(path=subds_path, source=path) remove(dataset=subds_path) # should remove just fine
def test_update_fetch_all(path=None): path = Path(path) remote_1 = str(path / "remote_1") remote_2 = str(path / "remote_2") ds = Dataset(path / "src").create() src = ds.repo.path ds_rmt1 = clone(source=src, path=remote_1) ds_rmt2 = clone(source=src, path=remote_2) ds.siblings('add', name="sibling_1", url=remote_1) ds.siblings('add', name="sibling_2", url=remote_2) # modify the remotes: (ds_rmt1.pathobj / "first.txt").write_text("some file load") ds_rmt1.save() # TODO: Modify an already present file! (ds_rmt2.pathobj / "second.txt").write_text("different file load") ds_rmt2.save() # Let's init some special remote which we couldn't really update/fetch if not dl_cfg.get('datalad.tests.dataladremote'): ds.repo.init_remote( 'datalad', ['encryption=none', 'type=external', 'externaltype=datalad']) # fetch all remotes assert_result_count(ds.update(), 1, status='ok', type='dataset') # no merge, so changes are not in active branch: assert_not_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # but we know the changes in remote branches: assert_in("first.txt", ds.repo.get_files("sibling_1/" + DEFAULT_BRANCH)) assert_in("second.txt", ds.repo.get_files("sibling_2/" + DEFAULT_BRANCH)) # no merge strategy for multiple remotes yet: # more clever now, there is a tracking branch that provides a remote #assert_raises(NotImplementedError, ds.update, merge=True) # merge a certain remote: assert_result_count(ds.update(sibling='sibling_1', merge=True), 1, action='update', status='ok', type='dataset') # changes from sibling_2 still not present: assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch())) # changes from sibling_1 merged: assert_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch())) # it's known to annex, but has no content yet: annexprops = ds.repo.get_file_annexinfo("first.txt", eval_availability=True) annexprops['key'] # blows if unknown eq_(False, annexprops['has_content'])
def test_as_common_datasource(testbed=None, viapath=None, viaurl=None, remotepath=None, url=None): ds = Dataset(remotepath).create() (ds.pathobj / 'testfile').write_text('likemagic') (ds.pathobj / 'testfile2').write_text('likemagic2') ds.save() # make clonable via HTTP ds.repo.call_git(['update-server-info']) # this does not work for remotes that have path URLs ds_frompath = clone(source=remotepath, path=viapath) res = ds_frompath.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike', on_failure='ignore', result_renderer='disabled', ) assert_in_results( res, status='impossible', message='cannot configure as a common data source, URL protocol ' 'is not http or https', ) # but it works for HTTP ds_fromurl = clone(source=url, path=viaurl) res = ds_fromurl.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike2', result_renderer='disabled', ) assert_status('ok', res) # same thing should be possible by adding a fresh remote res = ds_fromurl.siblings( 'add', name='fresh', url=url, as_common_datasrc='fresh-sr', result_renderer='disabled', ) assert_status('ok', res) # now try if it works. we will clone the clone, and get a repo that does # not know its ultimate origin. still, we should be able to pull data # from it via the special remote testbed = clone(source=ds_fromurl, path=testbed) assert_status('ok', testbed.get('testfile')) eq_('likemagic', (testbed.pathobj / 'testfile').read_text()) # and the other one assert_status('ok', testbed.get('testfile2'))
def test_clone_isnt_a_smartass(origin_path, path): origin = create(origin_path) cloned = clone(origin, path, result_xfm='datasets', return_type='item-or-list') with chpwd(path): # no were are inside a dataset clone, and we make another one # we do not want automatic subdatasetification without given a dataset # explicitely clonedsub = clone(origin, 'testsub', result_xfm='datasets', return_type='item-or-list') # correct destination assert clonedsub.path.startswith(path) # no subdataset relation eq_(cloned.subdatasets(), [])
def test_clone_isnt_a_smartass(origin_path, path): origin = create(origin_path) cloned = clone(origin, path, result_xfm='datasets', return_type='item-or-list') with chpwd(path): # no were are inside a dataset clone, and we make another one # we do not want automatic subdatasetification without given a dataset # explicitely clonedsub = clone(origin, 'testsub', result_xfm='datasets', return_type='item-or-list') # correct destination assert clonedsub.path.startswith(path) # no subdataset relation eq_(cloned.subdatasets(), [])
def test_clone_report_permission_issue(tdir): pdir = Path(tdir) / 'protected' pdir.mkdir() # make it read-only pdir.chmod(0o555) with chpwd(pdir): # first check the premise of the test. If we can write (strangely # mounted/crippled file system, subsequent assumptions are violated # and we can stop probe = Path('probe') try: probe.write_text('should not work') raise SkipTest except PermissionError: # we are indeed in a read-only situation pass res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message="could not create work tree dir '%s/%s': Permission denied" % (pdir, get_datasets_topdir()))
def test_ria_http_storedataladorg(path): # can we clone from the store w/o any dedicated config ds = clone( 'ria+http://store.datalad.org#{}'.format(datalad_store_testds_id), path) ok_(ds.is_installed()) eq_(ds.id, datalad_store_testds_id)
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_sibling_enable_sameas(repo=None, clone_path=None): ds = Dataset(repo.path) create_tree(ds.path, {"f0": "0"}) ds.save(path="f0") ds.push(["f0"], to="r_dir") ds.repo.drop(["f0"]) ds_cloned = clone(ds.path, clone_path) assert_false(ds_cloned.repo.file_has_content("f0")) # does not work without a name res = ds_cloned.siblings( action="enable", result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='error', message='require `name` of sibling to enable') # does not work with the wrong name res = ds_cloned.siblings( action="enable", name='wrong', result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='impossible', message=("cannot enable sibling '%s', not known", 'wrong')) # works with the right name res = ds_cloned.siblings(action="enable", name="r_rsync") assert_status("ok", res) ds_cloned.get(path=["f0"]) ok_(ds_cloned.repo.file_has_content("f0"))
def test_orc_datalad_no_remote_get(tmpdir, shell, should_pass): import datalad.api as dl topdir = str(tmpdir) ds_a = dl.create(op.join(topdir, "a")) if should_pass: (ds_a.pathobj / "foo").write_text("data") ds_a.save() ds_b = dl.clone(ds_a.path, op.join(topdir, "b")) assert not ds_b.repo.file_has_content("foo") with chpwd(ds_b.path): orc = orcs.DataladNoRemoteOrchestrator(shell, submission_type="local", job_spec={ "root_directory": op.join(topdir, "run-root"), "inputs": ["foo"], "outputs": ["out"], "_resolved_command_str": 'sh -c "cat foo foo >out"' }) if should_pass: orc.prepare_remote() orc.submit() orc.follow() finish_fn = MagicMock() orc.fetch(on_remote_finish=finish_fn) finish_fn.assert_called_once_with(orc.resource, []) assert (ds_b.pathobj / "out").read_text() == "datadata" else: with pytest.raises(OrchestratorError): orc.prepare_remote()
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone data from any candidate source URL: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_inherit_src_candidates(lcl, storepath, url): lcl = Path(lcl) storepath = Path(storepath) # dataset with a subdataset ds1 = Dataset(lcl / 'ds1').create() ds1sub = ds1.create('sub') # a different dataset into which we install ds1, but do not touch its subds ds2 = Dataset(lcl / 'ds2').create() ds2.clone(source=ds1.path, path='mysub') # we give no dataset a source candidate config! # move all dataset into the store for d in (ds1, ds1sub, ds2): _move2store(storepath, d) # now we must be able to obtain all three datasets from the store riaclone = clone( 'ria+{}#{}'.format( # store URL url, # ID of the root dataset ds2.id), lcl / 'clone', ) # what happens is the the initial clone call sets a source candidate # config, because it sees the dataset coming from a store # all obtained subdatasets get the config inherited on-clone datasets = riaclone.get('.', get_data=False, recursive=True, result_xfm='datasets') # we get two subdatasets eq_(len(datasets), 2) for ds in datasets: eq_(ConfigManager(dataset=ds, source='dataset-local').get( 'datalad.get.subdataset-source-candidate-200origin'), 'ria+%s#{id}' % url)
def test_autoenabled_remote_msg(path): # Verify that no message about a remote not been enabled is displayed # whenever the remote we clone is the type=git special remote, so the name # of the remote might not match with swallow_logs(new_level=logging.INFO) as cml: res = clone('///repronim/containers', path, result_xfm=None, return_type='list') assert_status('ok', res) assert_not_in("not auto-enabled", cml.out)
def test_reckless(path, top_path): ds = clone(path, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') eq_(ds.config.get('annex.hardlink', None), 'true') eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_clone_isnot_recursive(src, path_nr, path_r): ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) # check nothin is unintentionally installed subdss = ds.subdatasets(recursive=True) assert_result_count(subdss, len(subdss), state='absent') # this also means, subdatasets to be listed as not fulfilled: eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')), {'subm 1', '2'})
def test_clone_isnot_recursive(src, path_nr, path_r): ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) # check nothin is unintentionally installed subdss = ds.subdatasets(recursive=True) assert_result_count(subdss, len(subdss), state='absent') # this also means, subdatasets to be listed as not fulfilled: eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')), {'subm 1', '2'})
def check_reckless(annex, src_path, top_path, sharedpath): # super with or without annex src = Dataset(src_path).create(annex=annex) # sub always with annex srcsub = src.create('sub') # and for the actual test ds = clone(src.path, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') is_crippled = srcsub.repo.is_managed_branch() if annex and not is_crippled: eq_(ds.config.get('annex.hardlink', None), 'true') # actual value is 'auto', because True is a legacy value and we map it eq_(ds.config.get('datalad.clone.reckless', None), 'auto') if annex: eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True) # now, if we clone another repo into this one, it will inherit the setting # without having to provide it explicitly newsub = ds.clone(srcsub, 'newsub', result_xfm='datasets', return_type='item-or-list') # and `get` the original subdataset origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list') for sds in (newsub, origsub): eq_(sds.config.get('datalad.clone.reckless', None), 'auto') if not is_crippled: eq_(sds.config.get('annex.hardlink', None), 'true') if is_crippled: raise SkipTest("Remainder of test needs proper filesystem permissions") if annex: # the standard setup keeps the annex locks accessible to the user only nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP) # but we can set it up for group-shared access too sharedds = clone( src, sharedpath, reckless='shared-group', result_xfm='datasets', return_type='item-or-list') ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \ & stat.S_IWGRP)
def _test_ria_postclonecfg(url, dsid, clone_path): # Test cloning from RIA store while ORA special remote autoenabling failed # due to an invalid URL from the POV of the cloner. # Origin's git-config-file should contain the UUID to enable. This needs to # work via HTTP, SSH and local cloning. # Autoenabling should fail initially by git-annex-init and we would report # on INFO level. Only postclone routine would deal with it. with swallow_logs(new_level=logging.INFO) as cml: # First, the super ds: riaclone = clone('ria+{}#{}'.format(url, dsid), clone_path) cml.assert_logged(msg="access to 1 dataset sibling store-storage not " "auto-enabled", level="INFO", regex=False) # However, we now can retrieve content since clone should have enabled the # special remote with new URL (or origin in case of HTTP). res = riaclone.get('test.txt') assert_result_count( res, 1, status='ok', path=str(riaclone.pathobj / 'test.txt'), message="from {}...".format( "origin" if url.startswith('http') else "store-storage")) # same thing for the sub ds (we don't need a store-url and id - get should # figure those itself): with swallow_logs(new_level=logging.INFO) as cml: riaclonesub = riaclone.get(op.join('subdir', 'subds'), get_data=False, result_xfm='datasets', return_type='item-or-list') cml.assert_logged(msg="access to 1 dataset sibling store-storage not " "auto-enabled", level="INFO", regex=False) res = riaclonesub.get('testsub.txt') assert_result_count( res, 1, status='ok', path=str(riaclonesub.pathobj / 'testsub.txt'), message="from {}...".format( "origin" if url.startswith('http') else "store-storage")) # finally get the plain git subdataset. # Clone should figure to also clone it from a ria+ URL # (subdataset-source-candidate), notice that there wasn't an autoenabled ORA # remote, but shouldn't stumble upon it, since it's a plain git. res = riaclone.get(op.join('subdir', 'subgit', 'testgit.txt')) assert_result_count(res, 1, status='ok', type='dataset', action='install') assert_result_count(res, 1, status='notneeded', type='file') assert_result_count(res, 2)
def get_tests_data_dir(dl_dset, dset_url=None, commit_ref=None): """Get the path to the test data directory. If the test data directory does not exist or is not populated, install with datalad. """ logger = logging.getLogger("Test data setup") if not dl_dset.is_installed(): if dl_dset.pathobj.exists(): raise ValueError( f"{dl_dset.path} exists but is not a datalad repository") else: try: global dl_lock dl_lock.acquire() if not dl_dset.is_installed(): logger.warn("Installing test data") if not dset_url: raise ValueError( f"{dl_dset.path} is not installed and a url is not provided." ) datalad.clone( dset_url, dl_dset.path, ) finally: dl_lock.release() # In the case where a datalad repository is read-only but the # correct git ref/commit is not checked out # we should raise an error if commit_ref is None: return else: raise NotImplementedError # confirm repo is user writable. some_files = [".git/logs/HEAD"] for f in some_files: data_file = dl_dset.pathobj / f if not data_file.exists(): raise ValueError( f"{f} does not exist (parent existences: {f.parent.exists()}") if not os.access(data_file, os.W_OK): raise ValueError(f"{f} is not user writeable ({os.getuid()})")
def test_create_alias(ds_path, ria_path, clone_path): ds_path = Path(ds_path) clone_path = Path(clone_path) ds_path.mkdir() dsa = Dataset(ds_path / "a").create() res = dsa.create_sibling_ria(url="ria+file://{}".format(ria_path), name="origin", alias="ds-a") assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_(len(res), 1) ds_clone = clone(source="ria+file://{}#~ds-a".format(ria_path), path=clone_path / "a") assert_repo_status(ds_clone.path) # multiple datasets in a RIA store with different aliases work dsb = Dataset(ds_path / "b").create() res = dsb.create_sibling_ria(url="ria+file://{}".format(ria_path), name="origin", alias="ds-b") assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_(len(res), 1) ds_clone = clone(source="ria+file://{}#~ds-b".format(ria_path), path=clone_path / "b") assert_repo_status(ds_clone.path) # second dataset in a RIA store with the same alias emits a warning dsc = Dataset(ds_path / "c").create() with swallow_logs(logging.WARNING) as cml: res = dsc.create_sibling_ria(url="ria+file://{}".format(ria_path), name="origin", alias="ds-a") assert_in( "Alias 'ds-a' already exists in the RIA store, not adding an alias", cml.out) assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_(len(res), 1)
def test_clone_datasets_root(tdir): with chpwd(tdir): ds = clone("///", result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: res = clone("///", on_failure='ignore') assert_message("dataset %s was already cloned from '%s'", res) assert_status('notneeded', res) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") res = clone('///', path="sub", on_failure='ignore') assert_message( 'target path already exists and not empty, refuse to clone into target path', res) assert_status('error', res)
def test_readonly_dataset_access(path): # clone from OSF; ds is self-contained at OSF ds = clone('osf://q8xnk', path) # standard name storage remote assert_in('osf-storage', ds.repo.get_remotes()) for avail in ds.repo.whereis('inannex'): assert_in('7784367b-69c6-483d-9564-67f840715890', avail) test_file = ds.repo.pathobj / 'inannex' / 'animated.gif' eq_(ds.repo.annexstatus([test_file])[test_file]['has_content'], False) ds.repo.call_git(['annex', 'copy', str(test_file), '-f', 'osf-storage']) eq_(ds.repo.annexstatus([test_file])[test_file]['has_content'], True)
def test_clone_crcns(tdir, ds_path): with chpwd(tdir): res = clone('///', path="all-nonrecursive", on_failure='ignore') assert_status('ok', res) # again, but into existing dataset: ds = create(ds_path) crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list') ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
def test_cfg_originorigin(path): path = Path(path) origin = Dataset(path / 'origin').create() (origin.pathobj / 'file1.txt').write_text('content') origin.save() clone_lev1 = clone(origin, path / 'clone_lev1') clone_lev2 = clone(clone_lev1, path / 'clone_lev2') # the goal is to be able to get file content from origin without # the need to configure it manually assert_result_count( clone_lev2.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev2.pathobj / 'file1.txt'), ) eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content') eq_( Path(clone_lev2.siblings( 'query', name='origin-2', return_type='item-or-list')['url']), origin.pathobj ) # Clone another level, this time with a relative path. Drop content from # lev2 so that origin is the only place that the file is available from. clone_lev2.drop("file1.txt") with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml: clone_lev3 = clone('clone_lev2', 'clone_lev3') # we called git-annex-init; see gh-4367: cml.assert_logged(msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), " r"'init'", match=False, level='DEBUG') assert_result_count( clone_lev3.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev3.pathobj / 'file1.txt'))
def test_ensure_datalad_remote_maybe_enable(path=None, *, autoenable): path = Path(path) ds_a = Dataset(path / "a").create(force=True) init_datalad_remote(ds_a.repo, DATALAD_SPECIAL_REMOTE, autoenable=autoenable) ds_b = clone(source=ds_a.path, path=path / "b") repo = ds_b.repo if not autoenable: assert_not_in("datalad", repo.get_remotes()) ensure_datalad_remote(repo) assert_in("datalad", repo.get_remotes())
def test_ephemeral(ds_path=None, store_path=None, clone_path=None): dspath = Path(ds_path) store = Path(store_path) file_test = Path('file1.txt') file_testsub = Path('sub') / 'other.txt' # create the original dataset ds = Dataset(dspath) ds.create(force=True) ds.save() # put into store: ds.create_sibling_ria("ria+{}".format(store.as_uri()), "riastore", new_store_ok=True) ds.push(to="riastore", data="anything") # now, get an ephemeral clone from the RIA store: eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id), clone_path, reckless="ephemeral") # ephemeral clone was properly linked (store has bare repos!): clone_annex = (eph_clone.repo.dot_git / 'annex') assert_true(clone_annex.is_symlink()) assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] / 'annex')) if not eph_clone.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet # we don't need to get files in order to access them: assert_equal((eph_clone.pathobj / file_test).read_text(), "some") assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other") # can we unlock those files? eph_clone.unlock(file_test) # change content (eph_clone.pathobj / file_test).write_text("new content") eph_clone.save() # new content should already be in store # (except the store doesn't know yet) res = eph_clone.repo.fsck(remote="riastore-storage", fast=True) assert_equal(len(res), 2) assert_result_count(res, 1, success=True, file=file_test.as_posix()) assert_result_count(res, 1, success=True, file=file_testsub.as_posix()) # push back git history eph_clone.push(to=DEFAULT_REMOTE, data="nothing") # get an update in origin ds.update(merge=True, reobtain_data=True) assert_equal((ds.pathobj / file_test).read_text(), "new content")
def test_clone_report_permission_issue(tdir): pdir = _path_(tdir, 'protected') mkdir(pdir) # make it read-only chmod(pdir, 0o555) with chpwd(pdir): res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message="could not create work tree dir '%s/%s': Permission denied" % (pdir, get_datasets_topdir()) )
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_clone_datasets_root(tdir): with chpwd(tdir): ds = clone("///", result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: res = clone("///", on_failure='ignore') assert_message( "dataset %s was already cloned from '%s'", res) assert_status('notneeded', res) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") res = clone('///', path="sub", on_failure='ignore') assert_message( 'target path already exists and not empty, refuse to clone into target path', res) assert_status('error', res)
def test_clone_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = clone(src, path, description='mydummy', result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = clone(src, path) assert_result_values_equal(res, 'source_url', [src]) assert_status('notneeded', res) assert_message("dataset %s was already cloned from '%s'", res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_reckless(path, top_path): ds = clone(path, top_path, reckless=True, result_xfm='datasets', return_type='item-or-list') eq_(ds.config.get('annex.hardlink', None), 'true') eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
def test_install_source_relpath(src, dest): create(src) src_ = basename(src) with chpwd(dirname(src)): clone(src_, dest)