def test_push_url(storepath, dspath, blockfile): dspath = Path(dspath) store = Path(storepath) blockfile = Path(blockfile) blockfile.touch() ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = LocalIO() store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # initremote fails with invalid url (not a ria+ URL): invalid_url = (store.parent / "non-existent").as_uri() init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(invalid_url) ] assert_raises(CommandError, ds.repo.init_remote, 'store', options=init_opts) # initremote succeeds with valid but inaccessible URL (pointing to a file # instead of a store): block_url = "ria+" + blockfile.as_uri() init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(block_url) ] ds.repo.init_remote('store', options=init_opts) # but a push will fail: assert_raises(CommandError, ds.repo.call_annex, ['copy', 'one.txt', '--to', 'store']) # reconfigure with correct push-url: init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(store_url) ] ds.repo.enable_remote('store', options=init_opts) # push works now: ds.repo.call_annex(['copy', 'one.txt', '--to', 'store']) store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis('one.txt') assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources)
def test_ephemeral(ds_path=None, store_path=None, clone_path=None): dspath = Path(ds_path) store = Path(store_path) file_test = Path('file1.txt') file_testsub = Path('sub') / 'other.txt' # create the original dataset ds = Dataset(dspath) ds.create(force=True) ds.save() # put into store: ds.create_sibling_ria("ria+{}".format(store.as_uri()), "riastore", new_store_ok=True) ds.push(to="riastore", data="anything") # now, get an ephemeral clone from the RIA store: eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id), clone_path, reckless="ephemeral") # ephemeral clone was properly linked (store has bare repos!): clone_annex = (eph_clone.repo.dot_git / 'annex') assert_true(clone_annex.is_symlink()) assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] / 'annex')) if not eph_clone.repo.is_managed_branch(): # TODO: We can't properly handle adjusted branch yet # we don't need to get files in order to access them: assert_equal((eph_clone.pathobj / file_test).read_text(), "some") assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other") # can we unlock those files? eph_clone.unlock(file_test) # change content (eph_clone.pathobj / file_test).write_text("new content") eph_clone.save() # new content should already be in store # (except the store doesn't know yet) res = eph_clone.repo.fsck(remote="riastore-storage", fast=True) assert_equal(len(res), 2) assert_result_count(res, 1, success=True, file=file_test.as_posix()) assert_result_count(res, 1, success=True, file=file_testsub.as_posix()) # push back git history eph_clone.push(to=DEFAULT_REMOTE, data="nothing") # get an update in origin ds.update(merge=True, reobtain_data=True) assert_equal((ds.pathobj / file_test).read_text(), "new content")
def _test_initremote_alias(host, ds_path, store): ds_path = Path(ds_path) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset with alias create_ds_in_store(io, store, ds.id, '2', '1', 'ali') ds.repo.init_remote('ria-remote', options=init_opts) assert_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) assert_repo_status(ds.path) assert_true(io.exists(store / "alias" / "ali"))
def test_initremote(store_path, store_url, ds_path): ds = Dataset(ds_path).create() store_path = Path(store_path) url = "ria+" + store_url init_opts = common_init_opts + ['url={}'.format(url)] # fails on non-RIA URL assert_raises(CommandError, ds.repo.init_remote, 'ora-remote', options=common_init_opts + ['url={}' ''.format(store_path.as_uri())]) # Doesn't actually create a remote if it fails assert_not_in( 'ora-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) ds.repo.init_remote('ora-remote', options=init_opts) assert_in( 'ora-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) assert_repo_status(ds.path) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log']) assert_in("url={}".format(url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log)
def test_create_push_url(detection_path=None, ds_path=None, store_path=None): store_path = Path(store_path) ds_path = Path(ds_path) detection_path = Path(detection_path) ds = Dataset(ds_path).create(force=True) ds.save() # patch SSHConnection to signal it was used: from datalad.support.sshconnector import SSHManager def detector(f, d): @wraps(f) def _wrapper(*args, **kwargs): d.touch() return f(*args, **kwargs) return _wrapper url = "ria+{}".format(store_path.as_uri()) push_url = "ria+ssh://datalad-test{}".format(store_path.as_posix()) assert not detection_path.exists() with patch('datalad.support.sshconnector.SSHManager.get_connection', new=detector(SSHManager.get_connection, detection_path)): ds.create_sibling_ria(url, "datastore", push_url=push_url, new_store_ok=True) # used ssh_manager despite file-url hence used push-url (ria+ssh): assert detection_path.exists() # correct config in special remote: sr_cfg = ds.repo.get_special_remotes()[ds.siblings( name='datastore-storage')[0]['annex-uuid']] eq_(sr_cfg['url'], url) eq_(sr_cfg['push-url'], push_url) # git remote based on url (local path): eq_(ds.config.get("remote.datastore.url"), (store_path / ds.id[:3] / ds.id[3:]).as_posix()) eq_( ds.config.get("remote.datastore.pushurl"), "ssh://datalad-test{}".format( (store_path / ds.id[:3] / ds.id[3:]).as_posix())) # git-push uses SSH: detection_path.unlink() ds.push('.', to="datastore", data='nothing') assert detection_path.exists() # data push # Note, that here the patching has no effect, since the special remote # is running in a subprocess of git-annex. Hence we can't detect SSH # usage really. However, ORA remote is tested elsewhere - if it succeeds # all should be good wrt `create-sibling-ria`. ds.repo.call_annex(['copy', '.', '--to', 'datastore-storage'])
def setup_archive_remote(repo, archive_path): # for integration in a URL, we need POSIX version of the path archive_path = Path(archive_path) if 'DATALAD_TESTS_SSH' in os.environ: cfg = {'url': 'ria+ssh://datalad-test{}' ''.format(archive_path.as_posix())} else: cfg = {'url': 'ria+{}'.format(archive_path.as_uri())} initexternalremote(repo, 'archive', 'ora', config=cfg)
def _test_binary_data(host, store, dspath): # make sure, special remote deals with binary data and doesn't # accidentally involve any decode/encode etc. dspath = Path(dspath) store = Path(store) url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586" file = "dicomfile" ds = Dataset(dspath).create() ds.download_url(url, path=file, message="Add DICOM file from github") assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) # actual data transfer (both directions) # Note, that we intentionally call annex commands instead of # datalad-publish/-get here. We are testing an annex-special-remote. store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) ds.repo.call_annex(['move', str(file), '--to', 'store']) known_sources = ds.repo.whereis(str(file)) assert_not_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) ds.repo.call_annex(['get', str(file), '--from', 'store']) known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources)
def _test_gitannex(host, store, dspath): store = Path(store) dspath = Path(dspath) store = Path(store) ds = Dataset(dspath).create() if ds.repo.is_managed_branch(): # git-annex-testremote is way too slow on crippled FS. # Use is_managed_branch() as a proxy and skip only here # instead of in a decorator raise SkipTest("Test too slow on crippled FS") populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) from datalad.support.external_versions import external_versions if '8.20200330' < external_versions['cmd:annex'] < '8.20200624': # https://git-annex.branchable.com/bugs/testremote_breeds_way_too_many_instances_of_the_externals_remote/?updated raise SkipTest( "git-annex might lead to overwhelming number of external " "special remote instances") # run git-annex-testremote # note, that we don't want to capture output. If something goes wrong we # want to see it in test build's output log. GitWitlessRunner(cwd=dspath).run(['git', 'annex', 'testremote', 'store'])
def _test_initremote_rewrite(host, ds_path, store): # rudimentary repetition of test_initremote_basic, but # with url.<base>.insteadOf config, which should not only # be respected, but lead to the rewritten URL stored in # git-annex:remote.log ds_path = Path(ds_path) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) url = "mystore:" init_opts = common_init_opts + ['url={}'.format(url)] if host: replacement = "ria+ssh://{host}{path}".format(host=host, path=store) else: replacement = "ria+{}".format(store.as_uri()) ds.config.set("url.{}.insteadOf".format(replacement), url, where='local') # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # run initremote and check what's stored: ds.repo.init_remote('ria-remote', options=init_opts) assert_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) # git-annex:remote.log should have: # - rewritten url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'], read_only=True) assert_in("url={}".format(replacement), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log)
def _test_bare_git_version_1(host, dspath, store): # This test should take a dataset and create a bare repository at the remote # end from it. # Given, that it is placed correctly within a tree of dataset, that remote # thing should then be usable as an ora-remote as well as as a git-type # remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 1 (lower) upload and consumption should be # interchangeable. It doesn't matter which remote is used for what # direction. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 1 (dirhash lower): create_ds_in_store(io, store, ds.id, '1', '1') # Now, let's have the bare repo as a git remote and use it with annex git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # copy files to the remote ds.repo.copy_to('.', 'bare-git') eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()]) # Now, add the ora remote: ds.repo.init_remote('ora-remote', options=init_opts) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) # Now move content from git-remote to local and see it not being available # via bare-git anymore. ds.repo.call_annex(['move', '--all', '--from=bare-git']) # ora-remote doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # But after fsck it does: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, subdir/two\n' '** was expected to be present, ' 'but its content is missing.') eq_(len(ds.repo.whereis('one.txt')), 1) # and the other way around: upload via ora-remote and have it available via # git-remote: ds.repo.copy_to('.', 'ora-remote') # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3)
def _test_bare_git_version_2(host, dspath, store): # Similarly to test_bare_git_version_1, this should ensure a bare git repo # at the store location for a dataset doesn't conflict with the ORA remote. # Note: Usability of git remote by annex depends on dataset layout version # (dirhashlower vs. -mixed). # For version 2 (mixed) upload via ORA and consumption via git should # work. But not the other way around, since git-annex uses # dirhashlower with bare repos. ds_path = Path(dspath) store = Path(store) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() bare_repo_path, _, _ = get_layout_locations(1, store, ds.id) # Use git to make sure the remote end is what git thinks a bare clone of it # should look like subprocess.run([ 'git', 'clone', '--bare', quote_cmdlinearg(str(dspath)), quote_cmdlinearg(str(bare_repo_path)) ]) if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # set up the dataset location, too. # Note: Dataset layout version 2 (dirhash mixed): create_ds_in_store(io, store, ds.id, '2', '1') # Now, let's have the bare repo as a git remote git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \ if host else bare_repo_path.as_uri() ds.repo.add_remote('bare-git', git_url) ds.repo.enable_remote('bare-git') # and the ORA remote in addition: ds.repo.init_remote('ora-remote', options=init_opts) # upload keys via ORA: ds.repo.copy_to('.', 'ora-remote') # bare-git doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) ds.drop('.') eq_(len(ds.repo.whereis('one.txt')), 2) # actually consumable via git remote: ds.repo.call_annex(['move', 'one.txt', '--from', 'bare-git']) eq_(len(ds.repo.whereis('one.txt')), 2) # now, move back via git - shouldn't be consumable via ORA ds.repo.call_annex(['move', 'one.txt', '--to', 'bare-git']) # fsck to make availability known, but there's nothing from POV of ORA: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='ora-remote', fast=True) ] assert_result_count(fsck_res, 1, status='error', message='** Based on the location log, one.txt\n' '** was expected to be present, ' 'but its content is missing.') assert_result_count(fsck_res, 1, status='ok') eq_(len(fsck_res), 2) eq_(len(ds.repo.whereis('one.txt')), 1)
def _test_initremote_basic(host, ds_path, store, link): ds_path = Path(ds_path) store = Path(store) link = Path(link) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # fails on non-existing storage location assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=init_opts) # Doesn't actually create a remote if it fails assert_not_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) # fails on non-RIA URL assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=common_init_opts + ['url={}'.format(store.as_uri())] ) # Doesn't actually create a remote if it fails assert_not_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # still fails, since ds isn't setup in the store assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=init_opts) # Doesn't actually create a remote if it fails assert_not_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) # set up the dataset as well create_ds_in_store(io, store, ds.id, '2', '1') # now should work ds.repo.init_remote('ria-remote', options=init_opts) assert_in('ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()] ) assert_repo_status(ds.path) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'], read_only=True) assert_in("url={}".format(url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) # re-configure with invalid URL should fail: assert_raises( CommandError, ds.repo.call_annex, ['enableremote', 'ria-remote'] + common_init_opts + [ 'url=ria+file:///non-existing']) # but re-configure with valid URL should work if has_symlink_capability(): link.symlink_to(store) new_url = 'ria+{}'.format(link.as_uri()) ds.repo.call_annex( ['enableremote', 'ria-remote'] + common_init_opts + [ 'url={}'.format(new_url)]) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'], read_only=True) assert_in("url={}".format(new_url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) # we can deal with --sameas, which leads to a special remote not having a # 'name' property, but only a 'sameas-name'. See gh-4259 try: ds.repo.init_remote('ora2', options=init_opts + ['--sameas', 'ria-remote']) except CommandError as e: if 'Invalid option `--sameas' in e.stderr: # annex too old - doesn't know --sameas pass else: raise
def _test_version_check(host, dspath, store): dspath = Path(dspath) store = Path(store) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) ds.repo.copy_to('.', 'store') # check version files remote_ds_tree_version_file = store / 'ria-layout-version' dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, store, ds.id) remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version' assert_true(remote_ds_tree_version_file.exists()) assert_true(remote_obj_tree_version_file.exists()) with open(str(remote_ds_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '1') with open(str(remote_obj_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '2') # Accessing the remote should not yield any output regarding versioning, # since it's the "correct" version. Note that "fsck" is an arbitrary choice. # We need just something to talk to the special remote. with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) # TODO: For some reason didn't get cml.assert_logged to assert # "nothing was logged" assert not cml.out # Now fake-change the version with open(str(remote_obj_tree_version_file), 'w') as f: f.write('X\n') # Now we should see a message about it with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", regex=False) # reading still works: ds.drop('.') assert_status('ok', ds.get('.')) # but writing doesn't: with open(str(Path(ds.path) / 'new_file'), 'w') as f: f.write("arbitrary addition") ds.save(message="Add a new_file") # TODO: use self.annex.error in special remote and see whether we get an # actual error result assert_raises(CommandError, ds.repo.copy_to, 'new_file', 'store') # However, we can force it by configuration ds.config.add("annex.ora-remote.store.force-write", "true", where='local') ds.repo.copy_to('new_file', 'store')
def _test_remote_layout(host, dspath, store, archiv_store): dspath = Path(dspath) store = Path(store) archiv_store = Path(archiv_store) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) arch_url = "ria+ssh://{host}{path}".format(host=host, path=archiv_store) else: store_url = "ria+{}".format(store.as_uri()) arch_url = "ria+{}".format(archiv_store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) # copy files into the RIA store ds.repo.copy_to('.', 'store') # we should see the exact same annex object tree dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, store, ds.id) store_objects = get_all_files(dsobj_dir) local_objects = get_all_files(ds.pathobj / '.git' / 'annex' / 'objects') assert_equal(len(store_objects), 2) if not ds.repo.is_managed_branch(): # with managed branches the local repo uses hashdirlower instead # TODO: However, with dataset layout version 1 this should therefore # work on adjusted branch the same way # TODO: Wonder whether export-archive-ora should account for that and # rehash according to target layout. assert_equal(sorted([p for p in store_objects]), sorted([p for p in local_objects]) ) if not io.get_7z(): raise SkipTest("No 7z available in RIA store") # we can simply pack up the content of the remote into a # 7z archive and place it in the right location to get a functional # archive remote create_store(io, archiv_store, '1') create_ds_in_store(io, archiv_store, ds.id, '2', '1') whereis = ds.repo.whereis('one.txt') dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, archiv_store, ds.id) ds.export_archive_ora(archive_dir / 'archive.7z') init_opts = common_init_opts + ['url={}'.format(arch_url)] ds.repo.init_remote('archive', options=init_opts) # now fsck the new remote to get the new special remote indexed ds.repo.fsck(remote='archive', fast=True) assert_equal(len(ds.repo.whereis('one.txt')), len(whereis) + 1)
def _postclonetest_prepare(lcl, storepath, link): from datalad.customremotes.ria_utils import ( create_store, create_ds_in_store, get_layout_locations ) from datalad.distributed.ora_remote import ( LocalIO, ) create_tree(lcl, tree={ 'ds': { 'test.txt': 'some', 'subdir': { 'subds': {'testsub.txt': 'somemore'}, 'subgit': {'testgit.txt': 'even more'} }, }, }) # create a local dataset with a subdataset lcl = Path(lcl) storepath = Path(storepath) link = Path(link) link.symlink_to(storepath) subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True) subds.save() # add a plain git dataset as well subgit = Dataset(lcl / 'ds' / 'subdir' / 'subgit').create(force=True, annex=False) subgit.save() ds = Dataset(lcl / 'ds').create(force=True) ds.save(version_tag='original') assert_repo_status(ds.path) io = LocalIO() create_store(io, storepath, '1') # URL to use for upload. Point is, that this should be invalid for the clone # so that autoenable would fail. Therefore let it be based on a to be # deleted symlink upl_url = "ria+{}".format(link.as_uri()) for d in (ds, subds, subgit): # TODO: create-sibling-ria required for config! => adapt to RF'd # creation (missed on rebase?) create_ds_in_store(io, storepath, d.id, '2', '1') d.create_sibling_ria(upl_url, "store") if d is not subgit: # Now, simulate the problem by reconfiguring the special remote to # not be autoenabled. # Note, however, that the actual intention is a URL, that isn't # valid from the point of view of the clone (doesn't resolve, no # credentials, etc.) and therefore autoenabling on git-annex-init # when datalad-cloning would fail to succeed. Runner(cwd=d.path).run(['git', 'annex', 'enableremote', 'store-storage', 'autoenable=false']) d.push('.', to='store') store_loc, _, _ = get_layout_locations(1, storepath, d.id) Runner(cwd=str(store_loc)).run(['git', 'update-server-info']) link.unlink() # We should now have a store with datasets that have an autoenabled ORA # remote relying on an inaccessible URL. # datalad-clone is supposed to reconfigure based on the URL we cloned from. # Test this feature for cloning via HTTP, SSH and FILE URLs. return ds.id
def _test_permission(host, storepath, dspath): # Test whether ORA correctly revokes and obtains write permissions within # the annex object tree. That is: Revoke after ORA pushed a key to store # in order to allow the object tree to safely be used with an ephemeral # clone. And on removal obtain write permissions, like annex would # internally on a drop (but be sure to restore if something went wrong). dspath = Path(dspath) storepath = Path(storepath) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) testfile = 'one.txt' # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=storepath) else: store_url = "ria+{}".format(storepath.as_uri()) create_store(io, storepath, '1') create_ds_in_store(io, storepath, ds.id, '2', '1') _, _, obj_tree = get_layout_locations(1, storepath, ds.id) assert_true(obj_tree.is_dir()) file_key_in_store = obj_tree / 'X9' / '6J' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt' init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis(testfile) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) assert_false(file_key_in_store.exists()) ds.repo.call_annex(['copy', testfile, '--to', 'store']) known_sources = ds.repo.whereis(testfile) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) assert_true(file_key_in_store.exists()) # Revoke write permissions from parent dir in-store to test whether we # still can drop (if we can obtain the permissions). Note, that this has # no effect on VFAT. file_key_in_store.parent.chmod(file_key_in_store.parent.stat().st_mode & ~stat.S_IWUSR) # we can't directly delete; key in store should be protected assert_raises(PermissionError, file_key_in_store.unlink) # ORA can still drop, since it obtains permission to: ds.repo.call_annex(['drop', testfile, '--from', 'store']) known_sources = ds.repo.whereis(testfile) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) assert_false(file_key_in_store.exists())