def test_site_archive_location_config(path, objtree, objtree_alt): ds = create(path) # needs base-path under all circumstances assert_raises( CommandError, initexternalremote, ds.repo, 'archive', 'ria', config=None, ) # specify archive location via config (could also be system-wide # config setting, done locally here for a simple test setup) ds.config.set('annex.ria-remote.archive.base-path', objtree, where='local') initexternalremote( ds.repo, 'archive', 'ria', ) # put some stuff in and check if it flies populate_dataset(ds) ds.save() ds.repo.copy_to('.', 'archive') arxiv_files = get_all_files(objtree) assert len(arxiv_files) > 1 # now simulate a site-wide reconfiguration (here done to the # local git-repos config, but nothing that is committed or # invokes 'enableremote' # drop everything locally assert_status('ok', ds.drop('.')) # relocate the archive on the system shutil.move(objtree, objtree_alt) # adjust the config -- doesn't touch committed content ds.config.set( 'annex.ria-remote.archive.base-path', objtree_alt, where='local') # remote continues to function normally after system reconfiguration assert_status('ok', ds.get('.'))
def test_backup_archive(path, objtree, archivremote): """Similar to test_archive_layout(), but not focused on compatibility with the directory-type special remote. Instead, it tests build a second RIA remote from an existing one, e.g. for backup purposes. """ ds = create(path) setup_archive_remote(ds.repo, objtree) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # copy files into the RIA archive ds.repo.copy_to('.', 'archive') targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives' targetpath.mkdir(parents=True) subprocess.run( ['7z', 'u', str(targetpath / 'archive.7z'), '.'], cwd=str(Path(objtree) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'), ) initexternalremote(ds.repo, '7z', 'ria', config={'base-path': archivremote}) # wipe out the initial RIA remote (just for testing if the upcoming # one can fully take over) shutil.rmtree(objtree) # fsck to make git-annex aware of the loss assert_status('error', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='archive', fast=True) ]) # now only available "here" eq_(len(ds.repo.whereis('one.txt')), 1) # make the backup archive known initexternalremote(ds.repo, 'backup', 'ria', config={'base-path': archivremote}) # now fsck the new remote to get the new special remote indexed assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='backup', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])
def test_archive_layout(path, objtree, archivremote): ds = create(path) setup_archive_remote(ds.repo, objtree) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # copy files into the RIA archive ds.repo.copy_to('.', 'archive') # we should see the exact same annex object tree arxiv_files = get_all_files(objtree) # anything went there at all? assert len(arxiv_files) > 1 # minus the two layers for the archive path the content is identically # structured, except for the two additional version files at the root of the entire tree and at the dataset level assert len([p for p in arxiv_files if p.name == 'ria-layout-version']) == 2 eq_( sorted([ p.parts[-4:] for p in arxiv_files if p.name != 'ria-layout-version' ]), # Note: datalad-master has ds.repo.dot_git Path object. Not in 0.12.0rc6 though. This would # also resolve .git-files, which pathlib obv. can't. If we test more sophisticated structures, we'd need to # account for that sorted([ p.parts for p in get_all_files(ds.pathobj / '.git' / 'annex' / 'objects') ])) # we can simply pack up the content of the directory remote into a # 7z archive and place it in the right location to get a functional # special remote whereis = ds.repo.whereis('one.txt') targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives' ds.ria_export_archive(targetpath / 'archive.7z') initexternalremote(ds.repo, '7z', 'ria', config={'base-path': archivremote}) # now fsck the new remote to get the new special remote indexed ds.repo.fsck(remote='7z', fast=True) eq_(len(ds.repo.whereis('one.txt')), len(whereis) + 1)
def test_site_archive_url_config(path, objtree, objtree_alt): # same as test_site_archive_location_config but using an URL for configuration ds = create(path) # needs base-path under all circumstances assert_raises( CommandError, initexternalremote, ds.repo, 'archive', 'ria', config=None, ) # specify archive location via URL + configured label (url...insteadOf) for reconfiguration ds.config.set('url.ria+file://{}.insteadOf'.format(objtree), 'localstore:', where='local') initexternalremote(ds.repo, 'archive', 'ria', config={'url': 'localstore:'}) # put some stuff in and check if it flies populate_dataset(ds) ds.save() ds.repo.copy_to('.', 'archive') arxiv_files = get_all_files(objtree) assert len(arxiv_files) > 1 # now simulate a site-wide reconfiguration (here done to the # local git-repos config, but nothing that is committed or # invokes 'enableremote' # drop everything locally assert_status('ok', ds.drop('.')) # relocate the archive on the system shutil.move(objtree, objtree_alt) # adjust the config -- doesn't touch committed content ds.config.unset('url.ria+file://{}.insteadOf'.format(objtree), where='local') ds.config.set('url.ria+file://{}.insteadOf'.format(objtree_alt), 'localstore:', where='local') # remote continues to function normally after system reconfiguration assert_status('ok', ds.get('.'))
def test_version_check(path, objtree): ds = create(path) setup_archive_remote(ds.repo, objtree) populate_dataset(ds) ds.save() assert_repo_status(ds.path) remote_ds_tree_version_file = Path(objtree) / 'ria-layout-version' remote_obj_tree_version_file = Path( objtree) / ds.id[:3] / ds.id[3:] / 'ria-layout-version' # Those files are not yet there assert not remote_ds_tree_version_file.exists() assert not remote_obj_tree_version_file.exists() # Now copy everything to remote. This should create the structure including those version files ds.repo.copy_to('.', 'archive') assert remote_ds_tree_version_file.exists() assert remote_obj_tree_version_file.exists() # Currently the content of booth should be "2" with open(str(remote_ds_tree_version_file), 'r') as f: eq_(f.read().strip(), '1') with open(str(remote_obj_tree_version_file), 'r') as f: eq_(f.read().strip(), '2') # Accessing the remote should not yield any output regarding versioning, since it's the "correct" version # Note that "fsck" is an arbitrary choice. We need just something to talk to the special remote with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='archive', fast=True) assert not cml.out # TODO: For some reason didn't get cml.assert_logged to assert "nothing was logged" # Now fake-change the version with open(str(remote_obj_tree_version_file), 'w') as f: f.write('X\n') # Now we should see a message about it with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='archive', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", regex=False) cml.assert_logged(level="INFO", msg="Setting remote to read-only usage", regex=False) # reading still works: ds.drop('.') assert_status('ok', ds.get('.')) # but writing doesn't: with open(str(Path(ds.path) / 'new_file'), 'w') as f: f.write("arbitrary addition") ds.save(message="Add a new_file") # TODO: use self.annex.error and see whether we get an actual error result assert_raises(IncompleteResultsError, ds.repo.copy_to, 'new_file', 'archive') # However, we can force it by configuration ds.config.add("annex.ria-remote.archive.force-write", "true", where='local') ds.repo.copy_to('new_file', 'archive')
def test_bare_git(origin, remote_base_path): remote_base_path = Path(remote_base_path) # This test should take a dataset and create a bare repository at the remote end from it. # Given, that it is placed correctly within a tree of dataset, that remote thing should then be usable as a # ria-remote as well as as a git-type remote ds = create(origin) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # Use git to make sure the remote end is what git thinks a bare clone of it should look like bare_repo_path = remote_base_path / ds.id[:3] / ds.id[3:] subprocess.run(['git', 'clone', '--bare', origin, str(bare_repo_path)]) # Now, let's have the bare repo as a git remote and use it with annex eq_( subprocess.run( ['git', 'remote', 'add', 'bare-git', str(bare_repo_path)], cwd=origin).returncode, 0) eq_( subprocess.run(['git', 'annex', 'enableremote', 'bare-git'], cwd=origin).returncode, 0) eq_( subprocess.run(['git', 'annex', 'testremote', 'bare-git'], cwd=origin).returncode, 0) # copy files to the remote ds.repo.copy_to('.', 'bare-git') eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()]) # Since we created the remote this particular way instead of letting ria-remote create it, we need to put # ria-layout-version files into it. Then we should be able to also add it as a ria-remote. with open(str(remote_base_path / 'ria-layout-version'), 'w') as f: f.write('1') with open(str(bare_repo_path / 'ria-layout-version'), 'w') as f: f.write('1') # Now, add the ria remote: initexternalremote(ds.repo, 'riaremote', 'ria', config={'base-path': str(remote_base_path)}) # fsck to make availability known assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='riaremote', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) # Now move content from git-remote to local and see it not being available via bare-git anymore eq_( subprocess.run(['git', 'annex', 'move', '--all', '--from=bare-git'], cwd=origin).returncode, 0) # ria-remote doesn't know yet: eq_(len(ds.repo.whereis('one.txt')), 2) # But after fsck it does: fsck_res = [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='riaremote', fast=True) ] assert_result_count( fsck_res, 1, status='error', message= '** Based on the location log, one.txt\n** was expected to be present, ' 'but its content is missing.') assert_result_count( fsck_res, 1, status='error', message= '** Based on the location log, subdir/two\n** was expected to be present, ' 'but its content is missing.') eq_(len(ds.repo.whereis('one.txt')), 1)
def test_create_as_bare(origin, remote_base_path, remote_base_url, public, consumer, tmp_location): # Note/TODO: Do we need things like: # git config receive.denyCurrentBranch updateInstead # mv .hooks/post-update.sample hooks/post-update # git update-server-info # Test how we build a riaremote from an existing dataset, that is a bare git repo and can be accessed as a git type # remote as well. This should basically outline how to publish to that kind of structure as a data store, that is # autoenabled, so we can publish to github/gitlab and make that storage known. remote_base_path = Path(remote_base_path) ds = create(origin) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # add the ria remote: # Note: For serve_path_via_http to work (which we need later), the directory needs to already exist. # But by default RIARemote will reject to create the remote structure in an already existing directory, # that wasn't created by itself (lacks as ria-layout-version file). # So, we can either configure force-write here or put a version file in it beforehand. # However, this is specific to the test environment! with open(str(remote_base_path / 'ria-layout-version'), 'w') as f: f.write('1') initexternalremote(ds.repo, 'riaremote', 'ria', config={'base-path': str(remote_base_path)}) # pretty much any annex command that talks to that remote should now trigger the actual creation on the remote end: assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='riaremote', fast=True) ]) remote_dataset_path = remote_base_path / ds.id[:3] / ds.id[3:] assert remote_base_path.exists() assert remote_dataset_path.exists() ds.repo.copy_to('.', 'riaremote') # Now, let's make the remote end a valid, bare git repository eq_( subprocess.run(['git', 'init', '--bare'], cwd=str(remote_dataset_path)).returncode, 0) #subprocess.run(['mv', 'hooks/post-update.sample', 'hooks/post-update'], cwd=remote_dataset_path) #subprocess.run(['git', 'update-server-info'], cwd=remote_dataset_path) # TODO: we might need "mv .hooks/post-update.sample hooks/post-update", "git update-server-info" as well # add as git remote and push everything eq_( subprocess.run( ['git', 'remote', 'add', 'bare-git', str(remote_dataset_path)], cwd=origin).returncode, 0) # Note: "--mirror" does the job for this test, while it might not be a good default some kind of # datalad-create-sibling. However those things need to be configurable for actual publish/creation routine anyway eq_( subprocess.run(['git', 'push', '--mirror', 'bare-git'], cwd=origin).returncode, 0) # annex doesn't know the bare-git remote yet: eq_(len(ds.repo.whereis('one.txt')), 2) # But after enableremote and a fsck it does: eq_( subprocess.run(['git', 'annex', 'enableremote', 'bare-git'], cwd=origin).returncode, 0) assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='bare-git', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 3) # we can drop and get again via 'bare-git' remote: ds.drop('.') eq_(len(ds.repo.whereis('one.txt')), 2) eq_( subprocess.run( ['git', 'annex', 'get', 'one.txt', '--from', 'bare-git'], cwd=origin).returncode, 0) eq_(len(ds.repo.whereis('one.txt')), 3) # let's get the other one from riaremote eq_(len(ds.repo.whereis(op.join('subdir', 'two'))), 2) eq_( subprocess.run([ 'git', 'annex', 'get', op.join('subdir', 'two'), '--from', 'riaremote' ], cwd=origin).returncode, 0) eq_(len(ds.repo.whereis(op.join('subdir', 'two'))), 3) raise SkipTest("NOT YET DONE")