def check_basic_scenario(url, d=None): ds = Dataset(d).create() annex = ds.repo # TODO skip if no boto or no credentials get_test_providers(url) # so to skip if unknown creds # Let's try to add some file which we should have access to ds.download_url(url) ds.save() # git-annex got a fix where it stopped replacing - in the middle of the filename # Let's cater to the developers who might have some intermediate version and not # easy to compare -- we will just check that only one file there is an that it # matches what we expect when outside of the development versions range: filenames = glob.glob(op.join(d, '3versions[-_]allversioned.txt')) eq_(len(filenames), 1) filename = op.basename(filenames[0]) if external_versions['cmd:annex'] < '8.20200501': assert_in('_', filename) # Date after the fix in 8.20200501-53-gcabbc91b1 elif external_versions['cmd:annex'] >= '8.20200512': assert_in('-', filename) else: pass # either of those is ok whereis1 = annex.whereis(filename, output='full') eq_(len(whereis1), 2) # here and datalad annex.drop(filename) whereis2 = annex.whereis(filename, output='full') eq_(len(whereis2), 1) # datalad # make sure that there are no "hidden" error messages, despite the # whereis command succeeding # https://github.com/datalad/datalad/issues/6453#issuecomment-1047533276 from datalad.runner import StdOutErrCapture # we need to swallow logs since if DATALAD_LOG_LEVEL is set low, we # would get all the git-annex debug output in stderr with swallow_logs(new_level=logging.INFO) as cml: out = annex._call_annex(['whereis'], protocol=StdOutErrCapture) eq_(out['stderr'].strip(), '') # if we provide some bogus address which we can't access, we shouldn't pollute output with assert_raises(CommandError) as cme: annex.add_url_to_file('bogus', url + '_bogus') assert_in('addurl: 1 failed', cme.value.stderr)
def test_copy_file(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) webdir = Path(webdir) src_ds = Dataset(workdir / 'src').create() # put a file into the dataset by URL and drop it again src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path=opj('subdir', 'myfile2.txt')) ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123') # now create a fresh dataset dest_ds = Dataset(workdir / 'dest').create() if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \ not dest_ds.repo.is_managed_branch(): # unless we have a target ds on a cripples FS (where `annex fromkey` # doesn't work until after 8.20210428), we can even drop the file # content in the source repo src_ds.drop('myfile1.txt', reckless='kill') nok_(src_ds.repo.file_has_content('myfile1.txt')) # copy the file from the source dataset into it. # it must copy enough info to actually put datalad into the position # to obtain the file content from the original URL dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt') dest_ds.get('myfile1.txt') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # purposefully pollute the employed tmp folder to check that we do not trip # over such a condition tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some' tmploc.parent.mkdir(parents=True) tmploc.touch() # copy again, but to different target file name # (source+dest pair now) dest_ds.copy_file( [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt']) ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123') # copying more than one at once dest_ds.copy_file([ src_ds.pathobj / 'myfile1.txt', src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj ]) # copy directly from a non-dataset location dest_ds.copy_file(webdir / 'webfile1') # copy from annex dataset into gitrepo git_ds = Dataset(workdir / 'git').create(annex=False) git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
def test_copy_file_datalad_specialremote(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() # enable datalad special remote src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [ 'encryption=none', 'type=external', 'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true' ]) # put files into the dataset by URL src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt') # approx test that the file is known to a remote # that is not the web remote assert_in_results( src_ds.repo.whereis('myfile1.txt', output='full').values(), here=False, description='[{}]'.format(DATALAD_SPECIAL_REMOTE), ) # now a new dataset dest_ds = Dataset(workdir / 'dest').create() # no special remotes eq_(dest_ds.repo.get_special_remotes(), {}) # must call with a dataset to get change saved, in order for drop # below to work properly without getting in reckless mode dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj]) # we have an special remote in the destination dataset now assert_in_results( dest_ds.repo.get_special_remotes().values(), externaltype=DATALAD_SPECIAL_REMOTE, ) # and it works dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # now replace file in dest with a different content at the same path # must call with a dataset to get change saved, in order for drop dest_ds.copy_file( [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt']) dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') # no gets the "same path" but yields different content ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
def test_download_docker_blob(path=None): from datalad.consts import ( DATALAD_SPECIAL_REMOTE, DATALAD_SPECIAL_REMOTES_UUIDS, ) from datalad.customremotes.base import init_datalad_remote with patch_config({"datalad.repo.backend": "SHA256E"}): ds = Dataset(path).create() ds_repo = ds.repo init_datalad_remote(ds_repo, DATALAD_SPECIAL_REMOTE) id_ = "f0b02e9d092d905d0d87a8455a1ae3e9bb47b4aa3dc125125ca5cd10d6441c9f" outfile = ds_repo.pathobj / "blob" url = "https://registry-1.docker.io/v2/library/busybox/blobs/sha256:" + id_ ds.download_url(urls=[url], path=str(outfile)) annex_info = ds.repo.get_content_annexinfo(paths=[outfile], init=None) eq_(id_, annex_info[outfile]["keyname"]) assert_in(DATALAD_SPECIAL_REMOTES_UUIDS[DATALAD_SPECIAL_REMOTE], ds_repo.whereis([str(outfile)])[0])