def test_copy_file_errors(dspath1, dspath2, nondspath): ds1 = Dataset(dspath1) # nothing given assert_raises(ValueError, copy_file) # no target directory given assert_raises(ValueError, ds1.copy_file, 'somefile') # using multiple sources and --specs-from assert_raises(ValueError, ds1.copy_file, ['1', '2', '3'], specs_from='-') # trying to copy to a dir that is not in a dataset ds1.create() assert_status( 'error', ds1.copy_file('somepath', target_dir=nondspath, on_failure='ignore')) # copy into a dataset that is not in the reference dataset ds2 = Dataset(dspath2).create() assert_status( 'error', ds1.copy_file('somepath', target_dir=dspath2, on_failure='ignore')) # attempt to copy from a directory, but no recursion is enabled. # use no reference ds to excercise a different code path assert_status('impossible', copy_file([nondspath, dspath1], on_failure='ignore')) # attempt to copy a file that doesn't exist assert_status('impossible', copy_file(['funky', dspath1], on_failure='ignore')) # attempt to copy a file without a destination given assert_raises(ValueError, copy_file, 'somepath') assert_status('impossible', copy_file(specs_from=['somepath'], on_failure='ignore'))
def test_copy_file(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) webdir = Path(webdir) src_ds = Dataset(workdir / 'src').create() # put a file into the dataset by URL and drop it again src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path=opj('subdir', 'myfile2.txt')) ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123') # now create a fresh dataset dest_ds = Dataset(workdir / 'dest').create() if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \ not dest_ds.repo.is_managed_branch(): # unless we have a target ds on a cripples FS (where `annex fromkey` # doesn't work until after 8.20210428), we can even drop the file # content in the source repo src_ds.drop('myfile1.txt', reckless='kill') nok_(src_ds.repo.file_has_content('myfile1.txt')) # copy the file from the source dataset into it. # it must copy enough info to actually put datalad into the position # to obtain the file content from the original URL dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt') dest_ds.get('myfile1.txt') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # purposefully pollute the employed tmp folder to check that we do not trip # over such a condition tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some' tmploc.parent.mkdir(parents=True) tmploc.touch() # copy again, but to different target file name # (source+dest pair now) dest_ds.copy_file( [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt']) ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123') # copying more than one at once dest_ds.copy_file([ src_ds.pathobj / 'myfile1.txt', src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj ]) # copy directly from a non-dataset location dest_ds.copy_file(webdir / 'webfile1') # copy from annex dataset into gitrepo git_ds = Dataset(workdir / 'git').create(annex=False) git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
def test_copy_file_datalad_specialremote(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() # enable datalad special remote src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [ 'encryption=none', 'type=external', 'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true' ]) # put files into the dataset by URL src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt') # approx test that the file is known to a remote # that is not the web remote assert_in_results( src_ds.repo.whereis('myfile1.txt', output='full').values(), here=False, description='[{}]'.format(DATALAD_SPECIAL_REMOTE), ) # now a new dataset dest_ds = Dataset(workdir / 'dest').create() # no special remotes eq_(dest_ds.repo.get_special_remotes(), {}) # must call with a dataset to get change saved, in order for drop # below to work properly without getting in reckless mode dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj]) # we have an special remote in the destination dataset now assert_in_results( dest_ds.repo.get_special_remotes().values(), externaltype=DATALAD_SPECIAL_REMOTE, ) # and it works dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # now replace file in dest with a different content at the same path # must call with a dataset to get change saved, in order for drop dest_ds.copy_file( [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt']) dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') # no gets the "same path" but yields different content ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
def test_copy_file_into_dshierarchy(srcdir, destdir): srcdir = Path(srcdir) src_ds = Dataset(srcdir).create(force=True) src_ds.save() # now build two nested datasets, such that lvl2 ends up in a subdataset dest_ds = Dataset(destdir).create() dest_ds.create(dest_ds.pathobj / 'lvl2') assert_repo_status(dest_ds.path) dest_ds.copy_file([src_ds.pathobj / 'lvl1', dest_ds.pathobj], recursive=True) assert_repo_status(dest_ds.path) # we get the same structure as the input, just distributed across # nested datasets eq_(*[ sorted(r for r in d.status(result_xfm='relpaths', result_renderer=None) # filter out subdataset entry in dest_ds if r not in ('lvl2', '.gitmodules')) for d in (src_ds, dest_ds) ])
def test_copy_file_prevent_dotgit_placement(srcpath, destpath): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some', '.git'], on_failure='ignore'), status='impossible', action='copy_file')
def test_copy_file_nourl(serv_path=None, orig_path=None, tst_path=None): """Tests availability transfer to normal git-annex remote""" # prep source dataset that will have the file content srv_ds = Dataset(serv_path).create() (srv_ds.pathobj / 'myfile.dat').write_text('I am content') (srv_ds.pathobj / 'noavail.dat').write_text('null') srv_ds.save() srv_ds.drop('noavail.dat', reckless='kill') # make an empty superdataset, with the test dataset as a subdataset orig_ds = Dataset(orig_path).create() orig_ds.clone(source=serv_path, path='serv') assert_repo_status(orig_ds.path) # now copy the test file into the superdataset no_avail_file = orig_ds.pathobj / 'serv' / 'noavail.dat' assert_in_results( orig_ds.copy_file(no_avail_file, on_failure='ignore'), status='impossible', message='no known location of file content', path=str(no_avail_file), )
def _check_copy_file_specs_from(srcdir, destdir, specs, **kwargs): ds = Dataset(destdir).create() res = ds.copy_file(specs_from=specs, **kwargs) return ds, res
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())