def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_no_interaction_with_untracked_content(path=None): # extracted from what was a metadata test originally ds = Dataset(op.join(path, 'origin')).create(force=True) create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}}) subds = ds.create('sub', force=True) subds.remove(op.join('.datalad', 'config')) nok_((subds.pathobj / '.datalad' / 'config').exists()) # this will only work, if `remove` didn't do anything stupid and # caused all content to be saved subds.create('subsub', force=True)
def test_kill(path=None): # nested datasets with load ds = Dataset(path).create() (ds.pathobj / 'file.dat').write_text('load') ds.save("file.dat") subds = ds.create('deep1') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1']) assert_repo_status(ds.path) # and we fail to remove for many reasons # - unpushed commits # - a subdataset present # - unique annex key res = ds.remove(on_failure='ignore') assert_result_count(res, 1, status='error', path=ds.path) eq_(ds.remove(reckless='availability', result_xfm='datasets'), [subds, ds]) nok_(ds.pathobj.exists())
def test_copy_file(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) webdir = Path(webdir) src_ds = Dataset(workdir / 'src').create() # put a file into the dataset by URL and drop it again src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path=opj('subdir', 'myfile2.txt')) ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123') # now create a fresh dataset dest_ds = Dataset(workdir / 'dest').create() if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \ not dest_ds.repo.is_managed_branch(): # unless we have a target ds on a cripples FS (where `annex fromkey` # doesn't work until after 8.20210428), we can even drop the file # content in the source repo src_ds.drop('myfile1.txt', reckless='kill') nok_(src_ds.repo.file_has_content('myfile1.txt')) # copy the file from the source dataset into it. # it must copy enough info to actually put datalad into the position # to obtain the file content from the original URL dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt') dest_ds.get('myfile1.txt') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # purposefully pollute the employed tmp folder to check that we do not trip # over such a condition tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some' tmploc.parent.mkdir(parents=True) tmploc.touch() # copy again, but to different target file name # (source+dest pair now) dest_ds.copy_file( [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt']) ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123') # copying more than one at once dest_ds.copy_file([ src_ds.pathobj / 'myfile1.txt', src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj ]) # copy directly from a non-dataset location dest_ds.copy_file(webdir / 'webfile1') # copy from annex dataset into gitrepo git_ds = Dataset(workdir / 'git').create(annex=False) git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) if isinstance(ri, str): eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)( str(ri), str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # This one does not have a path. TODO: either proxy path from its .RI or adjust # hierarchy of classes to make it more explicit if cls == GitTransportRI: return # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_is_datalad_compat_ri(): ok_(is_datalad_compat_ri('ssh://*****:*****@host/path')) ok_(is_datalad_compat_ri('http://example.com')) ok_(is_datalad_compat_ri('file://localhost/some')) ok_(is_datalad_compat_ri('///localhost/some')) nok_(is_datalad_compat_ri('relative')) nok_(is_datalad_compat_ri('.///localhost/some')) nok_(is_datalad_compat_ri(123))
def test_register_config(): nok_(has_config('datalad.testdummies.invalid')) assert_raises( ValueError, register_config, 'datalad.testdummies.invalid', title=None, dialog='yesno') nok_(has_config('datalad.testdummies.invalid')) cfgkey = 'datalad.testdummies.try1' nok_(has_config(cfgkey)) register_config( cfgkey, 'This is what happens, when you do not listen to mama!', default_fn=lambda: 5, description='Try on-access default "computation"', type=int, dialog='question', scope='global', ) from datalad.interface.common_cfg import definitions assert_in(cfgkey, definitions) # same thing, other part of the API assert_in(cfgkey, definitions.keys()) # and yet another assert_in(cfgkey, [k for k, v in definitions.items()]) # one more still assert_in(cfgkey, [k for k in definitions]) # more smoke testing, we must have at least this one ok_(len(definitions)) df = definitions[cfgkey] # on access default computation eq_(df['default'], 5) # we could set any novel property df['novel'] = 'unexpected' eq_(df.get('novel'), 'unexpected') eq_(df.get('toonovel'), None) # smoke test str/repr assert_in('mama', str(df)) assert_in('mama', repr(df)) # internal data structure for UI was assembled assert_in('ui', df) # more smoke assert_in('ui', df.keys()) assert_in('ui', [k for k in df]) nkeys = len(df) df.update(funky='seven') eq_(len(df), nkeys + 1)
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())
def test_clean_subds_removal(path=None): ds = Dataset(path).create() subds1 = ds.create('one') subds2 = ds.create('two') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['one', 'two']) assert_repo_status(ds.path) # now kill one res = ds.remove('one', reckless='availability', result_xfm=None) # subds1 got uninstalled, and ds got the removal of subds1 saved assert_result_count(res, 1, path=subds1.path, action='uninstall', status='ok') assert_result_count(res, 1, path=subds1.path, action='remove', status='ok') assert_result_count(res, 1, path=ds.path, action='save', status='ok') ok_(not subds1.is_installed()) assert_repo_status(ds.path) # two must remain eq_(ds.subdatasets(result_xfm='relpaths'), ['two']) # one is gone nok_(subds1.pathobj.exists()) # and now again, but this time remove something that is not installed ds.create('three') eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two']) ds.drop('two', what='all', reckless='availability') assert_repo_status(ds.path) eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two']) nok_(subds2.is_installed()) # oderly empty mountpoint is maintained ok_(subds2.pathobj.exists()) res = ds.remove('two', reckless='availability') assert_in_results(res, path=str(ds.pathobj / 'two'), action='remove') assert_repo_status(ds.path) # subds2 was already uninstalled, now ds got the removal of subds2 saved nok_(subds2.pathobj.exists()) eq_(ds.subdatasets(result_xfm='relpaths'), ['three'])
def test_remove(path=None): # see docstring for test data structure ds = get_deeply_nested_structure(path) gitfile = op.join("subdir", "git_file.txt") ok_((ds.pathobj / gitfile).exists()) res = ds.remove(gitfile, drop='all') assert_result_count(res, 3) # git file needs no dropping assert_in_results( res, action='drop', path=str(ds.pathobj / gitfile), status='notneeded', type='file', ) # removed from working tree assert_in_results( res, action='remove', path=str(ds.pathobj / gitfile), status='ok', type='file', ) # saved removal in dataset assert_in_results( res, action='save', path=ds.path, type='dataset', status='ok', ) nok_((ds.pathobj / gitfile).exists()) # now same for an annexed files annexedfile = op.join("subdir", "annexed_file.txt") # drop failure prevents removal res = ds.remove(annexedfile, drop='all', on_failure='ignore') assert_result_count(res, 1) assert_in_results(res, status='error', action='drop', path=str(ds.pathobj / annexedfile)) ok_((ds.pathobj / annexedfile).exists()) # now remove the file, but actually not drop the underlying # key -- hence no availability loss -- default mode of operation # remember the key key = ds.repo.get_file_annexinfo(annexedfile)['key'] res = ds.remove(annexedfile, drop='datasets', message="custom msg", on_failure='ignore') # removal and dataset save assert_result_count(res, 2) eq_( ds.repo.format_commit("%B", ds.repo.get_corresponding_branch()).rstrip(), "custom msg") assert_in_results(res, action='remove', status='ok', path=str(ds.pathobj / annexedfile)) assert_not_in_results(res, action='drop') nok_((ds.pathobj / annexedfile).exists()) res = ds.repo.call_annex_records(['whereis', '--key', key, '--json']) assert_in_results(res, key=key, success=True) # now remove entire directory res = ds.remove('subdir', on_failure='ignore') assert_in_results(res, status='impossible', state='untracked') ok_((ds.pathobj / 'subdir').exists()) ds.save('subdir') res = ds.remove('subdir', on_failure='ignore') assert_in_results(res, status='ok', action='remove') assert_in_results(res, status='ok', action='save', type='dataset') nok_((ds.pathobj / 'subdir').exists()) # now remove an entire subdataset # prep: make clean rmdspath = ds.pathobj / 'subds_modified' / 'subds_lvl1_modified' ds.save(rmdspath, recursive=True) res = ds.remove(rmdspath, on_failure='ignore') # unique dataset, with unique keys -- must fail assert_in_results(res, status='error', action='uninstall', path=str(rmdspath)) # go reckless assert_in( str(rmdspath), ds.subdatasets(path='subds_modified', recursive=True, result_xfm='paths', result_renderer='disabled')) res = ds.remove(rmdspath, reckless='availability', on_failure='ignore') assert_status('ok', res) assert_in_results(res, action='uninstall', path=str(rmdspath)) assert_in_results(res, action='remove', path=str(rmdspath)) nok_(rmdspath.exists()) # properly unlinked assert_not_in( str(rmdspath), ds.subdatasets(path='subds_modified', recursive=True, result_xfm='paths', result_renderer='disabled')) # lastly, remove an uninstalled subdataset # we save all to be able to check whether removal was committed and # the ds is clean at the end ds.save() # uninstall, we don't care about the existing modifications here res = ds.drop('subds_modified', what='all', reckless='kill', recursive=True) # even remove the empty mount-point, such that is is invisible on the # file system (ds.pathobj / 'subds_modified').rmdir() res = ds.remove('subds_modified', on_failure='ignore') assert_in_results(res, action='remove', path=str(ds.pathobj / 'subds_modified')) # removal was committed assert_repo_status(ds.path) # and really finally, removing top-level is just a drop res = ds.remove(reckless='kill') assert_in_results(res, action='uninstall', path=ds.path, status='ok') nok_(ds.is_installed())
def test_is_url(): ok_(is_url('file://localhost/some')) ok_(is_url('http://localhost')) ok_(is_url('ssh://me@localhost')) # in current understanding it is indeed a url but an 'ssh', implicit=True, not just # a useless scheme=weird with a hope to point to a netloc with swallow_logs(): ok_(is_url('weird://')) nok_(is_url('relative')) nok_(is_url('/absolute')) ok_(is_url('like@sshlogin')) # actually we do allow ssh:implicit urls ATM nok_(is_url('')) nok_(is_url(' ')) nok_(is_url(123)) # stuff of other types wouldn't be considered a URL # we can pass RI instance directly ok_(is_url(RI('file://localhost/some'))) nok_(is_url(RI('relative')))