def test_overrides(): cfg = ConfigManager() # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', cfg) # set cfg.set('user.name', 'myoverride', scope='override') assert_equal(cfg['user.name'], 'myoverride') # unset just removes override, not entire config cfg.unset('user.name', scope='override') assert_in('user.name', cfg) assert_not_equal('user.name', 'myoverride') # add # there is no initial increment cfg.add('user.name', 'myoverride', scope='override') assert_equal(cfg['user.name'], 'myoverride') # same as with add, not a list assert_equal(cfg['user.name'], 'myoverride') # but then there is cfg.add('user.name', 'myother', scope='override') assert_equal(cfg['user.name'], ['myoverride', 'myother']) # rename assert_not_in('ups.name', cfg) cfg.rename_section('user', 'ups', scope='override') # original variable still there assert_in('user.name', cfg) # rename of override in effect assert_equal(cfg['ups.name'], ['myoverride', 'myother']) # remove entirely by section cfg.remove_section('ups', scope='override') from datalad.utils import Path assert_not_in('ups.name', cfg, ( cfg._stores, cfg.overrides, ))
def _check_auto_save(ds, orig_state): handle_dirty_dataset(ds, 'ignore') assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail') handle_dirty_dataset(ds, 'save-before') state = ds.repo.get_hexsha() assert_not_equal(orig_state, state) _check_all_clean(ds, state) return state
def test_tuple_requests(): bc = BatchedCommand(cmd=py2cmd(""" import time import sys print(f"{time.time()}:{sys.stdin.readline().strip()}") """)) start_time_1, line = bc(("one", "line")).split(":") assert_equal(line, "one line") start_time_2, line = bc(("end", "now")).split(":") assert_not_equal(start_time_1, start_time_2) assert_equal(line, "end now") bc.close(return_stderr=False)
def test_archive(path=None): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert (isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content ds.drop('file_up', reckless='kill') assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_command_fail_2(): # Expect that the return code of a failing command is caught, # that None is returned as result and that the process is restarted, # if the batched command is called again. bc = BatchedCommand(cmd=py2cmd(""" print(a*b) """)) # Send something to start the process result = bc("line one") assert_not_equal(bc.return_code, 0) assert_is_none(result) result = bc("line two") assert_not_equal(bc.return_code, 0) assert_is_none(result) bc.close(return_stderr=False)
def test_property_reevaluation(repo1=None): ds = Dataset(repo1) assert_is_none(ds.repo) assert_is_not_none(ds.config) first_config = ds.config assert_false(ds._cfg_bound) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after creation, we have `repo`, and `config` was reevaluated to point # to the repo's config: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) second_config = ds.config assert_true(ds._cfg_bound) assert_is(ds.config, ds.repo.config) assert_is_not(first_config, second_config) assert_is_not_none(ds.id) first_id = ds.id ds.drop(what='all', reckless='kill', recursive=True) # repo is gone, and config is again reevaluated to only provide user/system # level config: assert_false(lexists(ds.path)) assert_is_none(ds.repo) assert_is_not_none(ds.config) third_config = ds.config assert_false(ds._cfg_bound) assert_is_not(second_config, third_config) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after recreation everything is sane again: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) assert_is(ds.config, ds.repo.config) forth_config = ds.config assert_true(ds._cfg_bound) assert_is_not(third_config, forth_config) assert_is_not_none(ds.id) assert_not_equal(ds.id, first_id)
def test_cached_dataset(cache_dir=None): # patch DATALAD_TESTS_CACHE to not use the actual cache with # the test testing that very cache. cache_dir = Path(cache_dir) ds_url = "https://github.com/datalad/testrepo--minimalds" name_in_cache = url2filename(ds_url) annexed_file = Path("inannex") / "animated.gif" with patch(CACHE_PATCH_STR, new=cache_dir): @cached_dataset(url=ds_url) def decorated_test1(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name=DEFAULT_REMOTE, url=str(cache_dir / name_in_cache)) here = ds.config.get("annex.uuid") origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_not_in(here, where) assert_not_in(origin, where) return ds.pathobj, ds.repo.pathobj @cached_dataset(url=ds_url, paths=str(annexed_file)) def decorated_test2(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name=DEFAULT_REMOTE, url=str(cache_dir / name_in_cache)) here = ds.config.get("annex.uuid") origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_in(here, where) assert_in(origin, where) return ds.pathobj, ds.repo.pathobj @cached_dataset(url=ds_url) def decorated_test3(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name=DEFAULT_REMOTE, url=str(cache_dir / name_in_cache)) # origin is the same cached dataset, that got this content in # decorated_test2 before. Should still be there. But "here" we # didn't request it here = ds.config.get("annex.uuid") origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_not_in(here, where) assert_in(origin, where) return ds.pathobj, ds.repo.pathobj @cached_dataset(url=ds_url, version="541cf855d13c2a338ff2803d4488daf0035e568f") def decorated_test4(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name=DEFAULT_REMOTE, url=str(cache_dir / name_in_cache)) # origin is the same cached dataset, that got this content in # decorated_test2 before. Should still be there. But "here" we # didn't request it here = ds.config.get("annex.uuid") origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_not_in(here, where) assert_in(origin, where) assert_equal(ds.repo.get_hexsha(), "541cf855d13c2a338ff2803d4488daf0035e568f") return ds.pathobj, ds.repo.pathobj first_dspath, first_repopath = decorated_test1() second_dspath, second_repopath = decorated_test2() decorated_test3() decorated_test4() # first and second are not the same, only their origin is: assert_not_equal(first_dspath, second_dspath) assert_not_equal(first_repopath, second_repopath)
def test_FileStatus_basic(): assert_equal(FileStatus(size=0), FileStatus(size=0)) assert_not_equal(FileStatus(size=0), FileStatus(size=1)) # mtimes allow trimming if one is int assert_equal(FileStatus(mtime=0), FileStatus(mtime=0.9999)) assert_equal(FileStatus(mtime=0), FileStatus(mtime=0.0001)) assert_not_equal(FileStatus(mtime=0.2), FileStatus(mtime=0.1)) assert_not_equal(FileStatus(mtime=0.2), FileStatus(mtime=None)) assert_not_equal(FileStatus(mtime=1), FileStatus(mtime=None)) # And with None should be False assert_not_equal(FileStatus(mtime=1), None) assert_not_equal(None, FileStatus(mtime=1)) # adding more information would result in not-equal assert_not_equal(FileStatus(size=0), FileStatus(size=0, mtime=123)) # empty ones can't be compared # TODO: actually not clear why that NotImplemented singleton is not returned assert_not_equal(FileStatus(), FileStatus())
def test_target_ssh_simple(origin=None, src_path=None, target_rootpath=None): ca = dict(result_renderer='disabled') test_fname = 'test-annex.dat' orig = Dataset(origin).create(**ca) (orig.pathobj / test_fname).write_text('some') orig.save(**ca) port = get_ssh_port("datalad-test") # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://*****:*****@with_testsui(responses=["yes"]) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://datalad-test" + target_path, publish_by_default=DEFAULT_BRANCH, existing='replace', ui=have_webui(), ) interactive_assert_create_sshwebserver() eq_("ssh://datalad-test" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: lclcfg = AnnexRepo(src_path).config eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false') # valid uuid eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4) # should be added too, even if URL matches prior state eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH) # again, by explicitly passing urls. Since we are on datalad-test, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://datalad-test", target_dir=target_path, target_url=target_path, target_pushurl="ssh://datalad-test" + target_path, ui=have_webui(), ) @with_testsui(responses=['yes']) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver(existing='replace', **cpkwargs) interactive_assert_create_sshwebserver() if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://datalad-test" + target_path, source.repo.get_remote_url("local_target", push=True)) if have_webui(): from datalad_deprecated.tests.test_create_sibling_webui import ( assert_publish_with_ui, ) assert_publish_with_ui(target_path) # now, push should work: push(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex # and ignore .git/logs content (gh-5298) for f in list(digests): if f.startswith('.git/annex/mergedrefs') \ or f.startswith('.git/logs/'): digests.pop(f) mtimes.pop(f) if not have_webui(): # the rest of the test assumed that we have uploaded a UI return orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', } ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) ok_(modified_files.issuperset(ok_modified_files))