def test_rerun_run_left_nonrun_right(path=None): ds = Dataset(path).create() # keep direct repo accessor to speed things up ds_repo = ds.repo ds.run("echo foo >foo") ds_repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"]) with open(op.join(path, "nonrun-file"), "w") as f: f.write("blah") ds.save() ds_repo.checkout(DEFAULT_BRANCH) ds_repo.merge("side", options=["-m", "Merge side"]) # o d_n # |\ # | o c_n # o | b_r # |/ # o a_n ds.rerun(since="", onto="") # o d_M # |\ # | o c_n # o | b_R # |/ # o a_n neq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^"), ds_repo.get_hexsha("HEAD^")) eq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds_repo.get_hexsha("HEAD^2")) ds_repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o b_R # o c_n # o a_n assert_false(ds_repo.commit_exists("HEAD^2")) eq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds_repo.get_hexsha("HEAD^")) ds_repo.checkout(DEFAULT_BRANCH) hexsha_before = ds_repo.get_hexsha() ds.rerun(since="") # o d_n # |\ # | o c_n # o | b_r # |/ # o a_n eq_(hexsha_before, ds_repo.get_hexsha())
def test_basic_setup(): # the import alone will verify that all default values match their # constraints from datalad import api # random pick of something that should be there assert_true(hasattr(api, 'install')) assert_true(hasattr(api, 'create')) # make sure all helper utilities do not pollute the namespace # and we end up only with __...__ attributes assert_false( list( filter(lambda s: s.startswith('_') and not re.match('__.*__', s), dir(api)))) assert_in('Parameters', api.Dataset.install.__doc__) assert_in('Parameters', api.Dataset.create.__doc__)
def test_gitcredential_interface(path=None): # use a dataset as a local configuration vehicle ds = Dataset(path).create() # preserve credentials between git processes for a brief time # credential-cache is not supported on windows (needs UNIX sockets) # ds.config.set('credential.helper', 'cache', scope='local') # However, first set an empty helper in order to disable already set helpers ds.config.set('credential.helper', '', scope='local') ds.config.set('credential.helper', 'store', scope='local') # git manages credentials by target URL credurl = 'https://example.datalad.org/somepath' credurl_justhost = 'https://example.datalad.org' # define a credential cred = GitCredentialInterface(url=credurl, username='******', password='******', repo=ds) # put it in the manager (a cache in this case, but could invoke any number # of helpers cred.approve() # new instance, no knowledge of login cred = GitCredentialInterface(url=credurl, repo=ds) assert_not_in('username', cred) # query store cred.fill() eq_(cred['username'], 'mike') eq_(cred['password'], 's3cr3t') # git does host-only identification by default (see credential.useHttpPath) cred = GitCredentialInterface(url=credurl_justhost, repo=ds) cred.fill() eq_(cred['username'], 'mike') eq_(cred['password'], 's3cr3t') # the URL is enough to remove ("reject") a credential GitCredentialInterface(url=credurl, repo=ds).reject() cred = GitCredentialInterface(url=credurl, repo=ds) # this will yield empty passwords, not the most precise test # whether it actually removed the credentials, but some test # at least cred.fill() assert_false(cred['username']) assert_false(cred['password'])
def test_alter_interface_docs_for_api(): alt = alter_interface_docs_for_api(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('CMD', alt) assert_not_in('PY', alt) assert_not_in('REFLOW', alt) assert_in('a b', alt) assert_in('not\n reflowed', alt) assert_in("Some Python-only bits Multiline!", alt) altpd = alter_interface_docs_for_api(demo_paramdoc) assert_in('python', altpd) assert_in('in between', altpd) assert_in('appended', altpd) assert_not_in('cmdline', altpd)
def test_check_dates(path=None): skip_if_no_module("dateutil") ref_ts = 1218182889 # Fri, 08 Aug 2008 04:08:09 -0400 refdate = "@{}".format(ref_ts) repo = os.path.join(path, "repo") with set_date(ref_ts + 5000): ar = AnnexRepo(repo) ar.add(".") ar.commit() # The standard renderer outputs json. with swallow_outputs() as cmo: # Set level to WARNING to avoid the progress bar when # DATALAD_TESTS_UI_BACKEND=console. with swallow_logs(new_level=logging.WARNING): check_dates([repo], reference_date=refdate, return_type="list") assert_in("report", json.loads(cmo.out).keys()) # We find the newer objects. newer = call([path], reference_date=refdate) eq_(len(newer), 1) ok_(newer[0]["report"]["objects"]) # There are no older objects to find. older = call([repo], reference_date=refdate, older=True) assert_false(older[0]["report"]["objects"]) # We can pass the date in RFC 2822 format. assert_dict_equal( newer[0], call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0]) # paths=None defaults to the current directory. with chpwd(path): assert_dict_equal( newer[0]["report"], call(paths=None, reference_date=refdate)[0]["report"]) # Only commit type is present when annex='none'. newer_noannex = call([path], reference_date=refdate, annex="none") for entry in newer_noannex[0]["report"]["objects"].values(): ok_(entry["type"] == "commit")
def check_dss(): # we added the remote and set all the for subds in subdss: eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') for target_sub in target_subdss: ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_new_or_modified(path=None): def get_new_or_modified(*args, **kwargs): return [op.relpath(ap["path"], path) for ap in new_or_modified(diff_revision(*args, **kwargs))] ds = Dataset(path).create(force=True, annex=False) # Check out an orphan branch so that we can test the "one commit # in a repo" case. ds.repo.checkout("orph", options=["--orphan"]) ds.save() assert_false(ds.repo.dirty) eq_(len(ds.repo.get_revisions("HEAD")), 1) # Diffing doesn't fail when the branch contains a single commit. assert_in("to_modify", get_new_or_modified(ds, "HEAD")) # New files are detected, deletions are not. ds.repo.remove(["to_remove"]) ok_(ds.repo.dirty) with open(op.join(path, "to_add"), "w") as f: f.write("content5") ds.repo.add(["to_add"]) ds.repo.commit("add one, remove another") eq_(get_new_or_modified(ds, "HEAD"), ["to_add"]) # Modifications are detected. with open(op.join(path, "to_modify"), "w") as f: f.write("updated 1") with open(op.join(path, "d/to_modify"), "w") as f: f.write("updated 2") ds.save(["to_modify", "d/to_modify"]) eq_(set(get_new_or_modified(ds, "HEAD")), {"to_modify", op.join("d", "to_modify")}) # Non-HEAD revisions work. ds.repo.commit("empty", options=["--allow-empty"]) assert_false(get_new_or_modified(ds, "HEAD")) eq_(set(get_new_or_modified(ds, "HEAD~")), {"to_modify", op.join("d", "to_modify")})
def test_rerun_unrelated_run_left_nonrun_right(path=None): ds = Dataset(path).create() # keep direct repo accessor to speed things up ds_repo = ds.repo ds.run("echo foo >foo") ds_repo.checkout(DEFAULT_BRANCH + "~", options=["--orphan", "side"]) ds.save(message="squashed") ds_repo.checkout(DEFAULT_BRANCH) ds_repo.merge("side", options=["-m", "Merge side", "--allow-unrelated-histories"]) # o d_n # |\ # | o c_n # o b_r # o a_n ds.rerun(since="", onto="") # o d_M # |\ # | o c_n # o b_R # o a_n neq_(ds_repo.get_hexsha("HEAD^"), ds_repo.get_hexsha(DEFAULT_BRANCH + "^")) eq_(ds_repo.get_hexsha("HEAD^2"), ds_repo.get_hexsha(DEFAULT_BRANCH + "^2")) assert_false(ds_repo.commit_exists("HEAD^2^")) ds_repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o b_R # o c_n assert_false(ds_repo.commit_exists("HEAD^2")) eq_(ds_repo.get_hexsha("HEAD^"), ds_repo.get_hexsha(DEFAULT_BRANCH + "^2")) ds_repo.checkout(DEFAULT_BRANCH) ds.rerun(since="") # o d_n # |\ # | o c_n # o b_r # o a_n eq_(ds_repo.get_hexsha(DEFAULT_BRANCH), ds_repo.get_hexsha())
def check_datasets_order(res, order='bottom-up'): """Check that all type=dataset records not violating the expected order it is somewhat weak test, i.e. records could be produced so we do not detect that order is violated, e.g. a/b c/d would satisfy either although they might be neither depth nor breadth wise. But this test would allow to catch obvious violations like a, a/b, a """ prev = None for r in res: if r.get('type') != 'dataset': continue if prev and r['path'] != prev: if order == 'bottom-up': assert_false(path_startswith(r['path'], prev)) elif order == 'top-down': assert_false(path_startswith(prev, r['path'])) else: raise ValueError(order) prev = r['path']
def test_update_unborn_master(path=None): ds_a = Dataset(op.join(path, "ds-a")).create() ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"]) ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"]) ds_b = install(source=ds_a.path, path=op.join(path, "ds-b")) ds_a.repo.checkout("other") (ds_a.pathobj / "foo").write_text("content") ds_a.save() # clone() will try to switch away from an unborn branch if there # is another ref available. Reverse these efforts so that we can # test that update() fails reasonably here because we should still # be able to update from remotes that datalad didn't clone. ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH, symbolic=True) assert_false(ds_b.repo.commit_exists("HEAD")) assert_status("impossible", ds_b.update(merge=True, on_failure="ignore")) ds_b.repo.checkout("other") assert_status("ok", ds_b.update(merge=True, on_failure="ignore")) eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
def test_ExtractedArchive(path=None): archive = op.join(path, fn_archive_obscure_ext) earchive = ExtractedArchive(archive) assert_false(op.exists(earchive.path)) # no longer the case -- just using hash for now # assert_in(os.path.basename(archive), earchive.path) fpath = op.join( fn_archive_obscure, # lead directory fn_in_archive_obscure) extracted = earchive.get_extracted_filename(fpath) eq_(extracted, op.join(earchive.path, fpath)) assert_false(op.exists(extracted)) # not yet extracted_ = earchive.get_extracted_file(fpath) eq_(extracted, extracted_) assert_true(op.exists(extracted)) # now it should extracted_files = earchive.get_extracted_files() ok_generator(extracted_files) eq_( sorted(extracted_files), sorted([ # ['bbc/3.txt', 'bbc/abc'] op.join(fn_archive_obscure, fn_in_archive_obscure), op.join(fn_archive_obscure, '3.txt') ])) earchive.clean() if not dl_cfg.get('datalad.tests.temp.keep'): assert_false(op.exists(earchive.path))
def test_external_versions_basic(): ev = ExternalVersions() our_module = 'datalad' assert_equal(ev.versions, {}) assert_equal(ev[our_module], __version__) # and it could be compared assert_greater_equal(ev[our_module], __version__) # We got some odd failure in this test not long are after switching to versionner # https://github.com/datalad/datalad/issues/5785. Verify that we do get expected # data types our_version = ev[our_module].version assert isinstance( our_version, (str, list)), f"Got {our_version!r} of type {type(our_version)}" assert_greater(ev[our_module], '0.1') assert_equal(list(ev.keys()), [our_module]) assert_true(our_module in ev) assert_false('unknown' in ev) # all are LooseVersions now assert_true(isinstance(ev[our_module], LooseVersion)) version_str = __version__ assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str)) # For non-existing one we get None assert_equal(ev['custom__nonexisting'], None) # and nothing gets added to _versions for nonexisting assert_equal(set(ev.versions.keys()), {our_module}) # but if it is a module without version, we get it set to UNKNOWN assert_equal(ev['os'], ev.UNKNOWN) # And get a record on that inside assert_equal(ev.versions.get('os'), ev.UNKNOWN) # And that thing is "True", i.e. present assert (ev['os']) # but not comparable with anything besides itself (was above) assert_raises(TypeError, cmp, ev['os'], '0') assert_raises(TypeError, assert_greater, ev['os'], '0') return
def test_rerun_explicit(path=None): ds = Dataset(path).create(force=True) ds.run("echo o >> foo", explicit=True, outputs=["foo"]) with open(op.join(ds.path, "foo")) as ifh: orig_content = ifh.read() orig_head = ds.repo.get_hexsha(DEFAULT_BRANCH) # Explicit rerun is allowed in a dirty tree. ok_(ds.repo.dirty) ds.rerun(explicit=True) eq_(orig_head, ds.repo.get_hexsha(DEFAULT_BRANCH + "~1")) with open(op.join(ds.path, "foo")) as ifh: eq_(orig_content * 2, ifh.read()) # --since also works. ds.rerun(since="", explicit=True) eq_(orig_head, # Added two rerun commits. ds.repo.get_hexsha(DEFAULT_BRANCH + "~3")) # With just untracked changes, we can rerun with --onto. ds.rerun(since="", onto="", explicit=True) eq_(ds.repo.get_hexsha(orig_head + "^"), # Reran the four run commits from above on the initial base. ds.repo.get_hexsha("HEAD~4")) # But checking out a new HEAD can fail when there are modifications. ds.repo.checkout(DEFAULT_BRANCH) ok_(ds.repo.dirty) ds.repo.add(["to_modify"], git=True) ds.save() assert_false(ds.repo.dirty) with open(op.join(ds.path, "to_modify"), "a") as ofh: ofh.write("more") ok_(ds.repo.dirty) with assert_raises(CommandError): ds.rerun(onto="", since="", explicit=True)
def test_GitRepo_instance_from_not_existing(path=None, path2=None): # 1. create=False and path doesn't exist: repo = GitRepo(path) assert_false(op.exists(path)) # 2. create=False, path exists, but no git repo: os.mkdir(path) ok_(op.exists(path)) repo = GitRepo(path) assert_false(op.exists(op.join(path, '.git'))) # 3. create=True, path doesn't exist: gr = GitRepo(path2).init() assert_is_instance(gr, GitRepo, "GitRepo was not created.") ok_(op.exists(op.join(path2, '.git'))) # re-enable from core GitRepo has a status() method #assert_repo_status(path2, annex=False) # 4. create=True, path exists, but no git repo: gr = GitRepo(path).init() assert_is_instance(gr, GitRepo, "GitRepo was not created.") ok_(op.exists(op.join(path, '.git')))
def test_no_storage(store1=None, store2=None, ds_path=None): store1_url = 'ria+' + get_local_file_url(store1) store2_url = 'ria+' + get_local_file_url(store2) ds = Dataset(ds_path).create(force=True) ds.save(recursive=True) assert_repo_status(ds.path) res = ds.create_sibling_ria(store1_url, "datastore1", storage_sibling=False, new_store_ok=True) assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_({'datastore1', 'here'}, {s['name'] for s in ds.siblings(result_renderer='disabled')}) # deprecated way of disabling storage still works res = ds.create_sibling_ria(store2_url, "datastore2", storage_sibling=False, new_store_ok=True) assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_({'datastore2', 'datastore1', 'here'}, {s['name'] for s in ds.siblings(result_renderer='disabled')}) # no annex/object dir should be created when there is no special remote # to use it. for s in [store1, store2]: p = Path(s) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects' assert_false(p.exists()) # smoke test that we can push to it res = ds.push(to='datastore1') assert_status('ok', res) # but nothing was copied, because there is no storage sibling assert_result_count(res, 0, action='copy')
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None) # Test it blowing up if we provide unknown field with assert_raises(ValueError) as cme: cred.enter_new(username='******') assert_in('field(s): username. Known but not specified: password, user', str(cme.value)) # Test that if user is provided, it is not asked cred.enter_new(user='******') assert_equal(keyring.get('name', 'user'), 'user2') assert_equal(keyring.get('name', 'password'), 'newpassword')
def test_unlock_directory(path=None): ds = Dataset(path).create(force=True) ds.save() ds.unlock(path="dir") dirpath = Path("dir") dirpath_abs = Path(ds.pathobj / "dir") # On adjusted branches (for the purposes of this test, crippled # filesystems), the files were already unlocked and the committed state is # the unlocked pointer file. is_managed_branch = ds.repo.is_managed_branch() if is_managed_branch: assert_repo_status(ds.path) else: assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"]) ds.save() ds.drop(str(dirpath / "a"), reckless='kill') assert_false(ds.repo.file_has_content(str(dirpath / "a"))) # Unlocking without an explicit non-directory path doesn't fail if one of # the directory's files doesn't have content. res = ds.unlock(path="dir") assert_not_in_results(res, action="unlock", path=str(dirpath_abs / "a")) if is_managed_branch: assert_not_in_results(res, action="unlock", path=str(dirpath_abs / "b")) else: assert_in_results(res, action="unlock", status="ok", path=str(dirpath_abs / "b")) assert_repo_status(ds.path, modified=[dirpath / "b"]) # If we explicitly provide a path that lacks content, we get a result # for it. assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"), action="unlock", status="impossible", path=str(dirpath_abs / "a"))
def test_name_starts_with_hyphen(origpath=None, path=None): ds = Dataset.create(origpath) # create dash_sub = ds.create('-sub') assert_true(dash_sub.is_installed()) assert_result_count(ds.subdatasets(), 1, path=dash_sub.path, state='present') # clone ds_clone = Dataset.create(path) dash_clone = clone(source=dash_sub.path, path=os.path.join(path, '-clone')) ds_clone.save(recursive=True) assert_true(dash_clone.is_installed()) assert_result_count(ds_clone.subdatasets(), 1, path=dash_clone.path, state='present') # uninstall ds_clone.drop('-clone', what='all', reckless='kill', recursive=True) assert_false(dash_clone.is_installed()) assert_result_count(ds_clone.subdatasets(), 1, path=dash_clone.path, state='absent') # get ds_clone.get('-clone') assert_true(dash_clone.is_installed()) assert_result_count(ds_clone.subdatasets(), 1, path=dash_clone.path, state='present') assert_repo_status(ds.path)
def test_get_subdataset_inherit_reckless(path=None, *, override): src = Dataset(opj(path, "a")).create() src_subds = src.create("sub") src_subds.create("subsub") src.save(recursive=True) clone = install(opj(path, "b"), source=src, reckless="auto", result_xfm="datasets", return_type="item-or-list") clone_sub = Dataset(clone.pathobj / "sub") assert_false(clone_sub.is_installed()) clone_subsub = Dataset(clone.pathobj / "sub" / "subsub") clone.get(opj("sub", "subsub"), reckless=False if override else None) ok_(clone_sub.is_installed()) ok_(clone_subsub.is_installed()) for sub in [clone_sub, clone_subsub]: eq_(sub.config.get("datalad.clone.reckless", None), None if override else "auto") eq_(sub.config.get("annex.hardlink", None), None if override else "true")
def test_aggregate_with_unavailable_objects_from_subds(path=None, target=None): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n' ) sub = base.create('sub', force=True) subsub = base.create(opj('sub', 'subsub'), force=True) base.save(recursive=True) assert_repo_status(base.path) base.aggregate_metadata(recursive=True, update_mode='all') assert_repo_status(base.path) # now make that a subdataset of a new one, so aggregation needs to get the # metadata objects first: super = Dataset(target).create() super.install("base", source=base.path) assert_repo_status(super.path) clone = Dataset(opj(super.path, "base")) assert_repo_status(clone.path) objpath = opj('.datalad', 'metadata', 'objects') objs = clone.repo.get_content_annexinfo(paths=[objpath], init=None, eval_availability=True) eq_(len(objs), 6) assert_false(any(st["has_content"] for st in objs.values())) # now aggregate should get those metadata objects super.aggregate_metadata(recursive=True, update_mode='all', force_extraction=False) objs_after = clone.repo.get_content_annexinfo(paths=objs, init=None, eval_availability=True) assert_true(all(st["has_content"] for st in objs_after.values()))
def test_add_archive_dirs(path_orig=None, url=None, repo_path=None): # change to repo_path with chpwd(repo_path): # create annex repo ds = Dataset(repo_path).create(force=True) repo = ds.repo # add archive to the repo so we could test with swallow_outputs(): repo.add_url_to_file('1.tar.gz', opj(url, '1.tar.gz')) repo.commit("added 1.tar.gz") # test with excludes and annex options add_archive_content( '1.tar.gz', existing='archive-suffix', # Since inconsistent and seems in many cases no # leading dirs to strip, keep them as provided strip_leading_dirs=True, delete=True, leading_dirs_consider=['crcns.*', '1'], leading_dirs_depth=2, use_current_dir=False, exclude='.*__MACOSX.*') # some junk penetrates eq_( repo.get_description( uuid=DATALAD_SPECIAL_REMOTES_UUIDS[ARCHIVES_SPECIAL_REMOTE]), '[%s]' % ARCHIVES_SPECIAL_REMOTE) all_files = sorted(find_files('.')) # posixify paths to make it work on Windows as well all_files = [Path(file).as_posix() for file in all_files] target_files = { 'CR24A/behaving1/1 f.txt', 'CR24C/behaving3/3 f.txt', 'CR24D/behaving2/2 f.txt', '.datalad/config', } eq_(set(all_files), target_files) # regression test: the subdir in MACOSX wasn't excluded and its name was # getting stripped by leading_dir_len # if stripping and exclude didn't work this fails assert_false(exists('__MACOSX')) # if exclude doesn't work then name of subdir gets stripped by # leading_dir_len assert_false(exists('c-1_data')) # if exclude doesn't work but everything else works this fails assert_false(exists('CR24B'))
def test_ArchivesCache(): # we don't actually need to test archives handling itself path1 = "/zuba/duba" path2 = "/zuba/duba2" # should not be able to create a persistent cache without topdir assert_raises(ValueError, ArchivesCache, persistent=True) cache = ArchivesCache() # by default -- non persistent archive1_path = op.join(path1, fn_archive_obscure_ext) archive2_path = op.join(path2, fn_archive_obscure_ext) cached_archive1_path = cache[archive1_path].path assert_false(cache[archive1_path].path == cache[archive2_path].path) assert_true(cache[archive1_path] is cache[archive1_path]) cache.clean() assert_false(op.exists(cached_archive1_path)) assert_false(op.exists(cache.path)) # test del cache = ArchivesCache() # by default -- non persistent assert_true(op.exists(cache.path)) cache_path = cache.path del cache assert_false(op.exists(cache_path))
def test_reobtain_data(originpath=None, destpath=None): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_property_reevaluation(repo1=None): ds = Dataset(repo1) assert_is_none(ds.repo) assert_is_not_none(ds.config) first_config = ds.config assert_false(ds._cfg_bound) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after creation, we have `repo`, and `config` was reevaluated to point # to the repo's config: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) second_config = ds.config assert_true(ds._cfg_bound) assert_is(ds.config, ds.repo.config) assert_is_not(first_config, second_config) assert_is_not_none(ds.id) first_id = ds.id ds.drop(what='all', reckless='kill', recursive=True) # repo is gone, and config is again reevaluated to only provide user/system # level config: assert_false(lexists(ds.path)) assert_is_none(ds.repo) assert_is_not_none(ds.config) third_config = ds.config assert_false(ds._cfg_bound) assert_is_not(second_config, third_config) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after recreation everything is sane again: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) assert_is(ds.config, ds.repo.config) forth_config = ds.config assert_true(ds._cfg_bound) assert_is_not(third_config, forth_config) assert_is_not_none(ds.id) assert_not_equal(ds.id, first_id)
def test_is_installed(src=None, path=None): ca = dict(result_renderer='disabled') # a remote dataset with a subdataset underneath origds = Dataset(src).create(**ca) _ = origds.create('subm 1', **ca) ds = Dataset(path) assert_false(ds.is_installed()) # get a clone: clone(src, path, **ca) ok_(ds.is_installed()) # submodule still not installed: subds = Dataset(ds.pathobj / 'subm 1') assert_false(subds.is_installed()) # We must not be able to create a new repository under a known # subdataset path. # Note: Unfortunately we would still be able to generate it under # subdirectory within submodule, e.g. `subm 1/subdir` but that is # not checked here. `create` provides that protection though. res = subds.create(on_failure='ignore', return_type='list', result_filter=None, result_xfm=None, **ca) assert_result_count(res, 1) assert_result_count(res, 1, status='error', path=subds.path, message=('collision with %s (dataset) in dataset %s', subds.path, ds.path)) # get the submodule with chpwd(ds.path): get('subm 1', **ca) ok_(subds.is_installed()) # wipe it out rmtree(ds.path) assert_false(ds.is_installed())
def test_credentials_from_env(): keyring = Keyring() cred = AWS_S3("test-s3", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.get('key_id'), None) assert_equal(cred.get('secret_id'), None) def _check1(): assert_equal(cred.get('key_id'), '1') assert_false(cred.is_known) def _check2(): assert_equal(cred.get('key_id'), '1') assert_equal(cred.get('secret_id'), '2') assert_true(cred.is_known) # this is the old way, should still work with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}): _check1() with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}): _check2() assert_false(cred.is_known) # no memory of the past # here is the new way import datalad try: with patch.dict('os.environ', {'DATALAD_CREDENTIAL_test__s3_key__id': '1'}): datalad.cfg.reload() _check1() with patch.dict('os.environ', {'DATALAD_CREDENTIAL_test__s3_secret__id': '2'}): datalad.cfg.reload() _check2() datalad.cfg.reload() assert_false(cred.is_known) # no memory of the past finally: datalad.cfg.reload()
def test_datalad_credential_helper(path=None): ds = Dataset(path).create() # tell git to use git-credential-datalad ds.config.add('credential.helper', 'datalad', scope='local') ds.config.add('datalad.credentials.githelper.noninteractive', 'true', scope='global') from datalad.downloaders.providers import Providers url1 = "https://datalad-test.org/some" url2 = "https://datalad-test.org/other" provider_name = "datalad-test.org" # `Providers` code is old and only considers a dataset root based on PWD # for config lookup. contextmanager below can be removed once the # provider/credential system is redesigned. with chpwd(ds.path): gitcred = GitCredentialInterface(url=url1, repo=ds) # There's nothing set up yet, helper should return empty gitcred.fill() eq_(gitcred['username'], '') eq_(gitcred['password'], '') # store new credentials # Note, that `Providers.enter_new()` currently uses user-level config # files for storage only. TODO: make that an option! # To not mess with existing ones, fail if it already exists: cfg_file = Path(Providers._get_providers_dirs()['user']) \ / f"{provider_name}.cfg" assert_false(cfg_file.exists()) # Make sure we clean up from datalad.tests import _TEMP_PATHS_GENERATED _TEMP_PATHS_GENERATED.append(str(cfg_file)) # Give credentials to git and ask it to store them: gitcred = GitCredentialInterface(url=url1, username="******", password="******", repo=ds) gitcred.approve() assert_true(cfg_file.exists()) providers = Providers.from_config_files() p1 = providers.get_provider(url=url1, only_nondefault=True) assert_is_instance(p1.credential, UserPassword) eq_(p1.credential.get('user'), 'dl-user') eq_(p1.credential.get('password'), 'dl-pwd') # default regex should be host only, so matching url2, too p2 = providers.get_provider(url=url2, only_nondefault=True) assert_is_instance(p1.credential, UserPassword) eq_(p1.credential.get('user'), 'dl-user') eq_(p1.credential.get('password'), 'dl-pwd') # git, too, should now find it for both URLs gitcred = GitCredentialInterface(url=url1, repo=ds) gitcred.fill() eq_(gitcred['username'], 'dl-user') eq_(gitcred['password'], 'dl-pwd') gitcred = GitCredentialInterface(url=url2, repo=ds) gitcred.fill() eq_(gitcred['username'], 'dl-user') eq_(gitcred['password'], 'dl-pwd') # Rejection must not currently lead to deleting anything, since we would # delete too broadly. gitcred.reject() assert_true(cfg_file.exists()) gitcred = GitCredentialInterface(url=url1, repo=ds) gitcred.fill() eq_(gitcred['username'], 'dl-user') eq_(gitcred['password'], 'dl-pwd') dlcred = UserPassword(name=provider_name) eq_(dlcred.get('user'), 'dl-user') eq_(dlcred.get('password'), 'dl-pwd')
def test_something(path=None, new_home=None): # will refuse to work on dataset without a dataset assert_raises(ValueError, ConfigManager, source='branch') # now read the example config cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='branch') assert_equal(len(cfg), 5) assert_in('something.user', cfg) # multi-value assert_equal(len(cfg['something.user']), 2) assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]')) assert_true(cfg.has_section('something')) assert_false(cfg.has_section('somethingelse')) assert_equal(sorted(cfg.sections()), [u'onemore.complicated の beast with.dot', 'something']) assert_true(cfg.has_option('something', 'user')) assert_false(cfg.has_option('something', 'us?er')) assert_false(cfg.has_option('some?thing', 'user')) assert_equal(sorted(cfg.options('something')), ['empty', 'myint', 'novalue', 'user']) assert_equal(cfg.options(u'onemore.complicated の beast with.dot'), ['findme']) assert_equal(sorted(cfg.items()), [(u'onemore.complicated の beast with.dot.findme', '5.0'), ('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) assert_equal(sorted(cfg.items('something')), [('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) # by default get last value only assert_equal(cfg.get('something.user'), '[email protected]') # but can get all values assert_equal(cfg.get('something.user', get_all=True), ('name=Jane Doe', '[email protected]')) assert_raises(KeyError, cfg.__getitem__, 'somedthing.user') assert_equal( cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0) assert_equal(cfg.getint('something', 'myint'), 3) assert_equal(cfg.getbool('something', 'myint'), True) # git demands a key without value at all to be used as a flag, thus True assert_equal(cfg.getbool('something', 'novalue'), True) assert_equal(cfg.get('something.novalue'), None) # empty value is False assert_equal(cfg.getbool('something', 'empty'), False) assert_equal(cfg.get('something.empty'), '') assert_equal(cfg.getbool('doesnot', 'exist', default=True), True) assert_raises(TypeError, cfg.getbool, 'something', 'user') # gitpython-style access assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint')) assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa') # weird, but that is how it is assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None) # modification follows cfg.add('something.new', 'の') assert_equal(cfg.get('something.new'), u'の') # sections are added on demand cfg.add('unheard.of', 'fame') assert_true(cfg.has_section('unheard.of')) comp = cfg.items('something') cfg.rename_section('something', 'this') assert_true(cfg.has_section('this')) assert_false(cfg.has_section('something')) # direct comparison would fail, because of section prefix assert_equal(len(cfg.items('this')), len(comp)) # fail if no such section with swallow_logs(): assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant') assert_true(cfg.has_option('this', 'myint')) cfg.unset('this.myint') assert_false(cfg.has_option('this', 'myint')) # batch a changes cfg.add('mike.wants.to', 'know', reload=False) assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) # set a new one: cfg.set('mike.should.have', 'known') assert_in('mike.should.have', cfg) assert_equal(cfg['mike.should.have'], 'known') # set an existing one: cfg.set('mike.should.have', 'known better') assert_equal(cfg['mike.should.have'], 'known better') # set, while there are several matching ones already: cfg.add('mike.should.have', 'a meal') assert_equal(len(cfg['mike.should.have']), 2) # raises with force=False assert_raises(CommandError, cfg.set, 'mike.should.have', 'a beer', force=False) assert_equal(len(cfg['mike.should.have']), 2) # replaces all matching ones with force=True cfg.set('mike.should.have', 'a beer', force=True) assert_equal(cfg['mike.should.have'], 'a beer') # test deprecated 'where' interface and old 'dataset' (not 'branch') value # TODO: remove along with the removal of deprecated 'where' cfg.set('mike.should.have', 'wasknown', where='dataset') assert_equal(cfg['mike.should.have'], 'wasknown') assert_equal(cfg.get_from_source('dataset', 'mike.should.have'), 'wasknown') # fails unknown location assert_raises(ValueError, cfg.add, 'somesuch', 'shit', scope='umpalumpa') # very carefully test non-local config # so carefully that even in case of bad weather Yarik doesn't find some # lame datalad unittest sections in his precious ~/.gitconfig # Note: An easier way to test this, would be to just set GIT_CONFIG_GLOBAL # to point somewhere else. However, this is not supported by git before # 2.32. Hence, stick with changed HOME in this test, but be sure to unset a # possible GIT_CONFIG_GLOBAL in addition. patched_env = os.environ.copy() patched_env.pop('GIT_CONFIG_GLOBAL', None) patched_env.update(get_home_envvars(new_home)) with patch.dict('os.environ', dict(patched_env, DATALAD_SNEAKY_ADDITION='ignore'), clear=True): global_gitconfig = opj(new_home, '.gitconfig') assert (not exists(global_gitconfig)) globalcfg = ConfigManager() assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) cfg.add('datalad.unittest.youcan', 'removeme', scope='global') assert (exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) with swallow_logs(): assert_raises(CommandError, globalcfg.unset, 'datalad.unittest.youcan', scope='local') assert (globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', scope='global') # but after we unset the only value -- that section is no longer listed assert (not globalcfg.has_section('datalad.unittest')) assert_not_in('datalad.unittest.youcan', globalcfg) ok_file_has_content(global_gitconfig, "") cfg = ConfigManager(Dataset(opj(path, 'ds')), source='branch', overrides={'datalad.godgiven': True}) assert_equal(cfg.get('datalad.godgiven'), True) # setter has no effect cfg.set('datalad.godgiven', 'false') assert_equal(cfg.get('datalad.godgiven'), True)
def test_try_lock_informatively(tempfile=None): lock = InterProcessLock(tempfile + '.lck') lock_path = ensure_unicode( lock.path) # can be bytes, complicates string formattingetc t0 = time() with try_lock_informatively(lock, purpose="happy life") as acquired: assert_true(lock.acquired) assert_true(acquired) assert_greater( 2, time() - t0) # should not take any notable time, we cannot be blocking """ # InterProcessLock is not re-entrant so nesting should not be used, will result # in exception on release with try_lock_informatively(lock, timeouts=[dt, dt*2], proceed_unlocked=True) as acquired: assert_true(lock.acquired) # due to outer cm assert_true(acquired) # lock is reentrant apparently """ # Let's try in a completely different subprocess runner = WitlessRunner(env=dict( os.environ, DATALAD_LOG_LEVEL='info', DATALAD_LOG_TARGET='stderr')) script1 = Path(tempfile + "-script1.py") script1_fmt = f""" from fasteners import InterProcessLock from time import time from datalad.support.locking import try_lock_informatively lock = InterProcessLock({lock_path!r}) with try_lock_informatively(lock, timeouts=[0.05, 0.15], proceed_unlocked={{proceed_unlocked}}) as acquired: print("Lock acquired=%s" % acquired) """ script1.write_text(script1_fmt.format(proceed_unlocked=True)) t0 = time() res = runner.run([sys.executable, str(script1)], protocol=StdOutErrCapture) assert_in('Lock acquired=False', res['stdout']) assert_in(f'Failed to acquire lock at {lock_path} in 0.05', res['stderr']) assert_in(f'Failed to acquire lock at {lock_path} in 0.15', res['stderr']) assert_in('proceed without locking', res['stderr']) assert_greater(time() - t0, 0.19999) # should wait for at least 0.2 try: import psutil # PID does not correspond assert_in('Check following process: PID=', res['stderr']) assert_in(f'CWD={os.getcwd()} CMDLINE=', res['stderr']) except ImportError: pass # psutil was not installed, cannot get list of files except AssertionError: # we must have had the other one then assert_in('failed to determine one', res['stderr']) if not on_osx: # so far we had only OSX reporting failing to get PIDs information # but if it is something else -- re-raise original exception raise # in 2nd case, lets try without proceeding unlocked script1.write_text(script1_fmt.format(proceed_unlocked=False)) t0 = time() with assert_raises(CommandError) as cme: runner.run([sys.executable, str(script1)], protocol=StdOutErrCapture) assert_in(f"Failed to acquire lock at {lock_path} in 2 attempts.", str(cme.value)) assert_in(f"RuntimeError", str(cme.value)) assert_false( cme.value.stdout) # nothing there since print should not happen assert_in(f'Failed to acquire lock at {lock_path} in 0.05', cme.value.stderr) assert_in(f'Failed to acquire lock at {lock_path} in 0.15', cme.value.stderr) assert_greater(time() - t0, 0.19999) # should wait for at least 0.2 # now that we left context, should work out just fine res = runner.run([sys.executable, str(script1)], protocol=StdOutErrCapture) assert_in('Lock acquired=True', res['stdout']) assert_not_in(f'Failed to acquire lock', res['stderr']) assert_not_in('PID', res['stderr'])
def test_install_recursive(src=None, path_nr=None, path_r=None): _make_dataset_hierarchy(src) # first install non-recursive: ds = install(path_nr, source=src, recursive=False) ok_(ds.is_installed()) for sub in ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(not sub.is_installed(), "Unintentionally installed: %s" % (sub, )) # this also means, subdatasets to be listed as absent: eq_( set( ds.subdatasets(recursive=True, state='absent', result_xfm='relpaths')), {'sub1'}) # now recursively: # don't filter implicit results so we can inspect them res = install(path_r, source=src, recursive=True, result_xfm=None, result_filter=None) # installed a dataset and four subdatasets assert_result_count(res, 5, action='install', type='dataset') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path_r) top_ds = Dataset(res[0]['path']) ok_(top_ds.is_installed()) # the subdatasets are contained in returned list: # (Note: Until we provide proper (singleton) instances for Datasets, # need to check for their paths) assert_in_results(res, path=opj(top_ds.path, 'sub1'), type='dataset') assert_in_results(res, path=opj(top_ds.path, 'sub1', 'sub2'), type='dataset') assert_in_results(res, path=opj(top_ds.path, 'sub1', 'sub2', 'sub3'), type='dataset') assert_in_results(res, path=opj(top_ds.path, 'sub1', 'sub2', 'sub3', 'sub4'), type='dataset') eq_(len(top_ds.subdatasets(recursive=True)), 4) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds, )) # no content was installed: ainfo = subds.repo.get_content_annexinfo(init=None, eval_availability=True) assert_false(any(st["has_content"] for st in ainfo.values())) # no absent subdatasets: ok_(top_ds.subdatasets(recursive=True, state='absent') == []) # check if we can install recursively into a dataset # https://github.com/datalad/datalad/issues/2982 subds = ds.install('recursive-in-ds', source=src, recursive=True) ok_(subds.is_installed()) for subsub in subds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subsub.is_installed()) # check that we get subdataset instances manufactured from notneeded results # to install existing subdatasets again eq_(subds, ds.install('recursive-in-ds'))