def test_bundle_invariance(path): remote_url = 'ssh://localhost' manager = SSHManager() testfile = Path(path) / 'dummy' for flag in (True, False): assert_false(testfile.exists()) ssh = manager.get_connection(remote_url, use_remote_annex_bundle=flag) ssh('cd .>{}'.format(text_type(testfile))) ok_(testfile.exists()) testfile.unlink()
def test_ssh_copy(sourcedir, sourcefile1, sourcefile2): remote_url = 'ssh://localhost:22' manager = SSHManager() ssh = manager.get_connection(remote_url) # write to obscurely named file in sourcedir obscure_file = opj(sourcedir, get_most_obscure_supported_name()) with open(obscure_file, 'w') as f: f.write("three") # copy tempfile list to remote_url:sourcedir sourcefiles = [sourcefile1, sourcefile2, obscure_file] ssh.put(sourcefiles, opj(remote_url, sourcedir)) # docs promise that connection is auto-opened ok_(ssh.is_open()) # recursive copy tempdir to remote_url:targetdir targetdir = sourcedir + '.c opy' ssh.put(sourcedir, opj(remote_url, targetdir), recursive=True, preserve_attrs=True) # check if sourcedir copied to remote_url:targetdir ok_(isdir(targetdir)) # check if scp preserved source directory attributes # if source_mtime=1.12s, scp -p sets target_mtime = 1.0s, test that eq_(getmtime(targetdir), int(getmtime(sourcedir)) + 0.0) # check if targetfiles(and its content) exist in remote_url:targetdir, # this implies file(s) and recursive directory copying pass for targetfile, content in zip(sourcefiles, ["one", "two", "three"]): targetpath = opj(targetdir, targetfile) ok_(exists(targetpath)) with open(targetpath, 'r') as fp: eq_(content, fp.read()) # and now a quick smoke test for get togetfile = Path(targetdir) / '2|g>e"t.t&x;t' togetfile.write_text(text_type('something')) ssh.get(opj(remote_url, text_type(togetfile)), sourcedir) ok_((Path(sourcedir) / '2|g>e"t.t&x;t').exists()) ssh.close()
def test_sibling_path_is_posix(basedir, otherpath): ds_source = Dataset(opj(basedir, "source")).create() # add remote with system native path ds_source.siblings( action="add", name="donotexist", url=otherpath, result_renderer=None) res = ds_source.siblings( action="query", name="donotexist", result_renderer=None, return_type='item-or-list') # path URL should come out POSIX as if `git clone` had configured it for origin # https://github.com/datalad/datalad/issues/3972 eq_(res['url'], Path(otherpath).as_posix())
def test_update_adjusted_incompatible_with_ff_only(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") maybe_adjust_repo(ds_clone.repo) assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"), action="update", status="impossible") assert_in_results(ds_clone.update(on_failure="ignore"), action="update", status="ok")
def test_push_matching(path): path = Path(path) ds = Dataset(path / "ds").create(force=True) ds.config.set('push.default', 'matching', where='local') ds.save() remote_ds = mk_push_target(ds, 'local', str(path / 'dssibling'), annex=True, bare=False) # that fact that the next one even runs makes sure that we are in a better # place than https://github.com/datalad/datalad/issues/4888 ds.push(to='local') # and we pushed the commit in the current branch eq_(remote_ds.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha(DEFAULT_BRANCH))
def test_push_git_annex_branch_when_no_data(path): path = Path(path) ds = Dataset(path / "a").create() target = mk_push_target(ds, "target", str(path / "target"), annex=False, bare=True) (ds.pathobj / "f0").write_text("0") ds.save() ds.push(to="target", data="nothing") assert_in( "git-annex", { d["refname:strip=2"] for d in target.for_each_ref_(fields="refname:strip=2") })
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = Path(src) / 'ds' gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) assert_repo_status(path, annex=False) ok_file_has_content(ds.pathobj / 'test.txt', 'some')
def test_merge_no_merge_target(path): path = Path(path) ds_src = Dataset(path / "source").create() if ds_src.repo.is_managed_branch(): # `git annex sync REMOTE` rather than `git merge TARGET` is used on an # adjusted branch, so we don't give an error if TARGET can't be # determined. raise SkipTest("Test depends on non-adjusted branch") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") assert_repo_status(ds_src.path) ds_clone.repo.checkout("master", options=["-bnew"]) res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, status="impossible", action="update")
def test_aggregate_aggregation(path): path = Path(path) ds = Dataset(path).create() sourceds = ds.create(path / 'source') origds = ds.create(path / 'source' / 'orig') # put a single (empty) file in origds to have some metadata-relevant # content payload = origds.pathobj / 'CONTENT' payload.write_text(u'') ds.save(recursive=True) assert_repo_status(ds.path) # aggregate orids metadata into sourceds, note the trailing slash sourceds.meta_aggregate('orig' + op.sep) # orig has no aggregates assert_status( 'impossible', origds.meta_dump(reporton='aggregates', recursive=True, on_failure='ignore')) # but sourceds has exactly one record -- that of origds res = sourceds.meta_dump(reporton='aggregates', recursive=True, on_failure='ignore') assert_result_count(res, 1) assert_result_count(res, 1, path=origds.path) # now we change the payload file to have some metadata-change signal # and save the entire hierarchy payload.unlink() payload.write_text(u'BIGONE') ds.save(recursive=True) assert_repo_status(ds.path) # FOR REAL: aggregate the aggregate on 'orig' from 'source' # this must not REaggregate 'orig' # there is no trailing slash ds.meta_aggregate(op.join('source', 'orig')) # the freshly aggregated, but outdated metadata still reports # a zero bytesize for the payload file eq_( ds.meta_dump(payload, reporton='files')[0]['metadata']['metalad_core'] ['contentbytesize'], 0) # and now with actual aggregation from orig ds.meta_aggregate(op.join('source', 'orig') + op.sep) # picks up the new size eq_( ds.meta_dump(payload, reporton='files')[0]['metadata']['metalad_core'] ['contentbytesize'], 6)
def test_merge_conflict(path): path = Path(path) ds_src = Dataset(path / "src").create() ds_src_s0 = ds_src.create("s0") ds_src_s1 = ds_src.create("s1") ds_src.save() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_s0 = Dataset(path / "clone" / "s0") ds_clone_s1 = Dataset(path / "clone" / "s1") (ds_src.pathobj / "foo").write_text("src content") ds_src.save(to_git=True) (ds_clone.pathobj / "foo").write_text("clone content") ds_clone.save(to_git=True) # Top-level merge failure res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, action="merge", status="error") assert_in_results(res, action="update", status="error") # Deal with the conflicts. Note that save() won't handle this gracefully # because it will try to commit with a pathspec, which git doesn't allow # during a merge. ds_clone.repo.call_git(["checkout", "--theirs", "--", "foo"]) ds_clone.repo.call_git(["add", "--", "foo"]) ds_clone.repo.call_git(["commit", "--no-edit"]) assert_repo_status(ds_clone.path) # Top-level and subdataset merge failure (ds_src_s0.pathobj / "foo").write_text("src s0 content") (ds_src_s1.pathobj / "foo").write_text("no conflict") ds_src.save(recursive=True, to_git=True) (ds_clone_s0.pathobj / "foo").write_text("clone s0 content") ds_clone.save(recursive=True, to_git=True) res = ds_clone.update(merge=True, recursive=True, on_failure="ignore") assert_result_count(res, 2, action="merge", status="error") assert_result_count(res, 2, action="update", status="error") assert_in_results(res, action="merge", status="ok", path=ds_clone_s1.path) assert_in_results(res, action="update", status="ok", path=ds_clone_s1.path) # No saving happens if there's a top-level conflict. assert_repo_status(ds_clone.path, modified=[ds_clone_s0.path, ds_clone_s1.path])
def test_asyncio_forked(temp): # temp will be used to communicate from child either it succeeded or not temp = Path(temp) runner = Runner() import os try: pid = os.fork() except BaseException as exc: # .fork availability is "Unix", and there are cases where it is "not supported" # so we will just skip if no forking is possible raise SkipTest(f"Cannot fork: {exc}") # if does not fail (in original or in a fork) -- we are good if sys.version_info < (3, 8) and pid != 0: # for some reason it is crucial to sleep a little (but 0.001 is not enough) # in the master process with older pythons or it takes forever to make the child run sleep(0.1) try: runner.run([sys.executable, '--version'], protocol=StdOutCapture) if pid == 0: temp.write_text("I rule") except: if pid == 0: temp.write_text("I suck") if pid != 0: # parent: look after the child t0 = time() try: while not temp.exists() or temp.stat().st_size < 6: if time() - t0 > 5: raise AssertionError( "Child process did not create a file we expected!") finally: # kill the child os.kill(pid, signal.SIGTERM) # see if it was a good one eq_(temp.read_text(), "I rule") else: # sleep enough so parent just kills me the kid before I continue doing bad deeds sleep(10)
def check_create_path_semantics(cwd, create_ds, path_arg, base_path, other_path): ds = Dataset(base_path).create() os.makedirs(op.join(ds.path, 'some')) target_path = ds.pathobj / "some" / "what" / "deeper" with chpwd(other_path if cwd == 'elsewhere' else base_path if cwd == 'parentds' else str(ds.pathobj / 'some') if cwd == 'subdir' else str(Path.cwd())): subds = create( dataset=ds.path if create_ds == 'abspath' else str(ds.pathobj.relative_to(cwd)) if create_ds == 'relpath' else ds if create_ds == 'instance' else create_ds, path=str(target_path) if path_arg == 'abspath' else str(target_path.relative_to(ds.pathobj)) if path_arg == 'relpath' else op.join('what', 'deeper') if path_arg == 'subdir_relpath' else path_arg) eq_(subds.pathobj, target_path)
def check_create_initopts_form(form, path=None): path = Path(path) template_dir = path / "templates" template_dir.mkdir() (template_dir / "foo").write_text("") forms = { "list": [f"--template={template_dir}"], "dict": { "template": str(template_dir) } } ds = Dataset(path / "ds") ds.create(initopts=forms[form]) ok_exists(ds.repo.dot_git / "foo")
def test_merge_conflict_in_subdataset_only(path): path = Path(path) ds_src = Dataset(path / "src").create() if ds_src.repo.is_managed_branch(): # `git annex sync REMOTE` is used on an adjusted branch, but this error # depends on `git merge TARGET` being used. raise SkipTest("Test depends on non-adjusted branch") ds_src_sub_conflict = ds_src.create("sub_conflict") ds_src_sub_noconflict = ds_src.create("sub_noconflict") ds_src.save() # Set up a scenario where one subdataset has a conflict between the remote # and local version, but the parent dataset does not have a conflict # because it hasn't recorded the subdataset state. ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict") ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict") (ds_src_sub_conflict.pathobj / "foo").write_text("src content") ds_src_sub_conflict.save(to_git=True) (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content") ds_clone_sub_conflict.save(to_git=True) (ds_src_sub_noconflict.pathobj / "foo").write_text("src content") ds_src_sub_noconflict.save() res = ds_clone.update(merge=True, recursive=True, on_failure="ignore") assert_in_results(res, action="merge", status="error", path=ds_clone_sub_conflict.path) assert_in_results(res, action="merge", status="ok", path=ds_clone_sub_noconflict.path) assert_in_results(res, action="save", status="ok", path=ds_clone.path) # We saved the subdataset without a conflict... assert_repo_status(ds_clone_sub_noconflict.path) # ... but the one with the conflict leaves it for the caller to handle. ok_( ds_clone_sub_conflict.repo.call_git( ["ls-files", "--unmerged", "--", "foo"]).strip())
def test_get_open_files(p=None): pobj = Path(p) skip_if_no_module('psutil') eq_(get_open_files(p), {}) f1 = pobj / '1' subd = pobj / 'd' with f1.open() as f: # since lsof does not care about PWD env var etc, paths # will not contain symlinks, we better realpath them # all before comparison eq_(get_open_files(p, log_open=40)[str(f1.resolve())].pid, os.getpid()) assert not get_open_files(str(subd)) if on_windows: # the remainder of the test assume a certain performance. # however, on windows get_open_files() can be very slow # (e.g. the first invocation in this test (above) can easily # take 30-50s). It is not worth slowing the tests to # accommodate this issue, given we have tested proper functioning # in principle already above). return # if we start a process within that directory, should get informed from subprocess import ( PIPE, Popen, ) from time import time t0 = time() proc = Popen([ sys.executable, '-c', r'import sys; sys.stdout.write("OK\n"); sys.stdout.flush();' r'import time; time.sleep(10)' ], stdout=PIPE, cwd=str(subd)) # Assure that it started and we read the OK eq_(ensure_unicode(proc.stdout.readline().strip()), u"OK") assert time( ) - t0 < 5 # that we were not stuck waiting for process to finish eq_(get_open_files(p)[str(subd.resolve())].pid, proc.pid) eq_(get_open_files(subd)[str(subd.resolve())].pid, proc.pid) proc.terminate() assert_equal(get_open_files(str(subd)), {})
def test_copy_file_datalad_specialremote(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() # enable datalad special remote src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [ 'encryption=none', 'type=external', 'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true' ]) # put files into the dataset by URL src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt') # approx test that the file is known to a remote # that is not the web remote assert_in_results( src_ds.repo.whereis('myfile1.txt', output='full').values(), here=False, description='[{}]'.format(DATALAD_SPECIAL_REMOTE), ) # now a new dataset dest_ds = Dataset(workdir / 'dest').create() # no special remotes eq_(dest_ds.repo.get_special_remotes(), {}) # must call with a dataset to get change saved, in order for drop # below to work properly without getting in reckless mode dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj]) # we have an special remote in the destination dataset now assert_in_results( dest_ds.repo.get_special_remotes().values(), externaltype=DATALAD_SPECIAL_REMOTE, ) # and it works dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # now replace file in dest with a different content at the same path # must call with a dataset to get change saved, in order for drop dest_ds.copy_file( [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt']) dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') # no gets the "same path" but yields different content ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
def test_create_osf_simple(path): ds = Dataset(path).create(force=True) ds.save() file1 = Path('ds') / "file1.txt" create_results = ds.create_sibling_osf(title="CI dl-create", name="osf-storage") assert_result_count(create_results, 2, status='ok', type='dataset') # if we got here, we created something at OSF; # make sure, we clean up afterwards try: # special remote is configured: remote_log = ds.repo.call_git( ['cat-file', 'blob', 'git-annex:remote.log']) assert_in("project={}".format(create_results[0]['id']), remote_log) # copy files over ds.repo.copy_to('.', "osf-storage") whereis = ds.repo.whereis(str(file1)) here = ds.config.get("annex.uuid") # files should be 'here' and on remote end: assert_equal(len(whereis), 2) assert_in(here, whereis) # drop content here ds.drop('.') whereis = ds.repo.whereis(str(file1)) # now on remote end only assert_equal(len(whereis), 1) assert_not_in(here, whereis) # and get content again from remote: ds.get('.') whereis = ds.repo.whereis(str(file1)) assert_equal(len(whereis), 2) assert_in(here, whereis) finally: # clean remote end: cred = get_credentials(allow_interactive=False) osf = OSF(**cred) delete_project(osf.session, create_results[0]['id'])
def test_guess_dot_git(path=None, url=None, tdir=None, *, annex): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', git=not annex) repo.commit() # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(IncompleteResultsError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path).run(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(installed.pathobj.resolve(), Path(tdir).resolve()) ok_(exists(tdir)) assert_repo_status(tdir, annex=annex)
def test_gitannex(osf_id, dspath): from datalad.cmd import (GitRunner, WitlessRunner) dspath = Path(dspath) ds = Dataset(dspath).create() # add remote parameters here init_remote_opts = ["project={}".format(osf_id)] # add special remote init_opts = common_init_opts + init_remote_opts ds.repo.init_remote('osfproject', options=init_opts) # run git-annex-testremote # note, that we don't want to capture output. If something goes wrong we # want to see it in test build's output log. WitlessRunner(cwd=dspath, env=GitRunner.get_git_environ_adjusted()).run( ['git', 'annex', 'testremote', 'osfproject', "--fast"])
def test_global_config(): # from within tests, global config should be read from faked $HOME (see # setup_package) or from GIT_CONFIG_GLOBAL if 'GIT_CONFIG_GLOBAL' in os.environ.keys(): glb_cfg_file = Path(os.environ.get('GIT_CONFIG_GLOBAL')) else: glb_cfg_file = Path(os.path.expanduser('~')) / '.gitconfig' assert any( glb_cfg_file.samefile(Path(p)) for p in dl_cfg._stores['git']['files']) assert_equal(dl_cfg.get("user.name"), "DataLad Tester") assert_equal(dl_cfg.get("user.email"), "*****@*****.**")
def custom_result_renderer(res, **kwargs): if (res['status'] != 'ok' or res['action'] not in ('get_configuration', 'dump_configuration')): if 'message' not in res and 'name' in res: suffix = '={}'.format(res['value']) if 'value' in res else '' res['message'] = '{}{}'.format(res['name'], suffix) default_result_renderer(res) return # TODO source from datalad.ui import ui name = res['name'] if res['action'] == 'dump_configuration': for key in ('purpose', 'description'): s = res.get(key) if s: ui.message('\n'.join( wrap( s, initial_indent='# ', subsequent_indent='# ', ))) if kwargs.get('recursive', False): have_subds = res['path'] != res['refds'] # we need to mark up from which dataset results are reported prefix = '<ds>{}{}:'.format( '/' if have_subds else '', Path(res['path']).relative_to(res['refds']).as_posix() if have_subds else '', ) else: prefix = '' if kwargs.get('action', None) == 'dump': ui.message('{}{}={}'.format( prefix, ac.color_word(name, ac.BOLD), res['value'] if res['value'] is not None else '', )) else: ui.message('{}{}'.format( prefix, res['value'] if res['value'] is not None else '', ))
def test_clone_report_permission_issue(tdir): pdir = Path(tdir) / 'protected' pdir.mkdir() # make it read-only pdir.chmod(0o555) with chpwd(pdir): res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message="could not create work tree dir '%s/%s': Permission denied" % (pdir, get_datasets_topdir()) )
def _test_setup_ds_in_store(io_cls, io_args, store): io = io_cls(*io_args) store = Path(store) # ATM create_ds_in_store doesn't care what kind of ID is provided dsid = "abc123456" ds_path = store / dsid[:3] / dsid[3:] # store layout version 1 version_file = ds_path / 'ria-layout-version' archives = ds_path / 'archives' objects = ds_path / 'annex' / 'objects' git_config = ds_path / 'config' # invalid store version: assert_raises(UnknownLayoutVersion, create_ds_in_store, io, store, dsid, '1', 'abc') # invalid obj version: assert_raises(UnknownLayoutVersion, create_ds_in_store, io, store, dsid, 'abc', '1') # version 1 create_store(io, store, '1') create_ds_in_store(io, store, dsid, '1', '1') for p in [ds_path, archives, objects]: assert_true(p.is_dir(), msg="Not a directory: %s" % str(p)) for p in [version_file]: assert_true(p.is_file(), msg="Not a file: %s" % str(p)) assert_equal(version_file.read_text(), "1\n") # conflicting version exists at target: assert_raises(ValueError, create_ds_in_store, io, store, dsid, '2', '1') # version 2 # Note: The only difference between version 1 and 2 are supposed to be the # key paths (dirhashlower vs mixed), which has nothing to do with # setup routine. rmtree(str(store)) create_store(io, store, '1') create_ds_in_store(io, store, dsid, '2', '1') for p in [ds_path, archives, objects]: assert_true(p.is_dir(), msg="Not a directory: %s" % str(p)) for p in [version_file]: assert_true(p.is_file(), msg="Not a file: %s" % str(p)) assert_equal(version_file.read_text(), "2\n")
def test_get_local_file_url_compatibility(path): # smoke test for file:// URL compatibility with other datalad/git/annex # pieces path = Path(path) ds1 = Dataset(path / 'ds1').create() ds2 = Dataset(path / 'ds2').create() testfile = path / 'testfile.txt' testfile.write_text('some') # compat with annex addurl ds1.repo.add_url_to_file( 'test.txt', get_local_file_url(testfile, compatibility='git-annex')) # compat with git clone/submodule assert_status( 'ok', ds1.clone(get_local_file_url(ds2.path, compatibility='git'), result_xfm=None, return_type='generator'))
def test_push_custom_summary(path=None): path = Path(path) ds = Dataset(path / "ds").create() sib = mk_push_target(ds, "sib", str(path / "sib"), bare=False, annex=False) (sib.pathobj / "f1").write_text("f1") sib.save() (ds.pathobj / "f2").write_text("f2") ds.save() # These options are true by default and our tests usually run with a # temporary home, but set them to be sure. ds.config.set("advice.pushUpdateRejected", "true", scope="local") ds.config.set("advice.pushFetchFirst", "true", scope="local") with swallow_outputs() as cmo: ds.push(to="sib", result_renderer="default", on_failure="ignore") assert_in("Hints:", cmo.out) assert_in("action summary:", cmo.out)
def collect_jsonld_metadata(dspath, res, nodes_by_context, contexts): """Sift through a metadata result and gather JSON-LD documents Parameters ---------- dspath : str or Path Native absolute path of the dataset that shall be used to determine the relative path (name) of a file-result. This would typically be the path to the dataset that contains the file. res : dict Result dictionary as produced by `meta_extract()` or `meta_dump()`. nodes_by_context : dict JSON-LD documented are collected in this dict, using their context as keys. contexts : dict Holds a previously discovered context for any extractor. """ if res['type'] == 'dataset': _native_metadata_to_graph_nodes( res['metadata'], nodes_by_context, contexts, ) else: fmeta = res['metadata'] # pull out a datalad ID from -core, if there is any fid = fmeta.get('metalad_core', {}).get('@id', None) _native_metadata_to_graph_nodes( fmeta, nodes_by_context, contexts, defaults={ '@id': fid, # do not have a @type default here, it would # duplicate across all extractor records # let the core extractor deal with this #'@type': "DigitalDocument", # maybe we need something more fitting than # name 'name': Path(res['path']).relative_to(dspath).as_posix(), }, )
def test_copy_file_into_nonannex(workdir): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() (src_ds.pathobj / 'present.txt').write_text('123') (src_ds.pathobj / 'gone.txt').write_text('abc') src_ds.save() src_ds.drop('gone.txt', check=False) # destination has no annex dest_ds = Dataset(workdir / 'dest').create(annex=False) # no issue copying a file that has content copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj]) ok_file_has_content(dest_ds.pathobj / 'present.txt', '123') # but cannot handle a dropped file, no chance to register # availability info in an annex assert_status( 'impossible', copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj], on_failure='ignore'))
def metadata_locator(fs_metadata=None, path=None, ds_path=None, metadata_path=None): """path to metadata file of node associated with the fs_metadata dictionary Parameters ---------- fs_metadata: dict Metadata json of a node path: str Path to directory of metadata to be rendered ds_path: str Path to dataset root metadata_path: str Path to metadata root. Calculated relative to ds_path Returns ------- str path to metadata of current node """ # use implicit paths unless paths explicitly specified # Note: usage of ds_path as if it was the Repo's path. Therefore use # realpath, since we switched to have symlinks resolved in repos but not in # datasets ds_path = str(Path(ds_path).resolve()) if ds_path else fs_metadata['repo'] path = path or fs_metadata['path'] metadata_path = metadata_path or '.git/datalad/metadata' # directory metadata directory tree location metadata_dir = opj(ds_path, metadata_path) # relative path of current directory wrt dataset root dir_path = relpath(path, ds_path) if isabs(path) else path # normalize to / -- TODO, switch to '.' which is now actually the name since path is relative in web meta? if dir_path in ('.', None, ''): dir_path = '/' # create md5 hash of current directory's relative path metadata_hash = hashlib.md5(dir_path.encode('utf-8')).hexdigest() # construct final path to metadata file metadata_file = opj(metadata_dir, metadata_hash) return metadata_file
def test_aggregate_into_top_no_extraction(path): path = Path(path) superds = Dataset(path).create() subds = superds.create(path / 'sub') # put a single (empty) file in subds to have some metadata-relevant # content payload = subds.pathobj / 'CONTENT' payload.write_text(u'some') superds.save(recursive=True) assert_repo_status(superds.path) # have metadata aggregated in the subds res = subds.meta_aggregate() # FTR: Doing it again, yields extraction not needed: assert_result_count(subds.meta_aggregate(), 1, action='meta_extract', status='notneeded', type='dataset') # update subds entry in super superds.save(recursive=True) # super has no metadata on sub's content assert_status('impossible', superds.meta_dump('sub/', on_failure='ignore')) # but subds has res = subds.meta_dump('.', on_failure='ignore') assert_result_count(res, 2) assert_result_count(res, 2, status='ok') assert_result_count(res, 1, type='dataset') assert_result_count(res, 1, type='file') # Now, aggregate into top res = superds.meta_aggregate('sub/', into='top') # super should now be able to report: assert_status('ok', superds.meta_dump('sub/', on_failure='ignore')) # Re-extraction should not be required: assert_result_count(res, 1, action='meta_extract', status='notneeded', type='dataset')
def custom_result_renderer(res, **kwargs): # pragma: more cover # Don't render things like 'status' for clean-info messages - # seems rather meaningless. from os import getcwd import datalad.support.ansi_colors as ac from datalad.interface.utils import generic_result_renderer from datalad.utils import Path if res['action'] == 'clean': # default renderer is just fine return generic_result_renderer(res) elif res['action'] != 'clean [dry-run]': # Result didn't come from within `clean`. # Should be handled elsewhere. return assert res['action'] == 'clean [dry-run]' if res.get('status', None) == 'ok': from datalad.ui import ui # when to render relative paths: # 1) if a dataset arg was given # 2) if CWD is the refds refds = res.get('refds', None) refds = refds if kwargs.get('dataset', None) is not None \ or refds == getcwd() else None path = res['path'] if refds is None \ else str(Path(res['path']).relative_to(refds)) ui.message(u"{path}: {message}".format( path=ac.color_word(path, ac.BOLD), message=(res['message'][0] % res['message'][1:] if isinstance( res['message'], tuple) else res['message']) if res.get( 'message', None) else '')) else: # Any other status than 'ok' is reported the default way. return generic_result_renderer(res)
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True) ok_(ds.is_installed()) known_subdss = ds.subdatasets(result_xfm='datasets') for subds in ds.subdatasets(result_xfm='datasets'): ok_(subds.is_installed()) repo = subds.repo annexed_files = repo.get_content_annexinfo(init=None) repo.get([str(f) for f in annexed_files]) # drop data of subds: res = ds.drop(path=subds.path, result_xfm='paths') ok_(all(str(f) in res for f in annexed_files)) ainfo = repo.get_content_annexinfo(paths=annexed_files, eval_availability=True) ok_(all(not st["has_content"] for st in ainfo.values())) # subdataset is still known assert_in(subds.path, ds.subdatasets(result_xfm='paths')) eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) for subds in ds.subdatasets(result_xfm='datasets'): # uninstall subds itself: # simulate a cmdline invocation pointing to the subdataset # with a relative path from outside the superdataset to catch # https://github.com/datalad/datalad/issues/4001 pwd = Path(dst).parent with chpwd(str(pwd)): res = uninstall( dataset=ds.path, path=str(subds.pathobj.relative_to(pwd)), result_xfm='datasets', ) eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_merge_conflict_in_subdataset_only(path): path = Path(path) ds_src = Dataset(path / "src").create() ds_src_sub_conflict = ds_src.create("sub_conflict") ds_src_sub_noconflict = ds_src.create("sub_noconflict") ds_src.save() # Set up a scenario where one subdataset has a conflict between the remote # and local version, but the parent dataset does not have a conflict # because it hasn't recorded the subdataset state. ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict") ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict") (ds_src_sub_conflict.pathobj / "foo").write_text("src content") ds_src_sub_conflict.save(to_git=True) (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content") ds_clone_sub_conflict.save(to_git=True) (ds_src_sub_noconflict.pathobj / "foo").write_text("src content") ds_src_sub_noconflict.save() res = ds_clone.update(merge=True, recursive=True, on_failure="ignore") assert_in_results(res, action="merge", status="error", path=ds_clone_sub_conflict.path) assert_in_results(res, action="merge", status="ok", path=ds_clone_sub_noconflict.path) assert_in_results(res, action="save", status="ok", path=ds_clone.path) # We saved the subdataset without a conflict... assert_repo_status(ds_clone_sub_noconflict.path) # ... but the one with the conflict leaves it for the caller to handle. ok_( ds_clone_sub_conflict.repo.call_git( ["ls-files", "--unmerged", "--", "foo"], read_only=True).strip())