def test_rerun_assume_ready(path): ds = Dataset(path).create() repo = ds.repo (repo.pathobj / "f1").write_text("f1\n") ds.save() def double_in_both_cmd(src, dest1, dest2): return [ sys.executable, "-c", "import sys; import os; import os.path as op; " "content = open(sys.argv[-3]).read() * 2; " "d1 = sys.argv[-2]; d2 = sys.argv[-1]; " "op.lexists(d1) and os.unlink(d1); " "op.lexists(d2) and os.unlink(d2); " "open(d1, 'w').write(content); open(d2, 'w').write(content)", src, dest1, dest2 ] ds.run(double_in_both_cmd("f1", "out1", "out2"), outputs=["out1"]) # Drop the content so that we remove instead of unlock, making the test is # more meaningful on an adjusted branch. ds.drop(["out1", "out2"], check=False) # --assume-ready affects both explicitly specified and automatic outputs. res = ds.rerun(assume_ready="outputs") assert_not_in_results(res, action="remove")
def test_push_subds_no_recursion(src_path, dst_top, dst_sub, dst_subsub): # dataset with one submodule and one subsubmodule top = Dataset(src_path).create() sub = top.create('sub m') test_file = sub.pathobj / 'subdir' / 'test_file' test_file.parent.mkdir() test_file.write_text('some') subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m') top.save(recursive=True) assert_repo_status(top.path) target_top = mk_push_target(top, 'target', dst_top, annex=True) target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # now publish, but NO recursion, instead give the parent dir of # both a subdataset and a file in the middle subdataset res = top.push( to='target', # give relative to top dataset to elevate the difficulty a little path=str(test_file.relative_to(top.pathobj).parent)) assert_status('ok', res) assert_in_results(res, action='publish', type='dataset', path=top.path) assert_in_results(res, action='publish', type='dataset', path=sub.path) assert_in_results(res, action='copy', type='file', path=str(test_file)) # the lowest-level subdataset isn't touched assert_not_in_results(res, action='publish', type='dataset', path=subsub.path)
def test_force_checkdatapresent(srcpath, dstpath): src = Dataset(srcpath).create() target = mk_push_target(src, 'target', dstpath, annex=True, bare=True) (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=False, message="New annex file") assert_repo_status(src.path, annex=True) whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0] res = src.push(to='target', data='nothing') # nothing reported to be copied assert_not_in_results(res, action='copy') # we got the git-push nevertheless eq_(src.repo.get_hexsha(DEFAULT_BRANCH), target.get_hexsha(DEFAULT_BRANCH)) # nothing moved eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0]) # now a push without forced no-transfer # we do not give since, so the non-transfered file is picked up # and transferred res = src.push(to='target', force=None) # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec=DEFAULT_REFSPEC) # but availability update assert_in_results(res, action='publish', status='ok', refspec='refs/heads/git-annex:refs/heads/git-annex') assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # whereis info reflects the change ok_(len(whereis_prior) < len( src.repo.whereis(files=['test_mod_annex_file'])[0])) # do it yet again will do nothing, because all is uptodate assert_status('notneeded', src.push(to='target', force=None)) # an explicit reference point doesn't change that assert_status('notneeded', src.push(to='target', force=None, since='HEAD~1')) # now force data transfer res = src.push(to='target', force='checkdatapresent') # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec=DEFAULT_REFSPEC) # no availability update assert_in_results(res, action='publish', status='notneeded', refspec='refs/heads/git-annex:refs/heads/git-annex') # but data transfer assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # force data transfer, but data isn't available src.repo.drop('test_mod_annex_file') res = src.push(to='target', path='.', force='checkdatapresent', on_failure='ignore') assert_in_results(res, status='impossible', path=str(src.pathobj / 'test_mod_annex_file'), action='copy', message='Slated for transport, but no content present')
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() if src.repo.is_managed_branch(): # on crippled FS post-update hook enabling via create-sibling doesn't # work ATM raise SkipTest("no create-sibling on crippled FS") (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, if instructed by configuration src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local') res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check that dataset-config cannot overrule this src.config.set('datalad.push.copy-auto-if-wanted', 'false', where='dataset') res = src.push(to='target') assert_status('notneeded', res) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # remove local config, must enable push of secure file src.config.unset('datalad.push.copy-auto-if-wanted', where='local') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_recurse_existing(src, path): origin_ds = _make_dataset_hierarchy(src) # make sure recursion_limit works as expected across a range of depths for depth in range(len(origin_ds)): res = install(path, source=src, recursive=True, recursion_limit=depth, result_xfm=None, return_type='list', result_filter=None) # we expect one dataset per level assert_result_count(res, depth + 1, type='dataset', status='ok') rmtree(path) # now install all but the last two levels, no data root, sub1, sub2 = install(path, source=src, recursive=True, recursion_limit=2, result_xfm='datasets', result_filter=None) ok_(sub2.repo.file_has_content('file_in_annex.txt') is False) sub3 = Dataset(opj(sub2.path, 'sub3')) ok_(not sub3.is_installed()) # now get all content in all existing datasets, no new datasets installed # in the process files = root.get(curdir, recursive=True, recursion_limit='existing') assert_not_in_results(files, type='dataset', status='ok') assert_result_count(files, 1, type='file', status='ok') ok_(sub2.repo.file_has_content('file_in_annex.txt') is True) ok_(not sub3.is_installed()) # now pull down all remaining datasets, no data sub3, sub4 = root.get(curdir, recursive=True, get_data=False, result_xfm='datasets', result_filter=lambda x: x['status'] == 'ok') ok_(sub4.is_installed()) ok_(sub3.repo.file_has_content('file_in_annex.txt') is False) # aaannd all data files = root.get( curdir, recursive=True, result_filter=lambda x: x['status'] == 'ok' and x['type'] == 'file') eq_(len(files), 1) ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
def test_auto_data_transfer(path): path = Path(path) ds_a = Dataset(path / "a").create() (ds_a.pathobj / "foo.dat").write_text("foo") ds_a.save() # Should be the default, but just in case. ds_a.repo.config.set("annex.numcopies", "1", where="local") ds_a.create_sibling(str(path / "b"), name="b") # With numcopies=1, no data is copied with data="auto". res = ds_a.push(to="b", data="auto", since=None) assert_not_in_results(res, action="copy") # Even when a file is explicitly given. res = ds_a.push(to="b", path="foo.dat", data="auto", since=None) assert_not_in_results(res, action="copy") # numcopies=2 changes that. ds_a.repo.config.set("annex.numcopies", "2", where="local") res = ds_a.push(to="b", data="auto", since=None) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "foo.dat")) # --since= limits the files considered by --auto. (ds_a.pathobj / "bar.dat").write_text("bar") ds_a.save() (ds_a.pathobj / "baz.dat").write_text("baz") ds_a.save() res = ds_a.push(to="b", data="auto", since="HEAD~1") assert_not_in_results(res, action="copy", path=str(ds_a.pathobj / "bar.dat")) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "baz.dat")) # --auto also considers preferred content. ds_a.repo.config.unset("annex.numcopies", where="local") ds_a.repo.set_preferred_content("wanted", "nothing", remote="b") res = ds_a.push(to="b", data="auto", since=None) assert_not_in_results(res, action="copy", path=str(ds_a.pathobj / "bar.dat")) ds_a.repo.set_preferred_content("wanted", "anything", remote="b") res = ds_a.push(to="b", data="auto", since=None) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "bar.dat"))
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_unlock_directory(path): ds = Dataset(path).create(force=True) ds.save() ds.unlock(path="dir") dirpath = Path("dir") dirpath_abs = Path(ds.pathobj / "dir") # On adjusted branches (for the purposes of this test, crippled # filesystems), the files were already unlocked and the committed state is # the unlocked pointer file. is_managed_branch = ds.repo.is_managed_branch() if is_managed_branch: assert_repo_status(ds.path) else: assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"]) ds.save() ds.drop(text_type(dirpath / "a"), check=False) assert_false(ds.repo.file_has_content(text_type(dirpath / "a"))) # Unlocking without an explicit non-directory path doesn't fail if one of # the directory's files doesn't have content. res = ds.unlock(path="dir") assert_not_in_results(res, action="unlock", path=text_type(dirpath_abs / "a")) if is_managed_branch: assert_not_in_results(res, action="unlock", path=text_type(dirpath_abs / "b")) else: assert_in_results(res, action="unlock", status="ok", path=text_type(dirpath_abs / "b")) assert_repo_status(ds.path, modified=[dirpath / "b"]) # If we explicitly provide a path that lacks content, we get a result # for it. assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"), action="unlock", status="impossible", path=text_type(dirpath_abs / "a"))
def test_recurse_existing(src, path): origin_ds = _make_dataset_hierarchy(src) # make sure recursion_limit works as expected across a range of depths for depth in range(len(origin_ds)): res = install( path, source=src, recursive=True, recursion_limit=depth, result_xfm=None, return_type='list', result_filter=None) # we expect one dataset per level assert_result_count( res, depth + 1, type='dataset', status='ok') rmtree(path) # now install all but the last two levels, no data root, sub1, sub2 = install( path, source=src, recursive=True, recursion_limit=2, result_xfm='datasets', result_filter=None) ok_(sub2.repo.file_has_content('file_in_annex.txt') is False) sub3 = Dataset(opj(sub2.path, 'sub3')) ok_(not sub3.is_installed()) # now get all content in all existing datasets, no new datasets installed # in the process files = root.get(curdir, recursive=True, recursion_limit='existing') assert_not_in_results(files, type='dataset', status='ok') assert_result_count(files, 1, type='file', status='ok') ok_(sub2.repo.file_has_content('file_in_annex.txt') is True) ok_(not sub3.is_installed()) # now pull down all remaining datasets, no data sub3, sub4 = root.get( curdir, recursive=True, get_data=False, result_xfm='datasets', result_filter=lambda x: x['status'] == 'ok') ok_(sub4.is_installed()) ok_(sub3.repo.file_has_content('file_in_annex.txt') is False) # aaannd all data files = root.get(curdir, recursive=True, result_filter=lambda x: x['status'] == 'ok' and x['type'] == 'file') eq_(len(files), 1) ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
def test_procedure_discovery(path, super_path): with chpwd(path): # ^ Change directory so that we don't fail with an # InvalidGitRepositoryError if the test is executed from a git # worktree. ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get three essential properties eq_( sum([ 'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps ]), len(ps)) # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # configure dataset to run the demo procedure prior to the clean command ds.config.add('datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') ds.add(op.join('.datalad', 'config')) # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum([ 'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps ]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py')) # make it a subdataset and try again: super = Dataset(super_path).create() super.install('sub', source=ds.path) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum([ 'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps ]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'datalad_test_proc.py')) if not on_windows: # no symlinks import os # create a procedure which is a broken symlink, but recognizable as a # python script: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'broken_link_proc.py')) # broken symlink at procedure location, but we can't tell, whether it is # an actual procedure without any guess on how to execute it: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'unknwon_broken_link')) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad and the dataset # procedure registered before assert_true(len(ps) > 3) assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'broken_link_proc.py'), state='absent') assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))
def test_procedure_discovery(path, super_path): with chpwd(path): # ^ Change directory so that we don't fail with an # InvalidGitRepositoryError if the test is executed from a git # worktree. ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get essential properties _check_procedure_properties(ps) # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) # extra check: must not pick up cfg_yoda.sh in top directory ds.run_procedure('cfg_yoda') # path to a procedure which is not under any "standard" location but # present in the dataset code_dir_procedure_path = op.join(ds.path, 'code', 'datalad_test_proc.py') top_dir_procedure_path = op.join(ds.path, 'cfg_yoda.sh') # run discovery on the dataset: ps = ds.run_procedure(discover=True) # it should not be found magically by default assert_not_in_results(ps, path=code_dir_procedure_path) assert_not_in_results(ps, path=top_dir_procedure_path) with patch_config({'datalad.locations.extra-procedures': op.join(ds.path, 'code')}): # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 3) # and procedure under the path we specified assert_result_count(ps, 1, path=code_dir_procedure_path) assert_not_in_results(ps, path=top_dir_procedure_path) # multiple extra locations with patch_config({'datalad.locations.extra-procedures': [op.join(ds.path, 'code'), ds.path]}): # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 4) # and procedure under the path we specified assert_result_count(ps, 1, path=code_dir_procedure_path) assert_result_count(ps, 1, path=top_dir_procedure_path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') ds.save(op.join('.datalad', 'config')) # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties _check_procedure_properties(ps) # dataset's procedure needs to be in the results # and only a single one assert_result_count(ps, 1, path=code_dir_procedure_path) # a subdir shouldn't be considered a procedure just because it's "executable" assert_not_in_results(ps, path=op.join(ds.path, 'code', 'testdir')) # make it a subdataset and try again: # first we need to save the beast to make install work ds.save() super = Dataset(super_path).create() super.install('sub', source=ds.path) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) _check_procedure_properties(ps) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'datalad_test_proc.py')) if not on_windows: # no symlinks import os # create a procedure which is a broken symlink, but recognizable as a # python script: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'broken_link_proc.py')) # broken symlink at procedure location, but we can't tell, whether it is # an actual procedure without any guess on how to execute it: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'unknwon_broken_link')) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad and the dataset # procedure registered before assert_true(len(ps) > 3) assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'broken_link_proc.py'), state='absent') assert_in_results( ps, path=op.join(super.path, 'sub', 'code', 'unknwon_broken_link'), state='absent')
def test_run_subdataset_install(path): path = Path(path) ds_src = Dataset(path / "src").create() # Repository setup # # . # |-- a/ # | |-- a2/ # | | `-- img # | `-- img # |-- b/ # | `-- b2/ # | `-- img # |-- c/ # | `-- c2/ # | `-- img # `-- d/ # `-- d2/ # `-- img ds_src_a = ds_src.create("a") ds_src_a2 = ds_src_a.create("a2") ds_src_b = Dataset(ds_src.pathobj / "b").create() ds_src_b2 = ds_src_b.create("b2") ds_src_c = ds_src.create("c") ds_src_c2 = ds_src_c.create("c2") ds_src_d = Dataset(ds_src.pathobj / "d").create() ds_src_d2 = ds_src_d.create("d2") ds_src.save() add_pyscript_image(ds_src_a, "in-a", "img") add_pyscript_image(ds_src_a2, "in-a2", "img") add_pyscript_image(ds_src_b2, "in-b2", "img") add_pyscript_image(ds_src_c2, "in-c2", "img") add_pyscript_image(ds_src_d2, "in-d2", "img") ds_src.save(recursive=True) ds_dest = clone(ds_src.path, str(path / "dest")) ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2") ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2") ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2") ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2") assert_false(ds_dest_a2.is_installed()) assert_false(ds_dest_b2.is_installed()) assert_false(ds_dest_c2.is_installed()) assert_false(ds_dest_d2.is_installed()) # Needed subdatasets are installed if container name is given... res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_a2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_a2.pathobj / "img")) ok_(ds_dest_a2.is_installed()) # ... even if the name and path do not match. res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_b2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_b2.pathobj / "img")) ok_(ds_dest_b2.is_installed()) # Subdatasets will also be installed if given an image path... res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img"))) assert_result_count(res, 1, action="install", status="ok", path=ds_dest_c2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_c2.pathobj / "img")) ok_(ds_dest_c2.is_installed()) ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) # There's no install record if subdataset is already present. res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_not_in_results(res, action="install")
def test_push_recursive(origin_path, src_path, dst_top, dst_sub, dst_subnoannex, dst_subsub): # dataset with two submodules and one subsubmodule origin = Dataset(origin_path).create() origin_subm1 = origin.create('sub m') origin_subm1.create('subsub m') origin.create('subm noannex', annex=False) origin.save() assert_repo_status(origin.path) # prepare src as a fresh clone with all subdatasets checkout out recursively # running on a clone should make the test scenario more different than # test_push(), even for the pieces that should be identical top = Clone.__call__(source=origin.path, path=src_path) sub, subsub, subnoannex = top.get('.', recursive=True, get_data=False, result_xfm='datasets') target_top = mk_push_target(top, 'target', dst_top, annex=True) # subdatasets have no remote yet, so recursive publishing should fail: res = top.push(to="target", recursive=True, on_failure='ignore') assert_in_results(res, path=top.path, type='dataset', refspec='refs/heads/master:refs/heads/master', operations=['new-branch'], action='publish', status='ok', target='target') for d in (sub, subsub, subnoannex): assert_in_results(res, status='error', type='dataset', path=d.path, message=("Unknown target sibling '%s'.", 'target')) # now fix that and set up targets for the submodules target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subnoannex = mk_push_target(subnoannex, 'target', dst_subnoannex, annex=False) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # and same push call as above res = top.push(to="target", recursive=True) # topds skipped assert_in_results(res, path=top.path, type='dataset', action='publish', status='notneeded', target='target') # the rest pushed for d in (sub, subsub, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # all correspondig branches match across all datasets for s, d in zip( (top, sub, subnoannex, subsub), (target_top, target_sub, target_subnoannex, target_subsub)): eq_(list(s.repo.get_branch_commits_("master")), list(d.get_branch_commits_("master"))) if s != subnoannex: eq_(list(s.repo.get_branch_commits_("git-annex")), list(d.get_branch_commits_("git-annex"))) # rerun should not result in further pushes of master res = top.push(to="target", recursive=True) assert_not_in_results(res, status='ok', refspec="refs/heads/master:refs/heads/master") assert_in_results(res, status='notneeded', refspec="refs/heads/master:refs/heads/master") if top.repo.is_managed_branch(): raise SkipTest( 'Save/status of subdataset with managed branches is an still ' 'unresolved issue') # now annex a file in subsub test_copy_file = subsub.pathobj / 'test_mod_annex_file' test_copy_file.write_text("Heavy stuff.") # save all the way up assert_status(('ok', 'notneeded'), top.save(message='subsub got something', recursive=True)) assert_repo_status(top.path) # publish straight up, should be smart by default res = top.push(to="target", recursive=True) # we see 3 out of 4 datasets pushed (sub noannex was left unchanged) for d in (top, sub, subsub): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # file content copied too assert_in_results(res, action='copy', status='ok', path=str(test_copy_file)) # verify it is accessible, drop and bring back assert_status('ok', top.drop(str(test_copy_file))) ok_(not subsub.repo.file_has_content('test_mod_annex_file')) top.get(test_copy_file) ok_file_has_content(test_copy_file, 'Heavy stuff.') # make two modification (sub.pathobj / 'test_mod_annex_file').write_text('annex') (subnoannex.pathobj / 'test_mod_file').write_text('git') # save separately top.save(sub.pathobj, message='annexadd', recursive=True) top.save(subnoannex.pathobj, message='gitadd', recursive=True) # now only publish the latter one res = top.push(to="target", since='HEAD~1', recursive=True) # nothing copied, no reports on the other modification assert_not_in_results(res, action='copy') assert_not_in_results(res, path=sub.path) for d in (top, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master') # an unconditional push should now pick up the remaining changes res = top.push(to="target", recursive=True) assert_in_results(res, action='copy', status='ok', path=str(sub.pathobj / 'test_mod_annex_file')) assert_in_results(res, status='ok', type='dataset', path=sub.path, refspec='refs/heads/master:refs/heads/master') for d in (top, subnoannex, subsub): assert_in_results(res, status='notneeded', type='dataset', path=d.path, refspec='refs/heads/master:refs/heads/master')
def test_nested_pushclone_cycle_allplatforms(origpath, storepath, clonepath): if 'DATALAD_SEED' in os.environ: # we are using create-sibling-ria via the cmdline in here # this will create random UUIDs for datasets # however, given a fixed seed each call to this command will start # with the same RNG seed, hence yield the same UUID on the same # machine -- leading to a collision raise SkipTest( 'Test incompatible with fixed random number generator seed') # the aim here is this high-level test a std create-push-clone cycle for a # dataset with a subdataset, with the goal to ensure that correct branches # and commits are tracked, regardless of platform behavior and condition # of individual clones. Nothing fancy, just that the defaults behave in # sensible ways from datalad.cmd import WitlessRunner as Runner run = Runner().run # create original nested dataset with chpwd(origpath): run(['datalad', 'create', 'super']) run(['datalad', 'create', '-d', 'super', str(Path('super', 'sub'))]) # verify essential linkage properties orig_super = Dataset(Path(origpath, 'super')) orig_sub = Dataset(orig_super.pathobj / 'sub') (orig_super.pathobj / 'file1.txt').write_text('some1') (orig_sub.pathobj / 'file2.txt').write_text('some1') with chpwd(orig_super.path): run(['datalad', 'save', '--recursive']) # TODO not yet reported clean with adjusted branches #assert_repo_status(orig_super.path) # the "true" branch that sub is on, and the gitsha of the HEAD commit of it orig_sub_corr_branch = \ orig_sub.repo.get_corresponding_branch() or orig_sub.repo.get_active_branch() orig_sub_corr_commit = orig_sub.repo.get_hexsha(orig_sub_corr_branch) # make sure the super trackes this commit assert_in_results( orig_super.subdatasets(), path=orig_sub.path, gitshasum=orig_sub_corr_commit, # TODO it should also track the branch name # Attempted: https://github.com/datalad/datalad/pull/3817 # But reverted: https://github.com/datalad/datalad/pull/4375 ) # publish to a store, to get into a platform-agnostic state # (i.e. no impact of an annex-init of any kind) store_url = 'ria+' + get_local_file_url(storepath) with chpwd(orig_super.path): run([ 'datalad', 'create-sibling-ria', '--recursive', '-s', 'store', store_url ]) run(['datalad', 'push', '--recursive', '--to', 'store']) # we are using the 'store' sibling's URL, which should be a plain path store_super = AnnexRepo(orig_super.siblings(name='store')[0]['url'], init=False) store_sub = AnnexRepo(orig_sub.siblings(name='store')[0]['url'], init=False) # both datasets in the store only carry the real branches, and nothing # adjusted for r in (store_super, store_sub): eq_(set(r.get_branches()), set([orig_sub_corr_branch, 'git-annex'])) # and reobtain from a store cloneurl = 'ria+' + get_local_file_url(str(storepath), compatibility='git') with chpwd(clonepath): run(['datalad', 'clone', cloneurl + '#' + orig_super.id, 'super']) run(['datalad', '-C', 'super', 'get', '--recursive', '.']) # verify that nothing has changed as a result of a push/clone cycle clone_super = Dataset(Path(clonepath, 'super')) clone_sub = Dataset(clone_super.pathobj / 'sub') assert_in_results( clone_super.subdatasets(), path=clone_sub.path, gitshasum=orig_sub_corr_commit, ) for ds1, ds2, f in ((orig_super, clone_super, 'file1.txt'), (orig_sub, clone_sub, 'file2.txt')): eq_((ds1.pathobj / f).read_text(), (ds2.pathobj / f).read_text()) # get status info that does not recursive into subdatasets, i.e. not # looking for uncommitted changes # we should see no modification reported assert_not_in_results(clone_super.status(eval_subdataset_state='commit'), state='modified') # and now the same for a more expensive full status assert_not_in_results(clone_super.status(recursive=True), state='modified')
def test_auto_if_wanted_data_transfer_path_restriction(path): path = Path(path) ds_a = Dataset(path / "a").create() ds_a_sub0 = ds_a.create("sub0") ds_a_sub1 = ds_a.create("sub1") for ds in [ds_a, ds_a_sub0, ds_a_sub1]: (ds.pathobj / "sec.dat").write_text("sec") (ds.pathobj / "reg.dat").write_text("reg") ds_a.save(recursive=True) ds_a.create_sibling( str(path / "b"), name="b", annex_wanted="not metadata=distribution-restrictions=*", recursive=True) for ds in [ds_a, ds_a_sub0, ds_a_sub1]: ds.repo.set_metadata(add={"distribution-restrictions": "doesntmatter"}, files=["sec.dat"]) # wanted-triggered --auto can be restricted to subdataset... res = ds_a.push(to="b", path="sub0", data="auto-if-wanted", recursive=True) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "reg.dat")) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub0.pathobj / "reg.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub0.pathobj / "sec.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub1.pathobj / "reg.dat")) # ... and to a wanted file. res = ds_a.push(to="b", path="reg.dat", data="auto-if-wanted", recursive=True) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "reg.dat")) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a_sub1.pathobj / "reg.dat")) # But asking to transfer a file does not do it if the remote has a # wanted setting and doesn't want it. res = ds_a.push(to="b", path="sec.dat", data="auto-if-wanted", recursive=True) assert_not_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "sec.dat")) res = ds_a.push(to="b", path="sec.dat", data="anything", recursive=True) assert_in_results(res, action="copy", target="b", status="ok", path=str(ds_a.pathobj / "sec.dat"))
def test_procedure_discovery(path, super_path): ps = run_procedure(discover=True) # there are a few procedures coming with datalad, needs to find them assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # configure dataset to run the demo procedure prior to the clean command ds.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') ds.add(op.join('.datalad', 'config')) # run discovery on the dataset: ps = ds.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(ds.path, 'code', 'datalad_test_proc.py')) # make it a subdataset and try again: super = Dataset(super_path).create() super.install('sub', source=ds.path) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad assert_true(len(ps) > 2) # we get three essential properties eq_( sum(['procedure_type' in p and 'procedure_callfmt' in p and 'path' in p for p in ps]), len(ps)) # dataset's procedure needs to be in the results assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'datalad_test_proc.py')) if not on_windows: # no symlinks import os # create a procedure which is a broken symlink, but recognizable as a # python script: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'broken_link_proc.py')) # broken symlink at procedure location, but we can't tell, whether it is # an actual procedure without any guess on how to execute it: os.symlink(op.join(super.path, 'sub', 'not_existent'), op.join(super.path, 'sub', 'code', 'unknwon_broken_link')) ps = super.run_procedure(discover=True) # still needs to find procedures coming with datalad and the dataset # procedure registered before assert_true(len(ps) > 3) assert_in_results(ps, path=op.join(super.path, 'sub', 'code', 'broken_link_proc.py'), state='absent') assert_not_in_results(ps, path=op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))
def test_run_subdataset_install(path): path = Path(path) ds_src = Dataset(path / "src").create() # Repository setup # # . # |-- a/ # | |-- a2/ # | | `-- img # | `-- img # |-- b/ / module name: b-name / # | `-- b2/ # | `-- img # |-- c/ # | `-- c2/ # | `-- img # `-- d/ / module name: d-name / # `-- d2/ # `-- img ds_src_a = ds_src.create("a") ds_src_a2 = ds_src_a.create("a2") ds_src_b = Dataset(ds_src.pathobj / "b").create() ds_src_b2 = ds_src_b.create("b2") ds_src_c = ds_src.create("c") ds_src_c2 = ds_src_c.create("c2") ds_src_d = Dataset(ds_src.pathobj / "d").create() ds_src_d2 = ds_src_d.create("d2") ds_src.repo.add_submodule("b", name="b-name") ds_src.repo.add_submodule("d", name="d-name") ds_src.save() add_pyscript_image(ds_src_a, "in-a", "img") add_pyscript_image(ds_src_a2, "in-a2", "img") add_pyscript_image(ds_src_b2, "in-b2", "img") add_pyscript_image(ds_src_c2, "in-c2", "img") add_pyscript_image(ds_src_d2, "in-d2", "img") ds_src.save(recursive=True) ds_dest = clone(ds_src.path, str(path / "dest")) ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2") ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2") ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2") ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2") assert_false(ds_dest_a2.is_installed()) assert_false(ds_dest_b2.is_installed()) assert_false(ds_dest_c2.is_installed()) assert_false(ds_dest_d2.is_installed()) # Needed subdatasets are installed if container name is given... res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_a2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_a2.pathobj / "img")) ok_(ds_dest_a2.is_installed()) # ... even if the name and path do not match. res = ds_dest.containers_run(["arg"], container_name="b-name/b2/in-b2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_b2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_b2.pathobj / "img")) ok_(ds_dest_b2.is_installed()) # Subdatasets will also be installed if given an image path... res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img"))) assert_result_count(res, 1, action="install", status="ok", path=ds_dest_c2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_c2.pathobj / "img")) ok_(ds_dest_c2.is_installed()) # ... unless the module name chain doesn't match the subdataset path. In # that case, the caller needs to install the subdatasets beforehand. with assert_raises(ValueError): ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) ds_dest.get(ds_dest_d2.path, recursive=True, get_data=False) ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) # There's no install record if subdataset is already present. res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_not_in_results(res, action="install")
def test_run_assume_ready(path): ds = Dataset(path).create() repo = ds.repo adjusted = repo.is_managed_branch() # --assume-ready=inputs (repo.pathobj / "f1").write_text("f1") ds.save() def cat_cmd(fname): return [ sys.executable, "-c", "import sys; print(open(sys.argv[-1]).read())", fname ] assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]), action="get", type="file") # Same thing, but without the get() call. assert_not_in_results(ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs"), action="get", type="file") ds.drop("f1", check=False) if not adjusted: # If the input is not actually ready, the command will fail. with assert_raises(CommandError): ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs") # --assume-ready=outputs def unlink_and_write_cmd(fname): # This command doesn't care whether the output file is unlocked because # it removes it ahead of time anyway. return [ sys.executable, "-c", "import sys; import os; import os.path as op; " "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); " "open(f, mode='w').write(str(sys.argv))", fname ] (repo.pathobj / "f2").write_text("f2") ds.save() res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"]) if not adjusted: assert_in_results(res, action="unlock", type="file") # Same thing, but without the unlock() call. res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], assume_ready="outputs") assert_not_in_results(res, action="unlock", type="file") # --assume-ready=both res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"]) assert_in_results(res, action="get", type="file") if not adjusted: assert_in_results(res, action="unlock", type="file") res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"], assume_ready="both") assert_not_in_results(res, action="get", type="file") assert_not_in_results(res, action="unlock", type="file")