def test_recurseinto(dspath, dest): # make fresh dataset hierarchy ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) # label intermediate dataset as 'norecurseinto' res = Dataset(opj(ds.path, 'b')).subdatasets(contains='bb', set_property=[ ('datalad-recursiveinstall', 'skip') ]) assert_result_count(res, 1, path=opj(ds.path, 'b', 'bb')) ds.save('b', recursive=True) assert_repo_status(ds.path) # recursive install, should skip the entire bb branch res = install(source=ds.path, path=dest, recursive=True, result_xfm=None, result_filter=None) assert_result_count(res, 5) assert_result_count(res, 5, type='dataset') # we got the neighbor subdataset assert_result_count(res, 1, type='dataset', path=opj(dest, 'b', 'ba')) # we did not get the one we wanted to skip assert_result_count(res, 0, type='dataset', path=opj(dest, 'b', 'bb')) assert_not_in( opj(dest, 'b', 'bb'), Dataset(dest).subdatasets(fulfilled=True, result_xfm='paths')) assert (not Dataset(opj(dest, 'b', 'bb')).is_installed()) # cleanup Dataset(dest).remove(recursive=True) assert (not lexists(dest)) # again but just clone the base, and then get content and grab 'bb' # explicitly -- must get it installed dest = install(source=ds.path, path=dest) res = dest.get(['.', opj('b', 'bb')], get_data=False, recursive=True) assert_result_count(res, 7) assert_result_count(res, 7, type='dataset') assert_result_count(res, 1, type='dataset', path=opj(dest.path, 'b', 'bb')) assert (Dataset(opj(dest.path, 'b', 'bb')).is_installed())
def test_recurseinto(dspath, dest): # make fresh dataset hierarchy ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) # label intermediate dataset as 'norecurseinto' res = Dataset(opj(ds.path, 'b')).subdatasets( contains='bb', set_property=[('datalad-recursiveinstall', 'skip')]) assert_result_count(res, 1, path=opj(ds.path, 'b', 'bb')) ds.save('b', recursive=True) ok_clean_git(ds.path) # recursive install, should skip the entire bb branch res = install(source=ds.path, path=dest, recursive=True, result_xfm=None, result_filter=None) assert_result_count(res, 5) assert_result_count(res, 5, type='dataset') # we got the neighbor subdataset assert_result_count(res, 1, type='dataset', path=opj(dest, 'b', 'ba')) # we did not get the one we wanted to skip assert_result_count(res, 0, type='dataset', path=opj(dest, 'b', 'bb')) assert_not_in( opj(dest, 'b', 'bb'), Dataset(dest).subdatasets(fulfilled=True, result_xfm='paths')) assert(not Dataset(opj(dest, 'b', 'bb')).is_installed()) # cleanup Dataset(dest).remove(recursive=True) assert(not lexists(dest)) # again but just clone the base, and then get content and grab 'bb' # explicitly -- must get it installed dest = install(source=ds.path, path=dest) res = dest.get(['.', opj('b', 'bb')], get_data=False, recursive=True) assert_result_count(res, 8) assert_result_count(res, 8, type='dataset') assert_result_count(res, 1, type='dataset', path=opj(dest.path, 'b', 'bb')) assert(Dataset(opj(dest.path, 'b', 'bb')).is_installed())
def test_annotate_paths(dspath, nodspath): # this test doesn't use API`remove` to avoid circularities ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) assert_repo_status(ds.path) with chpwd(dspath): # with and without an explicitly given path the result is almost the # same inside a dataset without_path = annotate_paths(on_failure='ignore') pwd_res = annotate_paths(path='.', on_failure='ignore') assert_result_count(without_path, 1, type='dataset', path=dspath) assert_result_count(pwd_res, 1, type='dataset', path=dspath, orig_request='.', raw_input=True) # make sure going into a subdataset vs giving it as a path has no # structural impact eq_([{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(path='b', recursive=True)], [{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(dataset='b', recursive=True)]) # when we point to a list of directories, there should be no # multiple rediscoveries of the subdatasets with swallow_logs(new_level=logging.DEBUG) as cml: annotate_paths(path=['a', 'b']) eq_( cml.out.count( 'Resolved dataset for subdataset reporting/modification'), 1) # now do it again, pointing to the ds directly res = ds.annotate_paths(on_failure='ignore') # no request, no refds, but otherwise the same eq_(len(res), len(pwd_res)) eq_( { k: pwd_res[0][k] for k in pwd_res[0] if k in ('path', 'type', 'action', 'status') }, {k: res[0][k] for k in res[0] if k not in ('refds', )}) # will refuse a path that is not a dataset as refds res = annotate_paths(dataset=nodspath, on_failure='ignore') assert_result_count(res, 1, status='error', path=nodspath, message='given reference dataset is not a dataset') # recursion with proper base dataset parentds = Dataset(opj(dspath, 'a')) base_res = parentds.annotate_paths(recursive=True) # needs to find 'aa' and the base assert_result_count(base_res, 2) assert_result_count(base_res, 2, type='dataset') assert_result_count(base_res, 1, type='dataset', parentds=parentds.path, path=opj(parentds.path, 'aa'), status='') # same recursion but without a base dataset res = annotate_paths(path=opj(dspath, 'a'), recursive=True) # needs to find 'aa' and 'a' again assert_result_count(res, 2) eq_(res[-1], {k: base_res[-1][k] for k in base_res[-1] if k not in ('refds', )}) assert_result_count( res, 1, type='dataset', status='', # it does not auto-discover parent datasets without force or a refds #parentds=parentds.path, path=parentds.path) # but we can force parent discovery res = parentds.annotate_paths(path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True) assert_result_count(res, 2) assert_result_count(res, 1, type='dataset', status='', parentds=dspath, path=parentds.path) # recursion with multiple disjoint seeds, no common base eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['a', 'aa', 'bba', 'bbaa']) # recursion with partially overlapping seeds, no duplicate results eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['b', 'ba', 'bb', 'bba', 'bbaa']) # get straight from a file fpath = opj('a', 'aa', 'file_aa') res = ds.annotate_paths(fpath) assert_result_count(res, 1) assert_result_count(res, 1, orig_request=fpath, raw_input=True, type='file', path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='') # now drop it dropres = ds.drop(fpath, check=False) assert_result_count(dropres, 1, path=res[0]['path'], status='ok') # ask for same file again, use 'notneeded' for unavailable to try trigger # any difference droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded') # but we get the same result eq_(res, droppedres) # now try the same on an uninstalled dataset subdspath = opj('b', 'bb') # before before_res = ds.annotate_paths(subdspath, recursive=True, unavailable_path_status='error') assert_result_count(before_res, 3, status='', type='dataset') uninstall_res = ds.uninstall(subdspath, recursive=True, check=False) assert_result_count(uninstall_res, 3, status='ok', type='dataset') # after after_res = ds.annotate_paths(subdspath, unavailable_path_status='error', on_failure='ignore') # uninstall hides all low-level datasets assert_result_count(after_res, 1) # but for the top-most uninstalled one it merely reports absent state now assert_result_count(after_res, 1, state='absent', **{ k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status') }) # however, this beauty doesn't come for free, so it can be disabled # which will make the uninstalled subdataset like a directory in the # parent (or even just a non-existing path, if the mountpoint dir isn't # present after_res = ds.annotate_paths(subdspath, force_subds_discovery=False) assert_result_count(after_res, 1, type='directory', path=before_res[0]['path'], parentds=before_res[0]['parentds']) # feed annotated paths into annotate_paths, it shouldn't change things # upon second run # datasets and file res = ds.annotate_paths(['.', fpath], recursive=True) # make a copy, just to the sure orig_res = deepcopy(res) assert_result_count(res, 7) # and in again, no recursion this time res_again = ds.annotate_paths(res) # doesn't change a thing eq_(orig_res, res_again) # and in again, with recursion this time res_recursion_again = ds.annotate_paths(res, recursive=True) assert_result_count(res_recursion_again, 7) # doesn't change a thing eq_(orig_res, res_recursion_again)
def test_annotate_paths(dspath, nodspath): # this test doesn't use API`remove` to avoid circularities ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) ok_clean_git(ds.path) with chpwd(dspath): # with and without an explicitly given path the result is almost the # same inside a dataset without_path = annotate_paths(on_failure='ignore') pwd_res = annotate_paths(path='.', on_failure='ignore') assert_result_count( without_path, 1, type='dataset', path=dspath) assert_result_count( pwd_res, 1, type='dataset', path=dspath, orig_request='.', raw_input=True) # make sure going into a subdataset vs giving it as a path has no # structural impact eq_( [{k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')} for ap in annotate_paths(path='b', recursive=True)], [{k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')} for ap in annotate_paths(dataset='b', recursive=True)]) # now do it again, pointing to the ds directly res = ds.annotate_paths(on_failure='ignore') # no request, no refds, but otherwise the same eq_(len(res), len(pwd_res)) eq_({k: pwd_res[0][k] for k in pwd_res[0] if k in ('path', 'type', 'action', 'status')}, {k: res[0][k] for k in res[0] if k not in ('refds',)}) # will refuse a path that is not a dataset as refds res = annotate_paths(dataset=nodspath, on_failure='ignore') assert_result_count( res, 1, status='error', path=nodspath, message='given reference dataset is not a dataset') # recursion with proper base dataset parentds = Dataset(opj(dspath, 'a')) base_res = parentds.annotate_paths(recursive=True) # needs to find 'aa' and the base assert_result_count(base_res, 2) assert_result_count(base_res, 2, type='dataset') assert_result_count( base_res, 1, type='dataset', parentds=parentds.path, path=opj(parentds.path, 'aa'), status='') # same recursion but without a base dataset res = annotate_paths(path=opj(dspath, 'a'), recursive=True) # needs to find 'aa' and 'a' again assert_result_count(res, 2) eq_(res[-1], {k: base_res[-1][k] for k in base_res[-1] if k not in ('refds',)}) assert_result_count( res, 1, type='dataset', status='', # it does not auto-discover parent datasets without force or a refds #parentds=parentds.path, path=parentds.path) # but we can force parent discovery res = parentds.annotate_paths( path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True) assert_result_count(res, 2) assert_result_count( res, 1, type='dataset', status='', parentds=dspath, path=parentds.path) # recursion with multiple disjoint seeds, no common base eq_([basename(p) for p in annotate_paths( path=[opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')], recursive=True, result_xfm='paths')], ['a', 'aa', 'bba', 'bbaa']) # recursion with partially overlapping seeds, no duplicate results eq_([basename(p) for p in annotate_paths( path=[opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')], recursive=True, result_xfm='paths')], ['b', 'ba', 'bb', 'bba', 'bbaa']) # get straight from a file fpath = opj('a', 'aa', 'file_aa') res = ds.annotate_paths(fpath) assert_result_count(res, 1) assert_result_count( res, 1, orig_request=fpath, raw_input=True, type='file', path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='') # now drop it dropres = ds.drop(fpath, check=False) assert_result_count(dropres, 1, path=res[0]['path'], status='ok') # ask for same file again, use 'notneeded' for unavailable to try trigger # any difference droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded') # but we get the same result eq_(res, droppedres) # now try the same on an uninstalled dataset subdspath = opj('b', 'bb') # before before_res = ds.annotate_paths(subdspath, recursive=True, unavailable_path_status='error') assert_result_count(before_res, 3, status='', type='dataset') uninstall_res = ds.uninstall(subdspath, recursive=True, check=False) assert_result_count(uninstall_res, 3, status='ok', type='dataset') # after after_res = ds.annotate_paths(subdspath, unavailable_path_status='error', on_failure='ignore') # uninstall hides all low-level datasets assert_result_count(after_res, 1) # but for the top-most uninstalled one it merely reports absent state now assert_result_count( after_res, 1, state='absent', **{k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status')}) # however, this beauty doesn't come for free, so it can be disabled # which will make the uninstalled subdataset like a directory in the # parent (or even just a non-existing path, if the mountpoint dir isn't # present after_res = ds.annotate_paths(subdspath, force_subds_discovery=False) assert_result_count( after_res, 1, type='directory', path=before_res[0]['path'], parentds=before_res[0]['parentds']) # feed annotated paths into annotate_paths, it shouldn't change things # upon second run # datasets and file res = ds.annotate_paths(['.', fpath], recursive=True) # make a copy, just to the sure orig_res = deepcopy(res) assert_result_count(res, 7) # and in again, no recursion this time res_again = ds.annotate_paths(res) # doesn't change a thing eq_(orig_res, res_again) # and in again, with recursion this time res_recursion_again = ds.annotate_paths(res, recursive=True) assert_result_count(res_recursion_again, 7) # doesn't change a thing eq_(orig_res, res_recursion_again)