def test_annotate_paths(dspath, nodspath): # this test doesn't use API`remove` to avoid circularities ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.add('.', recursive=True) ok_clean_git(ds.path) with chpwd(dspath): # with and without an explicitly given path the result is almost the # same inside a dataset without_path = annotate_paths(on_failure='ignore') pwd_res = annotate_paths(path='.', on_failure='ignore') assert_result_count(without_path, 1, type='dataset', path=dspath) assert_result_count(pwd_res, 1, type='dataset', path=dspath, orig_request='.', raw_input=True) # make sure going into a subdataset vs giving it as a path has no # structural impact eq_([{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(path='b', recursive=True)], [{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(dataset='b', recursive=True)]) # now do it again, pointing to the ds directly res = ds.annotate_paths(on_failure='ignore') # no request, no refds, but otherwise the same eq_(len(res), len(pwd_res)) eq_( { k: pwd_res[0][k] for k in pwd_res[0] if k in ('path', 'type', 'action', 'status') }, {k: res[0][k] for k in res[0] if k not in ('refds', )}) # will refuse a path that is not a dataset as refds res = annotate_paths(dataset=nodspath, on_failure='ignore') assert_result_count(res, 1, status='error', path=nodspath, message='given reference dataset is not a dataset') # recursion with proper base dataset parentds = Dataset(opj(dspath, 'a')) base_res = parentds.annotate_paths(recursive=True) # needs to find 'aa' and the base assert_result_count(base_res, 2) assert_result_count(base_res, 2, type='dataset') assert_result_count(base_res, 1, type='dataset', parentds=parentds.path, path=opj(parentds.path, 'aa'), status='') # same recursion but without a base dataset res = annotate_paths(path=opj(dspath, 'a'), recursive=True) # needs to find 'aa' and 'a' again assert_result_count(res, 2) eq_(res[-1], {k: base_res[-1][k] for k in base_res[-1] if k not in ('refds', )}) assert_result_count( res, 1, type='dataset', status='', # it does not auto-discover parent datasets without force or a refds #parentds=parentds.path, path=parentds.path) # but we can force parent discovery res = parentds.annotate_paths(path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True) assert_result_count(res, 2) assert_result_count(res, 1, type='dataset', status='', parentds=dspath, path=parentds.path) # recursion with multiple disjoint seeds, no common base eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['a', 'aa', 'bba', 'bbaa']) # recursion with partially overlapping seeds, no duplicate results eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['b', 'ba', 'bb', 'bba', 'bbaa']) # get straight from a file fpath = opj('a', 'aa', 'file_aa') res = ds.annotate_paths(fpath) assert_result_count(res, 1) assert_result_count(res, 1, orig_request=fpath, raw_input=True, type='file', path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='') # now drop it dropres = ds.drop(fpath, check=False) assert_result_count(dropres, 1, path=res[0]['path'], status='ok') # ask for same file again, use 'notneeded' for unavailable to try trigger # any difference droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded') # but we get the same result eq_(res, droppedres) # now try the same on an uninstalled dataset subdspath = opj('b', 'bb') # before before_res = ds.annotate_paths(subdspath, recursive=True, unavailable_path_status='error') assert_result_count(before_res, 3, status='', type='dataset') uninstall_res = ds.uninstall(subdspath, recursive=True, check=False) assert_result_count(uninstall_res, 3, status='ok', type='dataset') # after after_res = ds.annotate_paths(subdspath, unavailable_path_status='error', on_failure='ignore') # uninstall hides all low-level datasets assert_result_count(after_res, 1) # but for the top-most uninstalled one it merely reports absent state now assert_result_count(after_res, 1, state='absent', **{ k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status') }) # however, this beauty doesn't come for free, so it can be disabled # which will make the uninstalled subdataset like a directory in the # parent (or even just a non-existing path, if the mountpoint dir isn't # present after_res = ds.annotate_paths(subdspath, force_subds_discovery=False) assert_result_count(after_res, 1, type='directory', path=before_res[0]['path'], parentds=before_res[0]['parentds']) # feed annotated paths into annotate_paths, it shouldn't change things # upon second run # datasets and file res = ds.annotate_paths(['.', fpath], recursive=True) # make a copy, just to the sure orig_res = deepcopy(res) assert_result_count(res, 7) # and in again, no recursion this time res_again = ds.annotate_paths(res) # doesn't change a thing eq_(orig_res, res_again) # and in again, with recursion this time res_recursion_again = ds.annotate_paths(res, recursive=True) assert_result_count(res_recursion_again, 7) # doesn't change a thing eq_(orig_res, res_recursion_again)
def test_annotate_paths(dspath, nodspath): # this test doesn't use API`remove` to avoid circularities ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) ok_clean_git(ds.path) with chpwd(dspath): # with and without an explicitly given path the result is almost the # same inside a dataset without_path = annotate_paths(on_failure='ignore') pwd_res = annotate_paths(path='.', on_failure='ignore') assert_result_count( without_path, 1, type='dataset', path=dspath) assert_result_count( pwd_res, 1, type='dataset', path=dspath, orig_request='.', raw_input=True) # make sure going into a subdataset vs giving it as a path has no # structural impact eq_( [{k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')} for ap in annotate_paths(path='b', recursive=True)], [{k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')} for ap in annotate_paths(dataset='b', recursive=True)]) # now do it again, pointing to the ds directly res = ds.annotate_paths(on_failure='ignore') # no request, no refds, but otherwise the same eq_(len(res), len(pwd_res)) eq_({k: pwd_res[0][k] for k in pwd_res[0] if k in ('path', 'type', 'action', 'status')}, {k: res[0][k] for k in res[0] if k not in ('refds',)}) # will refuse a path that is not a dataset as refds res = annotate_paths(dataset=nodspath, on_failure='ignore') assert_result_count( res, 1, status='error', path=nodspath, message='given reference dataset is not a dataset') # recursion with proper base dataset parentds = Dataset(opj(dspath, 'a')) base_res = parentds.annotate_paths(recursive=True) # needs to find 'aa' and the base assert_result_count(base_res, 2) assert_result_count(base_res, 2, type='dataset') assert_result_count( base_res, 1, type='dataset', parentds=parentds.path, path=opj(parentds.path, 'aa'), status='') # same recursion but without a base dataset res = annotate_paths(path=opj(dspath, 'a'), recursive=True) # needs to find 'aa' and 'a' again assert_result_count(res, 2) eq_(res[-1], {k: base_res[-1][k] for k in base_res[-1] if k not in ('refds',)}) assert_result_count( res, 1, type='dataset', status='', # it does not auto-discover parent datasets without force or a refds #parentds=parentds.path, path=parentds.path) # but we can force parent discovery res = parentds.annotate_paths( path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True) assert_result_count(res, 2) assert_result_count( res, 1, type='dataset', status='', parentds=dspath, path=parentds.path) # recursion with multiple disjoint seeds, no common base eq_([basename(p) for p in annotate_paths( path=[opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')], recursive=True, result_xfm='paths')], ['a', 'aa', 'bba', 'bbaa']) # recursion with partially overlapping seeds, no duplicate results eq_([basename(p) for p in annotate_paths( path=[opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')], recursive=True, result_xfm='paths')], ['b', 'ba', 'bb', 'bba', 'bbaa']) # get straight from a file fpath = opj('a', 'aa', 'file_aa') res = ds.annotate_paths(fpath) assert_result_count(res, 1) assert_result_count( res, 1, orig_request=fpath, raw_input=True, type='file', path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='') # now drop it dropres = ds.drop(fpath, check=False) assert_result_count(dropres, 1, path=res[0]['path'], status='ok') # ask for same file again, use 'notneeded' for unavailable to try trigger # any difference droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded') # but we get the same result eq_(res, droppedres) # now try the same on an uninstalled dataset subdspath = opj('b', 'bb') # before before_res = ds.annotate_paths(subdspath, recursive=True, unavailable_path_status='error') assert_result_count(before_res, 3, status='', type='dataset') uninstall_res = ds.uninstall(subdspath, recursive=True, check=False) assert_result_count(uninstall_res, 3, status='ok', type='dataset') # after after_res = ds.annotate_paths(subdspath, unavailable_path_status='error', on_failure='ignore') # uninstall hides all low-level datasets assert_result_count(after_res, 1) # but for the top-most uninstalled one it merely reports absent state now assert_result_count( after_res, 1, state='absent', **{k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status')}) # however, this beauty doesn't come for free, so it can be disabled # which will make the uninstalled subdataset like a directory in the # parent (or even just a non-existing path, if the mountpoint dir isn't # present after_res = ds.annotate_paths(subdspath, force_subds_discovery=False) assert_result_count( after_res, 1, type='directory', path=before_res[0]['path'], parentds=before_res[0]['parentds']) # feed annotated paths into annotate_paths, it shouldn't change things # upon second run # datasets and file res = ds.annotate_paths(['.', fpath], recursive=True) # make a copy, just to the sure orig_res = deepcopy(res) assert_result_count(res, 7) # and in again, no recursion this time res_again = ds.annotate_paths(res) # doesn't change a thing eq_(orig_res, res_again) # and in again, with recursion this time res_recursion_again = ds.annotate_paths(res, recursive=True) assert_result_count(res_recursion_again, 7) # doesn't change a thing eq_(orig_res, res_recursion_again)