def get_subdatasets(self, pattern=None, fulfilled=None, absolute=False, recursive=False, recursion_limit=None, edges=False): """DEPRECATED: use `subdatasets()`""" # TODO wipe this function out completely once we are comfortable # with it. Internally we don't need or use it anymore. import inspect lgr.warning( '%s still uses Dataset.get_subdatasets(). RF to use `subdatasets` command', inspect.stack()[1][3]) from datalad.api import subdatasets if edges: return [(r['parentpath'] if absolute else relpath( r['parentpath'], start=self.path), r['path'] if absolute else relpath(r['path'], start=self.path)) for r in subdatasets(dataset=self, fulfilled=fulfilled, recursive=recursive, recursion_limit=recursion_limit, bottomup=True)] else: return subdatasets( dataset=self, fulfilled=fulfilled, recursive=recursive, recursion_limit=recursion_limit, bottomup=True, result_xfm='{}paths'.format('' if absolute else 'rel'))
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" hexsha_before = ds.repo.get_hexsha() ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) hexsha_after = ds.repo.get_hexsha() for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]: ok_exists(fname.format(label)) assert_true(save ^ (hexsha_before == hexsha_after)) assert_true(save ^ ds.repo.dirty) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.add(".") eq_(set(subdatasets(ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_subdataset(self=None, path=None): ds = Dataset(path).create(force=True) for save in True, False: label = "save" if save else "nosave" with swallow_outputs() as cmo: ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save, cfg_proc=["yoda"]) # The custom result renderer transforms the subdataset # action=create results into something more informative than # "create(ok): . (dataset)"... assert_in("create(ok): foo-{} (dataset)".format(label), cmo.out) # ... and that doesn't lose the standard summary. assert_in("create (ok: 2)", cmo.out) subdirs = [ op.join(ds.path, "{}-{}".format(d, label)) for d in ["foo", "bar"] ] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) # cfg_proc was applied generated subdatasets. ok_exists(op.join(subds, "code")) if save: assert_repo_status(path) else: # The datasets are create but not saved (since asked not to) assert_repo_status(path, untracked=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_( set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}", result_renderer='disabled') assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save, cfg_proc=["yoda"]) subdirs = [ op.join(ds.path, "{}-{}".format(d, label)) for d in ["foo", "bar"] ] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) # cfg_proc was applied generated subdatasets. ok_exists(op.join(subds, "code")) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_( set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_addurls_subdataset(self, path): ds = Dataset(path).create(force=True) with chpwd(path): for save in True, False: label = "save" if save else "nosave" ds.addurls(self.json_file, "{url}", "{subdir}-" + label + "//{name}", save=save) subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]] subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]])) for subds, fnames in subdir_files.items(): for fname in fnames: ok_exists(op.join(subds, fname)) if save: assert_repo_status(path) else: # The datasets are create and saved ... assert_repo_status(path, modified=subdirs) # but the downloaded files aren't. for subds, fnames in subdir_files.items(): assert_repo_status(subds, added=fnames) # Now save the "--nosave" changes and check that we have # all the subdatasets. ds.save() eq_(set(subdatasets(dataset=ds, recursive=True, result_xfm="relpaths")), {"foo-save", "bar-save", "foo-nosave", "bar-nosave"}) # We don't try to recreate existing subdatasets. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}") assert_in("Not creating subdataset at existing path", cml.out)
def test_get_subdatasets(path): ds = Dataset(path) eq_(subdatasets(ds, recursive=True, fulfilled=False, result_xfm='relpaths'), [ 'sub dataset1' ]) ds.get('sub dataset1') eq_(subdatasets(ds, recursive=True, fulfilled=False, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', ]) # obtain key subdataset, so all leave subdatasets are discoverable ds.get(opj('sub dataset1', 'sub sub dataset1')) eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1']) eq_([(r['parentds'], r['path']) for r in ds.subdatasets()], [(path, opj(path, 'sub dataset1'))]) eq_(subdatasets(ds, recursive=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/subm 1', ]) # uses slow, flexible query eq_(subdatasets(ds, recursive=True, bottomup=True, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', 'sub dataset1', ]) eq_(subdatasets(ds, recursive=True, fulfilled=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/sub sub dataset1', ]) eq_([(relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path)) for r in ds.subdatasets(recursive=True)], [ (os.curdir, 'sub dataset1'), ('sub dataset1', 'sub dataset1/2'), ('sub dataset1', 'sub dataset1/sub sub dataset1'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1'), ('sub dataset1', 'sub dataset1/subm 1'), ]) # uses slow, flexible query eq_(subdatasets(ds, recursive=True, recursion_limit=0), []) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'), ['sub dataset1']) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', ]) res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'): for prop in ('gitmodule_url', 'revision', 'gitmodule_name'): assert_in(prop, r) # random property is unknown assert_not_in('mike', r) # now add info to all datasets res = ds.subdatasets( recursive=True, set_property=[('mike', 'slow'), ('expansion', '<{refds_relname}>')]) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # plain query again to see if it got into the files res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # and remove again res = ds.subdatasets(recursive=True, delete_property=('mike', 'something')) assert_status('ok', res) for r in res: for prop in ('gitmodule_mike', 'gitmodule_something'): assert_not_in(prop, r) # and again, because above yields on the fly edit res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: for prop in ('gitmodule_mike', 'gitmodule_something'): assert_not_in(prop, r) # # test --contains # target_sub = 'sub dataset1/sub sub dataset1/subm 1' # give the closest direct subdataset eq_(ds.subdatasets(contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1']) # should find the actual subdataset trail eq_(ds.subdatasets(recursive=True, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1']) # doesn't affect recursion limit eq_(ds.subdatasets(recursive=True, recursion_limit=2, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1']) # for a direct dataset path match, return the matching dataset eq_(ds.subdatasets(recursive=True, contains=target_sub, result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1']) # but it has to be a subdataset, otherwise no match # which is what get_containing_subdataset() used to do eq_(ds.subdatasets(contains=ds.path), []) # no error if contains is bullshit eq_(ds.subdatasets(recursive=True, contains='errrr_nope', result_xfm='paths'), []) # TODO maybe at a courtesy bullshit detector some day eq_(ds.subdatasets(recursive=True, contains=opj(pardir, 'errrr_nope'), result_xfm='paths'), [])
def test_get_subdatasets(path): ds = Dataset(path) # one more subdataset with a name that could ruin config option parsing dots = text_type(Path('subdir') / '.lots.of.dots.') ds.create(dots) eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [ 'sub dataset1' ]) ds.get('sub dataset1') eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', ]) # obtain key subdataset, so all leaf subdatasets are discoverable ds.get(opj('sub dataset1', 'sub sub dataset1')) eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots]) eq_([(r['parentds'], r['path']) for r in ds.subdatasets()], [(path, opj(path, 'sub dataset1')), (path, opj(path, dots))]) eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/subm 1', dots, ]) # redo, but limit to specific paths eq_( ds.subdatasets( path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'], recursive=True, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', ] ) with chpwd(text_type(ds.pathobj / 'subdir')): # imitate cmdline invocation w/ no dataset argument # -> curdir limits the query, when no info is given eq_(subdatasets(dataset=None, path=[], recursive=True, result_xfm='paths'), [text_type(ds.pathobj / dots)] ) # but with a dataset explicitly given, even if just as a path, # curdir does no limit the query eq_(subdatasets(dataset=os.pardir, path=None, recursive=True, result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/subm 1', dots] ) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', 'sub dataset1', dots, ]) eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/sub sub dataset1', dots, ]) eq_([(relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path)) for r in ds.subdatasets(recursive=True)], [ (os.curdir, 'sub dataset1'), ('sub dataset1', 'sub dataset1/2'), ('sub dataset1', 'sub dataset1/sub sub dataset1'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1'), ('sub dataset1', 'sub dataset1/subm 1'), (os.curdir, dots), ]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=0), []) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'), ['sub dataset1', dots]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', dots, ]) res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'): for prop in ('gitmodule_url', 'revision', 'gitmodule_name'): assert_in(prop, r) # random property is unknown assert_not_in('mike', r) # now add info to all datasets res = ds.subdatasets( recursive=True, set_property=[('mike', 'slow'), ('expansion', '<{refds_relname}>')]) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # plain query again to see if it got into the files res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # and remove again res = ds.subdatasets(recursive=True, delete_property='mike') assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # and again, because above yields on the fly edit res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # # test --contains # target_sub = 'sub dataset1/sub sub dataset1/subm 1' # give the closest direct subdataset eq_(ds.subdatasets(contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1']) # should find the actual subdataset trail eq_(ds.subdatasets(recursive=True, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1']) # doesn't affect recursion limit eq_(ds.subdatasets(recursive=True, recursion_limit=2, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1']) # for a direct dataset path match, return the matching dataset eq_(ds.subdatasets(recursive=True, contains=target_sub, result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1']) # but it has to be a subdataset, otherwise no match # which is what get_containing_subdataset() used to do eq_(ds.subdatasets(contains=ds.path), []) # no error if contains is bullshit eq_(ds.subdatasets(recursive=True, contains='errrr_nope', result_xfm='paths'), []) # TODO maybe at a courtesy bullshit detector some day eq_(ds.subdatasets(recursive=True, contains=opj(pardir, 'errrr_nope'), result_xfm='paths'), []) eq_(ds.subdatasets( recursive=True, contains=[target_sub, 'sub dataset1/2'], result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1', ])
def test_get_subdatasets(origpath, path): # setup orig = Dataset(origpath).create() orig_sub = orig.create('sub dataset1') # 2nd-level for s in ('2', 'sub sub dataset1', 'subm 1'): orig_sub.create(s) # 3rd-level for s in ('2', 'subm 1'): orig_sub.create(Path('sub sub dataset1', s)) orig.save(recursive=True) assert_repo_status(orig.path) # tests ds = clone(source=origpath, path=path) # one more subdataset with a name that could ruin config option parsing # no trailing dots on windows and its crippled FS mounted on linux! dots = str(Path('subdir') / ('.lots.of.dots')) ds.create(dots) # mitigate https://github.com/datalad/datalad/issues/4267 ds.save() eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), ['sub dataset1']) ds.get('sub dataset1') eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [ _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/subm 1'), ]) # obtain key subdataset, so all leaf subdatasets are discoverable ds.get(opj('sub dataset1', 'sub sub dataset1')) eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots]) eq_([(r['parentds'], r['path']) for r in ds.subdatasets()], [(path, opj(path, 'sub dataset1')), (path, opj(path, dots))]) all_subs = [ _p('sub dataset1'), _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/2'), _p('sub dataset1/sub sub dataset1/subm 1'), _p('sub dataset1/subm 1'), dots, ] eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), all_subs) with chpwd(str(ds.pathobj)): # imitate cmdline invocation w/ no dataset argument eq_( subdatasets(dataset=None, path=[], recursive=True, result_xfm='relpaths'), all_subs) # redo, but limit to specific paths eq_( ds.subdatasets( path=[_p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1')], recursive=True, result_xfm='relpaths'), [ _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/2'), _p('sub dataset1/sub sub dataset1/subm 1'), ]) eq_( ds.subdatasets(path=['sub dataset1'], recursive=True, result_xfm='relpaths'), [ _p('sub dataset1'), _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/2'), _p('sub dataset1/sub sub dataset1/subm 1'), _p('sub dataset1/subm 1'), ]) with chpwd(str(ds.pathobj / 'subdir')): # imitate cmdline invocation w/ no dataset argument # -> curdir limits the query, when no info is given eq_( subdatasets(dataset=None, path=[], recursive=True, result_xfm='paths'), [str(ds.pathobj / dots)]) # but with a dataset explicitly given, even if just as a path, # curdir does no limit the query eq_( subdatasets(dataset=os.pardir, path=None, recursive=True, result_xfm='relpaths'), [ _p('sub dataset1'), _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/2'), _p('sub dataset1/sub sub dataset1/subm 1'), _p('sub dataset1/subm 1'), dots ]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [ _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1/2'), _p('sub dataset1/sub sub dataset1/subm 1'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/subm 1'), _p('sub dataset1'), dots, ]) eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'), [ _p('sub dataset1'), _p('sub dataset1/sub sub dataset1'), dots, ]) eq_([(relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path)) for r in ds.subdatasets(recursive=True)], [ (os.curdir, 'sub dataset1'), ('sub dataset1', _p('sub dataset1/2')), ('sub dataset1', _p('sub dataset1/sub sub dataset1')), (_p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/2')), (_p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/subm 1')), ('sub dataset1', _p('sub dataset1/subm 1')), (os.curdir, dots), ]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=0), []) # uses slow, flexible query eq_( ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'), ['sub dataset1', _p(dots)]) # uses slow, flexible query eq_( ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'), [ 'sub dataset1', _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/subm 1'), dots, ]) res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: #for prop in ('gitmodule_url', 'state', 'gitshasum', 'gitmodule_name'): for prop in ('gitmodule_url', 'gitshasum', 'gitmodule_name'): assert_in(prop, r) # random property is unknown assert_not_in('mike', r) # now add info to all datasets res = ds.subdatasets(recursive=True, set_property=[('mike', 'slow'), ('expansion', '<{refds_relname}>')]) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # plain query again to see if it got into the files res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # and remove again res = ds.subdatasets(recursive=True, delete_property='mike') assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # and again, because above yields on the fly edit res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # # test --contains # target_sub = _p('sub dataset1/sub sub dataset1/subm 1') # give the closest direct subdataset eq_( ds.subdatasets(contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1']) # should find the actual subdataset trail eq_( ds.subdatasets(recursive=True, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), [ 'sub dataset1', _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/subm 1') ]) # doesn't affect recursion limit eq_( ds.subdatasets(recursive=True, recursion_limit=2, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', _p('sub dataset1/sub sub dataset1')]) # for a direct dataset path match, return the matching dataset eq_( ds.subdatasets(recursive=True, contains=target_sub, result_xfm='relpaths'), [ 'sub dataset1', _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/subm 1') ]) # but it has to be a subdataset, otherwise no match # which is what get_containing_subdataset() used to do assert_status('impossible', ds.subdatasets(contains=ds.path, on_failure='ignore')) # 'impossible' if contains is bullshit assert_status( 'impossible', ds.subdatasets(recursive=True, contains='impossible_yes', on_failure='ignore')) assert_status( 'impossible', ds.subdatasets(recursive=True, contains=opj(pardir, 'impossible_yes'), on_failure='ignore')) eq_( ds.subdatasets(recursive=True, contains=[target_sub, _p('sub dataset1/2')], result_xfm='relpaths'), [ 'sub dataset1', _p('sub dataset1/2'), _p('sub dataset1/sub sub dataset1'), _p('sub dataset1/sub sub dataset1/subm 1'), ])
def test_get_subdatasets(path): ds = Dataset(path) # one more subdataset with a name that could ruin config option parsing dots = text_type(Path('subdir') / '.lots.of.dots.') ds.create(dots) eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), ['sub dataset1']) ds.get('sub dataset1') eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', ]) # obtain key subdataset, so all leaf subdatasets are discoverable ds.get(opj('sub dataset1', 'sub sub dataset1')) eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots]) eq_([(r['parentds'], r['path']) for r in ds.subdatasets()], [(path, opj(path, 'sub dataset1')), (path, opj(path, dots))]) eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/subm 1', dots, ]) # redo, but limit to specific paths eq_( ds.subdatasets( path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'], recursive=True, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', ]) with chpwd(text_type(ds.pathobj / 'subdir')): # imitate cmdline invocation w/ no dataset argument # -> curdir limits the query, when no info is given eq_( subdatasets(dataset=None, path=[], recursive=True, result_xfm='paths'), [text_type(ds.pathobj / dots)]) # but with a dataset explicitly given, even if just as a path, # curdir does no limit the query eq_( subdatasets(dataset=os.pardir, path=None, recursive=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/subm 1', dots ]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [ 'sub dataset1/2', 'sub dataset1/sub sub dataset1/2', 'sub dataset1/sub sub dataset1/subm 1', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', 'sub dataset1', dots, ]) eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/sub sub dataset1', dots, ]) eq_([ (relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path)) for r in ds.subdatasets(recursive=True) ], [ (os.curdir, 'sub dataset1'), ('sub dataset1', 'sub dataset1/2'), ('sub dataset1', 'sub dataset1/sub sub dataset1'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'), ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1'), ('sub dataset1', 'sub dataset1/subm 1'), (os.curdir, dots), ]) # uses slow, flexible query eq_(ds.subdatasets(recursive=True, recursion_limit=0), []) # uses slow, flexible query eq_( ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'), ['sub dataset1', dots]) # uses slow, flexible query eq_( ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/subm 1', dots, ]) res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'): for prop in ('gitmodule_url', 'revision', 'gitmodule_name'): assert_in(prop, r) # random property is unknown assert_not_in('mike', r) # now add info to all datasets res = ds.subdatasets(recursive=True, set_property=[('mike', 'slow'), ('expansion', '<{refds_relname}>')]) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # plain query again to see if it got into the files res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: eq_(r['gitmodule_mike'], 'slow') eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-')) # and remove again res = ds.subdatasets(recursive=True, delete_property='mike') assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # and again, because above yields on the fly edit res = ds.subdatasets(recursive=True) assert_status('ok', res) for r in res: for prop in ('gitmodule_mike'): assert_not_in(prop, r) # # test --contains # target_sub = 'sub dataset1/sub sub dataset1/subm 1' # give the closest direct subdataset eq_( ds.subdatasets(contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1']) # should find the actual subdataset trail eq_( ds.subdatasets(recursive=True, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1' ]) # doesn't affect recursion limit eq_( ds.subdatasets(recursive=True, recursion_limit=2, contains=opj(target_sub, 'something_inside'), result_xfm='relpaths'), ['sub dataset1', 'sub dataset1/sub sub dataset1']) # for a direct dataset path match, return the matching dataset eq_( ds.subdatasets(recursive=True, contains=target_sub, result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1' ]) # but it has to be a subdataset, otherwise no match # which is what get_containing_subdataset() used to do eq_(ds.subdatasets(contains=ds.path), []) # no error if contains is bullshit eq_( ds.subdatasets(recursive=True, contains='errrr_nope', result_xfm='paths'), []) # TODO maybe at a courtesy bullshit detector some day eq_( ds.subdatasets(recursive=True, contains=opj(pardir, 'errrr_nope'), result_xfm='paths'), []) eq_( ds.subdatasets(recursive=True, contains=[target_sub, 'sub dataset1/2'], result_xfm='relpaths'), [ 'sub dataset1', 'sub dataset1/2', 'sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1', ])