Example #1
0
def test_report_absent_keys(path=None):
    ds = Dataset(path).create()
    # create an annexed file
    testfile = ds.pathobj / 'dummy'
    testfile.write_text(u'nothing')
    ds.save()
    # present in a full report and in a partial report
    # based on worktree of HEAD ref
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], True)
    # drop the key, not available anywhere else
    ds.drop('dummy', reckless='kill')
    # does not change a thing, except the key is gone
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], False)
    # make sure files with URL keys are correctly reported:
    from datalad.conftest import test_http_server
    remote_file_name = 'imaremotefile.dat'
    local_file_name = 'mehasurlkey'
    (Path(test_http_server.path) / remote_file_name).write_text("weee")
    remote_file_url = f'{test_http_server.url}/{remote_file_name}'
    # we need to get a file with a URL key and check its local availability
    ds.repo.call_annex(
        ['addurl', '--relaxed', remote_file_url, '--file', local_file_name])
    ds.save("URL keys!")
    # should not be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], False)
    ds.get(local_file_name)
    # should be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], True)
Example #2
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Example #3
0
def test_get_subdatasets(path):
    ds = Dataset(path)
    # one more subdataset with a name that could ruin config option parsing
    dots = text_type(Path('subdir') / '.lots.of.dots.')
    ds.create(dots)
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [
        'sub dataset1'
    ])
    ds.get('sub dataset1')
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
    ])
    # obtain key subdataset, so all leaf subdatasets are discoverable
    ds.get(opj('sub dataset1', 'sub sub dataset1'))
    eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots])
    eq_([(r['parentds'], r['path']) for r in ds.subdatasets()],
        [(path, opj(path, 'sub dataset1')),
         (path, opj(path, dots))])
    eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/subm 1',
        dots,
    ])
    # redo, but limit to specific paths
    eq_(
        ds.subdatasets(
            path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'],
            recursive=True, result_xfm='relpaths'),
        [
            'sub dataset1/2',
            'sub dataset1/sub sub dataset1',
            'sub dataset1/sub sub dataset1/2',
            'sub dataset1/sub sub dataset1/subm 1',
        ]
    )
    with chpwd(text_type(ds.pathobj / 'subdir')):
        # imitate cmdline invocation w/ no dataset argument
        # -> curdir limits the query, when no info is given
        eq_(subdatasets(dataset=None,
                        path=[],
                        recursive=True,
                        result_xfm='paths'),
            [text_type(ds.pathobj / dots)]
        )
        # but with a dataset explicitly given, even if just as a path,
        # curdir does no limit the query
        eq_(subdatasets(dataset=os.pardir,
                        path=None,
                        recursive=True,
                        result_xfm='relpaths'),
            ['sub dataset1',
             'sub dataset1/2',
             'sub dataset1/sub sub dataset1',
             'sub dataset1/sub sub dataset1/2',
             'sub dataset1/sub sub dataset1/subm 1',
             'sub dataset1/subm 1',
             dots]
        )
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        'sub dataset1',
        dots,
    ])
    eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/sub sub dataset1',
        dots,
    ])
    eq_([(relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path))
         for r in ds.subdatasets(recursive=True)], [
        (os.curdir, 'sub dataset1'),
        ('sub dataset1', 'sub dataset1/2'),
        ('sub dataset1', 'sub dataset1/sub sub dataset1'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1'),
        ('sub dataset1', 'sub dataset1/subm 1'),
        (os.curdir, dots),
    ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=0),
        [])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'),
        ['sub dataset1', dots])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'),
        [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        dots,
    ])
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'):
        for prop in ('gitmodule_url', 'revision', 'gitmodule_name'):
            assert_in(prop, r)
        # random property is unknown
        assert_not_in('mike', r)

    # now add info to all datasets
    res = ds.subdatasets(
        recursive=True,
        set_property=[('mike', 'slow'),
                      ('expansion', '<{refds_relname}>')])
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-'))
    # plain query again to see if it got into the files
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-'))

    # and remove again
    res = ds.subdatasets(recursive=True, delete_property='mike')
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)
    # and again, because above yields on the fly edit
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)

    #
    # test --contains
    #
    target_sub = 'sub dataset1/sub sub dataset1/subm 1'
    # give the closest direct subdataset
    eq_(ds.subdatasets(contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1'])
    # should find the actual subdataset trail
    eq_(ds.subdatasets(recursive=True,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'])
    # doesn't affect recursion limit
    eq_(ds.subdatasets(recursive=True, recursion_limit=2,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1'])
    # for a direct dataset path match, return the matching dataset
    eq_(ds.subdatasets(recursive=True,
                       contains=target_sub,
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'])
    # but it has to be a subdataset, otherwise no match
    # which is what get_containing_subdataset() used to do
    eq_(ds.subdatasets(contains=ds.path), [])
    # no error if contains is bullshit
    eq_(ds.subdatasets(recursive=True,
                       contains='errrr_nope',
                       result_xfm='paths'),
        [])
    # TODO maybe at a courtesy bullshit detector some day
    eq_(ds.subdatasets(recursive=True,
                       contains=opj(pardir, 'errrr_nope'),
                       result_xfm='paths'),
        [])
    eq_(ds.subdatasets(
        recursive=True,
        contains=[target_sub, 'sub dataset1/2'],
        result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/subm 1',
    ])
Example #4
0
def _update_ds_agginfo(refds_path, ds_path, subds_paths, agginfo_db, to_save):
    """Perform metadata aggregation for ds and a given list of subdataset paths

    Parameters
    ----------
    refds_path : str
      Absolute path to the reference dataset that aggregate_metadata() was
      called on.
    ds_path : str
      Absolute path to the dataset to have its aggregate info updates
    subds_paths : list(str)
      Sequence of absolute paths of subdatasets of the to-be-updated dataset,
      whose agginfo shall be updated within the to-be-updated dataset.
      Any subdataset that is not listed here is assumed to be gone (i.e. no longer
      a subdataset at all, not just not locally installed)
    agginfo_db : dict
      Dictionary with all information on aggregate metadata on all datasets.
      Keys are absolute paths of datasets.
    to_save : list
      List of paths to save eventually. This function will add new paths as
      necessary.
    """
    ds = Dataset(ds_path)
    # location info of aggregate metadata
    # aggregate.json
    agginfo_fpath = opj(ds.path, agginfo_relpath)
    # base path in which aggregate.json and objects is located
    agg_base_path = dirname(agginfo_fpath)
    # load existing aggregate info dict
    # TODO take from cache, once used in _get_dsinfo_from_aggmetadata()
    ds_agginfos = _load_json_object(agginfo_fpath)
    # object locations referenced initially
    objlocs_was = set(ai[k]
                      for ai in ds_agginfos.values()
                      for k in location_keys
                      if k in ai)
    # track which objects need to be copied (each item is a from/to tuple
    objs2copy = []
    # for each subdataset (any depth level)
    procds_paths = [ds_path] + subds_paths
    for dpath in procds_paths:
        ds_dbinfo = agginfo_db.get(dpath, {}).copy()
        # relative path of the currect dataset within the dataset we are updating
        drelpath = relpath(dpath, start=ds.path)
        for loclabel in location_keys:
            # TODO filepath_info is obsolete
            if loclabel == 'filepath_info' and drelpath == curdir:
                # do not write a file list into the dataset it is from
                if 'filepath_info' in ds_dbinfo:
                    del ds_dbinfo['filepath_info']
                continue
            # abspath to object
            objloc = ds_dbinfo.get(loclabel, None)
            if objloc is None:
                continue
            # XXX needs to change when layout of object store is changed
            # current is ./datalad/metadata/objects/{hash}/{hash}
            target_objrelpath = opj(*objloc.split(os.sep)[-3:])
            # make sure we copy the file from its current location to where it is
            # needed in this dataset
            target_objpath = opj(agg_base_path, target_objrelpath)
            objs2copy.append((objloc, target_objpath))
            # now build needed local relpath
            ds_dbinfo[loclabel] = target_objrelpath
        # (re)assign in case record is new
        ds_agginfos[drelpath] = ds_dbinfo
    # remove all entries for which we did not (no longer) have a corresponding
    # subdataset to take care of
    ds_agginfos = {k: v
                   for k, v in ds_agginfos.items()
                   if normpath(opj(ds_path, k)) in procds_paths}
    # set of metadata objects now referenced
    objlocs_is = set(
        ai[k]
        for sdsrpath, ai in ds_agginfos.items()
        for k in location_keys
        if k in ai)
    objs2add = objlocs_is

    # yoh: we appanretly do need to filter the ones to remove - I did
    #      "git reset --hard HEAD^" and
    #      aggregate-metadata failed upon next run trying to remove
    #      an unknown to git file. I am yet to figure out why that
    #      mattered (hopefully not that reflog is used somehow)
    objs2remove = []
    for obj in objlocs_was.difference(objlocs_is):
        obj_path = opj(agg_base_path, obj)
        if lexists(obj_path):
            objs2remove.append(obj_path)
        else:
            # not really a warning, we don't need it anymore, it is already gone
            lgr.debug(
                "To-be-deleted metadata object not found, skip deletion (%s)",
                obj_path
            )

    # secretly remove obsolete object files, not really a result from a
    # user's perspective
    if objs2remove:
        ds.remove(
            objs2remove,
            # Don't use the misleading default commit message of `remove`:
            message='[DATALAD] Remove obsolete metadata object files',
            # we do not want to drop these files by default, because we would
            # loose them for other branches, and earlier tags
            # TODO evaluate whether this should be exposed as a switch
            # to run an explicit force-drop prior to calling remove()
            check=False,
            result_renderer=None, return_type=list)
        if not objs2add and not refds_path == ds_path:
            # this is not the base dataset, make sure to save removal in the
            # parentds -- not needed when objects get added, as removal itself
            # is already committed
            to_save.append(dict(path=ds_path, type='dataset', staged=True))

    # must copy object files to local target destination
    # make sure those objects are present
    ds.get([f for f, t in objs2copy], result_renderer='disabled')
    for copy_from, copy_to in objs2copy:
        if copy_to == copy_from:
            continue
        target_dir = dirname(copy_to)
        if not exists(target_dir):
            makedirs(target_dir)
        # TODO we could be more clever (later) and maybe `addurl` (or similar)
        # the file from another dataset
        if lexists(copy_to):
            # no need to unlock, just wipe out and replace
            os.remove(copy_to)
        shutil.copy(copy_from, copy_to)
    to_save.append(
        dict(path=agginfo_fpath, type='file', staged=True))

    if objs2add:
        # they are added standard way, depending on the repo type
        ds.add(
            [opj(agg_base_path, p) for p in objs2add],
            save=False, result_renderer=None, return_type=list)
        # queue for save, and mark as staged
        to_save.extend(
            [dict(path=opj(agg_base_path, p), type='file', staged=True)
             for p in objs2add])
    # write aggregate info file
    if not ds_agginfos:
        return

    json_py.dump(ds_agginfos, agginfo_fpath)
    ds.add(agginfo_fpath, save=False, to_git=True,
           result_renderer=None, return_type=list)
    # queue for save, and mark as staged
    to_save.append(
        dict(path=agginfo_fpath, type='file', staged=True))
Example #5
0
def test_get_subdatasets(path):
    ds = Dataset(path)
    # one more subdataset with a name that could ruin config option parsing
    dots = text_type(Path('subdir') / '.lots.of.dots.')
    ds.create(dots)
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'),
        ['sub dataset1'])
    ds.get('sub dataset1')
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'),
        [
            'sub dataset1/2',
            'sub dataset1/sub sub dataset1',
            'sub dataset1/subm 1',
        ])
    # obtain key subdataset, so all leaf subdatasets are discoverable
    ds.get(opj('sub dataset1', 'sub sub dataset1'))
    eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots])
    eq_([(r['parentds'], r['path']) for r in ds.subdatasets()],
        [(path, opj(path, 'sub dataset1')), (path, opj(path, dots))])
    eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/subm 1',
        dots,
    ])
    # redo, but limit to specific paths
    eq_(
        ds.subdatasets(
            path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'],
            recursive=True,
            result_xfm='relpaths'), [
                'sub dataset1/2',
                'sub dataset1/sub sub dataset1',
                'sub dataset1/sub sub dataset1/2',
                'sub dataset1/sub sub dataset1/subm 1',
            ])
    with chpwd(text_type(ds.pathobj / 'subdir')):
        # imitate cmdline invocation w/ no dataset argument
        # -> curdir limits the query, when no info is given
        eq_(
            subdatasets(dataset=None,
                        path=[],
                        recursive=True,
                        result_xfm='paths'), [text_type(ds.pathobj / dots)])
        # but with a dataset explicitly given, even if just as a path,
        # curdir does no limit the query
        eq_(
            subdatasets(dataset=os.pardir,
                        path=None,
                        recursive=True,
                        result_xfm='relpaths'), [
                            'sub dataset1', 'sub dataset1/2',
                            'sub dataset1/sub sub dataset1',
                            'sub dataset1/sub sub dataset1/2',
                            'sub dataset1/sub sub dataset1/subm 1',
                            'sub dataset1/subm 1', dots
                        ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        'sub dataset1',
        dots,
    ])
    eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'),
        [
            'sub dataset1',
            'sub dataset1/sub sub dataset1',
            dots,
        ])
    eq_([
        (relpath(r['parentds'],
                 start=ds.path), relpath(r['path'], start=ds.path))
        for r in ds.subdatasets(recursive=True)
    ], [
        (os.curdir, 'sub dataset1'),
        ('sub dataset1', 'sub dataset1/2'),
        ('sub dataset1', 'sub dataset1/sub sub dataset1'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'),
        ('sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'),
        ('sub dataset1', 'sub dataset1/subm 1'),
        (os.curdir, dots),
    ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=0), [])
    # uses slow, flexible query
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=1,
                       result_xfm='relpaths'), ['sub dataset1', dots])
    # uses slow, flexible query
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=2,
                       result_xfm='relpaths'), [
                           'sub dataset1',
                           'sub dataset1/2',
                           'sub dataset1/sub sub dataset1',
                           'sub dataset1/subm 1',
                           dots,
                       ])
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'):
        for prop in ('gitmodule_url', 'revision', 'gitmodule_name'):
            assert_in(prop, r)
        # random property is unknown
        assert_not_in('mike', r)

    # now add info to all datasets
    res = ds.subdatasets(recursive=True,
                         set_property=[('mike', 'slow'),
                                       ('expansion', '<{refds_relname}>')])
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'],
            relpath(r['path'], r['refds']).replace(os.sep, '-'))
    # plain query again to see if it got into the files
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'],
            relpath(r['path'], r['refds']).replace(os.sep, '-'))

    # and remove again
    res = ds.subdatasets(recursive=True, delete_property='mike')
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)
    # and again, because above yields on the fly edit
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)

    #
    # test --contains
    #
    target_sub = 'sub dataset1/sub sub dataset1/subm 1'
    # give the closest direct subdataset
    eq_(
        ds.subdatasets(contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'), ['sub dataset1'])
    # should find the actual subdataset trail
    eq_(
        ds.subdatasets(recursive=True,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'), [
                           'sub dataset1', 'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1'
                       ])
    # doesn't affect recursion limit
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=2,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1', 'sub dataset1/sub sub dataset1'])
    # for a direct dataset path match, return the matching dataset
    eq_(
        ds.subdatasets(recursive=True,
                       contains=target_sub,
                       result_xfm='relpaths'), [
                           'sub dataset1', 'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1'
                       ])
    # but it has to be a subdataset, otherwise no match
    # which is what get_containing_subdataset() used to do
    eq_(ds.subdatasets(contains=ds.path), [])
    # no error if contains is bullshit
    eq_(
        ds.subdatasets(recursive=True,
                       contains='errrr_nope',
                       result_xfm='paths'), [])
    # TODO maybe at a courtesy bullshit detector some day
    eq_(
        ds.subdatasets(recursive=True,
                       contains=opj(pardir, 'errrr_nope'),
                       result_xfm='paths'), [])
    eq_(
        ds.subdatasets(recursive=True,
                       contains=[target_sub, 'sub dataset1/2'],
                       result_xfm='relpaths'), [
                           'sub dataset1',
                           'sub dataset1/2',
                           'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1',
                       ])