Python Dataset.get Examples

Programming Language: Python

Namespace/Package Name: datalad.distribution.dataset

Class/Type: Dataset

Method/Function: get

Examples at hotexamples.com: 5

Python Dataset.get - 5 examples found. These are the top rated real world Python examples of datalad.distribution.dataset.Dataset.get extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Dataset(30)

create(30)

add(30)

drop(26)

diff(14)

aggregate_metadata(14)

create_sibling(8)

copy_file(8)

_save(8)

get_dataset_handles(6)

clone(5)

download_url(4)

get(4)

_diff(2)

foreach_dataset(2)

get_containing_subdataset(2)

clean(2)

configuration(1)

containers_list(1)

_call_annex(1)

annotate_paths(1)

add_readme(1)

create_sibling_ria(1)

get_content_annexinfo(1)

get_contentlocation(1)

Example #1

Show file

def test_report_absent_keys(path=None):
    ds = Dataset(path).create()
    # create an annexed file
    testfile = ds.pathobj / 'dummy'
    testfile.write_text(u'nothing')
    ds.save()
    # present in a full report and in a partial report
    # based on worktree of HEAD ref
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], True)
    # drop the key, not available anywhere else
    ds.drop('dummy', reckless='kill')
    # does not change a thing, except the key is gone
    for ai in (ds.repo.get_content_annexinfo(eval_availability=True),
               ds.repo.get_content_annexinfo(paths=['dummy'],
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             eval_availability=True),
               ds.repo.get_content_annexinfo(ref='HEAD',
                                             paths=['dummy'],
                                             eval_availability=True)):
        assert_in(testfile, ai)
        assert_equal(ai[testfile]['has_content'], False)
    # make sure files with URL keys are correctly reported:
    from datalad.conftest import test_http_server
    remote_file_name = 'imaremotefile.dat'
    local_file_name = 'mehasurlkey'
    (Path(test_http_server.path) / remote_file_name).write_text("weee")
    remote_file_url = f'{test_http_server.url}/{remote_file_name}'
    # we need to get a file with a URL key and check its local availability
    ds.repo.call_annex(
        ['addurl', '--relaxed', remote_file_url, '--file', local_file_name])
    ds.save("URL keys!")
    # should not be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], False)
    ds.get(local_file_name)
    # should be there
    res = ds.repo.get_file_annexinfo(local_file_name, eval_availability=True)
    assert_equal(res['has_content'], True)

Example #2

Show file

def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')

Example #3

Show file

File: test_subdataset.py Project: datalad/datalad

def test_get_subdatasets(path):
    ds = Dataset(path)
    # one more subdataset with a name that could ruin config option parsing
    dots = text_type(Path('subdir') / '.lots.of.dots.')
    ds.create(dots)
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [
        'sub dataset1'
    ])
    ds.get('sub dataset1')
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
    ])
    # obtain key subdataset, so all leaf subdatasets are discoverable
    ds.get(opj('sub dataset1', 'sub sub dataset1'))
    eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots])
    eq_([(r['parentds'], r['path']) for r in ds.subdatasets()],
        [(path, opj(path, 'sub dataset1')),
         (path, opj(path, dots))])
    eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/subm 1',
        dots,
    ])
    # redo, but limit to specific paths
    eq_(
        ds.subdatasets(
            path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'],
            recursive=True, result_xfm='relpaths'),
        [
            'sub dataset1/2',
            'sub dataset1/sub sub dataset1',
            'sub dataset1/sub sub dataset1/2',
            'sub dataset1/sub sub dataset1/subm 1',
        ]
    )
    with chpwd(text_type(ds.pathobj / 'subdir')):
        # imitate cmdline invocation w/ no dataset argument
        # -> curdir limits the query, when no info is given
        eq_(subdatasets(dataset=None,
                        path=[],
                        recursive=True,
                        result_xfm='paths'),
            [text_type(ds.pathobj / dots)]
        )
        # but with a dataset explicitly given, even if just as a path,
        # curdir does no limit the query
        eq_(subdatasets(dataset=os.pardir,
                        path=None,
                        recursive=True,
                        result_xfm='relpaths'),
            ['sub dataset1',
             'sub dataset1/2',
             'sub dataset1/sub sub dataset1',
             'sub dataset1/sub sub dataset1/2',
             'sub dataset1/sub sub dataset1/subm 1',
             'sub dataset1/subm 1',
             dots]
        )
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        'sub dataset1',
        dots,
    ])
    eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/sub sub dataset1',
        dots,
    ])
    eq_([(relpath(r['parentds'], start=ds.path), relpath(r['path'], start=ds.path))
         for r in ds.subdatasets(recursive=True)], [
        (os.curdir, 'sub dataset1'),
        ('sub dataset1', 'sub dataset1/2'),
        ('sub dataset1', 'sub dataset1/sub sub dataset1'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/subm 1'),
        ('sub dataset1', 'sub dataset1/subm 1'),
        (os.curdir, dots),
    ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=0),
        [])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=1, result_xfm='relpaths'),
        ['sub dataset1', dots])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=2, result_xfm='relpaths'),
        [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        dots,
    ])
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'):
        for prop in ('gitmodule_url', 'revision', 'gitmodule_name'):
            assert_in(prop, r)
        # random property is unknown
        assert_not_in('mike', r)

    # now add info to all datasets
    res = ds.subdatasets(
        recursive=True,
        set_property=[('mike', 'slow'),
                      ('expansion', '<{refds_relname}>')])
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-'))
    # plain query again to see if it got into the files
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'], relpath(r['path'], r['refds']).replace(os.sep, '-'))

    # and remove again
    res = ds.subdatasets(recursive=True, delete_property='mike')
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)
    # and again, because above yields on the fly edit
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)

    #
    # test --contains
    #
    target_sub = 'sub dataset1/sub sub dataset1/subm 1'
    # give the closest direct subdataset
    eq_(ds.subdatasets(contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1'])
    # should find the actual subdataset trail
    eq_(ds.subdatasets(recursive=True,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'])
    # doesn't affect recursion limit
    eq_(ds.subdatasets(recursive=True, recursion_limit=2,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1'])
    # for a direct dataset path match, return the matching dataset
    eq_(ds.subdatasets(recursive=True,
                       contains=target_sub,
                       result_xfm='relpaths'),
        ['sub dataset1',
         'sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'])
    # but it has to be a subdataset, otherwise no match
    # which is what get_containing_subdataset() used to do
    eq_(ds.subdatasets(contains=ds.path), [])
    # no error if contains is bullshit
    eq_(ds.subdatasets(recursive=True,
                       contains='errrr_nope',
                       result_xfm='paths'),
        [])
    # TODO maybe at a courtesy bullshit detector some day
    eq_(ds.subdatasets(recursive=True,
                       contains=opj(pardir, 'errrr_nope'),
                       result_xfm='paths'),
        [])
    eq_(ds.subdatasets(
        recursive=True,
        contains=[target_sub, 'sub dataset1/2'],
        result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/subm 1',
    ])

Example #4

Show file

File: aggregate.py Project: xlecours/datalad

def _update_ds_agginfo(refds_path, ds_path, subds_paths, agginfo_db, to_save):
    """Perform metadata aggregation for ds and a given list of subdataset paths

    Parameters
    ----------
    refds_path : str
      Absolute path to the reference dataset that aggregate_metadata() was
      called on.
    ds_path : str
      Absolute path to the dataset to have its aggregate info updates
    subds_paths : list(str)
      Sequence of absolute paths of subdatasets of the to-be-updated dataset,
      whose agginfo shall be updated within the to-be-updated dataset.
      Any subdataset that is not listed here is assumed to be gone (i.e. no longer
      a subdataset at all, not just not locally installed)
    agginfo_db : dict
      Dictionary with all information on aggregate metadata on all datasets.
      Keys are absolute paths of datasets.
    to_save : list
      List of paths to save eventually. This function will add new paths as
      necessary.
    """
    ds = Dataset(ds_path)
    # location info of aggregate metadata
    # aggregate.json
    agginfo_fpath = opj(ds.path, agginfo_relpath)
    # base path in which aggregate.json and objects is located
    agg_base_path = dirname(agginfo_fpath)
    # load existing aggregate info dict
    # TODO take from cache, once used in _get_dsinfo_from_aggmetadata()
    ds_agginfos = _load_json_object(agginfo_fpath)
    # object locations referenced initially
    objlocs_was = set(ai[k]
                      for ai in ds_agginfos.values()
                      for k in location_keys
                      if k in ai)
    # track which objects need to be copied (each item is a from/to tuple
    objs2copy = []
    # for each subdataset (any depth level)
    procds_paths = [ds_path] + subds_paths
    for dpath in procds_paths:
        ds_dbinfo = agginfo_db.get(dpath, {}).copy()
        # relative path of the currect dataset within the dataset we are updating
        drelpath = relpath(dpath, start=ds.path)
        for loclabel in location_keys:
            # TODO filepath_info is obsolete
            if loclabel == 'filepath_info' and drelpath == curdir:
                # do not write a file list into the dataset it is from
                if 'filepath_info' in ds_dbinfo:
                    del ds_dbinfo['filepath_info']
                continue
            # abspath to object
            objloc = ds_dbinfo.get(loclabel, None)
            if objloc is None:
                continue
            # XXX needs to change when layout of object store is changed
            # current is ./datalad/metadata/objects/{hash}/{hash}
            target_objrelpath = opj(*objloc.split(os.sep)[-3:])
            # make sure we copy the file from its current location to where it is
            # needed in this dataset
            target_objpath = opj(agg_base_path, target_objrelpath)
            objs2copy.append((objloc, target_objpath))
            # now build needed local relpath
            ds_dbinfo[loclabel] = target_objrelpath
        # (re)assign in case record is new
        ds_agginfos[drelpath] = ds_dbinfo
    # remove all entries for which we did not (no longer) have a corresponding
    # subdataset to take care of
    ds_agginfos = {k: v
                   for k, v in ds_agginfos.items()
                   if normpath(opj(ds_path, k)) in procds_paths}
    # set of metadata objects now referenced
    objlocs_is = set(
        ai[k]
        for sdsrpath, ai in ds_agginfos.items()
        for k in location_keys
        if k in ai)
    objs2add = objlocs_is

    # yoh: we appanretly do need to filter the ones to remove - I did
    #      "git reset --hard HEAD^" and
    #      aggregate-metadata failed upon next run trying to remove
    #      an unknown to git file. I am yet to figure out why that
    #      mattered (hopefully not that reflog is used somehow)
    objs2remove = []
    for obj in objlocs_was.difference(objlocs_is):
        obj_path = opj(agg_base_path, obj)
        if lexists(obj_path):
            objs2remove.append(obj_path)
        else:
            # not really a warning, we don't need it anymore, it is already gone
            lgr.debug(
                "To-be-deleted metadata object not found, skip deletion (%s)",
                obj_path
            )

    # secretly remove obsolete object files, not really a result from a
    # user's perspective
    if objs2remove:
        ds.remove(
            objs2remove,
            # Don't use the misleading default commit message of `remove`:
            message='[DATALAD] Remove obsolete metadata object files',
            # we do not want to drop these files by default, because we would
            # loose them for other branches, and earlier tags
            # TODO evaluate whether this should be exposed as a switch
            # to run an explicit force-drop prior to calling remove()
            check=False,
            result_renderer=None, return_type=list)
        if not objs2add and not refds_path == ds_path:
            # this is not the base dataset, make sure to save removal in the
            # parentds -- not needed when objects get added, as removal itself
            # is already committed
            to_save.append(dict(path=ds_path, type='dataset', staged=True))

    # must copy object files to local target destination
    # make sure those objects are present
    ds.get([f for f, t in objs2copy], result_renderer='disabled')
    for copy_from, copy_to in objs2copy:
        if copy_to == copy_from:
            continue
        target_dir = dirname(copy_to)
        if not exists(target_dir):
            makedirs(target_dir)
        # TODO we could be more clever (later) and maybe `addurl` (or similar)
        # the file from another dataset
        if lexists(copy_to):
            # no need to unlock, just wipe out and replace
            os.remove(copy_to)
        shutil.copy(copy_from, copy_to)
    to_save.append(
        dict(path=agginfo_fpath, type='file', staged=True))

    if objs2add:
        # they are added standard way, depending on the repo type
        ds.add(
            [opj(agg_base_path, p) for p in objs2add],
            save=False, result_renderer=None, return_type=list)
        # queue for save, and mark as staged
        to_save.extend(
            [dict(path=opj(agg_base_path, p), type='file', staged=True)
             for p in objs2add])
    # write aggregate info file
    if not ds_agginfos:
        return

    json_py.dump(ds_agginfos, agginfo_fpath)
    ds.add(agginfo_fpath, save=False, to_git=True,
           result_renderer=None, return_type=list)
    # queue for save, and mark as staged
    to_save.append(
        dict(path=agginfo_fpath, type='file', staged=True))

Example #5

Show file

def test_get_subdatasets(path):
    ds = Dataset(path)
    # one more subdataset with a name that could ruin config option parsing
    dots = text_type(Path('subdir') / '.lots.of.dots.')
    ds.create(dots)
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'),
        ['sub dataset1'])
    ds.get('sub dataset1')
    eq_(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths'),
        [
            'sub dataset1/2',
            'sub dataset1/sub sub dataset1',
            'sub dataset1/subm 1',
        ])
    # obtain key subdataset, so all leaf subdatasets are discoverable
    ds.get(opj('sub dataset1', 'sub sub dataset1'))
    eq_(ds.subdatasets(result_xfm='relpaths'), ['sub dataset1', dots])
    eq_([(r['parentds'], r['path']) for r in ds.subdatasets()],
        [(path, opj(path, 'sub dataset1')), (path, opj(path, dots))])
    eq_(ds.subdatasets(recursive=True, result_xfm='relpaths'), [
        'sub dataset1',
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/subm 1',
        dots,
    ])
    # redo, but limit to specific paths
    eq_(
        ds.subdatasets(
            path=['sub dataset1/2', 'sub dataset1/sub sub dataset1'],
            recursive=True,
            result_xfm='relpaths'), [
                'sub dataset1/2',
                'sub dataset1/sub sub dataset1',
                'sub dataset1/sub sub dataset1/2',
                'sub dataset1/sub sub dataset1/subm 1',
            ])
    with chpwd(text_type(ds.pathobj / 'subdir')):
        # imitate cmdline invocation w/ no dataset argument
        # -> curdir limits the query, when no info is given
        eq_(
            subdatasets(dataset=None,
                        path=[],
                        recursive=True,
                        result_xfm='paths'), [text_type(ds.pathobj / dots)])
        # but with a dataset explicitly given, even if just as a path,
        # curdir does no limit the query
        eq_(
            subdatasets(dataset=os.pardir,
                        path=None,
                        recursive=True,
                        result_xfm='relpaths'), [
                            'sub dataset1', 'sub dataset1/2',
                            'sub dataset1/sub sub dataset1',
                            'sub dataset1/sub sub dataset1/2',
                            'sub dataset1/sub sub dataset1/subm 1',
                            'sub dataset1/subm 1', dots
                        ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, bottomup=True, result_xfm='relpaths'), [
        'sub dataset1/2',
        'sub dataset1/sub sub dataset1/2',
        'sub dataset1/sub sub dataset1/subm 1',
        'sub dataset1/sub sub dataset1',
        'sub dataset1/subm 1',
        'sub dataset1',
        dots,
    ])
    eq_(ds.subdatasets(recursive=True, fulfilled=True, result_xfm='relpaths'),
        [
            'sub dataset1',
            'sub dataset1/sub sub dataset1',
            dots,
        ])
    eq_([
        (relpath(r['parentds'],
                 start=ds.path), relpath(r['path'], start=ds.path))
        for r in ds.subdatasets(recursive=True)
    ], [
        (os.curdir, 'sub dataset1'),
        ('sub dataset1', 'sub dataset1/2'),
        ('sub dataset1', 'sub dataset1/sub sub dataset1'),
        ('sub dataset1/sub sub dataset1', 'sub dataset1/sub sub dataset1/2'),
        ('sub dataset1/sub sub dataset1',
         'sub dataset1/sub sub dataset1/subm 1'),
        ('sub dataset1', 'sub dataset1/subm 1'),
        (os.curdir, dots),
    ])
    # uses slow, flexible query
    eq_(ds.subdatasets(recursive=True, recursion_limit=0), [])
    # uses slow, flexible query
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=1,
                       result_xfm='relpaths'), ['sub dataset1', dots])
    # uses slow, flexible query
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=2,
                       result_xfm='relpaths'), [
                           'sub dataset1',
                           'sub dataset1/2',
                           'sub dataset1/sub sub dataset1',
                           'sub dataset1/subm 1',
                           dots,
                       ])
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        #for prop in ('gitmodule_url', 'state', 'revision', 'gitmodule_name'):
        for prop in ('gitmodule_url', 'revision', 'gitmodule_name'):
            assert_in(prop, r)
        # random property is unknown
        assert_not_in('mike', r)

    # now add info to all datasets
    res = ds.subdatasets(recursive=True,
                         set_property=[('mike', 'slow'),
                                       ('expansion', '<{refds_relname}>')])
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'],
            relpath(r['path'], r['refds']).replace(os.sep, '-'))
    # plain query again to see if it got into the files
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        eq_(r['gitmodule_mike'], 'slow')
        eq_(r['gitmodule_expansion'],
            relpath(r['path'], r['refds']).replace(os.sep, '-'))

    # and remove again
    res = ds.subdatasets(recursive=True, delete_property='mike')
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)
    # and again, because above yields on the fly edit
    res = ds.subdatasets(recursive=True)
    assert_status('ok', res)
    for r in res:
        for prop in ('gitmodule_mike'):
            assert_not_in(prop, r)

    #
    # test --contains
    #
    target_sub = 'sub dataset1/sub sub dataset1/subm 1'
    # give the closest direct subdataset
    eq_(
        ds.subdatasets(contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'), ['sub dataset1'])
    # should find the actual subdataset trail
    eq_(
        ds.subdatasets(recursive=True,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'), [
                           'sub dataset1', 'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1'
                       ])
    # doesn't affect recursion limit
    eq_(
        ds.subdatasets(recursive=True,
                       recursion_limit=2,
                       contains=opj(target_sub, 'something_inside'),
                       result_xfm='relpaths'),
        ['sub dataset1', 'sub dataset1/sub sub dataset1'])
    # for a direct dataset path match, return the matching dataset
    eq_(
        ds.subdatasets(recursive=True,
                       contains=target_sub,
                       result_xfm='relpaths'), [
                           'sub dataset1', 'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1'
                       ])
    # but it has to be a subdataset, otherwise no match
    # which is what get_containing_subdataset() used to do
    eq_(ds.subdatasets(contains=ds.path), [])
    # no error if contains is bullshit
    eq_(
        ds.subdatasets(recursive=True,
                       contains='errrr_nope',
                       result_xfm='paths'), [])
    # TODO maybe at a courtesy bullshit detector some day
    eq_(
        ds.subdatasets(recursive=True,
                       contains=opj(pardir, 'errrr_nope'),
                       result_xfm='paths'), [])
    eq_(
        ds.subdatasets(recursive=True,
                       contains=[target_sub, 'sub dataset1/2'],
                       result_xfm='relpaths'), [
                           'sub dataset1',
                           'sub dataset1/2',
                           'sub dataset1/sub sub dataset1',
                           'sub dataset1/sub sub dataset1/subm 1',
                       ])