Ejemplo n.º 1
0
def test_rerun_run_left_nonrun_right(path=None):
    ds = Dataset(path).create()
    # keep direct repo accessor to speed things up
    ds_repo = ds.repo
    ds.run("echo foo >foo")
    ds_repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"])
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("blah")
    ds.save()
    ds_repo.checkout(DEFAULT_BRANCH)
    ds_repo.merge("side", options=["-m", "Merge side"])
    # o                 d_n
    # |\
    # | o               c_n
    # o |               b_r
    # |/
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 d_M
    # |\
    # | o               c_n
    # o |               b_R
    # |/
    # o                 a_n
    neq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^"), ds_repo.get_hexsha("HEAD^"))
    eq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"),
        ds_repo.get_hexsha("HEAD^2"))

    ds_repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    # o                 b_R
    # o                 c_n
    # o                 a_n
    assert_false(ds_repo.commit_exists("HEAD^2"))
    eq_(ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds_repo.get_hexsha("HEAD^"))

    ds_repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds_repo.get_hexsha()
    ds.rerun(since="")
    # o                 d_n
    # |\
    # | o               c_n
    # o |               b_r
    # |/
    # o                 a_n
    eq_(hexsha_before, ds_repo.get_hexsha())
Ejemplo n.º 2
0
def test_basic_setup():
    # the import alone will verify that all default values match their
    # constraints
    from datalad import api

    # random pick of something that should be there
    assert_true(hasattr(api, 'install'))
    assert_true(hasattr(api, 'create'))
    # make sure all helper utilities do not pollute the namespace
    # and we end up only with __...__ attributes
    assert_false(
        list(
            filter(lambda s: s.startswith('_') and not re.match('__.*__', s),
                   dir(api))))

    assert_in('Parameters', api.Dataset.install.__doc__)
    assert_in('Parameters', api.Dataset.create.__doc__)
Ejemplo n.º 3
0
def test_gitcredential_interface(path=None):
    # use a dataset as a local configuration vehicle
    ds = Dataset(path).create()

    # preserve credentials between git processes for a brief time
    # credential-cache is not supported on windows (needs UNIX sockets)
    # ds.config.set('credential.helper', 'cache', scope='local')
    # However, first set an empty helper in order to disable already set helpers
    ds.config.set('credential.helper', '', scope='local')
    ds.config.set('credential.helper', 'store', scope='local')

    # git manages credentials by target URL
    credurl = 'https://example.datalad.org/somepath'
    credurl_justhost = 'https://example.datalad.org'
    # define a credential
    cred = GitCredentialInterface(url=credurl,
                                  username='******',
                                  password='******',
                                  repo=ds)
    # put it in the manager (a cache in this case, but could invoke any number
    # of helpers
    cred.approve()
    # new instance, no knowledge of login
    cred = GitCredentialInterface(url=credurl, repo=ds)
    assert_not_in('username', cred)
    # query store
    cred.fill()
    eq_(cred['username'], 'mike')
    eq_(cred['password'], 's3cr3t')
    # git does host-only identification by default (see credential.useHttpPath)
    cred = GitCredentialInterface(url=credurl_justhost, repo=ds)
    cred.fill()
    eq_(cred['username'], 'mike')
    eq_(cred['password'], 's3cr3t')

    # the URL is enough to remove ("reject") a credential
    GitCredentialInterface(url=credurl, repo=ds).reject()

    cred = GitCredentialInterface(url=credurl, repo=ds)
    # this will yield empty passwords, not the most precise test
    # whether it actually removed the credentials, but some test
    # at least
    cred.fill()
    assert_false(cred['username'])
    assert_false(cred['password'])
Ejemplo n.º 4
0
def test_alter_interface_docs_for_api():
    alt = alter_interface_docs_for_api(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('CMD', alt)
    assert_not_in('PY', alt)
    assert_not_in('REFLOW', alt)
    assert_in('a b', alt)
    assert_in('not\n   reflowed', alt)
    assert_in("Some Python-only bits Multiline!", alt)

    altpd = alter_interface_docs_for_api(demo_paramdoc)
    assert_in('python', altpd)
    assert_in('in between', altpd)
    assert_in('appended', altpd)
    assert_not_in('cmdline', altpd)
Ejemplo n.º 5
0
def test_check_dates(path=None):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo], reference_date=refdate, return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
Ejemplo n.º 6
0
    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote),
                'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote),
                standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'),
                                    'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))
Ejemplo n.º 7
0
def test_new_or_modified(path=None):
    def get_new_or_modified(*args, **kwargs):
        return [op.relpath(ap["path"], path)
                for ap in new_or_modified(diff_revision(*args, **kwargs))]

    ds = Dataset(path).create(force=True, annex=False)

    # Check out an orphan branch so that we can test the "one commit
    # in a repo" case.
    ds.repo.checkout("orph", options=["--orphan"])
    ds.save()
    assert_false(ds.repo.dirty)
    eq_(len(ds.repo.get_revisions("HEAD")), 1)
    # Diffing doesn't fail when the branch contains a single commit.
    assert_in("to_modify", get_new_or_modified(ds, "HEAD"))

    # New files are detected, deletions are not.
    ds.repo.remove(["to_remove"])
    ok_(ds.repo.dirty)

    with open(op.join(path, "to_add"), "w") as f:
        f.write("content5")
    ds.repo.add(["to_add"])
    ds.repo.commit("add one, remove another")

    eq_(get_new_or_modified(ds, "HEAD"),
        ["to_add"])

    # Modifications are detected.
    with open(op.join(path, "to_modify"), "w") as f:
        f.write("updated 1")
    with open(op.join(path, "d/to_modify"), "w") as f:
        f.write("updated 2")
    ds.save(["to_modify", "d/to_modify"])

    eq_(set(get_new_or_modified(ds, "HEAD")),
        {"to_modify", op.join("d", "to_modify")})

    # Non-HEAD revisions work.
    ds.repo.commit("empty", options=["--allow-empty"])
    assert_false(get_new_or_modified(ds, "HEAD"))
    eq_(set(get_new_or_modified(ds, "HEAD~")),
        {"to_modify", op.join("d", "to_modify")})
Ejemplo n.º 8
0
def test_rerun_unrelated_run_left_nonrun_right(path=None):
    ds = Dataset(path).create()
    # keep direct repo accessor to speed things up
    ds_repo = ds.repo
    ds.run("echo foo >foo")
    ds_repo.checkout(DEFAULT_BRANCH + "~", options=["--orphan", "side"])
    ds.save(message="squashed")
    ds_repo.checkout(DEFAULT_BRANCH)
    ds_repo.merge("side",
                  options=["-m", "Merge side", "--allow-unrelated-histories"])
    # o                 d_n
    # |\
    # | o               c_n
    # o                 b_r
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 d_M
    # |\
    # | o               c_n
    # o                 b_R
    # o                 a_n
    neq_(ds_repo.get_hexsha("HEAD^"), ds_repo.get_hexsha(DEFAULT_BRANCH + "^"))
    eq_(ds_repo.get_hexsha("HEAD^2"),
        ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"))
    assert_false(ds_repo.commit_exists("HEAD^2^"))

    ds_repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    # o                 b_R
    # o                 c_n
    assert_false(ds_repo.commit_exists("HEAD^2"))
    eq_(ds_repo.get_hexsha("HEAD^"), ds_repo.get_hexsha(DEFAULT_BRANCH + "^2"))

    ds_repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="")
    # o                 d_n
    # |\
    # | o               c_n
    # o                 b_r
    # o                 a_n
    eq_(ds_repo.get_hexsha(DEFAULT_BRANCH), ds_repo.get_hexsha())
Ejemplo n.º 9
0
def check_datasets_order(res, order='bottom-up'):
    """Check that all type=dataset records not violating the expected order

    it is somewhat weak test, i.e. records could be produced so we
    do not detect that order is violated, e.g. a/b c/d would satisfy
    either although they might be neither depth nor breadth wise.  But
    this test would allow to catch obvious violations like a, a/b, a
    """
    prev = None
    for r in res:
        if r.get('type') != 'dataset':
            continue
        if prev and r['path'] != prev:
            if order == 'bottom-up':
                assert_false(path_startswith(r['path'], prev))
            elif order == 'top-down':
                assert_false(path_startswith(prev, r['path']))
            else:
                raise ValueError(order)
        prev = r['path']
Ejemplo n.º 10
0
def test_update_unborn_master(path=None):
    ds_a = Dataset(op.join(path, "ds-a")).create()
    ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"])
    ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"])
    ds_b = install(source=ds_a.path, path=op.join(path, "ds-b"))

    ds_a.repo.checkout("other")
    (ds_a.pathobj / "foo").write_text("content")
    ds_a.save()

    # clone() will try to switch away from an unborn branch if there
    # is another ref available.  Reverse these efforts so that we can
    # test that update() fails reasonably here because we should still
    # be able to update from remotes that datalad didn't clone.
    ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH, symbolic=True)
    assert_false(ds_b.repo.commit_exists("HEAD"))
    assert_status("impossible", ds_b.update(merge=True, on_failure="ignore"))

    ds_b.repo.checkout("other")
    assert_status("ok", ds_b.update(merge=True, on_failure="ignore"))
    eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
Ejemplo n.º 11
0
def test_ExtractedArchive(path=None):
    archive = op.join(path, fn_archive_obscure_ext)
    earchive = ExtractedArchive(archive)
    assert_false(op.exists(earchive.path))
    # no longer the case -- just using hash for now
    # assert_in(os.path.basename(archive), earchive.path)

    fpath = op.join(
        fn_archive_obscure,  # lead directory
        fn_in_archive_obscure)
    extracted = earchive.get_extracted_filename(fpath)
    eq_(extracted, op.join(earchive.path, fpath))
    assert_false(op.exists(extracted))  # not yet

    extracted_ = earchive.get_extracted_file(fpath)
    eq_(extracted, extracted_)
    assert_true(op.exists(extracted))  # now it should

    extracted_files = earchive.get_extracted_files()
    ok_generator(extracted_files)
    eq_(
        sorted(extracted_files),
        sorted([
            # ['bbc/3.txt', 'bbc/abc']
            op.join(fn_archive_obscure, fn_in_archive_obscure),
            op.join(fn_archive_obscure, '3.txt')
        ]))

    earchive.clean()
    if not dl_cfg.get('datalad.tests.temp.keep'):
        assert_false(op.exists(earchive.path))
Ejemplo n.º 12
0
def test_external_versions_basic():
    ev = ExternalVersions()
    our_module = 'datalad'
    assert_equal(ev.versions, {})
    assert_equal(ev[our_module], __version__)
    # and it could be compared
    assert_greater_equal(ev[our_module], __version__)
    # We got some odd failure in this test not long are after switching to versionner
    # https://github.com/datalad/datalad/issues/5785.  Verify that we do get expected
    # data types
    our_version = ev[our_module].version
    assert isinstance(
        our_version,
        (str, list)), f"Got {our_version!r} of type {type(our_version)}"
    assert_greater(ev[our_module], '0.1')
    assert_equal(list(ev.keys()), [our_module])
    assert_true(our_module in ev)
    assert_false('unknown' in ev)

    # all are LooseVersions now
    assert_true(isinstance(ev[our_module], LooseVersion))
    version_str = __version__
    assert_equal(ev.dumps(), "Versions: %s=%s" % (our_module, version_str))

    # For non-existing one we get None
    assert_equal(ev['custom__nonexisting'], None)
    # and nothing gets added to _versions for nonexisting
    assert_equal(set(ev.versions.keys()), {our_module})

    # but if it is a module without version, we get it set to UNKNOWN
    assert_equal(ev['os'], ev.UNKNOWN)
    # And get a record on that inside
    assert_equal(ev.versions.get('os'), ev.UNKNOWN)
    # And that thing is "True", i.e. present
    assert (ev['os'])
    # but not comparable with anything besides itself (was above)
    assert_raises(TypeError, cmp, ev['os'], '0')
    assert_raises(TypeError, assert_greater, ev['os'], '0')

    return
Ejemplo n.º 13
0
def test_rerun_explicit(path=None):
    ds = Dataset(path).create(force=True)

    ds.run("echo o >> foo", explicit=True, outputs=["foo"])
    with open(op.join(ds.path, "foo")) as ifh:
        orig_content = ifh.read()
        orig_head = ds.repo.get_hexsha(DEFAULT_BRANCH)

    # Explicit rerun is allowed in a dirty tree.
    ok_(ds.repo.dirty)
    ds.rerun(explicit=True)
    eq_(orig_head, ds.repo.get_hexsha(DEFAULT_BRANCH + "~1"))
    with open(op.join(ds.path, "foo")) as ifh:
        eq_(orig_content * 2, ifh.read())

    # --since also works.
    ds.rerun(since="", explicit=True)
    eq_(orig_head,
        # Added two rerun commits.
        ds.repo.get_hexsha(DEFAULT_BRANCH + "~3"))

    # With just untracked changes, we can rerun with --onto.
    ds.rerun(since="", onto="", explicit=True)
    eq_(ds.repo.get_hexsha(orig_head + "^"),
        # Reran the four run commits from above on the initial base.
        ds.repo.get_hexsha("HEAD~4"))

    # But checking out a new HEAD can fail when there are modifications.
    ds.repo.checkout(DEFAULT_BRANCH)
    ok_(ds.repo.dirty)
    ds.repo.add(["to_modify"], git=True)
    ds.save()
    assert_false(ds.repo.dirty)
    with open(op.join(ds.path, "to_modify"), "a") as ofh:
        ofh.write("more")
    ok_(ds.repo.dirty)

    with assert_raises(CommandError):
        ds.rerun(onto="", since="", explicit=True)
Ejemplo n.º 14
0
def test_GitRepo_instance_from_not_existing(path=None, path2=None):
    # 1. create=False and path doesn't exist:
    repo = GitRepo(path)
    assert_false(op.exists(path))

    # 2. create=False, path exists, but no git repo:
    os.mkdir(path)
    ok_(op.exists(path))
    repo = GitRepo(path)
    assert_false(op.exists(op.join(path, '.git')))

    # 3. create=True, path doesn't exist:
    gr = GitRepo(path2).init()
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    ok_(op.exists(op.join(path2, '.git')))
    # re-enable from core GitRepo has a status() method
    #assert_repo_status(path2, annex=False)

    # 4. create=True, path exists, but no git repo:
    gr = GitRepo(path).init()
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    ok_(op.exists(op.join(path, '.git')))
Ejemplo n.º 15
0
def test_no_storage(store1=None, store2=None, ds_path=None):
    store1_url = 'ria+' + get_local_file_url(store1)
    store2_url = 'ria+' + get_local_file_url(store2)

    ds = Dataset(ds_path).create(force=True)
    ds.save(recursive=True)
    assert_repo_status(ds.path)

    res = ds.create_sibling_ria(store1_url,
                                "datastore1",
                                storage_sibling=False,
                                new_store_ok=True)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_({'datastore1', 'here'},
        {s['name']
         for s in ds.siblings(result_renderer='disabled')})

    # deprecated way of disabling storage still works
    res = ds.create_sibling_ria(store2_url,
                                "datastore2",
                                storage_sibling=False,
                                new_store_ok=True)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_({'datastore2', 'datastore1', 'here'},
        {s['name']
         for s in ds.siblings(result_renderer='disabled')})

    # no annex/object dir should be created when there is no special remote
    # to use it.
    for s in [store1, store2]:
        p = Path(s) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'
        assert_false(p.exists())

    # smoke test that we can push to it
    res = ds.push(to='datastore1')
    assert_status('ok', res)
    # but nothing was copied, because there is no storage sibling
    assert_result_count(res, 0, action='copy')
Ejemplo n.º 16
0
def test_cred1_enter_new():
    keyring = MemoryKeyring()
    cred = UserPassword("name", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.enter_new(), None)
    assert_true(cred.is_known)
    assert_equal(keyring.get('name', 'user'), 'user1')
    assert_equal(keyring.get('name', 'password'), 'password1')
    keyring.delete('name')
    assert_raises(KeyError, keyring.delete, 'name', 'user')
    assert_raises(KeyError, keyring.delete, 'name')
    assert_equal(keyring.get('name', 'user'), None)

    # Test it blowing up if we provide unknown field
    with assert_raises(ValueError) as cme:
        cred.enter_new(username='******')
    assert_in('field(s): username.  Known but not specified: password, user',
              str(cme.value))

    # Test that if user is provided, it is not asked
    cred.enter_new(user='******')
    assert_equal(keyring.get('name', 'user'), 'user2')
    assert_equal(keyring.get('name', 'password'), 'newpassword')
Ejemplo n.º 17
0
def test_unlock_directory(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.unlock(path="dir")
    dirpath = Path("dir")
    dirpath_abs = Path(ds.pathobj / "dir")

    # On adjusted branches (for the purposes of this test, crippled
    # filesystems), the files were already unlocked and the committed state is
    # the unlocked pointer file.
    is_managed_branch = ds.repo.is_managed_branch()
    if is_managed_branch:
        assert_repo_status(ds.path)
    else:
        assert_repo_status(ds.path, modified=[dirpath / "a", dirpath / "b"])
    ds.save()
    ds.drop(str(dirpath / "a"), reckless='kill')
    assert_false(ds.repo.file_has_content(str(dirpath / "a")))

    # Unlocking without an explicit non-directory path doesn't fail if one of
    # the directory's files doesn't have content.
    res = ds.unlock(path="dir")
    assert_not_in_results(res, action="unlock",
                          path=str(dirpath_abs / "a"))
    if is_managed_branch:
        assert_not_in_results(res, action="unlock",
                              path=str(dirpath_abs / "b"))
    else:
        assert_in_results(res, action="unlock", status="ok",
                          path=str(dirpath_abs / "b"))
        assert_repo_status(ds.path, modified=[dirpath / "b"])

    # If we explicitly provide a path that lacks content, we get a result
    # for it.
    assert_in_results(ds.unlock(path=dirpath / "a", on_failure="ignore"),
                      action="unlock", status="impossible",
                      path=str(dirpath_abs / "a"))
Ejemplo n.º 18
0
def test_name_starts_with_hyphen(origpath=None, path=None):
    ds = Dataset.create(origpath)
    # create
    dash_sub = ds.create('-sub')
    assert_true(dash_sub.is_installed())
    assert_result_count(ds.subdatasets(),
                        1,
                        path=dash_sub.path,
                        state='present')

    # clone
    ds_clone = Dataset.create(path)
    dash_clone = clone(source=dash_sub.path, path=os.path.join(path, '-clone'))
    ds_clone.save(recursive=True)
    assert_true(dash_clone.is_installed())
    assert_result_count(ds_clone.subdatasets(),
                        1,
                        path=dash_clone.path,
                        state='present')

    # uninstall
    ds_clone.drop('-clone', what='all', reckless='kill', recursive=True)
    assert_false(dash_clone.is_installed())
    assert_result_count(ds_clone.subdatasets(),
                        1,
                        path=dash_clone.path,
                        state='absent')

    # get
    ds_clone.get('-clone')
    assert_true(dash_clone.is_installed())
    assert_result_count(ds_clone.subdatasets(),
                        1,
                        path=dash_clone.path,
                        state='present')

    assert_repo_status(ds.path)
Ejemplo n.º 19
0
def test_get_subdataset_inherit_reckless(path=None, *, override):
    src = Dataset(opj(path, "a")).create()
    src_subds = src.create("sub")
    src_subds.create("subsub")
    src.save(recursive=True)

    clone = install(opj(path, "b"),
                    source=src,
                    reckless="auto",
                    result_xfm="datasets",
                    return_type="item-or-list")
    clone_sub = Dataset(clone.pathobj / "sub")
    assert_false(clone_sub.is_installed())
    clone_subsub = Dataset(clone.pathobj / "sub" / "subsub")

    clone.get(opj("sub", "subsub"), reckless=False if override else None)
    ok_(clone_sub.is_installed())
    ok_(clone_subsub.is_installed())

    for sub in [clone_sub, clone_subsub]:
        eq_(sub.config.get("datalad.clone.reckless", None),
            None if override else "auto")
        eq_(sub.config.get("annex.hardlink", None),
            None if override else "true")
Ejemplo n.º 20
0
def test_aggregate_with_unavailable_objects_from_subds(path=None, target=None):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)

    # now make that a subdataset of a new one, so aggregation needs to get the
    # metadata objects first:
    super = Dataset(target).create()
    super.install("base", source=base.path)
    assert_repo_status(super.path)
    clone = Dataset(opj(super.path, "base"))
    assert_repo_status(clone.path)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = clone.repo.get_content_annexinfo(paths=[objpath],
                                            init=None,
                                            eval_availability=True)
    eq_(len(objs), 6)
    assert_false(any(st["has_content"] for st in objs.values()))

    # now aggregate should get those metadata objects
    super.aggregate_metadata(recursive=True,
                             update_mode='all',
                             force_extraction=False)
    objs_after = clone.repo.get_content_annexinfo(paths=objs,
                                                  init=None,
                                                  eval_availability=True)
    assert_true(all(st["has_content"] for st in objs_after.values()))
Ejemplo n.º 21
0
def test_add_archive_dirs(path_orig=None, url=None, repo_path=None):
    # change to repo_path
    with chpwd(repo_path):
        # create annex repo
        ds = Dataset(repo_path).create(force=True)
        repo = ds.repo
        # add archive to the repo so we could test
        with swallow_outputs():
            repo.add_url_to_file('1.tar.gz', opj(url, '1.tar.gz'))
        repo.commit("added 1.tar.gz")

        # test with excludes and annex options
        add_archive_content(
            '1.tar.gz',
            existing='archive-suffix',
            # Since inconsistent and seems in many cases no
            # leading dirs to strip, keep them as provided
            strip_leading_dirs=True,
            delete=True,
            leading_dirs_consider=['crcns.*', '1'],
            leading_dirs_depth=2,
            use_current_dir=False,
            exclude='.*__MACOSX.*')  # some junk penetrates

        eq_(
            repo.get_description(
                uuid=DATALAD_SPECIAL_REMOTES_UUIDS[ARCHIVES_SPECIAL_REMOTE]),
            '[%s]' % ARCHIVES_SPECIAL_REMOTE)

        all_files = sorted(find_files('.'))
        # posixify paths to make it work on Windows as well
        all_files = [Path(file).as_posix() for file in all_files]
        target_files = {
            'CR24A/behaving1/1 f.txt',
            'CR24C/behaving3/3 f.txt',
            'CR24D/behaving2/2 f.txt',
            '.datalad/config',
        }
        eq_(set(all_files), target_files)

        # regression test: the subdir in MACOSX wasn't excluded and its name was
        # getting stripped by leading_dir_len
        # if stripping and exclude didn't work this fails
        assert_false(exists('__MACOSX'))
        # if exclude doesn't work then name of subdir gets stripped by
        # leading_dir_len
        assert_false(exists('c-1_data'))
        # if exclude doesn't work but everything else works this fails
        assert_false(exists('CR24B'))
Ejemplo n.º 22
0
def test_ArchivesCache():
    # we don't actually need to test archives handling itself
    path1 = "/zuba/duba"
    path2 = "/zuba/duba2"
    # should not be able to create a persistent cache without topdir
    assert_raises(ValueError, ArchivesCache, persistent=True)
    cache = ArchivesCache()  # by default -- non persistent

    archive1_path = op.join(path1, fn_archive_obscure_ext)
    archive2_path = op.join(path2, fn_archive_obscure_ext)
    cached_archive1_path = cache[archive1_path].path
    assert_false(cache[archive1_path].path == cache[archive2_path].path)
    assert_true(cache[archive1_path] is cache[archive1_path])
    cache.clean()
    assert_false(op.exists(cached_archive1_path))
    assert_false(op.exists(cache.path))

    # test del
    cache = ArchivesCache()  # by default -- non persistent
    assert_true(op.exists(cache.path))
    cache_path = cache.path
    del cache
    assert_false(op.exists(cache_path))
Ejemplo n.º 23
0
def test_reobtain_data(originpath=None, destpath=None):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Ejemplo n.º 24
0
def test_property_reevaluation(repo1=None):
    ds = Dataset(repo1)
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    first_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after creation, we have `repo`, and `config` was reevaluated to point
    # to the repo's config:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    second_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is(ds.config, ds.repo.config)
    assert_is_not(first_config, second_config)
    assert_is_not_none(ds.id)
    first_id = ds.id

    ds.drop(what='all', reckless='kill', recursive=True)
    # repo is gone, and config is again reevaluated to only provide user/system
    # level config:
    assert_false(lexists(ds.path))
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    third_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_not(second_config, third_config)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after recreation everything is sane again:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    assert_is(ds.config, ds.repo.config)
    forth_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is_not(third_config, forth_config)
    assert_is_not_none(ds.id)
    assert_not_equal(ds.id, first_id)
Ejemplo n.º 25
0
def test_is_installed(src=None, path=None):
    ca = dict(result_renderer='disabled')
    # a remote dataset with a subdataset underneath
    origds = Dataset(src).create(**ca)
    _ = origds.create('subm 1', **ca)

    ds = Dataset(path)
    assert_false(ds.is_installed())

    # get a clone:
    clone(src, path, **ca)
    ok_(ds.is_installed())
    # submodule still not installed:
    subds = Dataset(ds.pathobj / 'subm 1')
    assert_false(subds.is_installed())
    # We must not be able to create a new repository under a known
    # subdataset path.
    # Note: Unfortunately we would still be able to generate it under
    # subdirectory within submodule, e.g. `subm 1/subdir` but that is
    # not checked here. `create` provides that protection though.
    res = subds.create(on_failure='ignore',
                       return_type='list',
                       result_filter=None,
                       result_xfm=None,
                       **ca)
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        status='error',
                        path=subds.path,
                        message=('collision with %s (dataset) in dataset %s',
                                 subds.path, ds.path))
    # get the submodule
    with chpwd(ds.path):
        get('subm 1', **ca)
    ok_(subds.is_installed())
    # wipe it out
    rmtree(ds.path)
    assert_false(ds.is_installed())
Ejemplo n.º 26
0
def test_credentials_from_env():
    keyring = Keyring()
    cred = AWS_S3("test-s3", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.get('key_id'), None)
    assert_equal(cred.get('secret_id'), None)

    def _check1():
        assert_equal(cred.get('key_id'), '1')
        assert_false(cred.is_known)

    def _check2():
        assert_equal(cred.get('key_id'), '1')
        assert_equal(cred.get('secret_id'), '2')
        assert_true(cred.is_known)

    # this is the old way, should still work
    with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}):
        _check1()
        with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}):
            _check2()
        assert_false(cred.is_known)  # no memory of the past

    # here is the new way
    import datalad
    try:
        with patch.dict('os.environ',
                        {'DATALAD_CREDENTIAL_test__s3_key__id': '1'}):
            datalad.cfg.reload()
            _check1()
            with patch.dict('os.environ',
                            {'DATALAD_CREDENTIAL_test__s3_secret__id': '2'}):
                datalad.cfg.reload()
                _check2()
            datalad.cfg.reload()
            assert_false(cred.is_known)  # no memory of the past
    finally:
        datalad.cfg.reload()
Ejemplo n.º 27
0
def test_datalad_credential_helper(path=None):

    ds = Dataset(path).create()

    # tell git to use git-credential-datalad
    ds.config.add('credential.helper', 'datalad', scope='local')
    ds.config.add('datalad.credentials.githelper.noninteractive',
                  'true',
                  scope='global')

    from datalad.downloaders.providers import Providers

    url1 = "https://datalad-test.org/some"
    url2 = "https://datalad-test.org/other"
    provider_name = "datalad-test.org"

    # `Providers` code is old and only considers a dataset root based on PWD
    # for config lookup. contextmanager below can be removed once the
    # provider/credential system is redesigned.
    with chpwd(ds.path):

        gitcred = GitCredentialInterface(url=url1, repo=ds)

        # There's nothing set up yet, helper should return empty
        gitcred.fill()
        eq_(gitcred['username'], '')
        eq_(gitcred['password'], '')

        # store new credentials
        # Note, that `Providers.enter_new()` currently uses user-level config
        # files for storage only. TODO: make that an option!
        # To not mess with existing ones, fail if it already exists:

        cfg_file = Path(Providers._get_providers_dirs()['user']) \
                   / f"{provider_name}.cfg"
        assert_false(cfg_file.exists())

        # Make sure we clean up
        from datalad.tests import _TEMP_PATHS_GENERATED
        _TEMP_PATHS_GENERATED.append(str(cfg_file))

        # Give credentials to git and ask it to store them:
        gitcred = GitCredentialInterface(url=url1,
                                         username="******",
                                         password="******",
                                         repo=ds)
        gitcred.approve()

        assert_true(cfg_file.exists())
        providers = Providers.from_config_files()
        p1 = providers.get_provider(url=url1, only_nondefault=True)
        assert_is_instance(p1.credential, UserPassword)
        eq_(p1.credential.get('user'), 'dl-user')
        eq_(p1.credential.get('password'), 'dl-pwd')

        # default regex should be host only, so matching url2, too
        p2 = providers.get_provider(url=url2, only_nondefault=True)
        assert_is_instance(p1.credential, UserPassword)
        eq_(p1.credential.get('user'), 'dl-user')
        eq_(p1.credential.get('password'), 'dl-pwd')

        # git, too, should now find it for both URLs
        gitcred = GitCredentialInterface(url=url1, repo=ds)
        gitcred.fill()
        eq_(gitcred['username'], 'dl-user')
        eq_(gitcred['password'], 'dl-pwd')

        gitcred = GitCredentialInterface(url=url2, repo=ds)
        gitcred.fill()
        eq_(gitcred['username'], 'dl-user')
        eq_(gitcred['password'], 'dl-pwd')

        # Rejection must not currently lead to deleting anything, since we would
        # delete too broadly.
        gitcred.reject()
        assert_true(cfg_file.exists())
        gitcred = GitCredentialInterface(url=url1, repo=ds)
        gitcred.fill()
        eq_(gitcred['username'], 'dl-user')
        eq_(gitcred['password'], 'dl-pwd')
        dlcred = UserPassword(name=provider_name)
        eq_(dlcred.get('user'), 'dl-user')
        eq_(dlcred.get('password'), 'dl-pwd')
Ejemplo n.º 28
0
def test_something(path=None, new_home=None):
    # will refuse to work on dataset without a dataset
    assert_raises(ValueError, ConfigManager, source='branch')
    # now read the example config
    cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='branch')
    assert_equal(len(cfg), 5)
    assert_in('something.user', cfg)
    # multi-value
    assert_equal(len(cfg['something.user']), 2)
    assert_equal(cfg['something.user'],
                 ('name=Jane Doe', '[email protected]'))

    assert_true(cfg.has_section('something'))
    assert_false(cfg.has_section('somethingelse'))
    assert_equal(sorted(cfg.sections()),
                 [u'onemore.complicated の beast with.dot', 'something'])
    assert_true(cfg.has_option('something', 'user'))
    assert_false(cfg.has_option('something', 'us?er'))
    assert_false(cfg.has_option('some?thing', 'user'))
    assert_equal(sorted(cfg.options('something')),
                 ['empty', 'myint', 'novalue', 'user'])
    assert_equal(cfg.options(u'onemore.complicated の beast with.dot'),
                 ['findme'])

    assert_equal(sorted(cfg.items()),
                 [(u'onemore.complicated の beast with.dot.findme', '5.0'),
                  ('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])
    assert_equal(sorted(cfg.items('something')),
                 [('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])

    # by default get last value only
    assert_equal(cfg.get('something.user'), '[email protected]')
    # but can get all values
    assert_equal(cfg.get('something.user', get_all=True),
                 ('name=Jane Doe', '[email protected]'))
    assert_raises(KeyError, cfg.__getitem__, 'somedthing.user')
    assert_equal(
        cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0)
    assert_equal(cfg.getint('something', 'myint'), 3)
    assert_equal(cfg.getbool('something', 'myint'), True)
    # git demands a key without value at all to be used as a flag, thus True
    assert_equal(cfg.getbool('something', 'novalue'), True)
    assert_equal(cfg.get('something.novalue'), None)
    # empty value is False
    assert_equal(cfg.getbool('something', 'empty'), False)
    assert_equal(cfg.get('something.empty'), '')
    assert_equal(cfg.getbool('doesnot', 'exist', default=True), True)
    assert_raises(TypeError, cfg.getbool, 'something', 'user')

    # gitpython-style access
    assert_equal(cfg.get('something.myint'),
                 cfg.get_value('something', 'myint'))
    assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa')
    # weird, but that is how it is
    assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None)

    # modification follows
    cfg.add('something.new', 'の')
    assert_equal(cfg.get('something.new'), u'の')
    # sections are added on demand
    cfg.add('unheard.of', 'fame')
    assert_true(cfg.has_section('unheard.of'))
    comp = cfg.items('something')
    cfg.rename_section('something', 'this')
    assert_true(cfg.has_section('this'))
    assert_false(cfg.has_section('something'))
    # direct comparison would fail, because of section prefix
    assert_equal(len(cfg.items('this')), len(comp))
    # fail if no such section
    with swallow_logs():
        assert_raises(CommandError, cfg.rename_section, 'nothere',
                      'irrelevant')
    assert_true(cfg.has_option('this', 'myint'))
    cfg.unset('this.myint')
    assert_false(cfg.has_option('this', 'myint'))

    # batch a changes
    cfg.add('mike.wants.to', 'know', reload=False)
    assert_false('mike.wants.to' in cfg)
    cfg.add('mike.wants.to', 'eat')
    assert_true('mike.wants.to' in cfg)
    assert_equal(len(cfg['mike.wants.to']), 2)

    # set a new one:
    cfg.set('mike.should.have', 'known')
    assert_in('mike.should.have', cfg)
    assert_equal(cfg['mike.should.have'], 'known')
    # set an existing one:
    cfg.set('mike.should.have', 'known better')
    assert_equal(cfg['mike.should.have'], 'known better')
    # set, while there are several matching ones already:
    cfg.add('mike.should.have', 'a meal')
    assert_equal(len(cfg['mike.should.have']), 2)
    # raises with force=False
    assert_raises(CommandError,
                  cfg.set,
                  'mike.should.have',
                  'a beer',
                  force=False)
    assert_equal(len(cfg['mike.should.have']), 2)
    # replaces all matching ones with force=True
    cfg.set('mike.should.have', 'a beer', force=True)
    assert_equal(cfg['mike.should.have'], 'a beer')

    # test deprecated 'where' interface and old 'dataset' (not 'branch') value
    # TODO: remove along with the removal of deprecated 'where'
    cfg.set('mike.should.have', 'wasknown', where='dataset')
    assert_equal(cfg['mike.should.have'], 'wasknown')
    assert_equal(cfg.get_from_source('dataset', 'mike.should.have'),
                 'wasknown')

    # fails unknown location
    assert_raises(ValueError, cfg.add, 'somesuch', 'shit', scope='umpalumpa')

    # very carefully test non-local config
    # so carefully that even in case of bad weather Yarik doesn't find some
    # lame datalad unittest sections in his precious ~/.gitconfig

    # Note: An easier way to test this, would be to just set GIT_CONFIG_GLOBAL
    # to point somewhere else. However, this is not supported by git before
    # 2.32. Hence, stick with changed HOME in this test, but be sure to unset a
    # possible GIT_CONFIG_GLOBAL in addition.

    patched_env = os.environ.copy()
    patched_env.pop('GIT_CONFIG_GLOBAL', None)
    patched_env.update(get_home_envvars(new_home))
    with patch.dict('os.environ',
                    dict(patched_env, DATALAD_SNEAKY_ADDITION='ignore'),
                    clear=True):
        global_gitconfig = opj(new_home, '.gitconfig')
        assert (not exists(global_gitconfig))
        globalcfg = ConfigManager()
        assert_not_in('datalad.unittest.youcan', globalcfg)
        assert_in('datalad.sneaky.addition', globalcfg)
        cfg.add('datalad.unittest.youcan', 'removeme', scope='global')
        assert (exists(global_gitconfig))
        # it did not go into the dataset's config!
        assert_not_in('datalad.unittest.youcan', cfg)
        # does not monitor additions!
        globalcfg.reload(force=True)
        assert_in('datalad.unittest.youcan', globalcfg)
        with swallow_logs():
            assert_raises(CommandError,
                          globalcfg.unset,
                          'datalad.unittest.youcan',
                          scope='local')
        assert (globalcfg.has_section('datalad.unittest'))
        globalcfg.unset('datalad.unittest.youcan', scope='global')
        # but after we unset the only value -- that section is no longer listed
        assert (not globalcfg.has_section('datalad.unittest'))
        assert_not_in('datalad.unittest.youcan', globalcfg)
        ok_file_has_content(global_gitconfig, "")

    cfg = ConfigManager(Dataset(opj(path, 'ds')),
                        source='branch',
                        overrides={'datalad.godgiven': True})
    assert_equal(cfg.get('datalad.godgiven'), True)
    # setter has no effect
    cfg.set('datalad.godgiven', 'false')
    assert_equal(cfg.get('datalad.godgiven'), True)
Ejemplo n.º 29
0
def test_try_lock_informatively(tempfile=None):
    lock = InterProcessLock(tempfile + '.lck')
    lock_path = ensure_unicode(
        lock.path)  # can be bytes, complicates string formattingetc
    t0 = time()
    with try_lock_informatively(lock, purpose="happy life") as acquired:
        assert_true(lock.acquired)
        assert_true(acquired)
        assert_greater(
            2,
            time() -
            t0)  # should not take any notable time, we cannot be blocking
        """
        # InterProcessLock is not re-entrant so nesting should not be used, will result
        # in exception on release
        with try_lock_informatively(lock, timeouts=[dt, dt*2], proceed_unlocked=True) as acquired:
            assert_true(lock.acquired)  # due to outer cm
            assert_true(acquired)       # lock is reentrant apparently
        """
        # Let's try in a completely different subprocess
        runner = WitlessRunner(env=dict(
            os.environ, DATALAD_LOG_LEVEL='info', DATALAD_LOG_TARGET='stderr'))

        script1 = Path(tempfile + "-script1.py")
        script1_fmt = f"""
from fasteners import InterProcessLock
from time import time

from datalad.support.locking import try_lock_informatively

lock = InterProcessLock({lock_path!r})

with try_lock_informatively(lock, timeouts=[0.05, 0.15], proceed_unlocked={{proceed_unlocked}}) as acquired:
    print("Lock acquired=%s" % acquired)
"""
        script1.write_text(script1_fmt.format(proceed_unlocked=True))
        t0 = time()
        res = runner.run([sys.executable, str(script1)],
                         protocol=StdOutErrCapture)
        assert_in('Lock acquired=False', res['stdout'])
        assert_in(f'Failed to acquire lock at {lock_path} in 0.05',
                  res['stderr'])
        assert_in(f'Failed to acquire lock at {lock_path} in 0.15',
                  res['stderr'])
        assert_in('proceed without locking', res['stderr'])
        assert_greater(time() - t0, 0.19999)  # should wait for at least 0.2
        try:
            import psutil

            # PID does not correspond
            assert_in('Check following process: PID=', res['stderr'])
            assert_in(f'CWD={os.getcwd()} CMDLINE=', res['stderr'])
        except ImportError:
            pass  # psutil was not installed, cannot get list of files
        except AssertionError:
            # we must have had the other one then
            assert_in('failed to determine one', res['stderr'])
            if not on_osx:
                # so far we had only OSX reporting failing to get PIDs information
                # but if it is something else -- re-raise original exception
                raise

        # in 2nd case, lets try without proceeding unlocked
        script1.write_text(script1_fmt.format(proceed_unlocked=False))
        t0 = time()
        with assert_raises(CommandError) as cme:
            runner.run([sys.executable, str(script1)],
                       protocol=StdOutErrCapture)
        assert_in(f"Failed to acquire lock at {lock_path} in 2 attempts.",
                  str(cme.value))
        assert_in(f"RuntimeError", str(cme.value))
        assert_false(
            cme.value.stdout)  # nothing there since print should not happen
        assert_in(f'Failed to acquire lock at {lock_path} in 0.05',
                  cme.value.stderr)
        assert_in(f'Failed to acquire lock at {lock_path} in 0.15',
                  cme.value.stderr)
        assert_greater(time() - t0, 0.19999)  # should wait for at least 0.2

    # now that we left context, should work out just fine
    res = runner.run([sys.executable, str(script1)], protocol=StdOutErrCapture)
    assert_in('Lock acquired=True', res['stdout'])
    assert_not_in(f'Failed to acquire lock', res['stderr'])
    assert_not_in('PID', res['stderr'])
Ejemplo n.º 30
0
def test_install_recursive(src=None, path_nr=None, path_r=None):

    _make_dataset_hierarchy(src)

    # first install non-recursive:
    ds = install(path_nr, source=src, recursive=False)
    ok_(ds.is_installed())
    for sub in ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(not sub.is_installed(), "Unintentionally installed: %s" % (sub, ))
    # this also means, subdatasets to be listed as absent:
    eq_(
        set(
            ds.subdatasets(recursive=True,
                           state='absent',
                           result_xfm='relpaths')), {'sub1'})

    # now recursively:
    # don't filter implicit results so we can inspect them
    res = install(path_r,
                  source=src,
                  recursive=True,
                  result_xfm=None,
                  result_filter=None)
    # installed a dataset and four subdatasets
    assert_result_count(res, 5, action='install', type='dataset')
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(res[0]['path'], path_r)
    top_ds = Dataset(res[0]['path'])
    ok_(top_ds.is_installed())

    # the subdatasets are contained in returned list:
    # (Note: Until we provide proper (singleton) instances for Datasets,
    # need to check for their paths)
    assert_in_results(res, path=opj(top_ds.path, 'sub1'), type='dataset')
    assert_in_results(res,
                      path=opj(top_ds.path, 'sub1', 'sub2'),
                      type='dataset')
    assert_in_results(res,
                      path=opj(top_ds.path, 'sub1', 'sub2', 'sub3'),
                      type='dataset')
    assert_in_results(res,
                      path=opj(top_ds.path, 'sub1', 'sub2', 'sub3', 'sub4'),
                      type='dataset')

    eq_(len(top_ds.subdatasets(recursive=True)), 4)

    for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subds.is_installed(), "Not installed: %s" % (subds, ))
        # no content was installed:
        ainfo = subds.repo.get_content_annexinfo(init=None,
                                                 eval_availability=True)
        assert_false(any(st["has_content"] for st in ainfo.values()))
    # no absent subdatasets:
    ok_(top_ds.subdatasets(recursive=True, state='absent') == [])

    # check if we can install recursively into a dataset
    # https://github.com/datalad/datalad/issues/2982
    subds = ds.install('recursive-in-ds', source=src, recursive=True)
    ok_(subds.is_installed())
    for subsub in subds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subsub.is_installed())

    # check that we get subdataset instances manufactured from notneeded results
    # to install existing subdatasets again
    eq_(subds, ds.install('recursive-in-ds'))