Exemple #1
0
def test_status_basics(path, linkpath, otherdir):
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath

    with chpwd(path):
        assert_raises(NoDatasetFound, status)
    ds = Dataset(path).create()
    # outcome identical between ds= and auto-discovery
    with chpwd(path):
        assert_raises(IncompleteResultsError, status, path=otherdir)
        stat = status(result_renderer=None)
    eq_(stat, ds.status(result_renderer=None))
    assert_status('ok', stat)
    # we have a bunch of reports (be vague to be robust to future changes
    assert len(stat) > 2
    # check the composition
    for s in stat:
        eq_(s['status'], 'ok')
        eq_(s['action'], 'status')
        eq_(s['state'], 'clean')
        eq_(s['type'], 'file')
        assert_in('gitshasum', s)
        assert_in('bytesize', s)
        eq_(s['refds'], ds.path)
Exemple #2
0
def test_ria_postclonecfg():

    if not has_symlink_capability():
        # This is needed to create an ORA remote using an URL for upload,
        # that is then invalidated later on (delete the symlink it's based on).
        raise SkipTest("Can't create symlinks")

    from datalad.utils import make_tempfile
    from datalad.tests.utils import HTTPPath

    with make_tempfile(mkdir=True) as lcl, make_tempfile(mkdir=True) as store:
        id = _postclonetest_prepare(lcl, store)

        # test cloning via ria+file://
        yield _test_ria_postclonecfg, Path(store).as_uri(), id

        # Note: HTTP disabled for now. Requires proper implementation in ORA
        #       remote. See
        # https://github.com/datalad/datalad/pull/4203#discussion_r410284649

        # # test cloning via ria+http://
        # with HTTPPath(store) as url:
        #     yield _test_ria_postclonecfg, url, id

        # test cloning via ria+ssh://
        yield skip_ssh(_test_ria_postclonecfg), \
            "ssh://datalad-test:{}".format(Path(store).as_posix()), id
Exemple #3
0
def test_status_symlinked_dir_within_repo(path):
    if not has_symlink_capability():
        raise SkipTest("Can't create symlinks")
    # <path>
    # |-- bar -> <path>/foo
    # `-- foo
    #     `-- f
    ds = Dataset(path).create()
    foo = ds.pathobj / "foo"
    foo.mkdir()
    (foo / "f").write_text("content")
    (ds.pathobj / "bar").symlink_to(foo, target_is_directory=True)
    ds.save()
    bar_f = ds.pathobj / "bar" / "f"

    def call():
        return ds.status(path=[bar_f],
                         annex="availability",
                         on_failure="ignore",
                         result_renderer=None)

    if ds.repo.git_annex_version < "8.20200522":
        assert_result_count(call(), 0)
    else:
        # As of 2a8fdfc7d (Display a warning message when asked to operate on a
        # file inside a symlinked directory, 2020-05-11), git-annex will error.
        #
        # TODO: Consider providing better error handling in this case.
        with assert_raises(CommandError):
            call()
Exemple #4
0
def test_ephemeral(origin_path, clone1_path, clone2_path):

    file_test = Path('ds') / 'test.txt'
    file_testsub = Path('ds') / 'subdir' / 'testsub.txt'

    origin = Dataset(origin_path).create(force=True)
    origin.save()
    # 1. clone via path
    clone1 = clone(origin_path, clone1_path, reckless='ephemeral')

    can_symlink = has_symlink_capability()

    if can_symlink:
        clone1_annex = (clone1.repo.dot_git / 'annex')
        ok_(clone1_annex.is_symlink())
        ok_(clone1_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone1.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 2. clone via file-scheme URL
    clone2 = clone('file://' + Path(origin_path).as_posix(), clone2_path,
                   reckless='ephemeral')

    if can_symlink:
        clone2_annex = (clone2.repo.dot_git / 'annex')
        ok_(clone2_annex.is_symlink())
        ok_(clone2_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone2.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 3. add something to clone1 and push back to origin availability from
    # clone1 should not be propagated (we declared 'here' dead to that end)

    (clone1.pathobj / 'addition.txt').write_text("even more")
    clone1.save()
    origin.config.set("receive.denyCurrentBranch", "updateInstead",
                      where="local")
    # Note, that the only thing to test is git-annex-dead here,
    # if we couldn't symlink:
    clone1.publish(to='origin', transfer_data='none' if can_symlink else 'auto')
    if not origin.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origin.repo.get_hexsha(), clone1.repo.get_hexsha())
    res = origin.repo.whereis("addition.txt")
    if can_symlink:
        # obv. present in origin, but this is not yet known to origin:
        eq_(res, [])
        res = origin.repo.fsck()
        assert_result_count(res, 3, success=True)
        # TODO: Double check whether annex reports POSIX paths o windows!
        eq_({str(file_test), str(file_testsub), "addition.txt"},
            {r['file'] for r in res})
        # now origin knows:
    res = origin.repo.whereis("addition.txt")
    eq_(res, [origin.config.get("annex.uuid")])
Exemple #5
0
def test_gh2927(path, linkpath):
    if has_symlink_capability():
        # make it more complicated by default
        Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath
    ds = Dataset(path).create()
    ds.create('subds_clean')
    assert_status('ok', ds.create(op.join('subds_clean', 'subds_lvl1_clean'),
                                  result_xfm=None, return_type='list'))
Exemple #6
0
def _test_initremote_basic(host, ds_path, store, link):

    ds_path = Path(ds_path)
    store = Path(store)
    link = Path(link)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    if host:
        url = "ria+ssh://{host}{path}".format(host=host,
                                              path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]

    # fails on non-existing storage location
    assert_raises(CommandError,
                  ds.repo.init_remote, 'ria-remote', options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )

    # fails on non-RIA URL
    assert_raises(CommandError, ds.repo.init_remote, 'ria-remote',
                  options=common_init_opts + ['url={}'.format(store.as_uri())]
                  )
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # still fails, since ds isn't setup in the store
    assert_raises(CommandError,
                  ds.repo.init_remote, 'ria-remote', options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )
    # set up the dataset as well
    create_ds_in_store(io, store, ds.id, '2', '1')
    # now should work
    ds.repo.init_remote('ria-remote', options=init_opts)
    assert_in('ria-remote',
              [cfg['name']
               for uuid, cfg in ds.repo.get_special_remotes().items()]
              )
    assert_repo_status(ds.path)
    # git-annex:remote.log should have:
    #   - url
    #   - common_init_opts
    #   - archive_id (which equals ds id)
    remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'],
                                  read_only=True)
    assert_in("url={}".format(url), remote_log)
    [assert_in(c, remote_log) for c in common_init_opts]
    assert_in("archive-id={}".format(ds.id), remote_log)

    # re-configure with invalid URL should fail:
    assert_raises(
        CommandError,
        ds.repo.call_annex,
        ['enableremote', 'ria-remote'] + common_init_opts + [
            'url=ria+file:///non-existing'])
    # but re-configure with valid URL should work
    if has_symlink_capability():
        link.symlink_to(store)
        new_url = 'ria+{}'.format(link.as_uri())
        ds.repo.call_annex(
            ['enableremote', 'ria-remote'] + common_init_opts + [
                'url={}'.format(new_url)])
        # git-annex:remote.log should have:
        #   - url
        #   - common_init_opts
        #   - archive_id (which equals ds id)
        remote_log = ds.repo.call_git(['cat-file', 'blob',
                                       'git-annex:remote.log'],
                                      read_only=True)
        assert_in("url={}".format(new_url), remote_log)
        [assert_in(c, remote_log) for c in common_init_opts]
        assert_in("archive-id={}".format(ds.id), remote_log)

    # we can deal with --sameas, which leads to a special remote not having a
    # 'name' property, but only a 'sameas-name'. See gh-4259
    try:
        ds.repo.init_remote('ora2',
                            options=init_opts + ['--sameas', 'ria-remote'])
    except CommandError as e:
        if 'Invalid option `--sameas' in e.stderr:
            # annex too old - doesn't know --sameas
            pass
        else:
            raise 
Exemple #7
0
def test_path_diff(_path, linkpath):
    # do the setup on the real path, not the symlink, to have its
    # bugs not affect this test of status()
    ds = get_deeply_nested_structure(str(_path))
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(_path, target_is_directory=True)
        path = linkpath
    else:
        path = _path

    ds = Dataset(path)
    if has_symlink_capability():
        assert ds.pathobj != ds.repo.pathobj

    plain_recursive = ds.diff(recursive=True,
                              annex='all',
                              result_renderer=None)
    # check integrity of individual reports with a focus on how symlinks
    # are reported
    for res in plain_recursive:
        # anything that is an "intended" symlink should be reported
        # as such. In contrast, anything that is a symlink for mere
        # technical reasons (annex using it for something in some mode)
        # should be reported as the thing it is representing (i.e.
        # a file)
        if 'link2' in str(res['path']):
            assert res['type'] == 'symlink', res
        else:
            assert res['type'] != 'symlink', res
        # every item must report its parent dataset
        assert_in('parentds', res)

    # bunch of smoke tests
    # query of '.' is same as no path
    eq_(plain_recursive,
        ds.diff(path='.', recursive=True, annex='all', result_renderer=None))
    # duplicate paths do not change things
    eq_(
        plain_recursive,
        ds.diff(path=['.', '.'],
                recursive=True,
                annex='all',
                result_renderer=None))
    # neither do nested paths
    if not "2.24.0" <= ds.repo.git_version < "2.25.0":
        # Release 2.24.0 contained a regression that was fixed with 072a231016
        # (2019-12-10).
        eq_(
            plain_recursive,
            ds.diff(path=['.', 'subds_modified'],
                    recursive=True,
                    annex='all',
                    result_renderer=None))
    # when invoked in a subdir of a dataset it still reports on the full thing
    # just like `git status`, as long as there are no paths specified
    with chpwd(op.join(path, 'directory_untracked')):
        plain_recursive = diff(recursive=True,
                               annex='all',
                               result_renderer=None)
    # should be able to take absolute paths and yield the same
    # output
    eq_(
        plain_recursive,
        ds.diff(path=ds.path,
                recursive=True,
                annex='all',
                result_renderer=None))

    # query for a deeply nested path from the top, should just work with a
    # variety of approaches
    rpath = op.join('subds_modified', 'subds_lvl1_modified',
                    u'{}_directory_untracked'.format(OBSCURE_FILENAME))
    apathobj = ds.pathobj / rpath
    apath = str(apathobj)
    for p in (rpath, apath, None):
        if p is None:
            # change into the realpath of the dataset and
            # query with an explicit path
            with chpwd(ds.path):
                res = ds.diff(path=op.join('.', rpath),
                              recursive=True,
                              annex='all',
                              result_renderer=None)
        else:
            res = ds.diff(path=p,
                          recursive=True,
                          annex='all',
                          result_renderer=None)
        assert_result_count(
            res,
            1,
            state='untracked',
            type='directory',
            refds=ds.path,
            # path always comes out a full path inside the queried dataset
            path=apath,
        )

    assert_result_count(ds.diff(recursive=True, result_renderer=None),
                        1,
                        path=apath)
    # limiting recursion will exclude this particular path
    assert_result_count(ds.diff(recursive=True,
                                recursion_limit=1,
                                result_renderer=None),
                        0,
                        path=apath)
    # negative limit is unlimited limit
    eq_(ds.diff(recursive=True, recursion_limit=-1, result_renderer=None),
        ds.diff(recursive=True, result_renderer=None))
Exemple #8
0
def test_ria_http(lcl, storepath, url):
    # create a local dataset with a subdataset
    lcl = Path(lcl)
    storepath = Path(storepath)
    subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True)
    subds.save()
    ds = Dataset(lcl / 'ds').create(force=True)
    ds.save(version_tag='original')
    assert_repo_status(ds.path)
    for d in (ds, subds):
        _move2store(storepath, d)
    # location of superds in store
    storeds_loc = str(storepath / ds.id[:3] / ds.id[3:])
    # now we should be able to clone from a ria+http url
    # the super
    riaclone = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone',
    )

    # due to default configuration, clone() should automatically look for the
    # subdataset in the store, too -- if not the following would fail, because
    # we never configured a proper submodule URL
    riaclonesub = riaclone.get(
        op.join('subdir', 'subds'), get_data=False,
        result_xfm='datasets', return_type='item-or-list')

    # both datasets came from the store and must be set up in an identical
    # fashion
    for origds, cloneds in ((ds, riaclone), (subds, riaclonesub)):
        eq_(origds.id, cloneds.id)
        if not ds.repo.is_managed_branch():
            # test logic cannot handle adjusted branches
            eq_(origds.repo.get_hexsha(), cloneds.repo.get_hexsha())
        ok_(cloneds.config.get('remote.origin.url').startswith(url))
        eq_(cloneds.config.get('remote.origin.annex-ignore'), 'true')
        eq_(cloneds.config.get('datalad.get.subdataset-source-candidate-200origin'),
            'ria+%s#{id}' % url)

    # now advance the source dataset
    (ds.pathobj / 'newfile.txt').write_text('new')
    ds.save()
    ds.publish(to='store')
    Runner(cwd=storeds_loc).run(['git', 'update-server-info'])
    # re-clone as before
    riaclone2 = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone2',
    )
    # and now clone a specific version, here given be the tag name
    riaclone_orig = clone(
        'ria+{}#{}@{}'.format(url, ds.id, 'original'),
        lcl / 'clone_orig',
    )
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        # we got the precise version we wanted
        eq_(riaclone.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())
        # and not the latest
        eq_(riaclone2.repo.get_hexsha(), ds.repo.get_hexsha())
        neq_(riaclone2.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())

    # attempt to clone a version that doesn't exist
    with swallow_logs():
        with assert_raises(IncompleteResultsError) as cme:
            clone('ria+{}#{}@impossible'.format(url, ds.id),
                  lcl / 'clone_failed')
        assert_in("not found in upstream", str(cme.exception))

    # lastly test if URL rewriting is in effect
    # on the surface we clone from an SSH source identified by some custom
    # label, no full URL, but URL rewriting setup maps it back to the
    # HTTP URL used above
    with patch_config({
            'url.ria+{}#.insteadof'.format(url): 'ria+ssh://somelabel#'}):
        cloned_by_label = clone(
            'ria+ssh://somelabel#{}'.format(origds.id),
            lcl / 'cloned_by_label',
        )
    # so we get the same setup as above, but....
    eq_(origds.id, cloned_by_label.id)
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origds.repo.get_hexsha(), cloned_by_label.repo.get_hexsha())
    ok_(cloned_by_label.config.get('remote.origin.url').startswith(url))
    eq_(cloned_by_label.config.get('remote.origin.annex-ignore'), 'true')
    # ... the clone candidates go with the label-based URL such that
    # future get() requests acknowlege a (system-wide) configuration
    # update
    eq_(cloned_by_label.config.get('datalad.get.subdataset-source-candidate-200origin'),
        'ria+ssh://somelabel#{id}')

    if not has_symlink_capability():
        return
    # place a symlink in the store to serve as a dataset alias
    (storepath / 'alias').mkdir()
    (storepath / 'alias' / 'myname').symlink_to(storeds_loc)
    with chpwd(lcl):
        cloned_by_alias = clone('ria+{}#~{}'.format(url, 'myname'))
    # still get the same data
    eq_(cloned_by_alias.id, ds.id)
    # more sensible default install path
    eq_(cloned_by_alias.pathobj.name, 'myname')
Exemple #9
0
def test_ephemeral(origin_path, bare_path,
                   clone1_path, clone2_path, clone3_path):

    file_test = Path('ds') / 'test.txt'
    file_testsub = Path('ds') / 'subdir' / 'testsub.txt'

    origin = Dataset(origin_path).create(force=True)
    if origin.repo.is_managed_branch():
        raise SkipTest('Ephemeral clones cannot use adjusted mode repos')

    origin.save()
    # 1. clone via path
    clone1 = clone(origin_path, clone1_path, reckless='ephemeral')

    can_symlink = has_symlink_capability()

    if can_symlink:
        clone1_annex = (clone1.repo.dot_git / 'annex')
        ok_(clone1_annex.is_symlink())
        ok_(clone1_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone1.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 2. clone via file-scheme URL
    clone2 = clone('file://' + Path(origin_path).as_posix(), clone2_path,
                   reckless='ephemeral')

    if can_symlink:
        clone2_annex = (clone2.repo.dot_git / 'annex')
        ok_(clone2_annex.is_symlink())
        ok_(clone2_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone2.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone2.pathobj / file_test).read_text(), 'some')
            eq_((clone2.pathobj / file_testsub).read_text(), 'somemore')

    # 3. add something to clone1 and push back to origin availability from
    # clone1 should not be propagated (we declared 'here' dead to that end)

    (clone1.pathobj / 'addition.txt').write_text("even more")
    clone1.save()
    origin.config.set("receive.denyCurrentBranch", "updateInstead",
                      where="local")
    # Note, that the only thing to test is git-annex-dead here,
    # if we couldn't symlink:
    clone1.publish(to='origin', transfer_data='none' if can_symlink else 'auto')
    if not origin.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origin.repo.get_hexsha(), clone1.repo.get_hexsha())
    res = origin.repo.whereis("addition.txt")
    if can_symlink:
        # obv. present in origin, but this is not yet known to origin:
        eq_(res, [])
        res = origin.repo.fsck()
        assert_result_count(res, 3, success=True)
        # TODO: Double check whether annex reports POSIX paths o windows!
        eq_({str(file_test), str(file_testsub), "addition.txt"},
            {r['file'] for r in res})
        # now origin knows:
    res = origin.repo.whereis("addition.txt")
    eq_(res, [origin.config.get("annex.uuid")])

    # 4. ephemeral clone from a bare repo
    runner = GitWitlessRunner()
    runner.run(['git', 'clone', '--bare', origin_path, bare_path])
    runner.run(['git', 'annex', 'init'], cwd=bare_path)

    eph_from_bare = clone(bare_path, clone3_path, reckless='ephemeral')
    can_symlink = has_symlink_capability()

    if can_symlink:
        # Bare repo uses dirhashlower by default, while a standard repo uses
        # dirhashmixed. Symlinking different object trees doesn't really work.
        # Don't test that here, since this is not a matter of the "ephemeral"
        # option alone. We should have such a setup in the RIA tests and test
        # for data access there.
        # Here we only test for the correct linking.
        eph_annex = eph_from_bare.repo.dot_git / 'annex'
        ok_(eph_annex.is_symlink())
        ok_(eph_annex.resolve().samefile(Path(bare_path) / 'annex'))
Exemple #10
0
def test_status(_path, linkpath):
    # do the setup on the real path, not the symlink, to have its
    # bugs not affect this test of status()
    ds = get_deeply_nested_structure(text_type(_path))
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(_path, target_is_directory=True)
        path = linkpath
    else:
        path = _path

    ds = Dataset(path)
    if not on_windows:
        # TODO test should also be has_symlink_capability(), but
        # something in the repo base class is not behaving yet
        # check the premise of this test
        assert ds.pathobj != ds.repo.pathobj

    # spotcheck that annex status reporting and availability evaluation
    # works
    assert_result_count(
        ds.status(annex='all'),
        1,
        path=text_type(ds.pathobj / 'subdir' / 'annexed_file.txt'),
        key='MD5E-s5--275876e34cf609db118f3d84b799a790.txt',
        has_content=True,
        objloc=text_type(ds.repo.pathobj / '.git' / 'annex' / 'objects' /
        # hashdir is different on windows
        ('f33' if on_windows else '7p') /
        ('94b' if on_windows else 'gp') /
        'MD5E-s5--275876e34cf609db118f3d84b799a790.txt' /
        'MD5E-s5--275876e34cf609db118f3d84b799a790.txt'))

    plain_recursive = ds.status(recursive=True)
    # check integrity of individual reports with a focus on how symlinks
    # are reported
    for res in plain_recursive:
        # anything that is an "intended" symlink should be reported
        # as such. In contrast, anything that is a symlink for mere
        # technical reasons (annex using it for something in some mode)
        # should be reported as the thing it is representing (i.e.
        # a file)
        if 'link2' in text_type(res['path']):
            assert res['type'] == 'symlink', res
        else:
            assert res['type'] != 'symlink', res
        # every item must report its parent dataset
        assert_in('parentds', res)

    # bunch of smoke tests
    # query of '.' is same as no path
    eq_(plain_recursive, ds.status(path='.', recursive=True))
    # duplicate paths do not change things
    eq_(plain_recursive, ds.status(path=['.', '.'], recursive=True))
    # neither do nested paths
    eq_(plain_recursive,
        ds.status(path=['.', 'subds_modified'], recursive=True))
    # when invoked in a subdir of a dataset it still reports on the full thing
    # just like `git status`, as long as there are no paths specified
    with chpwd(op.join(path, 'directory_untracked')):
        plain_recursive = status(recursive=True)
    # should be able to take absolute paths and yield the same
    # output
    eq_(plain_recursive, ds.status(path=ds.path, recursive=True))

    # query for a deeply nested path from the top, should just work with a
    # variety of approaches
    rpath = op.join('subds_modified', 'subds_lvl1_modified',
                    OBSCURE_FILENAME + u'_directory_untracked')
    apathobj = ds.pathobj / rpath
    apath = text_type(apathobj)
    # ds.repo.pathobj will have the symlink resolved
    arealpath = ds.repo.pathobj / rpath
    # TODO include explicit relative path in test
    for p in (rpath, apath, arealpath, None):
        if p is None:
            # change into the realpath of the dataset and
            # query with an explicit path
            with chpwd(ds.repo.path):
                res = ds.status(path=op.join('.', rpath))
        else:
            res = ds.status(path=p)
        assert_result_count(
            res,
            1,
            state='untracked',
            type='directory',
            refds=ds.path,
            # path always comes out a full path inside the queried dataset
            path=apath,
        )

    assert_result_count(
        ds.status(
            recursive=True),
        1,
        path=apath)
    # limiting recursion will exclude this particular path
    assert_result_count(
        ds.status(
            recursive=True,
            recursion_limit=1),
        0,
        path=apath)
    # negative limit is unlimited limit
    eq_(
        ds.status(recursive=True, recursion_limit=-1),
        ds.status(recursive=True)
    )
Exemple #11
0
def test_status(_path, linkpath):
    # do the setup on the real path, not the symlink, to have its
    # bugs not affect this test of status()
    ds = get_deeply_nested_structure(str(_path))
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(_path, target_is_directory=True)
        path = linkpath
    else:
        path = _path

    ds = Dataset(path)
    if has_symlink_capability():
        assert ds.pathobj != ds.repo.pathobj

    # spotcheck that annex status reporting and availability evaluation
    # works
    assert_result_count(
        ds.status(annex='all', result_renderer=None),
        1,
        path=str(ds.pathobj / 'subdir' / 'annexed_file.txt'),
        key='MD5E-s5--275876e34cf609db118f3d84b799a790.txt',
        has_content=True,
        objloc=str(ds.repo.pathobj / '.git' / 'annex' / 'objects' /
                   # hashdir is different on windows
                   ('f33' if ds.repo.is_managed_branch() else '7p') /
                   ('94b' if ds.repo.is_managed_branch() else 'gp') /
                   'MD5E-s5--275876e34cf609db118f3d84b799a790.txt' /
                   'MD5E-s5--275876e34cf609db118f3d84b799a790.txt'))

    plain_recursive = ds.status(recursive=True, result_renderer=None)
    # check integrity of individual reports with a focus on how symlinks
    # are reported
    for res in plain_recursive:
        # anything that is an "intended" symlink should be reported
        # as such. In contrast, anything that is a symlink for mere
        # technical reasons (annex using it for something in some mode)
        # should be reported as the thing it is representing (i.e.
        # a file)
        if 'link2' in str(res['path']):
            assert res['type'] == 'symlink', res
        else:
            assert res['type'] != 'symlink', res
        # every item must report its parent dataset
        assert_in('parentds', res)

    # bunch of smoke tests
    # query of '.' is same as no path
    eq_(plain_recursive,
        ds.status(path='.', recursive=True, result_renderer=None))
    # duplicate paths do not change things
    eq_(plain_recursive,
        ds.status(path=['.', '.'], recursive=True, result_renderer=None))
    # neither do nested paths
    eq_(
        plain_recursive,
        ds.status(path=['.', 'subds_modified'],
                  recursive=True,
                  result_renderer=None))
    # when invoked in a subdir of a dataset it still reports on the full thing
    # just like `git status`, as long as there are no paths specified
    with chpwd(op.join(path, 'directory_untracked')):
        plain_recursive = status(recursive=True, result_renderer=None)
    # should be able to take absolute paths and yield the same
    # output
    eq_(plain_recursive,
        ds.status(path=ds.path, recursive=True, result_renderer=None))

    # query for a deeply nested path from the top, should just work with a
    # variety of approaches
    rpath = op.join('subds_modified', 'subds_lvl1_modified',
                    OBSCURE_FILENAME + u'_directory_untracked')
    apathobj = ds.pathobj / rpath
    apath = str(apathobj)
    # ds.repo.pathobj will have the symlink resolved
    arealpath = ds.repo.pathobj / rpath
    # TODO include explicit relative path in test
    for p in (rpath, apath, arealpath, None):
        if p is None:
            # change into the realpath of the dataset and
            # query with an explicit path
            with chpwd(ds.repo.path):
                res = ds.status(path=op.join('.', rpath), result_renderer=None)
        else:
            res = ds.status(path=p, result_renderer=None)
        assert_result_count(
            res,
            1,
            state='untracked',
            type='directory',
            refds=ds.path,
            # path always comes out a full path inside the queried dataset
            path=apath,
        )

    assert_result_count(ds.status(recursive=True, result_renderer=None),
                        1,
                        path=apath)
    # limiting recursion will exclude this particular path
    assert_result_count(ds.status(recursive=True,
                                  recursion_limit=1,
                                  result_renderer=None),
                        0,
                        path=apath)
    # negative limit is unlimited limit
    eq_(ds.status(recursive=True, recursion_limit=-1, result_renderer=None),
        ds.status(recursive=True, result_renderer=None))
Exemple #12
0
from datalad.utils import Path
from datalad.tests.utils import (
    assert_equal,
    assert_result_count,
    assert_true,
    DEFAULT_REMOTE,
    has_symlink_capability,
    skip_if,
    skip_if_on_windows,
    with_tempfile,
    with_tree,
)


@skip_if_on_windows  # currently all tests re RIA/ORA don't run on windows
@skip_if(cond=not has_symlink_capability(),
         msg="skip testing ephemeral clone w/o symlink capabilities")
@with_tree({'file1.txt': 'some', 'sub': {'other.txt': 'other'}})
@with_tempfile
@with_tempfile
def test_ephemeral(ds_path, store_path, clone_path):

    dspath = Path(ds_path)
    store = Path(store_path)
    file_test = Path('file1.txt')
    file_testsub = Path('sub') / 'other.txt'

    # create the original dataset
    ds = Dataset(dspath)
    ds.create(force=True)
    ds.save()