Ejemplo n.º 1
0
def test_overrides():
    cfg = ConfigManager()
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', cfg)
    # set
    cfg.set('user.name', 'myoverride', scope='override')
    assert_equal(cfg['user.name'], 'myoverride')
    # unset just removes override, not entire config
    cfg.unset('user.name', scope='override')
    assert_in('user.name', cfg)
    assert_not_equal('user.name', 'myoverride')
    # add
    # there is no initial increment
    cfg.add('user.name', 'myoverride', scope='override')
    assert_equal(cfg['user.name'], 'myoverride')
    # same as with add, not a list
    assert_equal(cfg['user.name'], 'myoverride')
    # but then there is
    cfg.add('user.name', 'myother', scope='override')
    assert_equal(cfg['user.name'], ['myoverride', 'myother'])
    # rename
    assert_not_in('ups.name', cfg)
    cfg.rename_section('user', 'ups', scope='override')
    # original variable still there
    assert_in('user.name', cfg)
    # rename of override in effect
    assert_equal(cfg['ups.name'], ['myoverride', 'myother'])
    # remove entirely by section
    cfg.remove_section('ups', scope='override')
    from datalad.utils import Path
    assert_not_in('ups.name', cfg, (
        cfg._stores,
        cfg.overrides,
    ))
Ejemplo n.º 2
0
def _check_auto_save(ds, orig_state):
    handle_dirty_dataset(ds, 'ignore')
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail')
    handle_dirty_dataset(ds, 'save-before')
    state = ds.repo.get_hexsha()
    assert_not_equal(orig_state, state)
    _check_all_clean(ds, state)
    return state
Ejemplo n.º 3
0
def test_tuple_requests():
    bc = BatchedCommand(cmd=py2cmd("""
import time
import sys
print(f"{time.time()}:{sys.stdin.readline().strip()}")
            """))

    start_time_1, line = bc(("one", "line")).split(":")
    assert_equal(line, "one line")
    start_time_2, line = bc(("end", "now")).split(":")
    assert_not_equal(start_time_1, start_time_2)
    assert_equal(line, "end now")
    bc.close(return_stderr=False)
Ejemplo n.º 4
0
def test_archive(path=None):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save()
    committed_date = ds.repo.get_commit_date()
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    with chpwd(path):
        res = list(ds.export_archive())
        assert_status('ok', res)
        assert_result_count(res, 1)
        assert (isabs(res[0]['path']))
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export_archive(filename=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original archive filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export_archive(filename=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly four files (includes .gitattributes for default
            # MD5E backend), and expect no content for any directory
            assert_equal(nfiles, 4)

    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')

    # now loose some content
    ds.drop('file_up', reckless='kill')
    assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
    ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
    assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
Ejemplo n.º 5
0
def test_command_fail_2():
    # Expect that the return code of a failing command is caught,
    # that None is returned as result and that the process is restarted,
    # if the batched command is called again.
    bc = BatchedCommand(cmd=py2cmd("""
print(a*b)
            """))

    # Send something to start the process
    result = bc("line one")
    assert_not_equal(bc.return_code, 0)
    assert_is_none(result)
    result = bc("line two")
    assert_not_equal(bc.return_code, 0)
    assert_is_none(result)
    bc.close(return_stderr=False)
Ejemplo n.º 6
0
def test_property_reevaluation(repo1=None):
    ds = Dataset(repo1)
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    first_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after creation, we have `repo`, and `config` was reevaluated to point
    # to the repo's config:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    second_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is(ds.config, ds.repo.config)
    assert_is_not(first_config, second_config)
    assert_is_not_none(ds.id)
    first_id = ds.id

    ds.drop(what='all', reckless='kill', recursive=True)
    # repo is gone, and config is again reevaluated to only provide user/system
    # level config:
    assert_false(lexists(ds.path))
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    third_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_not(second_config, third_config)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after recreation everything is sane again:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    assert_is(ds.config, ds.repo.config)
    forth_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is_not(third_config, forth_config)
    assert_is_not_none(ds.id)
    assert_not_equal(ds.id, first_id)
Ejemplo n.º 7
0
def test_cached_dataset(cache_dir=None):

    # patch DATALAD_TESTS_CACHE to not use the actual cache with
    # the test testing that very cache.
    cache_dir = Path(cache_dir)
    ds_url = "https://github.com/datalad/testrepo--minimalds"
    name_in_cache = url2filename(ds_url)
    annexed_file = Path("inannex") / "animated.gif"

    with patch(CACHE_PATCH_STR, new=cache_dir):

        @cached_dataset(url=ds_url)
        def decorated_test1(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(),
                                1,
                                type="sibling",
                                name=DEFAULT_REMOTE,
                                url=str(cache_dir / name_in_cache))
            here = ds.config.get("annex.uuid")
            origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_not_in(here, where)
            assert_not_in(origin, where)

            return ds.pathobj, ds.repo.pathobj

        @cached_dataset(url=ds_url, paths=str(annexed_file))
        def decorated_test2(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(),
                                1,
                                type="sibling",
                                name=DEFAULT_REMOTE,
                                url=str(cache_dir / name_in_cache))
            here = ds.config.get("annex.uuid")
            origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_in(here, where)
            assert_in(origin, where)

            return ds.pathobj, ds.repo.pathobj

        @cached_dataset(url=ds_url)
        def decorated_test3(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(),
                                1,
                                type="sibling",
                                name=DEFAULT_REMOTE,
                                url=str(cache_dir / name_in_cache))
            # origin is the same cached dataset, that got this content in
            # decorated_test2 before. Should still be there. But "here" we
            # didn't request it
            here = ds.config.get("annex.uuid")
            origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_not_in(here, where)
            assert_in(origin, where)

            return ds.pathobj, ds.repo.pathobj

        @cached_dataset(url=ds_url,
                        version="541cf855d13c2a338ff2803d4488daf0035e568f")
        def decorated_test4(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(),
                                1,
                                type="sibling",
                                name=DEFAULT_REMOTE,
                                url=str(cache_dir / name_in_cache))
            # origin is the same cached dataset, that got this content in
            # decorated_test2 before. Should still be there. But "here" we
            # didn't request it
            here = ds.config.get("annex.uuid")
            origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_not_in(here, where)
            assert_in(origin, where)

            assert_equal(ds.repo.get_hexsha(),
                         "541cf855d13c2a338ff2803d4488daf0035e568f")

            return ds.pathobj, ds.repo.pathobj

        first_dspath, first_repopath = decorated_test1()
        second_dspath, second_repopath = decorated_test2()
        decorated_test3()
        decorated_test4()

        # first and second are not the same, only their origin is:
        assert_not_equal(first_dspath, second_dspath)
        assert_not_equal(first_repopath, second_repopath)
Ejemplo n.º 8
0
def test_FileStatus_basic():
    assert_equal(FileStatus(size=0), FileStatus(size=0))
    assert_not_equal(FileStatus(size=0), FileStatus(size=1))
    # mtimes allow trimming if one is int
    assert_equal(FileStatus(mtime=0), FileStatus(mtime=0.9999))
    assert_equal(FileStatus(mtime=0), FileStatus(mtime=0.0001))
    assert_not_equal(FileStatus(mtime=0.2), FileStatus(mtime=0.1))
    assert_not_equal(FileStatus(mtime=0.2), FileStatus(mtime=None))
    assert_not_equal(FileStatus(mtime=1), FileStatus(mtime=None))
    # And with None should be False
    assert_not_equal(FileStatus(mtime=1), None)
    assert_not_equal(None, FileStatus(mtime=1))
    # adding more information would result in not-equal
    assert_not_equal(FileStatus(size=0), FileStatus(size=0, mtime=123))
    # empty ones can't be compared
    # TODO: actually not clear why that NotImplemented singleton is not returned
    assert_not_equal(FileStatus(), FileStatus())
Ejemplo n.º 9
0
def test_target_ssh_simple(origin=None, src_path=None, target_rootpath=None):
    ca = dict(result_renderer='disabled')
    test_fname = 'test-annex.dat'
    orig = Dataset(origin).create(**ca)
    (orig.pathobj / test_fname).write_text('some')
    orig.save(**ca)

    port = get_ssh_port("datalad-test")
    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://*****:*****@with_testsui(responses=["yes"])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(
                dataset=source,
                name="local_target",
                sshurl="ssh://datalad-test" + target_path,
                publish_by_default=DEFAULT_BRANCH,
                existing='replace',
                ui=have_webui(),
            )

        interactive_assert_create_sshwebserver()

        eq_("ssh://datalad-test" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            lclcfg = AnnexRepo(src_path).config
            eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
            # valid uuid
            eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4)
            # should be added too, even if URL matches prior state
            eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH)

        # again, by explicitly passing urls. Since we are on datalad-test, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://datalad-test",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://datalad-test" + target_path,
            ui=have_webui(),
        )

        @with_testsui(responses=['yes'])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(existing='replace', **cpkwargs)

        interactive_assert_create_sshwebserver()

        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://datalad-test" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        if have_webui():
            from datalad_deprecated.tests.test_create_sibling_webui import (
                assert_publish_with_ui, )
            assert_publish_with_ui(target_path)

        # now, push should work:
        push(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            # and ignore .git/logs content (gh-5298)
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs') \
                        or f.startswith('.git/logs/'):
                    digests.pop(f)
                    mtimes.pop(f)

        if not have_webui():
            # the rest of the test assumed that we have uploaded a UI
            return
        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
        }
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        ok_(modified_files.issuperset(ok_modified_files))