Ejemplo n.º 1
0
def test_install_skip_failed_recursive(src=None, path=None):
    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result,
            status='error',
            path=sub1.path,
            type='dataset',
            message='target path already exists and not empty, refuse to '
            'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
Ejemplo n.º 2
0
def test_install_known_subdataset(src=None, path=None):

    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths'))
    assert_not_in('subm 1',
                  ds.subdatasets(state='present', result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    assert_in("test-annex.dat", subds.repo.get_indexed_files()),
    assert_not_in('subm 1',
                  ds.subdatasets(state='absent', result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Ejemplo n.º 3
0
def test_install_list(path=None, top_path=None):

    _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever")

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError,
                  ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Ejemplo n.º 4
0
def test_get_install_missing_subdataset(src=None, path=None):
    _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant')

    ds = install(path=path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    ds.create(force=True)  # force, to cause dataset initialization
    subs = ds.subdatasets(result_xfm='datasets')
    ok_(all([not sub.is_installed() for sub in subs]))

    # we don't install anything, if no explicitly given path points into a
    # not yet installed subdataset:
    ds.get(curdir)
    ok_(all([not sub.is_installed() for sub in subs]))

    # but we do, whenever a given path is contained in such a subdataset:
    file_ = opj(subs[0].path, 'test-annex.dat')
    ds.get(file_)
    ok_(subs[0].is_installed())
    ok_(subs[0].repo.file_has_content('test-annex.dat') is True)

    # but we fulfill any handles, and dataset handles too
    ds.get(curdir, recursive=True)
    ok_(all([sub.is_installed() for sub in subs]))
Ejemplo n.º 5
0
def test_get_greedy_recurse_subdatasets(src=None, path=None):
    _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant')

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')

    # GIMME EVERYTHING
    ds.get(['subm 1', '2'])

    # We got all content in the subdatasets
    subds1, subds2 = ds.subdatasets(result_xfm='datasets')
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)
    ok_(subds2.repo.file_has_content('test-annex.dat') is True)
Ejemplo n.º 6
0
def test_install_skip_list_arguments(src=None, path=None, path_outside=None):
    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    result = ds.install(path=['subm 1', 'not_existing', path_outside, '2'],
                        get_data=False,
                        on_failure='ignore',
                        result_xfm=None,
                        return_type='list')
    # good and bad results together
    ok_(isinstance(result, list))
    eq_(len(result), 4)
    # check that we have an 'impossible/error' status for both invalid args
    # but all the other tasks have been accomplished
    assert_result_count(result,
                        1,
                        status='impossible',
                        message="path does not exist",
                        path=opj(ds.path, 'not_existing'))
    assert_result_count(result,
                        1,
                        status='error',
                        message=("path not associated with dataset %s", ds),
                        path=path_outside)
    for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]:
        assert_result_count(result,
                            1,
                            status='ok',
                            message=('Installed subdataset in order to get %s',
                                     sub.path))
        ok_(sub.is_installed())

    # return of get is always a list, by default, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
Ejemplo n.º 7
0
def test_reckless(path=None, top_path=None):
    _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever")

    ds = install(top_path, source=path, reckless=True)
    eq_(ds.config.get('annex.hardlink', None), 'true')
    eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
Ejemplo n.º 8
0
def check_target_ssh_since(use_ssh, origin, src_path, target_path):
    _mk_submodule_annex(origin, 'test-annex.dat', 'whatever')

    if use_ssh:
        sshurl = "ssh://datalad-test" + target_path
    else:
        sshurl = target_path
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    eq_(len(source.subdatasets()), 2)
    # get a new subdataset and make sure it is committed in the super
    source.create('brandnew')
    eq_(len(source.subdatasets()), 3)
    assert_repo_status(source.path)

    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl=sshurl,
                               recursive=True,
                               since='HEAD~1')
    # there is one thing in the target directory only, and that is the
    # remote repo of the newly added subdataset

    target = Dataset(target_path)
    ok_(not target.is_installed())  # since we didn't create it due to since
    eq_(['brandnew'], os.listdir(target_path))

    # now test functionality if we add a subdataset with a subdataset
    brandnew2 = source.create('brandnew2')
    brandnewsub = brandnew2.create('sub')
    brandnewsubsub = brandnewsub.create('sub')
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl=sshurl,
                               recursive=True,
                               existing='skip')
    # verify that it created the immediate subdataset
    ok_(Dataset(_path_(target_path, 'brandnew2')).is_installed())
    # but not the subs since they were not saved, thus even push would not operate
    # on them yet, so no reason for us to create them until subdatasets are saved
    ok_(not Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())

    source.save(recursive=True)

    # and if repeated now -- will create those sub/sub
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl=sshurl,
                               recursive=True,
                               existing='skip')
    # verify that it created the immediate subdataset
    ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())

    # now we will try with --since while creating even deeper nested one, and ensuring
    # it is created -- see https://github.com/datalad/datalad/issues/6596
    brandnewsubsub.create('sub')
    source.save(recursive=True)
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl=sshurl,
                               recursive=True,
                               existing='skip',
                               since=f'{DEFAULT_REMOTE}/{DEFAULT_BRANCH}')
    # verify that it created the sub and sub/sub
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub/sub')).is_installed())

    # we installed without web ui - no hooks should be created/enabled
    assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
Ejemplo n.º 9
0
def check_target_ssh_recursive(use_ssh, origin, src_path, target_path):
    _mk_submodule_annex(origin, 'test-annex.dat', 'whatever')

    # prepare src
    source = install(src_path, source=origin, recursive=True)

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/prefix%RELNAME"
            sep = '-'
        else:
            sep = os.path.sep

        if use_ssh:
            sshurl = "ssh://datalad-test" + target_path_
        else:
            sshurl = target_path_

        remote_name = 'remote-' + str(flat)
        with chpwd(source.path):
            assert_create_sshwebserver(name=remote_name,
                                       sshurl=sshurl,
                                       target_dir=target_dir_tpl,
                                       recursive=True,
                                       ui=have_webui())

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + '2', '']:
            target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(
                os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            if have_webui():
                from datalad_deprecated.tests.test_create_sibling_webui import (
                    assert_publish_with_ui, )
                assert_publish_with_ui(target_dir,
                                       rootds=not suffix,
                                       flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        push(dataset=source, to=remote_name)

        # verify that we can create-sibling which was created later and possibly
        # first published in super-dataset as an empty directory
        sub3_name = 'subm 3-%s' % flat
        sub3 = source.create(sub3_name)
        # since is an empty value to force it to consider all changes since we published
        # already
        with chpwd(source.path):
            # as we discussed in gh-1495 we use the last-published state of the base
            # dataset as the indicator for modification detection with since='^'
            # hence we must not publish the base dataset on its own without recursion,
            # if we want to have this mechanism do its job
            #push(to=remote_name)  # no recursion
            out1 = assert_create_sshwebserver(name=remote_name,
                                              sshurl=sshurl,
                                              target_dir=target_dir_tpl,
                                              recursive=True,
                                              existing='skip',
                                              ui=have_webui(),
                                              since='^')
            assert_postupdate_hooks(target_path_,
                                    installed=have_webui(),
                                    flat=flat)
            assert_result_count(out1, 1, status='ok', sibling_name=remote_name)

            # ensure that nothing is created since since is used.
            # Also cover deprecation for since='' support.  Takes just 60ms or so.
            # TODO: change or remove when removing since='' deprecation support
            out2 = assert_create_sshwebserver(name=remote_name,
                                              sshurl=sshurl,
                                              target_dir=target_dir_tpl,
                                              recursive=True,
                                              existing='skip',
                                              ui=have_webui(),
                                              since='')
            assert_result_count(out2,
                                1,
                                status='notneeded',
                                sibling_name=remote_name)

        # so it was created on remote correctly and wasn't just skipped
        assert (Dataset(
            _path_(target_path_,
                   ('prefix-' if flat else '') + sub3_name)).is_installed())
        push(dataset=source, to=remote_name, recursive=True,
             since='^')  # just a smoke test
Ejemplo n.º 10
0
def test_get_recurse_subdatasets(src=None, path=None):
    _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant')

    ds = clone(src, path, result_xfm='datasets', return_type='item-or-list')

    # ask for the two subdatasets specifically. This will obtain them,
    # but not any content of any files in them
    subds1, subds2 = ds.get(['subm 1', '2'],
                            get_data=False,
                            description="youcouldnotmakethisup",
                            result_xfm='datasets')
    for d in (subds1, subds2):
        eq_(d.repo.get_description(), 'youcouldnotmakethisup')

    # there are 3 files to get: test-annex.dat within each dataset:
    rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat')
    rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat')
    annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2}

    # None of them is currently present:
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    assert_repo_status(subds1.path)
    # explicitly given path in subdataset => implicit recursion:
    # MIH: Nope, we fulfill the dataset handle, but that doesn't
    #      imply fulfilling all file handles
    result = ds.get(rel_path_sub1, recursive=True)
    # the subdataset was already present
    assert_in_results(result,
                      type='dataset',
                      path=subds1.path,
                      status='notneeded')
    # we got the file
    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')

    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)

    # drop it:
    subds1.repo.drop('test-annex.dat')
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)

    # now, with a path not explicitly pointing within a
    # subdataset, but recursive option:
    # get everything:
    result = ds.get(recursive=True,
                    result_filter=lambda x: x.get('type') != 'dataset')
    assert_status('ok', result)

    eq_(
        set([
            item.get('path')[len(ds.path) + 1:] for item in result
            if item['type'] == 'file'
        ]), annexed_files)
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)
    ok_(subds2.repo.file_has_content('test-annex.dat') is True)

    # drop them:
    ds.repo.drop('test-annex.dat')
    subds1.repo.drop('test-annex.dat')
    subds2.repo.drop('test-annex.dat')
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    # now, the very same call, but without recursive:
    result = ds.get('.', recursive=False)
    assert_status('ok', result)
    # no duplicate reporting on subdataset install and annex-get of its
    # directory
    eq_(len(result), 1)
    assert_result_count(result,
                        1,
                        path=opj(ds.path, 'test-annex.dat'),
                        status='ok')
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)