Exemple #1
0
def test_clone_unborn_head_no_other_ref(path):
    ds_origin = Dataset(op.join(path, "a")).create(annex=False)
    ds_origin.repo.call_git(["update-ref", "-d",
                             "refs/heads/" + DEFAULT_BRANCH])
    with swallow_logs(new_level=logging.WARNING) as cml:
        clone(source=ds_origin.path, path=op.join(path, "b"))
        assert_in("could not find a branch with commits", cml.out)
Exemple #2
0
def test_reckless(src, top_path, sharedpath):
    ds = clone(src, top_path, reckless=True,
               result_xfm='datasets', return_type='item-or-list')
    eq_(ds.config.get('annex.hardlink', None), 'true')
    # actual value is 'auto', because True is a legacy value and we map it
    eq_(ds.config.get('datalad.clone.reckless', None), 'auto')
    eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
    # now, if we clone another repo into this one, it will inherit the setting
    # without having to provide it explicitly
    sub = ds.clone(src, 'sub', result_xfm='datasets', return_type='item-or-list')
    eq_(sub.config.get('datalad.clone.reckless', None), 'auto')
    eq_(sub.config.get('annex.hardlink', None), 'true')

    if ds.repo.is_managed_branch():
        raise SkipTest("Remainder of test needs proper filesystem permissions")

    # the standard setup keeps the annex locks accessible to the user only
    nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
         & stat.S_IWGRP)
    # but we can set it up for group-shared access too
    sharedds = clone(
        src, sharedpath,
        reckless='shared-group',
        result_xfm='datasets',
        return_type='item-or-list')
    ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
        & stat.S_IWGRP)
def test_expanduser(srcpath, destpath):
    src = Dataset(Path(srcpath) / 'src').create()
    dest = Dataset(Path(destpath) / 'dest').create()

    with chpwd(destpath), patch.dict('os.environ', {'HOME': srcpath}):
        res = clone(op.join('~', 'src'),
                    'dest',
                    result_xfm=None,
                    return_type='list',
                    on_failure='ignore')
        assert_result_count(res, 1)
        assert_result_count(
            res,
            1,
            action='install',
            status='error',
            path=dest.path,
            message='target path already exists and not empty, refuse to '
            'clone into target path')
        # wipe out destination, and try again
        assert_status('ok', remove(dataset=dest, check=False))
        # now it should do it, and clone the right one
        cloneds = clone(op.join('~', 'src'), 'dest')
        eq_(cloneds.pathobj, Path(destpath) / 'dest')
        eq_(src.id, cloneds.id)
        # and it shouldn't fail when doing it again, because it detects
        # the re-clone
        cloneds = clone(op.join('~', 'src'), 'dest')
        eq_(cloneds.pathobj, Path(destpath) / 'dest')
Exemple #4
0
def test_fetch_git_special_remote(url_path, url, path):
    url_path = Path(url_path)
    path = Path(path)
    ds_special = Dataset(url_path / "special").create(force=True)
    if ds_special.repo.is_managed_branch():
        # TODO: git-annex-init fails in the second clone call below when this is
        # executed under ./tools/eval_under_testloopfs.
        raise SkipTest("Test fails on managed branch")
    ds_special.save()
    ds_special.repo.call_git(["update-server-info"])

    clone_url = url + "special/.git"
    ds_a = clone(clone_url, path / "a")
    ds_a.repo._run_annex_command(
        "initremote",
        annex_options=["special", "type=git", "autoenable=true",
                       "location=" + clone_url])

    # Set up a situation where a file is present only on the special remote,
    # and its existence is known only to the special remote's git-annex branch.
    (ds_special.pathobj / "f1").write_text("1")
    ds_special.save()
    ds_special.repo.call_git(["update-server-info"])

    ds_a.repo.fetch("origin")
    ds_a.repo.merge("origin/" + DEFAULT_BRANCH)

    ds_b = clone(ds_a.path, path / "other")
    ds_b.get("f1")
    ok_(ds_b.repo.file_has_content("f1"))
Exemple #5
0
def test_invalid_args(path, otherpath, alienpath):
    # source == path
    assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg')
    assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg')

    # "invalid URL" is a valid filepath... and since no clone to remote
    # is possible - we can just assume that it is the (legit) file path
    # which is provided, not a URL.  So both below should fail as any
    # other clone from a non-existing source and not for the reason of
    # "invalid something".  Behavior is similar to how Git performs - can
    # clone into a URL-like path.

    # install to an "invalid URL" path
    res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # install to a "remote location" path
    res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # make fake dataset
    ds = create(path)
    assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg')
    # make real dataset, try to install outside
    ds_target = create(Path(otherpath) / 'target')
    assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path)
    assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
Exemple #6
0
def test_invalid_args(path, otherpath, alienpath):
    # source == path
    assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg')
    assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg')

    # "invalid URL" is a valid filepath... and since no clone to remote
    # is possible - we can just assume that it is the (legit) file path
    # which is provided, not a URL.  So both below should fail as any
    # other clone from a non-existing source and not for the reason of
    # "invalid something".  Behavior is similar to how Git performs - can
    # clone into a URL-like path.

    # install to an "invalid URL" path
    res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # install to a "remote location" path
    res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore')
    assert_status('error', res)

    # make fake dataset
    ds = create(path)
    assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg')
    # make real dataset, try to install outside
    ds_target = create(opj(otherpath, 'target'))
    assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path)
    assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
Exemple #7
0
def test_ephemeral(origin_path, clone1_path, clone2_path):

    file_test = Path('ds') / 'test.txt'
    file_testsub = Path('ds') / 'subdir' / 'testsub.txt'

    origin = Dataset(origin_path).create(force=True)
    origin.save()
    # 1. clone via path
    clone1 = clone(origin_path, clone1_path, reckless='ephemeral')

    can_symlink = has_symlink_capability()

    if can_symlink:
        clone1_annex = (clone1.repo.dot_git / 'annex')
        ok_(clone1_annex.is_symlink())
        ok_(clone1_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone1.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 2. clone via file-scheme URL
    clone2 = clone('file://' + Path(origin_path).as_posix(), clone2_path,
                   reckless='ephemeral')

    if can_symlink:
        clone2_annex = (clone2.repo.dot_git / 'annex')
        ok_(clone2_annex.is_symlink())
        ok_(clone2_annex.resolve().samefile(origin.repo.dot_git / 'annex'))
        if not clone2.repo.is_managed_branch():
            # TODO: We can't properly handle adjusted branch yet
            eq_((clone1.pathobj / file_test).read_text(), 'some')
            eq_((clone1.pathobj / file_testsub).read_text(), 'somemore')

    # 3. add something to clone1 and push back to origin availability from
    # clone1 should not be propagated (we declared 'here' dead to that end)

    (clone1.pathobj / 'addition.txt').write_text("even more")
    clone1.save()
    origin.config.set("receive.denyCurrentBranch", "updateInstead",
                      where="local")
    # Note, that the only thing to test is git-annex-dead here,
    # if we couldn't symlink:
    clone1.publish(to='origin', transfer_data='none' if can_symlink else 'auto')
    if not origin.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origin.repo.get_hexsha(), clone1.repo.get_hexsha())
    res = origin.repo.whereis("addition.txt")
    if can_symlink:
        # obv. present in origin, but this is not yet known to origin:
        eq_(res, [])
        res = origin.repo.fsck()
        assert_result_count(res, 3, success=True)
        # TODO: Double check whether annex reports POSIX paths o windows!
        eq_({str(file_test), str(file_testsub), "addition.txt"},
            {r['file'] for r in res})
        # now origin knows:
    res = origin.repo.whereis("addition.txt")
    eq_(res, [origin.config.get("annex.uuid")])
Exemple #8
0
def test_remove_nowhining(path=None):
    # when removing a dataset under a dataset (but not a subdataset)
    # should not provide a meaningless message that something was not right
    ds = Dataset(path).create()
    # just install/clone inside of it
    subds_path = ds.pathobj / 'subds'
    clone(path=subds_path, source=path)
    remove(dataset=subds_path)  # should remove just fine
Exemple #9
0
def test_update_fetch_all(path=None):
    path = Path(path)
    remote_1 = str(path / "remote_1")
    remote_2 = str(path / "remote_2")

    ds = Dataset(path / "src").create()
    src = ds.repo.path

    ds_rmt1 = clone(source=src, path=remote_1)
    ds_rmt2 = clone(source=src, path=remote_2)

    ds.siblings('add', name="sibling_1", url=remote_1)
    ds.siblings('add', name="sibling_2", url=remote_2)

    # modify the remotes:
    (ds_rmt1.pathobj / "first.txt").write_text("some file load")
    ds_rmt1.save()

    # TODO: Modify an already present file!

    (ds_rmt2.pathobj / "second.txt").write_text("different file load")
    ds_rmt2.save()

    # Let's init some special remote which we couldn't really update/fetch
    if not dl_cfg.get('datalad.tests.dataladremote'):
        ds.repo.init_remote(
            'datalad',
            ['encryption=none', 'type=external', 'externaltype=datalad'])
    # fetch all remotes
    assert_result_count(ds.update(), 1, status='ok', type='dataset')

    # no merge, so changes are not in active branch:
    assert_not_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # but we know the changes in remote branches:
    assert_in("first.txt", ds.repo.get_files("sibling_1/" + DEFAULT_BRANCH))
    assert_in("second.txt", ds.repo.get_files("sibling_2/" + DEFAULT_BRANCH))

    # no merge strategy for multiple remotes yet:
    # more clever now, there is a tracking branch that provides a remote
    #assert_raises(NotImplementedError, ds.update, merge=True)

    # merge a certain remote:
    assert_result_count(ds.update(sibling='sibling_1', merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')

    # changes from sibling_2 still not present:
    assert_not_in("second.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # changes from sibling_1 merged:
    assert_in("first.txt", ds.repo.get_files(ds.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    annexprops = ds.repo.get_file_annexinfo("first.txt",
                                            eval_availability=True)
    annexprops['key']  # blows if unknown
    eq_(False, annexprops['has_content'])
Exemple #10
0
def test_as_common_datasource(testbed=None,
                              viapath=None,
                              viaurl=None,
                              remotepath=None,
                              url=None):
    ds = Dataset(remotepath).create()
    (ds.pathobj / 'testfile').write_text('likemagic')
    (ds.pathobj / 'testfile2').write_text('likemagic2')
    ds.save()

    # make clonable via HTTP
    ds.repo.call_git(['update-server-info'])

    # this does not work for remotes that have path URLs
    ds_frompath = clone(source=remotepath, path=viapath)
    res = ds_frompath.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike',
        on_failure='ignore',
        result_renderer='disabled',
    )
    assert_in_results(
        res,
        status='impossible',
        message='cannot configure as a common data source, URL protocol '
        'is not http or https',
    )

    # but it works for HTTP
    ds_fromurl = clone(source=url, path=viaurl)
    res = ds_fromurl.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike2',
        result_renderer='disabled',
    )
    assert_status('ok', res)
    # same thing should be possible by adding a fresh remote
    res = ds_fromurl.siblings(
        'add',
        name='fresh',
        url=url,
        as_common_datasrc='fresh-sr',
        result_renderer='disabled',
    )
    assert_status('ok', res)

    # now try if it works. we will clone the clone, and get a repo that does
    # not know its ultimate origin. still, we should be able to pull data
    # from it via the special remote
    testbed = clone(source=ds_fromurl, path=testbed)
    assert_status('ok', testbed.get('testfile'))
    eq_('likemagic', (testbed.pathobj / 'testfile').read_text())
    # and the other one
    assert_status('ok', testbed.get('testfile2'))
Exemple #11
0
def test_clone_isnt_a_smartass(origin_path, path):
    origin = create(origin_path)
    cloned = clone(origin, path,
                   result_xfm='datasets', return_type='item-or-list')
    with chpwd(path):
        # no were are inside a dataset clone, and we make another one
        # we do not want automatic subdatasetification without given a dataset
        # explicitely
        clonedsub = clone(origin, 'testsub',
                          result_xfm='datasets', return_type='item-or-list')
    # correct destination
    assert clonedsub.path.startswith(path)
    # no subdataset relation
    eq_(cloned.subdatasets(), [])
Exemple #12
0
def test_clone_isnt_a_smartass(origin_path, path):
    origin = create(origin_path)
    cloned = clone(origin, path,
                   result_xfm='datasets', return_type='item-or-list')
    with chpwd(path):
        # no were are inside a dataset clone, and we make another one
        # we do not want automatic subdatasetification without given a dataset
        # explicitely
        clonedsub = clone(origin, 'testsub',
                          result_xfm='datasets', return_type='item-or-list')
    # correct destination
    assert clonedsub.path.startswith(path)
    # no subdataset relation
    eq_(cloned.subdatasets(), [])
Exemple #13
0
def test_clone_report_permission_issue(tdir):
    pdir = Path(tdir) / 'protected'
    pdir.mkdir()
    # make it read-only
    pdir.chmod(0o555)
    with chpwd(pdir):
        # first check the premise of the test. If we can write (strangely
        # mounted/crippled file system, subsequent assumptions are violated
        # and we can stop
        probe = Path('probe')
        try:
            probe.write_text('should not work')
            raise SkipTest
        except PermissionError:
            # we are indeed in a read-only situation
            pass
        res = clone('///',
                    result_xfm=None,
                    return_type='list',
                    on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res,
            1,
            status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
            % (pdir, get_datasets_topdir()))
Exemple #14
0
def test_ria_http_storedataladorg(path):
    # can we clone from the store w/o any dedicated config
    ds = clone(
        'ria+http://store.datalad.org#{}'.format(datalad_store_testds_id),
        path)
    ok_(ds.is_installed())
    eq_(ds.id, datalad_store_testds_id)
Exemple #15
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path,
               result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
Exemple #16
0
def test_sibling_enable_sameas(repo=None, clone_path=None):
    ds = Dataset(repo.path)
    create_tree(ds.path, {"f0": "0"})
    ds.save(path="f0")
    ds.push(["f0"], to="r_dir")
    ds.repo.drop(["f0"])

    ds_cloned = clone(ds.path, clone_path)

    assert_false(ds_cloned.repo.file_has_content("f0"))
    # does not work without a name
    res = ds_cloned.siblings(
        action="enable",
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='error',
                      message='require `name` of sibling to enable')
    # does not work with the wrong name
    res = ds_cloned.siblings(
        action="enable",
        name='wrong',
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='impossible',
                      message=("cannot enable sibling '%s', not known",
                               'wrong'))
    # works with the right name
    res = ds_cloned.siblings(action="enable", name="r_rsync")
    assert_status("ok", res)
    ds_cloned.get(path=["f0"])
    ok_(ds_cloned.repo.file_has_content("f0"))
def test_orc_datalad_no_remote_get(tmpdir, shell, should_pass):
    import datalad.api as dl

    topdir = str(tmpdir)
    ds_a = dl.create(op.join(topdir, "a"))
    if should_pass:
        (ds_a.pathobj / "foo").write_text("data")
        ds_a.save()

    ds_b = dl.clone(ds_a.path, op.join(topdir, "b"))
    assert not ds_b.repo.file_has_content("foo")
    with chpwd(ds_b.path):
        orc = orcs.DataladNoRemoteOrchestrator(shell,
                                               submission_type="local",
                                               job_spec={
                                                   "root_directory":
                                                   op.join(topdir, "run-root"),
                                                   "inputs": ["foo"],
                                                   "outputs": ["out"],
                                                   "_resolved_command_str":
                                                   'sh -c "cat foo foo >out"'
                                               })
        if should_pass:
            orc.prepare_remote()
            orc.submit()
            orc.follow()

            finish_fn = MagicMock()
            orc.fetch(on_remote_finish=finish_fn)
            finish_fn.assert_called_once_with(orc.resource, [])
            assert (ds_b.pathobj / "out").read_text() == "datadata"
        else:
            with pytest.raises(OrchestratorError):
                orc.prepare_remote()
Exemple #18
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path, result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1',
                  ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone data from any candidate source URL: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1',
                  ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
Exemple #19
0
def test_inherit_src_candidates(lcl, storepath, url):
    lcl = Path(lcl)
    storepath = Path(storepath)
    # dataset with a subdataset
    ds1 = Dataset(lcl / 'ds1').create()
    ds1sub = ds1.create('sub')
    # a different dataset into which we install ds1, but do not touch its subds
    ds2 = Dataset(lcl / 'ds2').create()
    ds2.clone(source=ds1.path, path='mysub')

    # we give no dataset a source candidate config!
    # move all dataset into the store
    for d in (ds1, ds1sub, ds2):
        _move2store(storepath, d)

    # now we must be able to obtain all three datasets from the store
    riaclone = clone(
        'ria+{}#{}'.format(
            # store URL
            url,
            # ID of the root dataset
            ds2.id),
        lcl / 'clone',
    )
    # what happens is the the initial clone call sets a source candidate
    # config, because it sees the dataset coming from a store
    # all obtained subdatasets get the config inherited on-clone
    datasets = riaclone.get('.', get_data=False, recursive=True, result_xfm='datasets')
    # we get two subdatasets
    eq_(len(datasets), 2)
    for ds in datasets:
        eq_(ConfigManager(dataset=ds, source='dataset-local').get(
            'datalad.get.subdataset-source-candidate-200origin'),
            'ria+%s#{id}' % url)
Exemple #20
0
def test_autoenabled_remote_msg(path):
    # Verify that no message about a remote not been enabled is displayed
    # whenever the remote we clone is the  type=git special remote, so the name
    # of the remote might not match
    with swallow_logs(new_level=logging.INFO) as cml:
        res = clone('///repronim/containers', path, result_xfm=None, return_type='list')
        assert_status('ok', res)
        assert_not_in("not auto-enabled", cml.out)
Exemple #21
0
def test_reckless(path, top_path):
    ds = clone(path,
               top_path,
               reckless=True,
               result_xfm='datasets',
               return_type='item-or-list')
    eq_(ds.config.get('annex.hardlink', None), 'true')
    eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
Exemple #22
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Exemple #23
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Exemple #24
0
def test_clone_isnot_recursive(src, path_nr, path_r):
    ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list')
    ok_(ds.is_installed())
    # check nothin is unintentionally installed
    subdss = ds.subdatasets(recursive=True)
    assert_result_count(subdss, len(subdss), state='absent')
    # this also means, subdatasets to be listed as not fulfilled:
    eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')),
        {'subm 1', '2'})
Exemple #25
0
def test_clone_isnot_recursive(src, path_nr, path_r):
    ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list')
    ok_(ds.is_installed())
    # check nothin is unintentionally installed
    subdss = ds.subdatasets(recursive=True)
    assert_result_count(subdss, len(subdss), state='absent')
    # this also means, subdatasets to be listed as not fulfilled:
    eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')),
        {'subm 1', '2'})
Exemple #26
0
def check_reckless(annex, src_path, top_path, sharedpath):
    # super with or without annex
    src = Dataset(src_path).create(annex=annex)
    # sub always with annex
    srcsub = src.create('sub')

    # and for the actual test
    ds = clone(src.path, top_path, reckless=True,
               result_xfm='datasets', return_type='item-or-list')

    is_crippled = srcsub.repo.is_managed_branch()

    if annex and not is_crippled:
        eq_(ds.config.get('annex.hardlink', None), 'true')

    # actual value is 'auto', because True is a legacy value and we map it
    eq_(ds.config.get('datalad.clone.reckless', None), 'auto')
    if annex:
        eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
    # now, if we clone another repo into this one, it will inherit the setting
    # without having to provide it explicitly
    newsub = ds.clone(srcsub, 'newsub', result_xfm='datasets', return_type='item-or-list')
    # and `get` the original subdataset
    origsub = ds.get('sub', result_xfm='datasets', return_type='item-or-list')
    for sds in (newsub, origsub):
        eq_(sds.config.get('datalad.clone.reckless', None), 'auto')
        if not is_crippled:
            eq_(sds.config.get('annex.hardlink', None), 'true')

    if is_crippled:
        raise SkipTest("Remainder of test needs proper filesystem permissions")

    if annex:
        # the standard setup keeps the annex locks accessible to the user only
        nok_((ds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
             & stat.S_IWGRP)
        # but we can set it up for group-shared access too
        sharedds = clone(
            src, sharedpath,
            reckless='shared-group',
            result_xfm='datasets',
            return_type='item-or-list')
        ok_((sharedds.pathobj / '.git' / 'annex' / 'index.lck').stat().st_mode \
            & stat.S_IWGRP)
Exemple #27
0
def _test_ria_postclonecfg(url, dsid, clone_path):
    # Test cloning from RIA store while ORA special remote autoenabling failed
    # due to an invalid URL from the POV of the cloner.
    # Origin's git-config-file should contain the UUID to enable. This needs to
    # work via HTTP, SSH and local cloning.

    # Autoenabling should fail initially by git-annex-init and we would report
    # on INFO level. Only postclone routine would deal with it.
    with swallow_logs(new_level=logging.INFO) as cml:
        # First, the super ds:
        riaclone = clone('ria+{}#{}'.format(url, dsid), clone_path)
        cml.assert_logged(msg="access to 1 dataset sibling store-storage not "
                          "auto-enabled",
                          level="INFO",
                          regex=False)

    # However, we now can retrieve content since clone should have enabled the
    # special remote with new URL (or origin in case of HTTP).
    res = riaclone.get('test.txt')
    assert_result_count(
        res,
        1,
        status='ok',
        path=str(riaclone.pathobj / 'test.txt'),
        message="from {}...".format(
            "origin" if url.startswith('http') else "store-storage"))

    # same thing for the sub ds (we don't need a store-url and id - get should
    # figure those itself):
    with swallow_logs(new_level=logging.INFO) as cml:
        riaclonesub = riaclone.get(op.join('subdir', 'subds'),
                                   get_data=False,
                                   result_xfm='datasets',
                                   return_type='item-or-list')
        cml.assert_logged(msg="access to 1 dataset sibling store-storage not "
                          "auto-enabled",
                          level="INFO",
                          regex=False)
    res = riaclonesub.get('testsub.txt')
    assert_result_count(
        res,
        1,
        status='ok',
        path=str(riaclonesub.pathobj / 'testsub.txt'),
        message="from {}...".format(
            "origin" if url.startswith('http') else "store-storage"))

    # finally get the plain git subdataset.
    # Clone should figure to also clone it from a ria+ URL
    # (subdataset-source-candidate), notice that there wasn't an autoenabled ORA
    # remote, but shouldn't stumble upon it, since it's a plain git.
    res = riaclone.get(op.join('subdir', 'subgit', 'testgit.txt'))
    assert_result_count(res, 1, status='ok', type='dataset', action='install')
    assert_result_count(res, 1, status='notneeded', type='file')
    assert_result_count(res, 2)
Exemple #28
0
def get_tests_data_dir(dl_dset, dset_url=None, commit_ref=None):
    """Get the path to the test data directory. If the test data directory
    does not exist or is not populated, install with datalad.
    """
    logger = logging.getLogger("Test data setup")
    if not dl_dset.is_installed():
        if dl_dset.pathobj.exists():
            raise ValueError(
                f"{dl_dset.path} exists but is not a datalad repository")
        else:
            try:
                global dl_lock
                dl_lock.acquire()
                if not dl_dset.is_installed():
                    logger.warn("Installing test data")
                    if not dset_url:
                        raise ValueError(
                            f"{dl_dset.path} is not installed and a url is not provided."
                        )
                    datalad.clone(
                        dset_url,
                        dl_dset.path,
                    )
            finally:
                dl_lock.release()

    # In the case where a datalad repository is read-only but the
    # correct git ref/commit is not checked out
    # we should raise an error
    if commit_ref is None:
        return
    else:
        raise NotImplementedError
    # confirm repo is user writable.
    some_files = [".git/logs/HEAD"]
    for f in some_files:
        data_file = dl_dset.pathobj / f
        if not data_file.exists():
            raise ValueError(
                f"{f} does not exist (parent existences: {f.parent.exists()}")
        if not os.access(data_file, os.W_OK):
            raise ValueError(f"{f} is not user writeable ({os.getuid()})")
Exemple #29
0
def test_create_alias(ds_path, ria_path, clone_path):
    ds_path = Path(ds_path)
    clone_path = Path(clone_path)

    ds_path.mkdir()
    dsa = Dataset(ds_path / "a").create()

    res = dsa.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                 name="origin",
                                 alias="ds-a")
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_(len(res), 1)

    ds_clone = clone(source="ria+file://{}#~ds-a".format(ria_path),
                     path=clone_path / "a")
    assert_repo_status(ds_clone.path)

    # multiple datasets in a RIA store with different aliases work
    dsb = Dataset(ds_path / "b").create()

    res = dsb.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                 name="origin",
                                 alias="ds-b")
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_(len(res), 1)

    ds_clone = clone(source="ria+file://{}#~ds-b".format(ria_path),
                     path=clone_path / "b")
    assert_repo_status(ds_clone.path)

    # second dataset in a RIA store with the same alias emits a warning
    dsc = Dataset(ds_path / "c").create()

    with swallow_logs(logging.WARNING) as cml:
        res = dsc.create_sibling_ria(url="ria+file://{}".format(ria_path),
                                     name="origin",
                                     alias="ds-a")
        assert_in(
            "Alias 'ds-a' already exists in the RIA store, not adding an alias",
            cml.out)
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_(len(res), 1)
Exemple #30
0
def test_clone_datasets_root(tdir):
    with chpwd(tdir):
        ds = clone("///", result_xfm='datasets', return_type='item-or-list')
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, get_datasets_topdir()))

        # do it a second time:
        res = clone("///", on_failure='ignore')
        assert_message("dataset %s was already cloned from '%s'", res)
        assert_status('notneeded', res)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        res = clone('///', path="sub", on_failure='ignore')
        assert_message(
            'target path already exists and not empty, refuse to clone into target path',
            res)
        assert_status('error', res)
Exemple #31
0
def test_readonly_dataset_access(path):
    # clone from OSF; ds is self-contained at OSF
    ds = clone('osf://q8xnk', path)
    # standard name storage remote
    assert_in('osf-storage', ds.repo.get_remotes())
    for avail in ds.repo.whereis('inannex'):
        assert_in('7784367b-69c6-483d-9564-67f840715890', avail)
    test_file = ds.repo.pathobj / 'inannex' / 'animated.gif'
    eq_(ds.repo.annexstatus([test_file])[test_file]['has_content'], False)
    ds.repo.call_git(['annex', 'copy', str(test_file), '-f', 'osf-storage'])
    eq_(ds.repo.annexstatus([test_file])[test_file]['has_content'], True)
Exemple #32
0
def test_clone_crcns(tdir, ds_path):
    with chpwd(tdir):
        res = clone('///', path="all-nonrecursive", on_failure='ignore')
        assert_status('ok', res)

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list')
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
Exemple #33
0
def test_cfg_originorigin(path):
    path = Path(path)
    origin = Dataset(path / 'origin').create()
    (origin.pathobj / 'file1.txt').write_text('content')
    origin.save()
    clone_lev1 = clone(origin, path / 'clone_lev1')
    clone_lev2 = clone(clone_lev1, path / 'clone_lev2')
    # the goal is to be able to get file content from origin without
    # the need to configure it manually
    assert_result_count(
        clone_lev2.get('file1.txt', on_failure='ignore'),
        1,
        action='get',
        status='ok',
        path=str(clone_lev2.pathobj / 'file1.txt'),
    )
    eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content')
    eq_(
        Path(clone_lev2.siblings(
            'query',
            name='origin-2',
            return_type='item-or-list')['url']),
        origin.pathobj
    )

    # Clone another level, this time with a relative path. Drop content from
    # lev2 so that origin is the only place that the file is available from.
    clone_lev2.drop("file1.txt")
    with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml:
        clone_lev3 = clone('clone_lev2', 'clone_lev3')
        # we called git-annex-init; see gh-4367:
        cml.assert_logged(msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), "
                              r"'init'",
                          match=False,
                          level='DEBUG')
    assert_result_count(
        clone_lev3.get('file1.txt', on_failure='ignore'),
        1,
        action='get',
        status='ok',
        path=str(clone_lev3.pathobj / 'file1.txt'))
Exemple #34
0
def test_ensure_datalad_remote_maybe_enable(path=None, *, autoenable):
    path = Path(path)
    ds_a = Dataset(path / "a").create(force=True)
    init_datalad_remote(ds_a.repo, DATALAD_SPECIAL_REMOTE,
                        autoenable=autoenable)

    ds_b = clone(source=ds_a.path, path=path / "b")
    repo = ds_b.repo
    if not autoenable:
        assert_not_in("datalad", repo.get_remotes())
    ensure_datalad_remote(repo)
    assert_in("datalad", repo.get_remotes())
Exemple #35
0
def test_ephemeral(ds_path=None, store_path=None, clone_path=None):

    dspath = Path(ds_path)
    store = Path(store_path)
    file_test = Path('file1.txt')
    file_testsub = Path('sub') / 'other.txt'

    # create the original dataset
    ds = Dataset(dspath)
    ds.create(force=True)
    ds.save()

    # put into store:
    ds.create_sibling_ria("ria+{}".format(store.as_uri()),
                          "riastore",
                          new_store_ok=True)
    ds.push(to="riastore", data="anything")

    # now, get an ephemeral clone from the RIA store:
    eph_clone = clone('ria+{}#{}'.format(store.as_uri(), ds.id),
                      clone_path,
                      reckless="ephemeral")

    # ephemeral clone was properly linked (store has bare repos!):
    clone_annex = (eph_clone.repo.dot_git / 'annex')
    assert_true(clone_annex.is_symlink())
    assert_true(clone_annex.resolve().samefile(store / ds.id[:3] / ds.id[3:] /
                                               'annex'))
    if not eph_clone.repo.is_managed_branch():
        # TODO: We can't properly handle adjusted branch yet
        # we don't need to get files in order to access them:
        assert_equal((eph_clone.pathobj / file_test).read_text(), "some")
        assert_equal((eph_clone.pathobj / file_testsub).read_text(), "other")

        # can we unlock those files?
        eph_clone.unlock(file_test)
        # change content
        (eph_clone.pathobj / file_test).write_text("new content")
        eph_clone.save()

        # new content should already be in store
        # (except the store doesn't know yet)
        res = eph_clone.repo.fsck(remote="riastore-storage", fast=True)
        assert_equal(len(res), 2)
        assert_result_count(res, 1, success=True, file=file_test.as_posix())
        assert_result_count(res, 1, success=True, file=file_testsub.as_posix())

        # push back git history
        eph_clone.push(to=DEFAULT_REMOTE, data="nothing")

        # get an update in origin
        ds.update(merge=True, reobtain_data=True)
        assert_equal((ds.pathobj / file_test).read_text(), "new content")
Exemple #36
0
def test_clone_report_permission_issue(tdir):
    pdir = _path_(tdir, 'protected')
    mkdir(pdir)
    # make it read-only
    chmod(pdir, 0o555)
    with chpwd(pdir):
        res = clone('///', result_xfm=None, return_type='list', on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res, 1, status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
                    % (pdir, get_datasets_topdir())
        )
Exemple #37
0
def test_clone_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl):
        ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Exemple #38
0
def test_clone_datasets_root(tdir):
    with chpwd(tdir):
        ds = clone("///", result_xfm='datasets', return_type='item-or-list')
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, get_datasets_topdir()))

        # do it a second time:
        res = clone("///", on_failure='ignore')
        assert_message(
            "dataset %s was already cloned from '%s'",
            res)
        assert_status('notneeded', res)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        res = clone('///', path="sub", on_failure='ignore')
        assert_message(
            'target path already exists and not empty, refuse to clone into target path',
            res)
        assert_status('error', res)
Exemple #39
0
def test_clone_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = clone(src, path, description='mydummy',
               result_xfm='datasets', return_type='item-or-list')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = clone(src, path)
    assert_result_values_equal(res, 'source_url', [src])
    assert_status('notneeded', res)
    assert_message("dataset %s was already cloned from '%s'", res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)
Exemple #40
0
def test_reckless(path, top_path):
    ds = clone(path, top_path, reckless=True,
               result_xfm='datasets', return_type='item-or-list')
    eq_(ds.config.get('annex.hardlink', None), 'true')
    eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
Exemple #41
0
def test_install_source_relpath(src, dest):
    create(src)
    src_ = basename(src)
    with chpwd(dirname(src)):
        clone(src_, dest)