Esempio n. 1
0
def mk_push_target(ds, name, path, annex=True, bare=True):
    # life could be simple, but nothing is simple on windows
    #src.create_sibling(dst_path, name='target')
    if annex:
        if bare:
            target = GitRepo(path=path, bare=True, create=True)
            target.call_git(['annex', 'init'])
        else:
            target = AnnexRepo(path, init=True, create=True)
            if not target.is_managed_branch():
                # for managed branches we need more fireworks->below
                target.config.set('receive.denyCurrentBranch',
                                  'updateInstead',
                                  where='local')
    else:
        target = GitRepo(path=path, bare=bare, create=True)
    ds.siblings('add', name=name, url=path, result_renderer=None)
    if annex and not bare and target.is_managed_branch():
        # maximum complication
        # the target repo already has a commit that is unrelated
        # to the source repo, because it has built a reference
        # commit for the managed branch.
        # the only sane approach is to let git-annex establish a shared
        # history
        ds.repo.call_git(['annex', 'sync'])
        ds.repo.call_git(['annex', 'sync', '--cleanup'])
    return target
Esempio n. 2
0
def test_newthings_coming_down(originpath, destpath):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in('origin', ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH],
        ds.repo.get_remote_branches())
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Esempio n. 3
0
def _create_sibling_ria(ds, url, push_url, name, storage_sibling, storage_name,
                        alias, existing, shared, group, post_update_hook,
                        trust_level, res_kwargs):
    # be safe across datasets
    res_kwargs = res_kwargs.copy()
    # update dataset
    res_kwargs['ds'] = ds

    if not isinstance(ds.repo, AnnexRepo):
        # No point in dealing with a special remote when there's no annex.
        # Note, that in recursive invocations this might only apply to some of
        # the datasets. Therefore dealing with it here rather than one level up.
        lgr.debug("No annex at %s. Ignoring special remote options.", ds.path)
        storage_sibling = False
        storage_name = None

    # parse target URL
    try:
        ssh_host, base_path, rewritten_url = \
            verify_ria_url(push_url if push_url else url, ds.config)
    except ValueError as e:
        yield get_status_dict(status='error', message=str(e), **res_kwargs)
        return

    base_path = Path(base_path)

    git_url = decode_source_spec(
        # append dataset id to url and use magic from clone-helper:
        url + '#{}'.format(ds.id),
        cfg=ds.config)['giturl']
    git_push_url = decode_source_spec(
        push_url +
        '#{}'.format(ds.id), cfg=ds.config)['giturl'] if push_url else None

    # determine layout locations; go for a v1 store-level layout
    repo_path, _, _ = get_layout_locations(1, base_path, ds.id)

    ds_siblings = [
        r['name'] for r in ds.siblings(result_renderer='disabled',
                                       return_type='generator')
    ]
    # Figure whether we are supposed to skip this very dataset
    if existing == 'skip' and (name in ds_siblings or
                               (storage_name and storage_name in ds_siblings)):
        yield get_status_dict(status='notneeded',
                              message="Skipped on existing sibling",
                              **res_kwargs)
        # if we skip here, nothing else can change that decision further
        # down
        return

    # figure whether we need to skip or error due an existing target repo before
    # we try to init a special remote.
    if ssh_host:
        from datalad import ssh_manager
        ssh = ssh_manager.get_connection(ssh_host,
                                         use_remote_annex_bundle=False)
        ssh.open()

    if existing in ['skip', 'error']:
        config_path = repo_path / 'config'
        # No .git -- if it's an existing repo in a RIA store it should be a
        # bare repo.
        # Theoretically we could have additional checks for whether we have
        # an empty repo dir or a non-bare repo or whatever else.
        if ssh_host:
            try:
                ssh('[ -e {p} ]'.format(p=quote_cmdlinearg(str(config_path))))
                exists = True
            except CommandError:
                exists = False
        else:
            exists = config_path.exists()

        if exists:
            if existing == 'skip':
                # 1. not rendered by default
                # 2. message doesn't show up in ultimate result
                #    record as shown by -f json_pp
                yield get_status_dict(status='notneeded',
                                      message="Skipped on existing remote "
                                      "directory {}".format(repo_path),
                                      **res_kwargs)
                return
            else:  # existing == 'error'
                yield get_status_dict(status='error',
                                      message="remote directory {} already "
                                      "exists.".format(repo_path),
                                      **res_kwargs)
                return

    if storage_sibling == 'only':
        lgr.info("create storage sibling '{}' ...".format(name))
    else:
        lgr.info("create sibling{} '{}'{} ...".format(
            's' if storage_name else '',
            name,
            " and '{}'".format(storage_name) if storage_name else '',
        ))
    create_ds_in_store(SSHRemoteIO(ssh_host) if ssh_host else LocalIO(),
                       base_path,
                       ds.id,
                       '2',
                       '1',
                       alias,
                       init_obj_tree=storage_sibling is not False)
    if storage_sibling:
        # we are using the main `name`, if the only thing we are creating
        # is the storage sibling
        srname = name if storage_sibling == 'only' else storage_name

        lgr.debug('init special remote {}'.format(srname))
        special_remote_options = [
            'type=external', 'externaltype=ora', 'encryption=none',
            'autoenable=true', 'url={}'.format(url)
        ]
        if push_url:
            special_remote_options.append('push-url={}'.format(push_url))
        try:
            ds.repo.init_remote(srname, options=special_remote_options)
        except CommandError as e:
            if existing == 'reconfigure' \
                    and 'git-annex: There is already a special remote' \
                    in e.stderr:
                # run enableremote instead
                lgr.debug(
                    "special remote '%s' already exists. "
                    "Run enableremote instead.", srname)
                # TODO: Use AnnexRepo.enable_remote (which needs to get
                #       `options` first)
                ds.repo.call_annex(['enableremote', srname] +
                                   special_remote_options)
            else:
                yield get_status_dict(
                    status='error',
                    message="initremote failed.\nstdout: %s\nstderr: %s" %
                    (e.stdout, e.stderr),
                    **res_kwargs)
                return

        if trust_level:
            trust_cmd = [trust_level]
            if trust_level == 'trust':
                # Following git-annex 8.20201129-73-g6a0030a11, using `git
                # annex trust` requires --force.
                trust_cmd.append('--force')
            ds.repo.call_annex(trust_cmd + [srname])
        # get uuid for use in bare repo's config
        uuid = ds.config.get("remote.{}.annex-uuid".format(srname))

    if storage_sibling == 'only':
        # we can stop here, the rest of the function is about setting up
        # the git remote part of the sibling
        yield get_status_dict(
            status='ok',
            **res_kwargs,
        )
        return

    # 2. create a bare repository in-store:

    lgr.debug("init bare repository")
    # TODO: we should prob. check whether it's there already. How?
    # Note: like the special remote itself, we assume local FS if no
    # SSH host is specified
    disabled_hook = repo_path / 'hooks' / 'post-update.sample'
    enabled_hook = repo_path / 'hooks' / 'post-update'

    if group:
        chgrp_cmd = "chgrp -R {} {}".format(quote_cmdlinearg(str(group)),
                                            quote_cmdlinearg(str(repo_path)))

    if ssh_host:
        ssh('cd {rootdir} && git init --bare{shared}'.format(
            rootdir=quote_cmdlinearg(str(repo_path)),
            shared=" --shared='{}'".format(quote_cmdlinearg(shared))
            if shared else ''))

        if storage_sibling:
            # write special remote's uuid into git-config, so clone can
            # which one it is supposed to be and enable it even with
            # fallback URL
            ssh("cd {rootdir} && git config datalad.ora-remote.uuid {uuid}"
                "".format(rootdir=quote_cmdlinearg(str(repo_path)), uuid=uuid))

        if post_update_hook:
            ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)),
                                  quote_cmdlinearg(str(enabled_hook))))

        if group:
            # Either repository existed before or a new directory was
            # created for it, set its group to a desired one if was
            # provided with the same chgrp
            ssh(chgrp_cmd)

        # finally update server
        if post_update_hook:
            # Conditional on post_update_hook, since one w/o the other doesn't
            # seem to make much sense.
            ssh('cd {rootdir} && git update-server-info'.format(
                rootdir=quote_cmdlinearg(str(repo_path))))
    else:
        gr = GitRepo(repo_path,
                     create=True,
                     bare=True,
                     shared=shared if shared else None)
        if storage_sibling:
            # write special remote's uuid into git-config, so clone can
            # which one it is supposed to be and enable it even with
            # fallback URL
            gr.config.add("datalad.ora-remote.uuid", uuid, scope='local')

        if post_update_hook:
            disabled_hook.rename(enabled_hook)
        if group:
            # No CWD needed here, since `chgrp` is expected to be found via PATH
            # and the path it's operating on is absolute (repo_path). No
            # repository operation involved.
            Runner().run(chgrp_cmd)
        # finally update server
        if post_update_hook:
            # Conditional on post_update_hook, since one w/o the other doesn't
            # seem to make much sense.
            gr.call_git(["update-server-info"])

    # add a git remote to the bare repository
    # Note: needs annex-ignore! Otherwise we might push into dirhash
    # lower annex/object tree instead of mixed, since it's a bare
    # repo. This in turn would be an issue, if we want to pack the
    # entire thing into an archive. Special remote will then not be
    # able to access content in the "wrong" place within the archive
    lgr.debug("set up git remote")
    if name in ds_siblings:
        # otherwise we should have skipped or failed before
        assert existing == 'reconfigure'
    ds.config.set("remote.{}.annex-ignore".format(name),
                  value="true",
                  scope="local")
    yield from ds.siblings(
        'configure',
        name=name,
        url=str(repo_path) if url.startswith("ria+file") else git_url,
        pushurl=git_push_url,
        recursive=False,
        # Note, that this should be None if storage_sibling was not set
        publish_depends=storage_name,
        result_renderer='disabled',
        return_type='generator',
        # Note, that otherwise a subsequent publish will report
        # "notneeded".
        fetch=True)

    yield get_status_dict(
        status='ok',
        **res_kwargs,
    )
Esempio n. 4
0
def _install_subds_from_flexible_source(ds, sm, **kwargs):
    """Tries to obtain a given subdataset from several meaningful locations

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.
    **kwargs
      Passed onto clone()
    """
    sm_path = op.relpath(sm['path'], start=sm['parentds'])
    # compose a list of candidate clone URLs
    clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm)

    # prevent inevitable exception from `clone`
    dest_path = op.join(ds.path, sm_path)
    clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path]

    if not clone_urls:
        # yield error
        yield get_status_dict(
            action='install',
            ds=ds,
            status='error',
            message=("Have got no candidates to install subdataset %s from.",
                     sm_path),
            logger=lgr,
        )
        return

    for res in clone_dataset(clone_urls_,
                             Dataset(dest_path),
                             cfg=ds.config,
                             **kwargs):
        # make sure to fix a detached HEAD before yielding the install success
        # result. The resetting of the branch would undo any change done
        # to the repo by processing in response to the result
        if res.get('action', None) == 'install' and \
                res.get('status', None) == 'ok' and \
                res.get('type', None) == 'dataset' and \
                res.get('path', None) == dest_path:
            _fixup_submodule_dotgit_setup(ds, sm_path)

            target_commit = sm['gitshasum']
            lgr.debug(
                "Update cloned subdataset {0} in parent".format(dest_path))
            section_name = 'submodule.{}'.format(sm['gitmodule_name'])
            # do not use `git-submodule update --init`, it would make calls
            # to git-config which will not obey datalad inter-process locks for
            # modifying .git/config
            sub = GitRepo(res['path'])
            # record what branch we were on right after the clone
            # TODO instead of the active branch, this should first consider
            # a configured branch in the submodule record of the superdataset
            sub_orig_branch = sub.get_active_branch()
            # if we are on a branch this hexsha will be the tip of that branch
            sub_orig_hexsha = sub.get_hexsha()
            if sub_orig_hexsha != target_commit:
                # make sure we have the desired commit locally
                # expensive and possibly error-prone fetch conditional on cheap
                # local check
                if not sub.commit_exists(target_commit):
                    try:
                        sub.fetch(remote='origin', refspec=target_commit)
                    except CommandError:
                        pass
                    # instead of inspecting the fetch results for possible ways
                    # with which it could failed to produced the desired result
                    # let's verify the presence of the commit directly, we are in
                    # expensive-land already anyways
                    if not sub.commit_exists(target_commit):
                        res.update(
                            status='error',
                            message=
                            ('Target commit %s does not exist in the clone, and '
                             'a fetch that commit from origin failed',
                             target_commit[:8]),
                        )
                        yield res
                        # there is nothing we can do about this
                        # MIH thinks that removing the clone is not needed, as a likely
                        # next step will have to be a manual recovery intervention
                        # and not another blind attempt
                        continue
                # checkout the desired commit
                sub.call_git(['checkout', target_commit])
                # did we detach?
                # XXX: This is a less generic variant of a part of
                # GitRepo.update_submodule(). It makes use of already available
                # information and trusts the existence of the just cloned repo
                # and avoids (redoing) some safety checks
                if sub_orig_branch and not sub.get_active_branch():
                    # trace if current state is a predecessor of the branch_hexsha
                    lgr.debug(
                        "Detached HEAD after updating submodule %s "
                        "(original branch: %s)", sub, sub_orig_branch)
                    if sub.get_merge_base([sub_orig_hexsha,
                                           target_commit]) == target_commit:
                        # TODO: config option?
                        # MIH: There is no real need here. IMHO this should all not
                        # happen, unless the submodule record has a branch
                        # configured. And Datalad should leave such a record, when
                        # a submodule is registered.

                        # we assume the target_commit to be from the same branch,
                        # because it is an ancestor -- update that original branch
                        # to point to the target_commit, and update HEAD to point to
                        # that location -- this readies the subdataset for
                        # further modification
                        lgr.info(
                            "Reset subdataset branch '%s' to %s (from %s) to "
                            "avoid a detached HEAD", sub_orig_branch,
                            target_commit[:8], sub_orig_hexsha[:8])
                        branch_ref = 'refs/heads/%s' % sub_orig_branch
                        sub.update_ref(branch_ref, target_commit)
                        sub.update_ref('HEAD', branch_ref, symbolic=True)
                    else:
                        lgr.warning(
                            "%s has a detached HEAD, because the recorded "
                            "subdataset state %s has no unique ancestor with "
                            "branch '%s'", sub, target_commit[:8],
                            sub_orig_branch)

            # register the submodule as "active" in the superdataset
            ds.config.set(
                '{}.active'.format(section_name),
                'true',
                reload=False,
                force=True,
                where='local',
            )
            ds.config.set(
                '{}.url'.format(section_name),
                # record the actual source URL of the successful clone
                # and not a funky prediction based on the parent ds
                # like ds.repo.update_submodule() would do (does not
                # accept a URL)
                res['source']['giturl'],
                reload=True,
                force=True,
                where='local',
            )
        yield res

    subds = Dataset(dest_path)
    if not subds.is_installed():
        lgr.debug('Desired subdataset %s did not materialize, stopping', subds)
        return

    # check whether clone URL generators were involved
    cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)]
    if cand_cfg:
        # get a handle on the configuration that is specified in the
        # dataset itself (local and dataset)
        super_cfg = ConfigManager(dataset=ds, source='dataset-local')
        need_reload = False
        for rec in cand_cfg:
            # check whether any of this configuration originated from the
            # superdataset. if so, inherit the config in the new subdataset
            # clone. if not, keep things clean in order to be able to move with
            # any outside configuration change
            for c in ('datalad.get.subdataset-source-candidate-{}{}'.format(
                    rec['cost'], rec['name']),
                      'datalad.get.subdataset-source-candidate-{}'.format(
                          rec['name'])):
                if c in super_cfg.keys():
                    subds.config.set(c,
                                     super_cfg.get(c),
                                     where='local',
                                     reload=False)
                    need_reload = True
                    break
        if need_reload:
            subds.config.reload(force=True)