Example #1
0
def _install_subds_from_flexible_source(ds, sm, **kwargs):
    """Tries to obtain a given subdataset from several meaningful locations

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.
    **kwargs
      Passed onto clone()
    """
    sm_path = op.relpath(sm['path'], start=sm['parentds'])
    # compose a list of candidate clone URLs
    clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm)

    # prevent inevitable exception from `clone`
    dest_path = op.join(ds.path, sm_path)
    clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path]

    if not clone_urls:
        # yield error
        yield get_status_dict(
            action='install',
            ds=ds,
            status='error',
            message=("Have got no candidates to install subdataset %s from.",
                     sm_path),
            logger=lgr,
        )
        return

    for res in clone_dataset(clone_urls_, Dataset(dest_path), **kwargs):
        # make sure to fix a detached HEAD before yielding the install success
        # result. The resetting of the branch would undo any change done
        # to the repo by processing in response to the result
        if res.get('action', None) == 'install' and \
                res.get('status', None) == 'ok' and \
                res.get('type', None) == 'dataset' and \
                res.get('path', None) == dest_path:
            _fixup_submodule_dotgit_setup(ds, sm_path)

            # do fancy update
            lgr.debug(
                "Update cloned subdataset {0} in parent".format(dest_path))
            ds.repo.update_submodule(sm_path, init=True)
        yield res

    subds = Dataset(dest_path)
    if not subds.is_installed():
        lgr.debug('Desired subdataset %s did not materialize, stopping', subds)
        return

    # check whether clone URL generators were involved
    cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)]
    if cand_cfg:
        # get a handle on the configuration that is specified in the
        # dataset itself (local and dataset)
        super_cfg = ConfigManager(dataset=ds, source='dataset-local')
        need_reload = False
        for rec in cand_cfg:
            # check whether any of this configuration originated from the
            # superdataset. if so, inherit the config in the new subdataset
            # clone. if not, keep things clean in order to be able to move with
            # any outside configuration change
            for c in ('datalad.get.subdataset-source-candidate-{}{}'.format(
                    rec['cost'], rec['name']),
                      'datalad.get.subdataset-source-candidate-{}'.format(
                          rec['name'])):
                if c in super_cfg.keys():
                    subds.config.set(c,
                                     super_cfg.get(c),
                                     where='local',
                                     reload=False)
                    need_reload = True
                    break
        if need_reload:
            subds.config.reload(force=True)
Example #2
0
def _install_subds_from_flexible_source(ds, sm, **kwargs):
    """Tries to obtain a given subdataset from several meaningful locations

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.
    **kwargs
      Passed onto clone()
    """
    sm_path = op.relpath(sm['path'], start=sm['parentds'])
    # compose a list of candidate clone URLs
    clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm)

    # prevent inevitable exception from `clone`
    dest_path = op.join(ds.path, sm_path)
    clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path]

    if not clone_urls:
        # yield error
        yield get_status_dict(
            action='install',
            ds=ds,
            status='error',
            message=("Have got no candidates to install subdataset %s from.",
                     sm_path),
            logger=lgr,
        )
        return

    for res in clone_dataset(clone_urls_,
                             Dataset(dest_path),
                             cfg=ds.config,
                             checkout_gitsha=sm['gitshasum'],
                             **kwargs):
        if res.get('action', None) == 'install' and \
                res.get('status', None) == 'ok' and \
                res.get('type', None) == 'dataset' and \
                res.get('path', None) == dest_path:
            _fixup_submodule_dotgit_setup(ds, sm_path)

            section_name = 'submodule.{}'.format(sm['gitmodule_name'])
            # register the submodule as "active" in the superdataset
            ds.config.set(
                '{}.active'.format(section_name),
                'true',
                reload=False,
                force=True,
                where='local',
            )
            ds.config.set(
                '{}.url'.format(section_name),
                # record the actual source URL of the successful clone
                # and not a funky prediction based on the parent ds
                # like ds.repo.update_submodule() would do (does not
                # accept a URL)
                res['source']['giturl'],
                reload=True,
                force=True,
                where='local',
            )
        yield res

    subds = Dataset(dest_path)
    if not subds.is_installed():
        lgr.debug('Desired subdataset %s did not materialize, stopping', subds)
        return

    # check whether clone URL generators were involved
    cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)]
    if cand_cfg:
        # get a handle on the configuration that is specified in the
        # dataset itself (local and dataset)
        super_cfg = ConfigManager(dataset=ds, source='dataset-local')
        need_reload = False
        for rec in cand_cfg:
            # check whether any of this configuration originated from the
            # superdataset. if so, inherit the config in the new subdataset
            # clone. if not, keep things clean in order to be able to move with
            # any outside configuration change
            for c in ('datalad.get.subdataset-source-candidate-{}{}'.format(
                    rec['cost'], rec['name']),
                      'datalad.get.subdataset-source-candidate-{}'.format(
                          rec['name'])):
                if c in super_cfg.keys():
                    subds.config.set(c,
                                     super_cfg.get(c),
                                     where='local',
                                     reload=False)
                    need_reload = True
                    break
        if need_reload:
            subds.config.reload(force=True)
Example #3
0
def _install_subds_from_flexible_source(ds, sm, **kwargs):
    """Tries to obtain a given subdataset from several meaningful locations

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.
    **kwargs
      Passed onto clone()
    """
    sm_path = op.relpath(sm['path'], start=sm['parentds'])
    # compose a list of candidate clone URLs
    clone_urls = _get_flexible_source_candidates_for_submodule(ds, sm)

    # prevent inevitable exception from `clone`
    dest_path = op.join(ds.path, sm_path)
    clone_urls_ = [src['url'] for src in clone_urls if src['url'] != dest_path]

    if not clone_urls:
        # yield error
        yield get_status_dict(
            action='install',
            ds=ds,
            status='error',
            message=("Have got no candidates to install subdataset %s from.",
                     sm_path),
            logger=lgr,
        )
        return

    for res in clone_dataset(clone_urls_,
                             Dataset(dest_path),
                             cfg=ds.config,
                             **kwargs):
        # make sure to fix a detached HEAD before yielding the install success
        # result. The resetting of the branch would undo any change done
        # to the repo by processing in response to the result
        if res.get('action', None) == 'install' and \
                res.get('status', None) == 'ok' and \
                res.get('type', None) == 'dataset' and \
                res.get('path', None) == dest_path:
            _fixup_submodule_dotgit_setup(ds, sm_path)

            target_commit = sm['gitshasum']
            lgr.debug(
                "Update cloned subdataset {0} in parent".format(dest_path))
            section_name = 'submodule.{}'.format(sm['gitmodule_name'])
            # do not use `git-submodule update --init`, it would make calls
            # to git-config which will not obey datalad inter-process locks for
            # modifying .git/config
            sub = GitRepo(res['path'])
            # record what branch we were on right after the clone
            # TODO instead of the active branch, this should first consider
            # a configured branch in the submodule record of the superdataset
            sub_orig_branch = sub.get_active_branch()
            # if we are on a branch this hexsha will be the tip of that branch
            sub_orig_hexsha = sub.get_hexsha()
            if sub_orig_hexsha != target_commit:
                # make sure we have the desired commit locally
                # expensive and possibly error-prone fetch conditional on cheap
                # local check
                if not sub.commit_exists(target_commit):
                    try:
                        sub.fetch(remote='origin', refspec=target_commit)
                    except CommandError:
                        pass
                    # instead of inspecting the fetch results for possible ways
                    # with which it could failed to produced the desired result
                    # let's verify the presence of the commit directly, we are in
                    # expensive-land already anyways
                    if not sub.commit_exists(target_commit):
                        res.update(
                            status='error',
                            message=
                            ('Target commit %s does not exist in the clone, and '
                             'a fetch that commit from origin failed',
                             target_commit[:8]),
                        )
                        yield res
                        # there is nothing we can do about this
                        # MIH thinks that removing the clone is not needed, as a likely
                        # next step will have to be a manual recovery intervention
                        # and not another blind attempt
                        continue
                # checkout the desired commit
                sub.call_git(['checkout', target_commit])
                # did we detach?
                # XXX: This is a less generic variant of a part of
                # GitRepo.update_submodule(). It makes use of already available
                # information and trusts the existence of the just cloned repo
                # and avoids (redoing) some safety checks
                if sub_orig_branch and not sub.get_active_branch():
                    # trace if current state is a predecessor of the branch_hexsha
                    lgr.debug(
                        "Detached HEAD after updating submodule %s "
                        "(original branch: %s)", sub, sub_orig_branch)
                    if sub.get_merge_base([sub_orig_hexsha,
                                           target_commit]) == target_commit:
                        # TODO: config option?
                        # MIH: There is no real need here. IMHO this should all not
                        # happen, unless the submodule record has a branch
                        # configured. And Datalad should leave such a record, when
                        # a submodule is registered.

                        # we assume the target_commit to be from the same branch,
                        # because it is an ancestor -- update that original branch
                        # to point to the target_commit, and update HEAD to point to
                        # that location -- this readies the subdataset for
                        # further modification
                        lgr.info(
                            "Reset subdataset branch '%s' to %s (from %s) to "
                            "avoid a detached HEAD", sub_orig_branch,
                            target_commit[:8], sub_orig_hexsha[:8])
                        branch_ref = 'refs/heads/%s' % sub_orig_branch
                        sub.update_ref(branch_ref, target_commit)
                        sub.update_ref('HEAD', branch_ref, symbolic=True)
                    else:
                        lgr.warning(
                            "%s has a detached HEAD, because the recorded "
                            "subdataset state %s has no unique ancestor with "
                            "branch '%s'", sub, target_commit[:8],
                            sub_orig_branch)

            # register the submodule as "active" in the superdataset
            ds.config.set(
                '{}.active'.format(section_name),
                'true',
                reload=False,
                force=True,
                where='local',
            )
            ds.config.set(
                '{}.url'.format(section_name),
                # record the actual source URL of the successful clone
                # and not a funky prediction based on the parent ds
                # like ds.repo.update_submodule() would do (does not
                # accept a URL)
                res['source']['giturl'],
                reload=True,
                force=True,
                where='local',
            )
        yield res

    subds = Dataset(dest_path)
    if not subds.is_installed():
        lgr.debug('Desired subdataset %s did not materialize, stopping', subds)
        return

    # check whether clone URL generators were involved
    cand_cfg = [rec for rec in clone_urls if rec.get('from_config', False)]
    if cand_cfg:
        # get a handle on the configuration that is specified in the
        # dataset itself (local and dataset)
        super_cfg = ConfigManager(dataset=ds, source='dataset-local')
        need_reload = False
        for rec in cand_cfg:
            # check whether any of this configuration originated from the
            # superdataset. if so, inherit the config in the new subdataset
            # clone. if not, keep things clean in order to be able to move with
            # any outside configuration change
            for c in ('datalad.get.subdataset-source-candidate-{}{}'.format(
                    rec['cost'], rec['name']),
                      'datalad.get.subdataset-source-candidate-{}'.format(
                          rec['name'])):
                if c in super_cfg.keys():
                    subds.config.set(c,
                                     super_cfg.get(c),
                                     where='local',
                                     reload=False)
                    need_reload = True
                    break
        if need_reload:
            subds.config.reload(force=True)