예제 #1
0
    def __call__(
            action='query',
            dataset=None,
            name=None,
            url=None,
            pushurl=None,
            description=None,
            # TODO consider true, for now like add_sibling
            fetch=False,
            as_common_datasrc=None,
            publish_depends=None,
            publish_by_default=None,
            annex_wanted=None,
            annex_required=None,
            annex_group=None,
            annex_groupwanted=None,
            inherit=False,
            get_annex_info=True,
            recursive=False,
            recursion_limit=None):

        # TODO: Detect malformed URL and fail?
        # XXX possibly fail if fetch is False and as_common_datasrc

        if annex_groupwanted and not annex_group:
            raise InsufficientArgumentsError(
                "To set groupwanted, you need to provide annex_group option")

        # TODO catch invalid action specified
        action_worker_map = {
            'query': _query_remotes,
            'add': _add_remote,
            'configure': _configure_remote,
            'remove': _remove_remote,
            'enable': _enable_remote,
        }
        # all worker strictly operate on a single dataset
        # anything that deals with hierarchies and/or dataset
        # relationships in general should be dealt with in here
        # at the top-level and vice versa
        worker = action_worker_map[action]

        dataset = require_dataset(dataset,
                                  check_installed=False,
                                  purpose='sibling configuration')
        refds_path = dataset.path

        res_kwargs = dict(refds=refds_path, logger=lgr)

        ds_name = basename(dataset.path)

        # do not form single list of datasets (with recursion results) to
        # give fastest possible response, for the precise of a long-all
        # function call
        ds = dataset
        for r in worker(
                # always copy signature to below to avoid bugs!
                ds,
                name,
                ds.repo.get_remotes(),
                # for top-level dataset there is no layout questions
                _mangle_urls(url, ds_name),
                _mangle_urls(pushurl, ds_name),
                fetch,
                description,
                as_common_datasrc,
                publish_depends,
                publish_by_default,
                annex_wanted,
                annex_required,
                annex_group,
                annex_groupwanted,
                inherit,
                get_annex_info,
                **res_kwargs):
            yield r
        if not recursive:
            return

        # do we have instructions to register siblings with some alternative
        # layout?
        replicate_local_structure = url and "%NAME" not in url

        for subds in dataset.subdatasets(fulfilled=True,
                                         recursive=recursive,
                                         recursion_limit=recursion_limit,
                                         result_xfm='datasets'):
            subds_name = relpath(subds.path, start=dataset.path)
            if replicate_local_structure:
                subds_url = slash_join(url, subds_name)
                subds_pushurl = slash_join(pushurl, subds_name)
            else:
                subds_url = \
                    _mangle_urls(url, '/'.join([ds_name, subds_name]))
                subds_pushurl = \
                    _mangle_urls(pushurl, '/'.join([ds_name, subds_name]))
            for r in worker(
                    # always copy signature from above to avoid bugs
                    subds,
                    name,
                    subds.repo.get_remotes(),
                    subds_url,
                    subds_pushurl,
                    fetch,
                    description,
                    as_common_datasrc,
                    publish_depends,
                    publish_by_default,
                    annex_wanted,
                    annex_required,
                    annex_group,
                    annex_groupwanted,
                    inherit,
                    get_annex_info,
                    **res_kwargs):
                yield r
예제 #2
0
    def __call__(sshurl, name=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 existing='error',
                 shared=None,
                 group=None,
                 ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None,
                 annex_wanted=None, annex_group=None, annex_groupwanted=None,
                 inherit=False,
                 since=None):
        #
        # nothing without a base dataset
        #
        ds = require_dataset(dataset, check_installed=True,
                             purpose='creating a sibling')
        refds_path = ds.path

        #
        # all checks that are possible before we start parsing the dataset
        #

        # possibly use sshurl to get the name in case if not specified
        if not sshurl:
            if not inherit:
                raise InsufficientArgumentsError(
                    "needs at least an SSH URL, if no inherit option"
                )
            if name is None:
                raise ValueError(
                    "Neither SSH URL, nor the name of sibling to inherit from "
                    "was specified"
                )
            # It might well be that we already have this remote setup
            try:
                sshurl = CreateSibling._get_remote_url(ds, name)
            except Exception as exc:
                lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc))
        elif inherit:
            raise ValueError(
                "For now, for clarity not allowing specifying a custom sshurl "
                "while inheriting settings"
            )
            # may be could be safely dropped -- still WiP

        if not sshurl:
            # TODO: may be more back up before _prep?
            super_ds = ds.get_superdataset()
            if not super_ds:
                raise ValueError(
                    "Could not determine super dataset for %s to inherit URL"
                    % ds
                )
            super_url = CreateSibling._get_remote_url(super_ds, name)
            # for now assuming hierarchical setup
            # (TODO: to be able to destinguish between the two, probably
            # needs storing datalad.*.target_dir to have %RELNAME in there)
            sshurl = slash_join(super_url, relpath(ds.path, super_ds.path))

        # check the login URL
        sshri = RI(sshurl)
        if not is_ssh(sshri):
            raise ValueError(
                "Unsupported SSH URL: '{0}', "
                "use ssh://host/path or host:path syntax".format(sshurl))

        if not name:
            # use the hostname as default remote name
            name = sshri.hostname
            lgr.debug(
                "No sibling name given, use URL hostname '%s' as sibling name",
                name)

        if since == '':
            # consider creating siblings only since the point of
            # the last update
            # XXX here we assume one to one mapping of names from local branches
            # to the remote
            active_branch = ds.repo.get_active_branch()
            since = '%s/%s' % (name, active_branch)

        #
        # parse the base dataset to find all subdatasets that need processing
        #
        to_process = []
        for ap in AnnotatePaths.__call__(
                dataset=refds_path,
                # only a single path!
                path=refds_path,
                recursive=recursive,
                recursion_limit=recursion_limit,
                action='create_sibling',
                # both next should not happen anyways
                unavailable_path_status='impossible',
                nondataset_path_status='error',
                modified=since,
                return_type='generator',
                on_failure='ignore'):
            if ap.get('status', None):
                # this is done
                yield ap
                continue
            if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent':
                # this can happen when there is `since`, but we have no
                # use for anything but datasets here
                continue
            checkds_remotes = Dataset(ap['path']).repo.get_remotes() \
                if ap.get('state', None) != 'absent' \
                else []
            if publish_depends:
                # make sure dependencies are valid
                # TODO: inherit -- we might want to automagically create
                # those dependents as well???
                unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes)
                if unknown_deps:
                    ap['status'] = 'error'
                    ap['message'] = (
                        'unknown sibling(s) specified as publication dependency: %s',
                        unknown_deps)
                    yield ap
                    continue
            if name in checkds_remotes and existing in ('error', 'skip'):
                ap['status'] = 'error' if existing == 'error' else 'notneeded'
                ap['message'] = (
                    "sibling '%s' already configured (specify alternative name, or force "
                    "reconfiguration via --existing",
                    name)
                yield ap
                continue
            to_process.append(ap)

        if not to_process:
            # we ruled out all possibilities
            # TODO wait for gh-1218 and make better return values
            lgr.info("No datasets qualify for sibling creation. "
                     "Consider different settings for --existing "
                     "or --since if this is unexpected")
            return

        if target_dir is None:
            if sshri.path:
                target_dir = sshri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = "%RELNAME" not in target_dir

        # request ssh connection:
        lgr.info("Connecting ...")
        assert(sshurl is not None)  # delayed anal verification
        ssh = ssh_manager.get_connection(sshurl)
        if not ssh.get_annex_version():
            raise MissingExternalDependency(
                'git-annex',
                msg='on the remote system')

        #
        # all checks done and we have a connection, now do something
        #

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        yielded = set()
        remote_repos_to_run_hook_for = []
        for currentds_ap in \
                sorted(to_process, key=lambda x: x['path'].count('/')):
            current_ds = Dataset(currentds_ap['path'])

            path = _create_dataset_sibling(
                name,
                current_ds,
                ds.path,
                ssh,
                replicate_local_structure,
                sshri,
                target_dir,
                target_url,
                target_pushurl,
                existing,
                shared,
                group,
                publish_depends,
                publish_by_default,
                ui,
                as_common_datasrc,
                annex_wanted,
                annex_group,
                annex_groupwanted,
                inherit
            )
            if not path:
                # nothing new was created
                # TODO is 'notneeded' appropriate in this case?
                currentds_ap['status'] = 'notneeded'
                # TODO explain status in 'message'
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            remote_repos_to_run_hook_for.append((path, currentds_ap))

            # publish web-interface to root dataset on publication server
            if current_ds.path == ds.path and ui:
                lgr.info("Uploading web interface to %s" % path)
                try:
                    CreateSibling.upload_web_interface(path, ssh, shared, ui)
                except CommandError as e:
                    currentds_ap['status'] = 'error'
                    currentds_ap['message'] = (
                        "failed to push web interface to the remote datalad repository (%s)",
                        exc_str(e))
                    yield currentds_ap
                    yielded.add(currentds_ap['path'])
                    continue

        # in reverse order would be depth first
        lgr.info("Running post-update hooks in all created siblings")
        # TODO: add progressbar
        for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            lgr.debug("Running hook for %s (if exists and executable)", path)
            try:
                ssh("cd {} "
                    "&& ( [ -x hooks/post-update ] && hooks/post-update || : )"
                    "".format(sh_quote(_path_(path, ".git"))))
            except CommandError as e:
                currentds_ap['status'] = 'error'
                currentds_ap['message'] = (
                    "failed to run post-update hook under remote path %s (%s)",
                    path, exc_str(e))
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            if not currentds_ap['path'] in yielded:
                # if we were silent until now everything is just splendid
                currentds_ap['status'] = 'ok'
                yield currentds_ap
예제 #3
0
    def __call__(sshurl,
                 *,
                 name=None,
                 target_dir=None,
                 target_url=None,
                 target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 existing='error',
                 shared=None,
                 group=None,
                 ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None,
                 annex_wanted=None,
                 annex_group=None,
                 annex_groupwanted=None,
                 inherit=False,
                 since=None):
        if ui:
            # the webui has been moved to the deprecated extension
            try:
                from datalad_deprecated.sibling_webui \
                    import upload_web_interface
            except Exception as e:
                # we could just test for ModuleNotFoundError (which should be
                # all that would happen with PY3.6+, but be a little more robust
                # and use the pattern from duecredit
                if type(e).__name__ not in ('ImportError',
                                            'ModuleNotFoundError'):
                    lgr.error(
                        "Failed to import datalad_deprecated.sibling_webui "
                        "due to %s", str(e))
                raise RuntimeError(
                    "The DataLad web UI has been moved to an extension "
                    "package. Please install the Python package "
                    "`datalad_deprecated` to be able to deploy it.")

        # push uses '^' to annotate the previous pushed committish, and None for default
        # behavior. '' was/is (to be deprecated) used in `publish` and 'create-sibling'.
        # Alert user about the mistake
        if since == '':
            # deprecation was added prior 0.16.0
            import warnings
            warnings.warn("'since' should point to commitish or use '^'.",
                          DeprecationWarning)
            since = '^'

        #
        # nothing without a base dataset
        #
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='create sibling(s)')
        refds_path = ds.path

        #
        # all checks that are possible before we start parsing the dataset
        #
        if since and not recursive:
            raise ValueError("The use of 'since' requires 'recursive'")
        # possibly use sshurl to get the name in case if not specified
        if not sshurl:
            if not inherit:
                raise InsufficientArgumentsError(
                    "needs at least an SSH URL, if no inherit option")
            if name is None:
                raise ValueError(
                    "Neither SSH URL, nor the name of sibling to inherit from "
                    "was specified")
            # It might well be that we already have this remote setup
            try:
                sshurl = CreateSibling._get_remote_url(ds, name)
            except Exception as exc:
                ce = CapturedException(exc)
                lgr.debug('%s does not know about url for %s: %s', ds, name,
                          ce)
        elif inherit:
            raise ValueError(
                "For now, for clarity not allowing specifying a custom sshurl "
                "while inheriting settings")
            # may be could be safely dropped -- still WiP

        if not sshurl:
            # TODO: may be more back up before _prep?
            super_ds = ds.get_superdataset()
            if not super_ds:
                raise ValueError(
                    "Could not determine super dataset for %s to inherit URL" %
                    ds)
            super_url = CreateSibling._get_remote_url(super_ds, name)
            # for now assuming hierarchical setup
            # (TODO: to be able to distinguish between the two, probably
            # needs storing datalad.*.target_dir to have %RELNAME in there)
            sshurl = slash_join(super_url, relpath(refds_path, super_ds.path))

        # check the login URL
        sibling_ri = RI(sshurl)
        ssh_sibling = is_ssh(sibling_ri)
        if not (ssh_sibling or isinstance(sibling_ri, PathRI)):
            raise ValueError(
                "Unsupported SSH URL or path: '{0}', "
                "use ssh://host/path, host:path or path syntax".format(sshurl))

        if not name:
            name = sibling_ri.hostname if ssh_sibling else "local"
            lgr.info("No sibling name given. Using %s'%s' as sibling name",
                     "URL hostname " if ssh_sibling else "", name)
        if since == '^':
            # consider creating siblings only since the point of
            # the last update
            # XXX here we assume one to one mapping of names from local branches
            # to the remote
            active_branch = ds.repo.get_active_branch()
            since = '%s/%s' % (name, active_branch)

        to_process = []
        if recursive:
            #
            # parse the base dataset to find all subdatasets that need processing
            #
            cand_ds = [
                Dataset(r['path']) for r in diff_dataset(
                    ds,
                    fr=since,
                    to='HEAD',
                    # w/o False we might not follow into new subdatasets
                    # which do not have that remote yet setup,
                    # see https://github.com/datalad/datalad/issues/6596
                    constant_refs=False,
                    # save cycles, we are only looking for datasets
                    annex=None,
                    untracked='no',
                    recursive=True,
                    datasets_only=True,
                )
                # not installed subdatasets would be 'clean' so we would skip them
                if r.get('type') == 'dataset'
                and r.get('state', None) != 'clean'
            ]
            if not since:
                # not only subdatasets
                cand_ds = [ds] + cand_ds
        else:
            # only the current ds
            cand_ds = [ds]
        # check remotes setup()
        for d in cand_ds:
            d_repo = d.repo
            if d_repo is None:
                continue
            checkds_remotes = d.repo.get_remotes()
            res = dict(
                action='create_sibling',
                path=d.path,
                type='dataset',
            )

            if publish_depends:
                # make sure dependencies are valid
                # TODO: inherit -- we might want to automagically create
                # those dependents as well???
                unknown_deps = set(
                    ensure_list(publish_depends)).difference(checkds_remotes)
                if unknown_deps:
                    yield dict(
                        res,
                        status='error',
                        message=('unknown sibling(s) specified as publication '
                                 'dependency: %s', unknown_deps),
                    )
                    continue
            if name in checkds_remotes and existing in ('error', 'skip'):
                yield dict(
                    res,
                    sibling_name=name,
                    status='error' if existing == 'error' else 'notneeded',
                    message=(
                        "sibling '%s' already configured (specify alternative "
                        "name, or force reconfiguration via --existing", name),
                )
                continue
            to_process.append(res)

        if not to_process:
            # we ruled out all possibilities
            # TODO wait for gh-1218 and make better return values
            lgr.info("No datasets qualify for sibling creation. "
                     "Consider different settings for --existing "
                     "or --since if this is unexpected")
            return

        if ssh_sibling:
            # request ssh connection:
            lgr.info("Connecting ...")
            shell = ssh_manager.get_connection(sshurl)
        else:
            shell = _RunnerAdapter()
            sibling_ri.path = str(resolve_path(sibling_ri.path, dataset))
            if target_dir:
                target_dir = opj(sibling_ri.path, target_dir)

        if target_dir is None:
            if sibling_ri.path:
                target_dir = sibling_ri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = "%RELNAME" not in target_dir

        if not shell.get_annex_version():
            raise MissingExternalDependency(
                'git-annex',
                msg="It's required on the {} machine to create a sibling".
                format('remote' if ssh_sibling else 'local'))

        #
        # all checks done and we have a connection, now do something
        #

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        yielded = set()
        remote_repos_to_run_hook_for = []
        for currentds_ap in \
                sorted(to_process, key=lambda x: x['path'].count('/')):
            current_ds = Dataset(currentds_ap['path'])

            path = _create_dataset_sibling(
                name, current_ds, refds_path, shell, replicate_local_structure,
                sibling_ri, target_dir, target_url, target_pushurl, existing,
                shared, group, publish_depends, publish_by_default, ui,
                as_common_datasrc, annex_wanted, annex_group,
                annex_groupwanted, inherit)
            currentds_ap["sibling_name"] = name
            if not path:
                # nothing new was created
                # TODO is 'notneeded' appropriate in this case?
                currentds_ap['status'] = 'notneeded'
                # TODO explain status in 'message'
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            remote_repos_to_run_hook_for.append((path, currentds_ap))

            # publish web-interface to root dataset on publication server
            if current_ds.path == refds_path and ui:
                from datalad_deprecated.sibling_webui import upload_web_interface
                lgr.info("Uploading web interface to %s", path)
                try:
                    upload_web_interface(path, shell, shared, ui)
                except CommandError as e:
                    ce = CapturedException(e)
                    currentds_ap['status'] = 'error'
                    currentds_ap['message'] = (
                        "failed to push web interface to the remote datalad repository (%s)",
                        ce)
                    currentds_ap['exception'] = ce
                    yield currentds_ap
                    yielded.add(currentds_ap['path'])
                    continue

        # in reverse order would be depth first
        lgr.info("Running post-update hooks in all created siblings")
        # TODO: add progressbar
        for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            lgr.debug("Running hook for %s (if exists and executable)", path)
            try:
                shell(
                    "cd {} "
                    "&& ( [ -x hooks/post-update ] && hooks/post-update || true )"
                    "".format(sh_quote(_path_(path, ".git"))))
            except CommandError as e:
                ce = CapturedException(e)
                currentds_ap['status'] = 'error'
                currentds_ap['message'] = (
                    "failed to run post-update hook under remote path %s (%s)",
                    path, ce)
                currentds_ap['exception'] = ce
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            if not currentds_ap['path'] in yielded:
                # if we were silent until now everything is just splendid
                currentds_ap['status'] = 'ok'
                yield currentds_ap
예제 #4
0
파일: siblings.py 프로젝트: hanke/datalad
    def __call__(
            action='query',
            dataset=None,
            name=None,
            url=None,
            pushurl=None,
            description=None,
            # TODO consider true, for now like add_sibling
            fetch=False,
            as_common_datasrc=None,
            publish_depends=None,
            publish_by_default=None,
            annex_wanted=None,
            annex_required=None,
            annex_group=None,
            annex_groupwanted=None,
            inherit=False,
            get_annex_info=True,
            recursive=False,
            recursion_limit=None):

        # TODO: Detect malformed URL and fail?
        # XXX possibly fail if fetch is False and as_common_datasrc

        if annex_groupwanted and not annex_group:
            raise InsufficientArgumentsError(
                "To set groupwanted, you need to provide annex_group option")

        # TODO catch invalid action specified
        action_worker_map = {
            'query': _query_remotes,
            'add': _add_remote,
            'configure': _configure_remote,
            'remove': _remove_remote,
            'enable': _enable_remote,
        }
        # all worker strictly operate on a single dataset
        # anything that deals with hierarchies and/or dataset
        # relationships in general should be dealt with in here
        # at the top-level and vice versa
        worker = action_worker_map[action]

        dataset = require_dataset(
            dataset, check_installed=False, purpose='sibling configuration')
        refds_path = dataset.path

        res_kwargs = dict(refds=refds_path, logger=lgr)

        ds_name = basename(dataset.path)

        # do not form single list of datasets (with recursion results) to
        # give fastest possible response, for the precise of a long-all
        # function call
        ds = dataset
        for r in worker(
                # always copy signature to below to avoid bugs!
                ds, name,
                ds.repo.get_remotes(),
                # for top-level dataset there is no layout questions
                _mangle_urls(url, ds_name),
                _mangle_urls(pushurl, ds_name),
                fetch, description,
                as_common_datasrc, publish_depends, publish_by_default,
                annex_wanted, annex_required, annex_group, annex_groupwanted,
                inherit, get_annex_info,
                **res_kwargs):
            yield r
        if not recursive:
            return

        # do we have instructions to register siblings with some alternative
        # layout?
        replicate_local_structure = url and "%NAME" not in url

        for subds in dataset.subdatasets(
                fulfilled=True,
                recursive=recursive, recursion_limit=recursion_limit,
                result_xfm='datasets'):
            subds_name = relpath(subds.path, start=dataset.path)
            if replicate_local_structure:
                subds_url = slash_join(url, subds_name)
                subds_pushurl = slash_join(pushurl, subds_name)
            else:
                subds_url = \
                    _mangle_urls(url, '/'.join([ds_name, subds_name]))
                subds_pushurl = \
                    _mangle_urls(pushurl, '/'.join([ds_name, subds_name]))
            for r in worker(
                    # always copy signature from above to avoid bugs
                    subds, name,
                    subds.repo.get_remotes(),
                    subds_url,
                    subds_pushurl,
                    fetch,
                    description,
                    as_common_datasrc, publish_depends, publish_by_default,
                    annex_wanted, annex_required, annex_group, annex_groupwanted,
                    inherit, get_annex_info,
                    **res_kwargs):
                yield r
예제 #5
0
    def __call__(
            action='query',
            *,
            dataset=None,
            name=None,
            url=None,
            pushurl=None,
            description=None,
            # TODO consider true, for now like add_sibling
            fetch=False,
            as_common_datasrc=None,
            publish_depends=None,
            publish_by_default=None,
            annex_wanted=None,
            annex_required=None,
            annex_group=None,
            annex_groupwanted=None,
            inherit=False,
            get_annex_info=True,
            recursive=False,
            recursion_limit=None):

        # TODO: Detect malformed URL and fail?
        # XXX possibly fail if fetch is False and as_common_datasrc

        if annex_groupwanted and not annex_group:
            raise InsufficientArgumentsError(
                "To set groupwanted, you need to provide annex_group option")

        # TODO catch invalid action specified
        action_worker_map = {
            'query': _query_remotes,
            'add': _add_remote,
            'configure': _configure_remote,
            'remove': _remove_remote,
            'enable': _enable_remote,
        }
        # all worker strictly operate on a single dataset
        # anything that deals with hierarchies and/or dataset
        # relationships in general should be dealt with in here
        # at the top-level and vice versa
        worker = action_worker_map[action]

        ds = require_dataset(
            dataset,
            # it makes no sense to use this command without a dataset
            check_installed=True,
            purpose='configure sibling')
        refds_path = ds.path

        res_kwargs = dict(refds=refds_path, logger=lgr)

        ds_name = op.basename(ds.path)

        # do not form single list of datasets (with recursion results) to
        # give fastest possible response, for the precise of a long-all
        # function call

        # minimize expensive calls to .repo
        ds_repo = ds.repo

        # prepare common parameterization package for all worker calls
        worker_kwargs = dict(
            name=name,
            fetch=fetch,
            description=description,
            as_common_datasrc=as_common_datasrc,
            publish_depends=publish_depends,
            publish_by_default=publish_by_default,
            annex_wanted=annex_wanted,
            annex_required=annex_required,
            annex_group=annex_group,
            annex_groupwanted=annex_groupwanted,
            inherit=inherit,
            get_annex_info=get_annex_info,
            res_kwargs=res_kwargs,
        )
        yield from worker(
            ds=ds,
            repo=ds_repo,
            known_remotes=ds_repo.get_remotes(),
            # for top-level dataset there is no layout questions
            url=_mangle_urls(url, ds_name),
            pushurl=_mangle_urls(pushurl, ds_name),
            **worker_kwargs)
        if not recursive:
            return

        # do we have instructions to register siblings with some alternative
        # layout?
        replicate_local_structure = url and "%NAME" not in url

        subds_pushurl = None
        for subds in ds.subdatasets(state='present',
                                    recursive=recursive,
                                    recursion_limit=recursion_limit,
                                    result_xfm='datasets'):
            subds_repo = subds.repo
            subds_name = op.relpath(subds.path, start=ds.path)
            if replicate_local_structure:
                subds_url = slash_join(url, subds_name)
                if pushurl:
                    subds_pushurl = slash_join(pushurl, subds_name)
            else:
                subds_url = \
                    _mangle_urls(url, '/'.join([ds_name, subds_name]))
                subds_pushurl = \
                    _mangle_urls(pushurl, '/'.join([ds_name, subds_name]))
            yield from worker(ds=subds,
                              repo=subds_repo,
                              known_remotes=subds_repo.get_remotes(),
                              url=subds_url,
                              pushurl=subds_pushurl,
                              **worker_kwargs)