def __call__(
            reponame,
            dataset=None,
            recursive=False,
            recursion_limit=None,
            name='github',
            existing='error',
            github_login=None,
            github_passwd=None,
            github_organization=None,
            access_protocol='https',
            publish_depends=None,
            dryrun=False):
        try:
            # this is an absolute leaf package, import locally to avoid
            # unecessary dependencies
            import github as gh
        except ImportError:
            raise MissingExternalDependency(
                'PyGitHub',
                msg='GitHub-related functionality is unavailable without this package')

        # what to operate on
        ds = require_dataset(
            dataset, check_installed=True, purpose='create Github sibling')
        # gather datasets and essential info
        # dataset instance and mountpoint relative to the top
        toprocess = [(ds, '')]
        if recursive:
            for sub in ds.subdatasets(
                    fulfilled=None,  # we want to report on missing dataset in here
                    recursive=recursive,
                    recursion_limit=recursion_limit,
                    result_xfm='datasets'):
                if not sub.is_installed():
                    lgr.info('Ignoring unavailable subdataset %s', sub)
                    continue
                toprocess.append((sub, relpath(sub.path, start=ds.path)))

        # check for existing remote configuration
        filtered = []
        for d, mp in toprocess:
            if name in d.repo.get_remotes():
                if existing == 'error':
                    msg = '{} already had a configured sibling "{}"'.format(
                        d, name)
                    if dryrun:
                        lgr.error(msg)
                    else:
                        raise ValueError(msg)
                elif existing == 'skip':
                    continue
            gh_reponame = '{}{}{}'.format(
                reponame,
                '-' if mp else '',
                template_fx(mp))
            filtered.append((d, gh_reponame))

        if not filtered:
            # all skipped
            return []

        # actually make it happen on Github
        rinfo = _make_github_repos(
            gh, github_login, github_passwd, github_organization, filtered,
            existing, access_protocol, dryrun)

        # lastly configure the local datasets
        for d, url, existed in rinfo:
            if not dryrun:
                # first make sure that annex doesn't touch this one
                # but respect any existing config
                ignore_var = 'remote.{}.annex-ignore'.format(name)
                if not ignore_var in d.config:
                    d.config.add(ignore_var, 'true', where='local')
                Siblings()(
                    'configure',
                    dataset=d,
                    name=name,
                    url=url,
                    recursive=False,
                    # TODO fetch=True, maybe only if one existed already
                    publish_depends=publish_depends)

        # TODO let submodule URLs point to Github (optional)
        return rinfo
Example #2
0
def _create_dataset_sibling(
        name,
        ds,
        hierarchy_basepath,
        ssh,
        replicate_local_structure,
        ssh_url,
        target_dir,
        target_url,
        target_pushurl,
        existing,
        shared,
        group,
        publish_depends,
        publish_by_default,
        install_postupdate_hook,
        as_common_datasrc,
        annex_wanted,
        annex_group,
        annex_groupwanted,
        inherit
):
    """Everyone is very smart here and could figure out the combinatorial
    affluence among provided tiny (just slightly over a dozen) number of options
    and only a few pages of code
    """
    localds_path = ds.path
    ds_name = relpath(localds_path, start=hierarchy_basepath)
    if not replicate_local_structure:
        ds_name = '' if ds_name == curdir \
            else '-{}'.format(ds_name.replace("/", "-"))
        remoteds_path = target_dir.replace(
            "%RELNAME",
            ds_name)
    else:
        # TODO: opj depends on local platform, not the remote one.
        # check how to deal with it. Does windows ssh server accept
        # posix paths? vice versa? Should planned SSH class provide
        # tools for this issue?
        # see gh-1188
        remoteds_path = normpath(opj(target_dir, ds_name))

    # construct a would-be ssh url based on the current dataset's path
    ssh_url.path = remoteds_path
    ds_sshurl = ssh_url.as_str()
    # configure dataset's git-access urls
    ds_target_url = target_url.replace('%RELNAME', ds_name) \
        if target_url else ds_sshurl
    # push, configure only if needed
    ds_target_pushurl = None
    if ds_target_url != ds_sshurl:
        # not guaranteed that we can push via the primary URL
        ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \
            if target_pushurl else ds_sshurl

    lgr.info("Considering to create a target dataset {0} at {1} of {2}".format(
        localds_path, remoteds_path, ssh_url.hostname))
    # Must be set to True only if exists and existing='reconfigure'
    # otherwise we might skip actions if we say existing='reconfigure'
    # but it did not even exist before
    only_reconfigure = False
    if remoteds_path != '.':
        # check if target exists
        # TODO: Is this condition valid for != '.' only?
        path_exists = True
        try:
            out, err = ssh("ls {}".format(sh_quote(remoteds_path)))
        except CommandError as e:
            if "No such file or directory" in e.stderr and \
                    remoteds_path in e.stderr:
                path_exists = False
            else:
                raise  # It's an unexpected failure here

        if path_exists:
            _msg = "Target path %s already exists." % remoteds_path
            # path might be existing but be an empty directory, which should be
            # ok to remove
            try:
                lgr.debug(
                    "Trying to rmdir %s on remote since might be an empty dir",
                    remoteds_path
                )
                # should be safe since should not remove anything unless an empty dir
                ssh("rmdir {}".format(sh_quote(remoteds_path)))
                path_exists = False
            except CommandError as e:
                # If fails to rmdir -- either contains stuff no permissions
                # TODO: fixup encode/decode dance again :-/ we should have got
                # unicode/str here by now.  I guess it is the same as
                # https://github.com/ReproNim/niceman/issues/83
                # where I have reused this Runner thing
                try:
                    # ds_name is unicode which makes _msg unicode so we must be
                    # unicode-ready
                    err_str = text_type(e.stderr)
                except UnicodeDecodeError:
                    err_str = e.stderr.decode(errors='replace')
                _msg += " And it fails to rmdir (%s)." % (err_str.strip(),)

        if path_exists:
            if existing == 'error':
                raise RuntimeError(_msg)
            elif existing == 'skip':
                lgr.info(_msg + " Skipping")
                return
            elif existing == 'replace':
                lgr.info(_msg + " Replacing")
                # enable write permissions to allow removing dir
                ssh("chmod +r+w -R {}".format(sh_quote(remoteds_path)))
                # remove target at path
                ssh("rm -rf {}".format(sh_quote(remoteds_path)))
                # if we succeeded in removing it
                path_exists = False
                # Since it is gone now, git-annex also should forget about it
                remotes = ds.repo.get_remotes()
                if name in remotes:
                    # so we had this remote already, we should announce it dead
                    # XXX what if there was some kind of mismatch and this name
                    # isn't matching the actual remote UUID?  should have we
                    # checked more carefully?
                    lgr.info(
                        "Announcing existing remote %s dead to annex and removing",
                        name
                    )
                    if isinstance(ds.repo, AnnexRepo):
                        ds.repo.set_remote_dead(name)
                    ds.repo.remove_remote(name)
            elif existing == 'reconfigure':
                lgr.info(_msg + " Will only reconfigure")
                only_reconfigure = True
            else:
                raise ValueError(
                    "Do not know how to handle existing={}".format(
                        repr(existing)))

        if not path_exists:
            ssh("mkdir -p {}".format(sh_quote(remoteds_path)))

    delayed_super = _DelayedSuper(ds)
    if inherit and delayed_super.super:
        if shared is None:
            # here we must analyze current_ds's super, not the super_ds
            # inherit from the setting on remote end
            shared = CreateSibling._get_ds_remote_shared_setting(
                delayed_super, name, ssh)

        if not install_postupdate_hook:
            # Even though directive from above was False due to no UI explicitly
            # requested, we were asked to inherit the setup, so we might need
            # to install the hook, if super has it on remote
            install_postupdate_hook = CreateSibling._has_active_postupdate(
                delayed_super, name, ssh)



    if group:
        # Either repository existed before or a new directory was created for it,
        # set its group to a desired one if was provided with the same chgrp
        ssh("chgrp -R {} {}".format(
            sh_quote(text_type(group)),
            sh_quote(remoteds_path)))
    # don't (re-)initialize dataset if existing == reconfigure
    if not only_reconfigure:
        # init git and possibly annex repo
        if not CreateSibling.init_remote_repo(
                remoteds_path, ssh, shared, ds,
                description=target_url):
            return

        if target_url and not is_ssh(target_url):
            # we are not coming in via SSH, hence cannot assume proper
            # setup for webserver access -> fix
            ssh('git -C {} update-server-info'.format(sh_quote(remoteds_path)))
    else:
        # TODO -- we might still want to reconfigure 'shared' setting!
        pass

    # at this point we have a remote sibling in some shape or form
    # -> add as remote
    lgr.debug("Adding the siblings")
    # TODO generator, yield the now swallowed results
    Siblings.__call__(
        'configure',
        dataset=ds,
        name=name,
        url=ds_target_url,
        pushurl=ds_target_pushurl,
        recursive=False,
        fetch=True,
        as_common_datasrc=as_common_datasrc,
        publish_by_default=publish_by_default,
        publish_depends=publish_depends,
        annex_wanted=annex_wanted,
        annex_group=annex_group,
        annex_groupwanted=annex_groupwanted,
        inherit=inherit
    )

    # check git version on remote end
    lgr.info("Adjusting remote git configuration")
    if ssh.get_git_version() and ssh.get_git_version() >= LooseVersion("2.4"):
        # allow for pushing to checked out branch
        try:
            ssh("git -C {} config receive.denyCurrentBranch updateInstead".format(
                sh_quote(remoteds_path)))
        except CommandError as e:
            lgr.error("git config failed at remote location %s.\n"
                      "You will not be able to push to checked out "
                      "branch. Error: %s", remoteds_path, exc_str(e))
    else:
        lgr.error("Git version >= 2.4 needed to configure remote."
                  " Version detected on server: %s\nSkipping configuration"
                  " of receive.denyCurrentBranch - you will not be able to"
                  " publish updates to this repository. Upgrade your git"
                  " and run with --existing=reconfigure",
                  ssh.get_git_version())

    if install_postupdate_hook:
        # enable metadata refresh on dataset updates to publication server
        lgr.info("Enabling git post-update hook ...")
        try:
            CreateSibling.create_postupdate_hook(
                remoteds_path, ssh, ds)
        except CommandError as e:
            lgr.error("Failed to add json creation command to post update "
                      "hook.\nError: %s" % exc_str(e))

    return remoteds_path
Example #3
0
    def __call__(reponame,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 name='github',
                 existing='error',
                 github_login=None,
                 github_organization=None,
                 access_protocol='https',
                 publish_depends=None,
                 private=False,
                 dryrun=False,
                 dry_run=False):
        if dryrun and not dry_run:
            # the old one is used, and not in agreement with the new one
            warnings.warn(
                "datalad-create-sibling-github's `dryrun` option is "
                "deprecated and will be removed in a future release, "
                "use the renamed `dry_run/--dry-run` option instead.",
                DeprecationWarning)
            dry_run = dryrun

        # this is an absolute leaf package, import locally to avoid
        # unnecessary dependencies
        from datalad.support.github_ import _make_github_repos_

        if reponame != normalize_reponame(reponame):
            raise ValueError(
                'Invalid name for a GitHub project: {}'.format(reponame))

        # what to operate on
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='create GitHub sibling(s)')

        res_kwargs = dict(
            action='create_sibling_github [dry-run]'
            if dry_run else 'create_sibling_github',
            logger=lgr,
            refds=ds.path,
        )
        # gather datasets and essential info
        # dataset instance and mountpoint relative to the top
        toprocess = [ds]
        if recursive:
            for sub in ds.subdatasets(
                    fulfilled=
                    None,  # we want to report on missing dataset in here
                    recursive=recursive,
                    recursion_limit=recursion_limit,
                    result_xfm='datasets'):
                if not sub.is_installed():
                    lgr.info('Ignoring unavailable subdataset %s', sub)
                    continue
                toprocess.append(sub)

        # check for existing remote configuration
        filtered = []
        for d in toprocess:
            if name in d.repo.get_remotes():
                yield get_status_dict(
                    ds=d,
                    status='error' if existing == 'error' else 'notneeded',
                    message=('already has a configured sibling "%s"', name),
                    **res_kwargs)
                continue
            gh_reponame = reponame if d == ds else \
                '{}-{}'.format(
                    reponame,
                    normalize_reponame(str(d.pathobj.relative_to(ds.pathobj))))
            filtered.append((d, gh_reponame))

        if not filtered:
            # all skipped
            return

        # actually make it happen on GitHub
        for res in _make_github_repos_(github_login, github_organization,
                                       filtered, existing, access_protocol,
                                       private, dry_run):
            # blend reported results with standard properties
            res = dict(res, **res_kwargs)
            if 'message' not in res:
                res['message'] = ("Dataset sibling '%s', project at %s", name,
                                  res['url'])
            # report to caller
            yield get_status_dict(**res)
            if res['status'] not in ('ok', 'notneeded'):
                # something went wrong, do not proceed
                continue
            # lastly configure the local datasets
            if not dry_run:
                extra_remote_vars = {
                    # first make sure that annex doesn't touch this one
                    # but respect any existing config
                    'annex-ignore': 'true',
                    # first push should separately push active branch first
                    # to overcome github issue of choosing "default" branch
                    # alphabetically if its name does not match the default
                    # branch for the user (or organization) which now defaults
                    # to "main"
                    'datalad-push-default-first': 'true'
                }
                for var_name, var_value in extra_remote_vars.items():
                    var = 'remote.{}.{}'.format(name, var_name)
                    if var not in d.config:
                        d.config.add(var, var_value, where='local')
                yield from Siblings()(
                    'configure',
                    dataset=d,
                    name=name,
                    url=res['url'],
                    recursive=False,
                    # TODO fetch=True, maybe only if one existed already
                    publish_depends=publish_depends,
                    result_renderer='disabled')