Example #1
0
    def init_remote_repo(path, ssh, shared, dataset, description=None):
        cmd = "git -C {} init{}".format(
            sh_quote(path),
            " --shared='{}'".format(sh_quote(shared)) if shared else '')
        try:
            ssh(cmd)
        except CommandError as e:
            lgr.error("Initialization of remote git repository failed at %s."
                      "\nError: %s\nSkipping ..." % (path, exc_str(e)))
            return False

        if isinstance(dataset.repo, AnnexRepo):
            # init remote git annex repo (part fix of #463)
            try:
                ssh(
                    "git -C {} annex init {}".format(
                        sh_quote(path),
                        sh_quote(description)
                        if description else '')
                )
            except CommandError as e:
                lgr.error("Initialization of remote git annex repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, exc_str(e)))
                return False
        return True
    def upload_web_interface(path, ssh, shared, ui):
        # path to web interface resources on local
        webui_local = opj(dirname(datalad.__file__), 'resources', 'website')
        # local html to dataset
        html_local = opj(webui_local, "index.html")

        # name and location of web-interface html on target
        html_targetname = {True: ui, False: "index.html"}[isinstance(ui, str)]
        html_target = opj(path, html_targetname)

        # upload ui html to target
        ssh.copy(html_local, html_target)

        # upload assets to the dataset
        webresources_local = opj(webui_local, 'assets')
        webresources_remote = opj(path, WEB_HTML_DIR)
        ssh('mkdir -p {}'.format(sh_quote(webresources_remote)))
        ssh.copy(webresources_local, webresources_remote, recursive=True)

        # minimize and upload js assets
        for js_file in glob(opj(webresources_local, 'js', '*.js')):
            with open(js_file) as asset:
                try:
                    from jsmin import jsmin
                    # jsmin = lambda x: x   # no minimization
                    minified = jsmin(asset.read())  # minify asset
                except ImportError:
                    lgr.warning(
                        "Will not minify web interface javascript, no jsmin available"
                    )
                    minified = asset.read()  # no minify available
                with make_tempfile(content=minified
                                   ) as tempf:  # write minified to tempfile
                    js_name = js_file.split('/')[-1]
                    ssh.copy(tempf,
                             opj(webresources_remote, 'assets', 'js',
                                 js_name))  # and upload js

        # explicitly make web+metadata dir of dataset world-readable, if shared set to 'all'
        mode = None
        if shared in (True, 'true', 'all', 'world', 'everybody'):
            mode = 'a+rX'
        elif shared == 'group':
            mode = 'g+rX'
        elif str(shared).startswith('0'):
            mode = shared

        if mode:
            ssh('chmod {} -R {} {}'.format(
                mode, sh_quote(dirname(webresources_remote)),
                sh_quote(opj(path, 'index.html'))))
Example #3
0
    def upload_web_interface(path, ssh, shared, ui):
        # path to web interface resources on local
        webui_local = opj(dirname(datalad.__file__), 'resources', 'website')
        # local html to dataset
        html_local = opj(webui_local, "index.html")

        # name and location of web-interface html on target
        html_targetname = {True: ui, False: "index.html"}[isinstance(ui, str)]
        html_target = opj(path, html_targetname)

        # upload ui html to target
        ssh.put(html_local, html_target)

        # upload assets to the dataset
        webresources_local = opj(webui_local, 'assets')
        webresources_remote = opj(path, WEB_HTML_DIR)
        ssh('mkdir -p {}'.format(sh_quote(webresources_remote)))
        ssh.put(webresources_local, webresources_remote, recursive=True)

        # minimize and upload js assets
        for js_file in glob(opj(webresources_local, 'js', '*.js')):
            with open(js_file) as asset:
                try:
                    from jsmin import jsmin
                    # jsmin = lambda x: x   # no minimization
                    minified = jsmin(asset.read())                      # minify asset
                except ImportError:
                    lgr.warning(
                        "Will not minify web interface javascript, no jsmin available")
                    minified = asset.read()                             # no minify available
                with make_tempfile(content=minified) as tempf:          # write minified to tempfile
                    js_name = js_file.split('/')[-1]
                    ssh.put(tempf, opj(webresources_remote, 'assets', 'js', js_name))  # and upload js

        # explicitly make web+metadata dir of dataset world-readable, if shared set to 'all'
        mode = None
        if shared in (True, 'true', 'all', 'world', 'everybody'):
            mode = 'a+rX'
        elif shared == 'group':
            mode = 'g+rX'
        elif str(shared).startswith('0'):
            mode = shared

        if mode:
            ssh('chmod {} -R {} {}'.format(
                mode,
                sh_quote(dirname(webresources_remote)),
                sh_quote(opj(path, 'index.html'))))
Example #4
0
def _check_and_update_remote_server_info(ds, remote):
    # if we managed to copy to "http" url  we should should try to trigger git
    # update-server-info hook on the remote if there was ssh annexurl defined
    # for it. Apparently we do that already in create_sibling ones, but here
    # we need more checks and preparation
    remote_url = ds.repo.config.get('remote.%s.url' % remote, None)
    if remote_url:
        remote_url = RI(remote_url)
        if isinstance(remote_url,
                      URL) and remote_url.scheme in ('http', 'https'):
            remote_annexurl = ds.repo.config.get('remote.%s.annexurl' % remote,
                                                 None)
            if remote_annexurl:
                remote_annexurl_ri = RI(remote_annexurl)
                if is_ssh(remote_annexurl_ri):
                    ssh = ssh_manager.get_connection(remote_annexurl_ri)
                    ssh('git -C {} update-server-info'.format(
                        sh_quote(remote_annexurl_ri.path)))
                    return True
                else:
                    lgr.debug(
                        "There is no annexurl defined but not ssh: %s, "
                        "dunno if "
                        "we could/should do anything", remote_annexurl)
    return False
Example #5
0
def _check_and_update_remote_server_info(ds, remote):
    # if we managed to copy to "http" url  we should should try to trigger git
    # update-server-info hook on the remote if there was ssh annexurl defined
    # for it. Apparently we do that already in create_sibling ones, but here
    # we need more checks and preparation
    remote_url = ds.repo.config.get('remote.%s.url' % remote, None)
    if remote_url:
        remote_url = RI(remote_url)
        if isinstance(remote_url, URL) and remote_url.scheme in (
        'http', 'https'):
            remote_annexurl = ds.repo.config.get('remote.%s.annexurl' % remote,
                                                 None)
            if remote_annexurl:
                remote_annexurl_ri = RI(remote_annexurl)
                if is_ssh(remote_annexurl_ri):
                    ssh = ssh_manager.get_connection(remote_annexurl_ri)
                    ssh('git -C {} update-server-info'.format(
                        sh_quote(remote_annexurl_ri.path)))
                    return True
                else:
                    lgr.debug(
                        "There is no annexurl defined but not ssh: %s, "
                        "dunno if "
                        "we could/should do anything", remote_annexurl
                    )
    return False
    def create_postupdate_hook(path, ssh, dataset):
        # location of post-update hook file, logs folder on remote target
        hooks_remote_dir = opj(path, '.git', 'hooks')
        # make sure hooks directory exists (see #1251)
        ssh('mkdir -p {}'.format(sh_quote(hooks_remote_dir)))
        hook_remote_target = opj(hooks_remote_dir, 'post-update')

        # create json command for current dataset
        log_filename = 'datalad-publish-hook-$(date +%s).log' % TIMESTAMP_FMT
        hook_content = r'''#!/bin/bash

git update-server-info

#
# DataLad
#
# (Re)generate meta-data for DataLad Web UI and possibly init new submodules
dsdir="$(dirname $0)/../.."
logfile="$dsdir/{WEB_META_LOG}/{log_filename}"

if [ ! -e "$dsdir/.git" ]; then
  echo Assumption of being under .git has failed >&2
  exit 1
fi

mkdir -p "$dsdir/{WEB_META_LOG}"  # assure logs directory exists

# Avoid file name collisions.
suffix=0
logfile_orig="$logfile"
while [ -f "$logfile" ]; do
  suffix=$(( $suffix + 1 ))
  logfile="$logfile_orig.$suffix"
done

( which datalad > /dev/null \
  && ( cd "$dsdir"; GIT_DIR="$PWD/.git" datalad ls -a --json file .; ) \
  || echo "E: no datalad found - skipping generation of indexes for web frontend"; \
) &> "$logfile"
'''.format(WEB_META_LOG=WEB_META_LOG, **locals())

        with make_tempfile(content=hook_content) as tempf:
            # create post_update hook script
            # upload hook to dataset
            ssh.put(tempf, hook_remote_target)
        # and make it executable
        ssh('chmod +x {}'.format(sh_quote(hook_remote_target)))
Example #7
0
    def create_postupdate_hook(path, ssh, dataset):
        # location of post-update hook file, logs folder on remote target
        hooks_remote_dir = opj(path, '.git', 'hooks')
        # make sure hooks directory exists (see #1251)
        ssh('mkdir -p {}'.format(sh_quote(hooks_remote_dir)))
        hook_remote_target = opj(hooks_remote_dir, 'post-update')

        # create json command for current dataset
        log_filename = 'datalad-publish-hook-$(date +%s).log' % TIMESTAMP_FMT
        hook_content = r'''#!/bin/bash

git update-server-info

#
# DataLad
#
# (Re)generate meta-data for DataLad Web UI and possibly init new submodules
dsdir="$(dirname $0)/../.."
logfile="$dsdir/{WEB_META_LOG}/{log_filename}"

if [ ! -e "$dsdir/.git" ]; then
  echo Assumption of being under .git has failed >&2
  exit 1
fi

mkdir -p "$dsdir/{WEB_META_LOG}"  # assure logs directory exists

( which datalad > /dev/null \
  && ( cd "$dsdir"; GIT_DIR="$PWD/.git" datalad ls -a --json file .; ) \
  || echo "E: no datalad found - skipping generation of indexes for web frontend"; \
) &> "$logfile"

# Some submodules might have been added and thus we better init them
( cd "$dsdir"; git submodule update --init || : ; ) >> "$logfile" 2>&1
'''.format(WEB_META_LOG=WEB_META_LOG, **locals())

        with make_tempfile(content=hook_content) as tempf:
            # create post_update hook script
            # upload hook to dataset
            ssh.copy(tempf, hook_remote_target)
        # and make it executable
        ssh('chmod +x {}'.format(sh_quote(hook_remote_target)))
def _ls_remote_path(ssh, path):
    try:
        # yoh tried ls on mac
        out, err = ssh("ls -A1 {}".format(sh_quote(path)))
        if err:
            # we might even want to raise an exception, but since it was
            # not raised, let's just log a warning
            lgr.warning(
                "There was some output to stderr while running ls on %s via ssh: %s",
                path, err)
    except CommandError as e:
        if "No such file or directory" in e.stderr and \
                path in e.stderr:
            return None
        else:
            raise  # It's an unexpected failure here
    return [l for l in out.split(os.linesep) if l]
Example #9
0
 def _get_ds_remote_shared_setting(ds, name, ssh):
     """Figure out setting of sharedrepository for dataset's `name` remote"""
     shared = None
     try:
         current_super_url = CreateSibling._get_remote_url(ds, name)
         current_super_ri = RI(current_super_url)
         out, err = ssh('git -C {} config --get core.sharedrepository'.format(
             # TODO -- we might need to expanduser taking .user into account
             # but then it must be done also on remote side
             sh_quote(current_super_ri.path)))
         shared = out.strip()
         if err:
             lgr.warning("Got stderr while calling ssh: %s", err)
     except CommandError as e:
         lgr.debug(
             "Could not figure out remote shared setting of %s for %s due "
             "to %s", ds, name, exc_str(e))
         # could well be ok if e.g. not shared
         # TODO: more detailed analysis may be?
     return shared
Example #10
0
    def _run_on_ds_ssh_remote(ds, name, ssh, cmd):
        """Given a dataset, and name of the remote, run command via ssh

        Parameters
        ----------
        cmd: str
          Will be .format()'ed given the `path` to the dataset on remote

        Returns
        -------
        out

        Raises
        ------
        CommandError
        """
        remote_url = CreateSibling._get_remote_url(ds, name)
        remote_ri = RI(remote_url)
        out, err = ssh(cmd.format(path=sh_quote(remote_ri.path)))
        if err:
            lgr.warning("Got stderr while calling ssh: %s", err)
        return out
Example #11
0
 def _get_ds_remote_shared_setting(ds, name, ssh):
     """Figure out setting of sharedrepository for dataset's `name` remote"""
     shared = None
     try:
         current_super_url = CreateSibling._get_remote_url(
             ds, name)
         current_super_ri = RI(current_super_url)
         out, err = ssh('git -C {} config --get core.sharedrepository'.format(
             # TODO -- we might need to expanduser taking .user into account
             # but then it must be done also on remote side
             sh_quote(current_super_ri.path))
         )
         shared = out.strip()
         if err:
             lgr.warning("Got stderr while calling ssh: %s", err)
     except CommandError as e:
         lgr.debug(
             "Could not figure out remote shared setting of %s for %s due "
             "to %s",
             ds, name, exc_str(e)
         )
         # could well be ok if e.g. not shared
         # TODO: more detailed analysis may be?
     return shared
Example #12
0
def _create_dataset_sibling(
        name,
        ds,
        hierarchy_basepath,
        ssh,
        replicate_local_structure,
        ssh_url,
        target_dir,
        target_url,
        target_pushurl,
        existing,
        shared,
        group,
        publish_depends,
        publish_by_default,
        install_postupdate_hook,
        as_common_datasrc,
        annex_wanted,
        annex_group,
        annex_groupwanted,
        inherit
):
    """Everyone is very smart here and could figure out the combinatorial
    affluence among provided tiny (just slightly over a dozen) number of options
    and only a few pages of code
    """
    localds_path = ds.path
    ds_name = relpath(localds_path, start=hierarchy_basepath)
    if not replicate_local_structure:
        ds_name = '' if ds_name == curdir \
            else '-{}'.format(ds_name.replace("/", "-"))
        remoteds_path = target_dir.replace(
            "%RELNAME",
            ds_name)
    else:
        # TODO: opj depends on local platform, not the remote one.
        # check how to deal with it. Does windows ssh server accept
        # posix paths? vice versa? Should planned SSH class provide
        # tools for this issue?
        # see gh-1188
        remoteds_path = normpath(opj(target_dir, ds_name))

    # construct a would-be ssh url based on the current dataset's path
    ssh_url.path = remoteds_path
    ds_sshurl = ssh_url.as_str()
    # configure dataset's git-access urls
    ds_target_url = target_url.replace('%RELNAME', ds_name) \
        if target_url else ds_sshurl
    # push, configure only if needed
    ds_target_pushurl = None
    if ds_target_url != ds_sshurl:
        # not guaranteed that we can push via the primary URL
        ds_target_pushurl = target_pushurl.replace('%RELNAME', ds_name) \
            if target_pushurl else ds_sshurl

    lgr.info("Considering to create a target dataset {0} at {1} of {2}".format(
        localds_path, remoteds_path, ssh_url.hostname))
    # Must be set to True only if exists and existing='reconfigure'
    # otherwise we might skip actions if we say existing='reconfigure'
    # but it did not even exist before
    only_reconfigure = False
    if remoteds_path != '.':
        # check if target exists
        # TODO: Is this condition valid for != '.' only?
        path_exists = True
        try:
            out, err = ssh("ls {}".format(sh_quote(remoteds_path)))
        except CommandError as e:
            if "No such file or directory" in e.stderr and \
                    remoteds_path in e.stderr:
                path_exists = False
            else:
                raise  # It's an unexpected failure here

        if path_exists:
            _msg = "Target path %s already exists." % remoteds_path
            # path might be existing but be an empty directory, which should be
            # ok to remove
            try:
                lgr.debug(
                    "Trying to rmdir %s on remote since might be an empty dir",
                    remoteds_path
                )
                # should be safe since should not remove anything unless an empty dir
                ssh("rmdir {}".format(sh_quote(remoteds_path)))
                path_exists = False
            except CommandError as e:
                # If fails to rmdir -- either contains stuff no permissions
                # TODO: fixup encode/decode dance again :-/ we should have got
                # unicode/str here by now.  I guess it is the same as
                # https://github.com/ReproNim/niceman/issues/83
                # where I have reused this Runner thing
                try:
                    # ds_name is unicode which makes _msg unicode so we must be
                    # unicode-ready
                    err_str = text_type(e.stderr)
                except UnicodeDecodeError:
                    err_str = e.stderr.decode(errors='replace')
                _msg += " And it fails to rmdir (%s)." % (err_str.strip(),)

        if path_exists:
            if existing == 'error':
                raise RuntimeError(_msg)
            elif existing == 'skip':
                lgr.info(_msg + " Skipping")
                return
            elif existing == 'replace':
                lgr.info(_msg + " Replacing")
                # enable write permissions to allow removing dir
                ssh("chmod +r+w -R {}".format(sh_quote(remoteds_path)))
                # remove target at path
                ssh("rm -rf {}".format(sh_quote(remoteds_path)))
                # if we succeeded in removing it
                path_exists = False
                # Since it is gone now, git-annex also should forget about it
                remotes = ds.repo.get_remotes()
                if name in remotes:
                    # so we had this remote already, we should announce it dead
                    # XXX what if there was some kind of mismatch and this name
                    # isn't matching the actual remote UUID?  should have we
                    # checked more carefully?
                    lgr.info(
                        "Announcing existing remote %s dead to annex and removing",
                        name
                    )
                    if isinstance(ds.repo, AnnexRepo):
                        ds.repo.set_remote_dead(name)
                    ds.repo.remove_remote(name)
            elif existing == 'reconfigure':
                lgr.info(_msg + " Will only reconfigure")
                only_reconfigure = True
            else:
                raise ValueError(
                    "Do not know how to handle existing={}".format(
                        repr(existing)))

        if not path_exists:
            ssh("mkdir -p {}".format(sh_quote(remoteds_path)))

    delayed_super = _DelayedSuper(ds)
    if inherit and delayed_super.super:
        if shared is None:
            # here we must analyze current_ds's super, not the super_ds
            # inherit from the setting on remote end
            shared = CreateSibling._get_ds_remote_shared_setting(
                delayed_super, name, ssh)

        if not install_postupdate_hook:
            # Even though directive from above was False due to no UI explicitly
            # requested, we were asked to inherit the setup, so we might need
            # to install the hook, if super has it on remote
            install_postupdate_hook = CreateSibling._has_active_postupdate(
                delayed_super, name, ssh)



    if group:
        # Either repository existed before or a new directory was created for it,
        # set its group to a desired one if was provided with the same chgrp
        ssh("chgrp -R {} {}".format(
            sh_quote(text_type(group)),
            sh_quote(remoteds_path)))
    # don't (re-)initialize dataset if existing == reconfigure
    if not only_reconfigure:
        # init git and possibly annex repo
        if not CreateSibling.init_remote_repo(
                remoteds_path, ssh, shared, ds,
                description=target_url):
            return

        if target_url and not is_ssh(target_url):
            # we are not coming in via SSH, hence cannot assume proper
            # setup for webserver access -> fix
            ssh('git -C {} update-server-info'.format(sh_quote(remoteds_path)))
    else:
        # TODO -- we might still want to reconfigure 'shared' setting!
        pass

    # at this point we have a remote sibling in some shape or form
    # -> add as remote
    lgr.debug("Adding the siblings")
    # TODO generator, yield the now swallowed results
    Siblings.__call__(
        'configure',
        dataset=ds,
        name=name,
        url=ds_target_url,
        pushurl=ds_target_pushurl,
        recursive=False,
        fetch=True,
        as_common_datasrc=as_common_datasrc,
        publish_by_default=publish_by_default,
        publish_depends=publish_depends,
        annex_wanted=annex_wanted,
        annex_group=annex_group,
        annex_groupwanted=annex_groupwanted,
        inherit=inherit
    )

    # check git version on remote end
    lgr.info("Adjusting remote git configuration")
    if ssh.get_git_version() and ssh.get_git_version() >= LooseVersion("2.4"):
        # allow for pushing to checked out branch
        try:
            ssh("git -C {} config receive.denyCurrentBranch updateInstead".format(
                sh_quote(remoteds_path)))
        except CommandError as e:
            lgr.error("git config failed at remote location %s.\n"
                      "You will not be able to push to checked out "
                      "branch. Error: %s", remoteds_path, exc_str(e))
    else:
        lgr.error("Git version >= 2.4 needed to configure remote."
                  " Version detected on server: %s\nSkipping configuration"
                  " of receive.denyCurrentBranch - you will not be able to"
                  " publish updates to this repository. Upgrade your git"
                  " and run with --existing=reconfigure",
                  ssh.get_git_version())

    if install_postupdate_hook:
        # enable metadata refresh on dataset updates to publication server
        lgr.info("Enabling git post-update hook ...")
        try:
            CreateSibling.create_postupdate_hook(
                remoteds_path, ssh, ds)
        except CommandError as e:
            lgr.error("Failed to add json creation command to post update "
                      "hook.\nError: %s" % exc_str(e))

    return remoteds_path
Example #13
0
    def __call__(sshurl, name=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 existing='error',
                 shared=None,
                 group=None,
                 ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None,
                 annex_wanted=None, annex_group=None, annex_groupwanted=None,
                 inherit=False,
                 since=None):
        #
        # nothing without a base dataset
        #
        ds = require_dataset(dataset, check_installed=True,
                             purpose='creating a sibling')
        refds_path = ds.path

        #
        # all checks that are possible before we start parsing the dataset
        #

        # possibly use sshurl to get the name in case if not specified
        if not sshurl:
            if not inherit:
                raise InsufficientArgumentsError(
                    "needs at least an SSH URL, if no inherit option"
                )
            if name is None:
                raise ValueError(
                    "Neither SSH URL, nor the name of sibling to inherit from "
                    "was specified"
                )
            # It might well be that we already have this remote setup
            try:
                sshurl = CreateSibling._get_remote_url(ds, name)
            except Exception as exc:
                lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc))
        elif inherit:
            raise ValueError(
                "For now, for clarity not allowing specifying a custom sshurl "
                "while inheriting settings"
            )
            # may be could be safely dropped -- still WiP

        if not sshurl:
            # TODO: may be more back up before _prep?
            super_ds = ds.get_superdataset()
            if not super_ds:
                raise ValueError(
                    "Could not determine super dataset for %s to inherit URL"
                    % ds
                )
            super_url = CreateSibling._get_remote_url(super_ds, name)
            # for now assuming hierarchical setup
            # (TODO: to be able to destinguish between the two, probably
            # needs storing datalad.*.target_dir to have %RELNAME in there)
            sshurl = slash_join(super_url, relpath(ds.path, super_ds.path))

        # check the login URL
        sshri = RI(sshurl)
        if not is_ssh(sshri):
            raise ValueError(
                "Unsupported SSH URL: '{0}', "
                "use ssh://host/path or host:path syntax".format(sshurl))

        if not name:
            # use the hostname as default remote name
            name = sshri.hostname
            lgr.debug(
                "No sibling name given, use URL hostname '%s' as sibling name",
                name)

        if since == '':
            # consider creating siblings only since the point of
            # the last update
            # XXX here we assume one to one mapping of names from local branches
            # to the remote
            active_branch = ds.repo.get_active_branch()
            since = '%s/%s' % (name, active_branch)

        #
        # parse the base dataset to find all subdatasets that need processing
        #
        to_process = []
        for ap in AnnotatePaths.__call__(
                dataset=refds_path,
                # only a single path!
                path=refds_path,
                recursive=recursive,
                recursion_limit=recursion_limit,
                action='create_sibling',
                # both next should not happen anyways
                unavailable_path_status='impossible',
                nondataset_path_status='error',
                modified=since,
                return_type='generator',
                on_failure='ignore'):
            if ap.get('status', None):
                # this is done
                yield ap
                continue
            if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent':
                # this can happen when there is `since`, but we have no
                # use for anything but datasets here
                continue
            checkds_remotes = Dataset(ap['path']).repo.get_remotes() \
                if ap.get('state', None) != 'absent' \
                else []
            if publish_depends:
                # make sure dependencies are valid
                # TODO: inherit -- we might want to automagically create
                # those dependents as well???
                unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes)
                if unknown_deps:
                    ap['status'] = 'error'
                    ap['message'] = (
                        'unknown sibling(s) specified as publication dependency: %s',
                        unknown_deps)
                    yield ap
                    continue
            if name in checkds_remotes and existing in ('error', 'skip'):
                ap['status'] = 'error' if existing == 'error' else 'notneeded'
                ap['message'] = (
                    "sibling '%s' already configured (specify alternative name, or force "
                    "reconfiguration via --existing",
                    name)
                yield ap
                continue
            to_process.append(ap)

        if not to_process:
            # we ruled out all possibilities
            # TODO wait for gh-1218 and make better return values
            lgr.info("No datasets qualify for sibling creation. "
                     "Consider different settings for --existing "
                     "or --since if this is unexpected")
            return

        if target_dir is None:
            if sshri.path:
                target_dir = sshri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = "%RELNAME" not in target_dir

        # request ssh connection:
        lgr.info("Connecting ...")
        assert(sshurl is not None)  # delayed anal verification
        ssh = ssh_manager.get_connection(sshurl)
        if not ssh.get_annex_version():
            raise MissingExternalDependency(
                'git-annex',
                msg='on the remote system')

        #
        # all checks done and we have a connection, now do something
        #

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        yielded = set()
        remote_repos_to_run_hook_for = []
        for currentds_ap in \
                sorted(to_process, key=lambda x: x['path'].count('/')):
            current_ds = Dataset(currentds_ap['path'])

            path = _create_dataset_sibling(
                name,
                current_ds,
                ds.path,
                ssh,
                replicate_local_structure,
                sshri,
                target_dir,
                target_url,
                target_pushurl,
                existing,
                shared,
                group,
                publish_depends,
                publish_by_default,
                ui,
                as_common_datasrc,
                annex_wanted,
                annex_group,
                annex_groupwanted,
                inherit
            )
            if not path:
                # nothing new was created
                # TODO is 'notneeded' appropriate in this case?
                currentds_ap['status'] = 'notneeded'
                # TODO explain status in 'message'
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            remote_repos_to_run_hook_for.append((path, currentds_ap))

            # publish web-interface to root dataset on publication server
            if current_ds.path == ds.path and ui:
                lgr.info("Uploading web interface to %s" % path)
                try:
                    CreateSibling.upload_web_interface(path, ssh, shared, ui)
                except CommandError as e:
                    currentds_ap['status'] = 'error'
                    currentds_ap['message'] = (
                        "failed to push web interface to the remote datalad repository (%s)",
                        exc_str(e))
                    yield currentds_ap
                    yielded.add(currentds_ap['path'])
                    continue

        # in reverse order would be depth first
        lgr.info("Running post-update hooks in all created siblings")
        # TODO: add progressbar
        for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            lgr.debug("Running hook for %s (if exists and executable)", path)
            try:
                ssh("cd {} "
                    "&& ( [ -x hooks/post-update ] && hooks/post-update || : )"
                    "".format(sh_quote(_path_(path, ".git"))))
            except CommandError as e:
                currentds_ap['status'] = 'error'
                currentds_ap['message'] = (
                    "failed to run post-update hook under remote path %s (%s)",
                    path, exc_str(e))
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            if not currentds_ap['path'] in yielded:
                # if we were silent until now everything is just splendid
                currentds_ap['status'] = 'ok'
                yield currentds_ap
Example #14
0
    def __call__(sshurl,
                 name=None,
                 target_dir=None,
                 target_url=None,
                 target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 existing='error',
                 shared=None,
                 group=None,
                 ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None,
                 annex_wanted=None,
                 annex_group=None,
                 annex_groupwanted=None,
                 inherit=False,
                 since=None):
        if ui:
            # the webui has been moved to the deprecated extension
            try:
                from datalad_deprecated.sibling_webui \
                    import upload_web_interface
            except Exception as e:
                # we could just test for ModuleNotFoundError (which should be
                # all that would happen with PY3.6+, but be a little more robust
                # and use the pattern from duecredit
                if type(e).__name__ not in ('ImportError',
                                            'ModuleNotFoundError'):
                    lgr.error(
                        "Failed to import datalad_deprecated.sibling_webui "
                        "due to %s", str(e))
                raise RuntimeError(
                    "The DataLad web UI has been moved to an extension "
                    "package. Please install the Python package "
                    "`datalad_deprecated` to be able to deploy it.")
        #
        # nothing without a base dataset
        #
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='create sibling(s)')
        refds_path = ds.path

        #
        # all checks that are possible before we start parsing the dataset
        #

        # possibly use sshurl to get the name in case if not specified
        if not sshurl:
            if not inherit:
                raise InsufficientArgumentsError(
                    "needs at least an SSH URL, if no inherit option")
            if name is None:
                raise ValueError(
                    "Neither SSH URL, nor the name of sibling to inherit from "
                    "was specified")
            # It might well be that we already have this remote setup
            try:
                sshurl = CreateSibling._get_remote_url(ds, name)
            except Exception as exc:
                lgr.debug('%s does not know about url for %s: %s', ds, name,
                          exc_str(exc))
        elif inherit:
            raise ValueError(
                "For now, for clarity not allowing specifying a custom sshurl "
                "while inheriting settings")
            # may be could be safely dropped -- still WiP

        if not sshurl:
            # TODO: may be more back up before _prep?
            super_ds = ds.get_superdataset()
            if not super_ds:
                raise ValueError(
                    "Could not determine super dataset for %s to inherit URL" %
                    ds)
            super_url = CreateSibling._get_remote_url(super_ds, name)
            # for now assuming hierarchical setup
            # (TODO: to be able to distinguish between the two, probably
            # needs storing datalad.*.target_dir to have %RELNAME in there)
            sshurl = slash_join(super_url, relpath(refds_path, super_ds.path))

        # check the login URL
        sibling_ri = RI(sshurl)
        ssh_sibling = is_ssh(sibling_ri)
        if not (ssh_sibling or isinstance(sibling_ri, PathRI)):
            raise ValueError(
                "Unsupported SSH URL or path: '{0}', "
                "use ssh://host/path, host:path or path syntax".format(sshurl))

        if not name:
            name = sibling_ri.hostname if ssh_sibling else "local"
            lgr.info("No sibling name given. Using %s'%s' as sibling name",
                     "URL hostname " if ssh_sibling else "", name)
        if since == '':
            # consider creating siblings only since the point of
            # the last update
            # XXX here we assume one to one mapping of names from local branches
            # to the remote
            active_branch = ds.repo.get_active_branch()
            since = '%s/%s' % (name, active_branch)

        #
        # parse the base dataset to find all subdatasets that need processing
        #
        to_process = []
        cand_ds = [
            Dataset(r['path']) for r in diff_dataset(
                ds,
                fr=since,
                to=None,
                # make explicit, but doesn't matter, no recursion in diff()
                constant_refs=True,
                # contrain to the paths of all locally existing subdatasets
                path=[
                    sds['path']
                    for sds in ds.subdatasets(recursive=recursive,
                                              recursion_limit=recursion_limit,
                                              fulfilled=True,
                                              result_renderer=None)
                ],
                # save cycles, we are only looking for datasets
                annex=None,
                untracked='no',
                # recursion was done faster by subdatasets()
                recursive=False,
                # save cycles, we are only looking for datasets
                eval_file_type=False,
            ) if r.get('type') == 'dataset' and r.get('state', None) != 'clean'
        ]
        # check remotes setup
        for d in cand_ds if since else ([ds] + cand_ds):
            d_repo = d.repo
            if d_repo is None:
                continue
            checkds_remotes = d.repo.get_remotes()
            res = dict(
                action='create_sibling',
                path=d.path,
                type='dataset',
            )

            if publish_depends:
                # make sure dependencies are valid
                # TODO: inherit -- we might want to automagically create
                # those dependents as well???
                unknown_deps = set(
                    ensure_list(publish_depends)).difference(checkds_remotes)
                if unknown_deps:
                    yield dict(
                        res,
                        status='error',
                        message=('unknown sibling(s) specified as publication '
                                 'dependency: %s', unknown_deps),
                    )
                    continue
            if name in checkds_remotes and existing in ('error', 'skip'):
                yield dict(
                    res,
                    sibling_name=name,
                    status='error' if existing == 'error' else 'notneeded',
                    message=(
                        "sibling '%s' already configured (specify alternative "
                        "name, or force reconfiguration via --existing", name),
                )
                continue
            to_process.append(res)

        if not to_process:
            # we ruled out all possibilities
            # TODO wait for gh-1218 and make better return values
            lgr.info("No datasets qualify for sibling creation. "
                     "Consider different settings for --existing "
                     "or --since if this is unexpected")
            return

        if ssh_sibling:
            # request ssh connection:
            lgr.info("Connecting ...")
            shell = ssh_manager.get_connection(sshurl)
        else:
            shell = _RunnerAdapter()
            sibling_ri.path = str(resolve_path(sibling_ri.path, dataset))
            if target_dir:
                target_dir = opj(sibling_ri.path, target_dir)

        if target_dir is None:
            if sibling_ri.path:
                target_dir = sibling_ri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = "%RELNAME" not in target_dir

        if not shell.get_annex_version():
            raise MissingExternalDependency(
                'git-annex',
                msg="It's required on the {} machine to create a sibling".
                format('remote' if ssh_sibling else 'local'))

        #
        # all checks done and we have a connection, now do something
        #

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        yielded = set()
        remote_repos_to_run_hook_for = []
        for currentds_ap in \
                sorted(to_process, key=lambda x: x['path'].count('/')):
            current_ds = Dataset(currentds_ap['path'])

            path = _create_dataset_sibling(
                name, current_ds, refds_path, shell, replicate_local_structure,
                sibling_ri, target_dir, target_url, target_pushurl, existing,
                shared, group, publish_depends, publish_by_default, ui,
                as_common_datasrc, annex_wanted, annex_group,
                annex_groupwanted, inherit)
            currentds_ap["sibling_name"] = name
            if not path:
                # nothing new was created
                # TODO is 'notneeded' appropriate in this case?
                currentds_ap['status'] = 'notneeded'
                # TODO explain status in 'message'
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            remote_repos_to_run_hook_for.append((path, currentds_ap))

            # publish web-interface to root dataset on publication server
            if current_ds.path == refds_path and ui:
                from datalad_deprecated.sibling_webui import upload_web_interface
                lgr.info("Uploading web interface to %s", path)
                try:
                    upload_web_interface(path, shell, shared, ui)
                except CommandError as e:
                    currentds_ap['status'] = 'error'
                    currentds_ap['message'] = (
                        "failed to push web interface to the remote datalad repository (%s)",
                        exc_str(e))
                    yield currentds_ap
                    yielded.add(currentds_ap['path'])
                    continue

        # in reverse order would be depth first
        lgr.info("Running post-update hooks in all created siblings")
        # TODO: add progressbar
        for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            lgr.debug("Running hook for %s (if exists and executable)", path)
            try:
                shell(
                    "cd {} "
                    "&& ( [ -x hooks/post-update ] && hooks/post-update || true )"
                    "".format(sh_quote(_path_(path, ".git"))))
            except CommandError as e:
                currentds_ap['status'] = 'error'
                currentds_ap['message'] = (
                    "failed to run post-update hook under remote path %s (%s)",
                    path, exc_str(e))
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            if not currentds_ap['path'] in yielded:
                # if we were silent until now everything is just splendid
                currentds_ap['status'] = 'ok'
                yield currentds_ap