Example #1
0
    def assure_initialized(self):
        """Assures that manager is initialized - knows socket_dir, previous connections
        """
        if self._socket_dir is not None:
            return
        from ..config import ConfigManager
        from os import chmod
        cfg = ConfigManager()
        self._socket_dir = opj(cfg.obtain('datalad.locations.cache'),
                               'sockets')
        assure_dir(self._socket_dir)
        try:
            chmod(self._socket_dir, 0o700)
        except OSError as exc:
            lgr.warning(
                "Failed to (re)set permissions on the %s. "
                "Most likely future communications would be impaired or fail. "
                "Original exception: %s", self._socket_dir, exc_str(exc))

        from os import listdir
        from os.path import isdir
        try:
            self._prev_connections = [
                opj(self.socket_dir, p) for p in listdir(self.socket_dir)
                if not isdir(opj(self.socket_dir, p))
            ]
        except OSError as exc:
            self._prev_connections = []
            lgr.warning(
                "Failed to list %s for existing sockets. "
                "Most likely future communications would be impaired or fail. "
                "Original exception: %s", self._socket_dir, exc_str(exc))

        lgr.log(5, "Found %d previous connections",
                len(self._prev_connections))
Example #2
0
    def socket_dir(self):
        if self._socket_dir is None:
            from ..config import ConfigManager
            from os import chmod
            cfg = ConfigManager()
            self._socket_dir = opj(cfg.obtain('datalad.locations.cache'),
                                   'sockets')
            assure_dir(self._socket_dir)
            chmod(self._socket_dir, 0o700)

        return self._socket_dir
Example #3
0
    def socket_dir(self):
        if self._socket_dir is None:
            from ..config import ConfigManager
            from os import chmod
            cfg = ConfigManager()
            self._socket_dir = opj(cfg.obtain('datalad.locations.cache'),
                                   'sockets')
            assure_dir(self._socket_dir)
            chmod(self._socket_dir, 0o700)

        return self._socket_dir
Example #4
0
def get_cached_url_content(url, name=None, fetcher=None, maxage=None):
    """Loader of a document from a url, which caches loaded instance on disk

    Doesn't do anything smart about http headers etc which could provide
    information for cache/proxy servers for how long to retain etc

    TODO: theoretically it is not network specific at all -- and just a memoize
    pattern, but may be some time we would make it treat headers etc correctly.
    And ATM would support any URL we support via providers/downloaders

    Parameters
    ----------
    fetcher: callable, optional
       Function to call with url if needed to be refetched
    maxage: float, optional
       Age in days to retain valid for.  <0 - would retain forever.  If None -
       would consult the config, 0 - would force to reload
    """
    doc_fname = get_url_cache_filename(url, name)
    if maxage is None:
        maxage = float(cfg.get('datalad.locations.cache-maxage'))

    doc = None
    if os.path.exists(doc_fname) and maxage != 0:

        fage = (time.time() - os.stat(doc_fname).st_mtime) / (24. * 3600)
        if maxage < 0 or fage < maxage:
            try:
                lgr.debug("use cached request result to '%s' from %s", url,
                          doc_fname)
                doc = pickle.load(open(doc_fname, 'rb'))
            except Exception as e:  # it is OK to ignore any error and fall back on the true source
                lgr.warning(
                    "cannot load cache from '%s', fall back to download: %s",
                    doc_fname, exc_str(e))

    if doc is None:
        if fetcher is None:
            from datalad.downloaders.providers import Providers
            providers = Providers.from_config_files()
            fetcher = providers.fetch

        doc = fetcher(url)
        assure_dir(dirname(doc_fname))
        # use pickle to store the entire request result dict
        pickle.dump(doc, open(doc_fname, 'wb'))
        lgr.debug("stored result of request to '{}' in {}".format(
            url, doc_fname))
    return doc
Example #5
0
def get_cached_url_content(url, name=None, fetcher=None, maxage=None):
    """Loader of a document from a url, which caches loaded instance on disk

    Doesn't do anything smart about http headers etc which could provide
    information for cache/proxy servers for how long to retain etc

    TODO: theoretically it is not network specific at all -- and just a memoize
    pattern, but may be some time we would make it treat headers etc correctly.
    And ATM would support any URL we support via providers/downloaders

    Parameters
    ----------
    fetcher: callable, optional
       Function to call with url if needed to be refetched
    maxage: float, optional
       Age in days to retain valid for.  <0 - would retain forever.  If None -
       would consult the config, 0 - would force to reload
    """
    doc_fname = get_url_cache_filename(url, name)
    if maxage is None:
        maxage = float(cfg.get('datalad.locations.cache-maxage'))

    doc = None
    if os.path.exists(doc_fname) and maxage != 0:

        fage = (time.time() - os.stat(doc_fname).st_mtime)/(24. * 3600)
        if maxage < 0 or fage < maxage:
            try:
                lgr.debug("use cached request result to '%s' from %s", url, doc_fname)
                doc = pickle.load(open(doc_fname, 'rb'))
            except Exception as e:  # it is OK to ignore any error and fall back on the true source
                lgr.warning(
                    "cannot load cache from '%s', fall back to download: %s",
                    doc_fname, exc_str(e))

    if doc is None:
        if fetcher is None:
            from datalad.downloaders.providers import Providers
            providers = Providers.from_config_files()
            fetcher = providers.fetch

        doc = fetcher(url)
        assure_dir(dirname(doc_fname))
        # use pickle to store the entire request result dict
        pickle.dump(doc, open(doc_fname, 'wb'))
        lgr.debug("stored result of request to '{}' in {}".format(url, doc_fname))
    return doc
Example #6
0
    def assure_initialized(self):
        """Assures that manager is initialized - knows socket_dir, previous connections
        """
        if self._socket_dir is not None:
            return
        from ..config import ConfigManager
        from os import chmod
        cfg = ConfigManager()
        self._socket_dir = opj(cfg.obtain('datalad.locations.cache'),
                               'sockets')
        assure_dir(self._socket_dir)
        try:
            chmod(self._socket_dir, 0o700)
        except OSError as exc:
            lgr.warning(
                "Failed to (re)set permissions on the %s. "
                "Most likely future communications would be impaired or fail. "
                "Original exception: %s",
                self._socket_dir, exc_str(exc)
            )

        from os import listdir
        from os.path import isdir
        try:
            self._prev_connections = [opj(self.socket_dir, p)
                                      for p in listdir(self.socket_dir)
                                      if not isdir(opj(self.socket_dir, p))]
        except OSError as exc:
            self._prev_connections = []
            lgr.warning(
                "Failed to list %s for existing sockets. "
                "Most likely future communications would be impaired or fail. "
                "Original exception: %s",
                self._socket_dir, exc_str(exc)
            )

        lgr.log(5,
                "Found %d previous connections",
                len(self._prev_connections))
Example #7
0
def _cached_load_document(url):
    """Loader of pyld document from a url, which caches loaded instance on disk
    """
    doc_fname = _get_schema_url_cache_filename(url)

    doc = None
    if os.path.exists(doc_fname):
        try:
            lgr.debug("use cached request result to '%s' from %s", url, doc_fname)
            doc = pickle.load(open(doc_fname, 'rb'))
        except Exception as e:  # it is OK to ignore any error and fall back on the true source
            lgr.warning(
                "cannot load cache from '%s', fall back on schema download: %s",
                doc_fname, exc_str(e))

    if doc is None:
        from pyld.jsonld import load_document
        doc = load_document(url)
        assure_dir(dirname(doc_fname))
        # use pickle to store the entire request result dict
        pickle.dump(doc, open(doc_fname, 'wb'))
        lgr.debug("stored result of request to '{}' in {}".format(url, doc_fname))
    return doc
Example #8
0
def _fixup_submodule_dotgit_setup(ds, relativepath):
    """Implementation of our current of .git in a subdataset"""
    # move .git to superrepo's .git/modules, remove .git, create
    # .git-file
    path = opj(ds.path, relativepath)
    subds_git_dir = opj(path, ".git")
    ds_git_dir = get_git_dir(ds.path)
    moved_git_dir = opj(ds.path, ds_git_dir,
                        "modules", relativepath)
    # safety net
    if islink(subds_git_dir) \
            and realpath(subds_git_dir) == moved_git_dir:
        # .git dir is already moved and linked
        # remove link to enable .git replacement logic below
        os.remove(subds_git_dir)
    else:
        # move .git
        from os import rename, listdir, rmdir
        assure_dir(moved_git_dir)
        for dot_git_entry in listdir(subds_git_dir):
            rename(opj(subds_git_dir, dot_git_entry),
                   opj(moved_git_dir, dot_git_entry))
        assert not listdir(subds_git_dir)
        rmdir(subds_git_dir)

    # TODO: symlink or whatever annex does, since annexes beneath
    #       might break
    #       - figure out, what annex does in direct mode
    #         and/or on windows
    #       - for now use .git file on windows and symlink otherwise
    if not on_windows:
        os.symlink(relpath(moved_git_dir, start=path),
                   opj(path, ".git"))
    else:
        with open(opj(path, ".git"), "w") as f:
            f.write("gitdir: {moved}\n".format(moved=relpath(moved_git_dir, start=path)))
Example #9
0
    def __call__(self, target, collection=curdir, baseurl=None,
                 remote_name=None):
        """
        Returns
        -------
        Collection
        """

        # TODO: Note: Yarik's git mtheirs for publishing branches!

        local_master = get_datalad_master()

        if isdir(abspath(expandvars(expanduser(collection)))):
            c_path = abspath(expandvars(expanduser(collection)))
        elif collection in local_master.git_get_remotes():
            c_path = urlparse(local_master.git_get_remote_url(collection)).path
            if not isdir(c_path):
                raise RuntimeError("Invalid path to collection '%s':\n%s" %
                                   (collection, c_path))
        else:
            raise RuntimeError("Unknown collection '%s'." % collection)

        local_collection_repo = get_repo_instance(
            abspath(expandvars(expanduser(c_path))), CollectionRepo)

        available_handles = [key for key in
                             local_collection_repo.get_handle_list()
                             if exists(urlparse(CollectionRepoHandleBackend(
                                 local_collection_repo, key).url).path)]

        parsed_target = urlparse(target)  # => scheme, path

        from pkg_resources import resource_filename
        prepare_script_path = \
            resource_filename('datalad',
                              'resources/sshserver_prepare_for_publish.sh')
        cleanup_script_path = \
            resource_filename('datalad',
                              'resources/sshserver_cleanup_after_publish.sh')

        from ..cmd import Runner
        runner = Runner()
        if parsed_target.scheme == 'ssh':
            if parsed_target.netloc == '':
                raise RuntimeError("Invalid ssh address: %s" % target)

            if baseurl is None:
                baseurl = target
            collection_url = baseurl + '/' + local_collection_repo.name + \
                             ".datalad-collection"

            # build control master:
            from datalad.utils import assure_dir
            var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
            assure_dir(var_run_user_datalad)
            control_path = "%s/%s" % (var_run_user_datalad, parsed_target.netloc)
            control_path += ":%s" % parsed_target.port if parsed_target.port else ""

            # start controlmaster:

            cmd_str = "ssh -o \"ControlMaster=yes\" -o \"ControlPath=%s\" " \
                      "-o \"ControlPersist=yes\" %s exit" % \
                      (control_path, parsed_target.netloc)
            lgr.error("DEBUG: %s" % cmd_str)
            import subprocess
            proc = subprocess.Popen(cmd_str, shell=True)
            proc.communicate(input="\n")  # why the f.. this is necessary?

            # prepare target repositories:

            script_options = "%s %s.datalad-collection" % (parsed_target.path,
                                                    local_collection_repo.name)
            for key in available_handles:
                # prepare repos for locally available handles only
                script_options += " %s" % key

            cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \
                      (control_path, parsed_target.netloc, script_options)
            cmd_str += " < %s" % prepare_script_path
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Preparation script failed: %s" % str(e))
                out = e.stdout
                err = e.stderr

            # set GIT-SSH:
            environ['GIT_SSH'] = resource_filename('datalad',
                                                   'resources/git_ssh.sh')

        elif parsed_target.scheme == 'file' or parsed_target.scheme == '':
            # we should have a local target path
            if not isdir(abspath(expandvars(expanduser(parsed_target.path)))):
                raise RuntimeError("%s doesn't exist." % parsed_target.path)

            target_path = abspath(expandvars(expanduser(parsed_target.path)))
            if baseurl is None:
                baseurl = target_path
            collection_url = baseurl + '/' + local_collection_repo.name + \
                             ".datalad-collection"

            try:
                out, err = runner.run(["sh", prepare_script_path,
                                       target_path,
                                       local_collection_repo.name +
                                       ".datalad-collection"]
                                      + available_handles)
            except CommandError as e:
                lgr.error("Preparation script failed: %s" % str(e))
                out = e.stdout
                err = e.stderr

        else:
            raise RuntimeError("Don't know scheme '%s'." %
                               parsed_target.scheme)

        # check output:
        results = parse_script_output(out, err)

        script_failed = False
        for name in available_handles + \
                [local_collection_repo.name + ".datalad-collection"]:
            if not results[name]['init']:
                lgr.error("Server setup for %s failed." % name)
                script_failed = True
        # exit here, if something went wrong:
        if script_failed:
            raise RuntimeError("Server setup failed.")

        # Now, all the handles:
        from .publish_handle import PublishHandle
        handle_publisher = PublishHandle()
        for handle_name in available_handles:

            # get location:
            handle_loc = urlparse(CollectionRepoHandleBackend(
                local_collection_repo, handle_name).url).path
            # raise exception if there's no handle at that location:
            try:
                handle_repo = get_repo_instance(handle_loc, HandleRepo)
            except RuntimeError as e:
                lgr.error("'%s': No handle available at %s. Skip." %
                          (handle_name, handle_loc))
                continue

            annex_ssh = "-S %s" % control_path \
                if parsed_target.scheme == 'ssh' else None
            handle_publisher(None, handle=handle_loc,
                             url=baseurl + '/' + handle_name,
                             ssh_options=annex_ssh)

        # TODO: check success => go on with collection

        # prepare publish branch in local collection:
        # check for existing publish branches:
        from random import choice
        from string import ascii_letters
        from six.moves import xrange
        p_branch = "publish_" + ''.join(choice(ascii_letters) for i in xrange(6))
        local_collection_repo.git_checkout(p_branch, '-b')

        importer = CustomImporter('Collection', 'Collection', DLNS.this)
        importer.import_data(local_collection_repo.path)
        graphs = importer.get_graphs()
        orig_uri = graphs[REPO_STD_META_FILE[0:-4]].value(predicate=RDF.type,
                                                          object=DLNS.Collection)

        # correct collection uri
        new_uri = URIRef(collection_url)
        for graph_name in graphs:
            for p, o in graphs[graph_name].predicate_objects(subject=orig_uri):
                graphs[graph_name].remove((orig_uri, p, o))
                graphs[graph_name].add((new_uri, p, o))

        # correct handle uris in hasPart statements:
        replacements = []
        from datalad.support.collection import Collection
        from datalad.support.collectionrepo import CollectionRepoBackend
        col_meta = Collection(CollectionRepoBackend(local_collection_repo))
        for o in graphs[REPO_STD_META_FILE[0:-4]].objects(subject=new_uri,
                                                          predicate=DCTERMS.hasPart):
            from os.path import basename
            path = urlparse(o).path
            if exists(path):
                # local handle
                # retrieve name for that uri:
                # Note: That's an experimental implementation
                hdl_name = None
                for key in col_meta:
                    if urlparse(col_meta[key].url).path == path:
                        hdl_name = col_meta[key].name
                if hdl_name is None:
                    raise RuntimeError("No handle found for path '%s'." % path)

                o_new = URIRef(baseurl + '/' + hdl_name)
                # replacements for collection level:
                replacements.append((o, o_new))

                # replace in collection's handle storage:
                hdl_dir = opj(local_collection_repo.path,
                              local_collection_repo._key2filename(hdl_name))
                hdl_importer = CustomImporter('Collection', 'Handle', o)
                hdl_importer.import_data(hdl_dir)
                hdl_graphs = hdl_importer.get_graphs()
                for g in hdl_graphs:
                    import rdflib
                    rdflib.Graph()
                    for pre, obj in hdl_graphs[g].predicate_objects(o):
                        hdl_graphs[g].remove((o, pre, obj))
                        hdl_graphs[g].add((o_new, pre, obj))
                hdl_importer.store_data(hdl_dir)
                local_collection_repo.git_add(hdl_dir)

            else:
                # We have a locally not available handle
                # in that collection, that therefore can't be published.
                # Just skip for now and assume uri simply doesn't change.
                continue
        for o, o_new in replacements:
            graphs[REPO_STD_META_FILE[0:-4]].remove((new_uri, DCTERMS.hasPart, o))
            graphs[REPO_STD_META_FILE[0:-4]].add((new_uri, DCTERMS.hasPart, o_new))

        # TODO: add commit reference?

        importer.store_data(local_collection_repo.path)
        [local_collection_repo.git_add(graph_name + '.ttl')
         for graph_name in graphs]
        local_collection_repo.git_commit("metadata prepared for publishing")

        # add as remote to local:
        # TODO: Better remote name?
        if remote_name is None:
            remote_name = p_branch
        local_collection_repo.git_remote_add(remote_name, collection_url)

        # push local branch "publish" to remote branch "master"
        # we want to push to master, so a different branch has to be checked
        # out in target; in general we can't explicitly allow for the local
        # repo to push
        local_collection_repo.git_push("%s +%s:master" % (remote_name, p_branch))

        # checkout master in local collection:
        local_collection_repo.git_checkout("master")

        # checkout master in published collection:
        if parsed_target.scheme == 'ssh':
            cmd_str = "ssh -S %s %s \'cat | sh /dev/stdin\' %s" % \
                      (control_path, parsed_target.netloc, script_options)
            cmd_str += " < %s" % cleanup_script_path
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Clean-up script failed: %s" % str(e))

            # stop controlmaster:
            cmd_str = "ssh -O stop -S %s %s" % (control_path,
                                                parsed_target.netloc)
            try:
                out, err = runner.run(cmd_str)
            except CommandError as e:
                lgr.error("Stopping ssh control master failed: %s" % str(e))

        else:
            try:
                out, err = runner.run(["sh", cleanup_script_path,
                                       target_path,
                                       local_collection_repo.name +
                                       ".datalad-collection"]
                                      + available_handles)
            except CommandError as e:
                lgr.error("Clean-up script failed: %s" % str(e))

        # TODO: final check, whether everything is fine
        # Delete publish branch:
        local_collection_repo._git_custom_command('', 'git branch -D %s'
                                                  % p_branch)

        return Collection(CollectionRepoBackend(local_collection_repo,
                                                remote_name + "/master"))
Example #10
0
    def __call__(sshurl,
                 target=None,
                 target_dir=None,
                 target_url=None,
                 target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 existing='raise',
                 shared=False):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None
                               or target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("""No dataset found
                                 at or above {0}.""".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))
        assert (ds is not None and sshurl is not None)

        if not ds.is_installed():
            raise ValueError(
                """Dataset {0} is not installed yet.""".format(ds))
        assert (ds.repo is not None)

        # determine target parameters:
        parsed_target = urlparse(sshurl)
        host_name = parsed_target.netloc

        # TODO: Sufficient to fail on this condition?
        if not parsed_target.netloc:
            raise ValueError("Malformed URL: {0}".format(sshurl))

        if target_dir is None:
            if parsed_target.path:
                target_dir = parsed_target.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # setup SSH Connection:
        # TODO: Make the entire setup a helper to use it when pushing via
        # publish?

        # - build control master:
        from datalad.utils import assure_dir
        not_supported_on_windows("TODO")
        from os import geteuid  # Linux specific import
        var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
        assure_dir(var_run_user_datalad)
        control_path = "%s/%s" % (var_run_user_datalad, host_name)
        control_path += ":%s" % parsed_target.port if parsed_target.port else ""

        # - start control master:
        cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \
              "-o ControlPersist=yes %s exit" % (control_path, host_name)
        lgr.debug("Try starting control master by calling:\n%s" % cmd)
        import subprocess
        proc = subprocess.Popen(cmd, shell=True)
        proc.communicate(input="\n")  # why the f.. this is necessary?

        runner = Runner()
        ssh_cmd = ["ssh", "-S", control_path, host_name]

        lgr.info("Creating target datasets ...")
        for current_dataset in datasets:
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dataset.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(
                    opj(target_dir,
                        relpath(datasets[current_dataset].path,
                                start=ds.path)))

            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                cmd = ssh_cmd + ["ls", path]
                try:
                    out, err = runner.run(cmd,
                                          expect_fail=True,
                                          expect_stderr=True)
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                                    path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'raise':
                        raise RuntimeError(
                            "Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        pass
                    else:
                        raise ValueError(
                            "Do not know how to hand existing=%s" %
                            repr(existing))

                cmd = ssh_cmd + ["mkdir", "-p", path]
                try:
                    runner.run(cmd)
                except CommandError as e:
                    lgr.error("Remotely creating target directory failed at "
                              "%s.\nError: %s" % (path, str(e)))
                    continue

            # init git repo
            cmd = ssh_cmd + ["git", "-C", path, "init"]
            if shared:
                cmd.append("--shared=%s" % shared)
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Remotely initializing git repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, str(e)))
                continue

            # check git version on remote end:
            cmd = ssh_cmd + ["git", "version"]
            try:
                out, err = runner.run(cmd)
                git_version = out.lstrip("git version").strip()
                lgr.debug("Detected git version on server: %s" % git_version)
                if git_version < "2.4":
                    lgr.error("Git version >= 2.4 needed to configure remote."
                              " Version detected on server: %s\nSkipping ..." %
                              git_version)
                    continue

            except CommandError as e:
                lgr.warning("Failed to determine git version on remote.\n"
                            "Error: {0}\nTrying to configure anyway "
                            "...".format(e.message))

            # allow for pushing to checked out branch
            cmd = ssh_cmd + [
                "git", "-C", path, "config", "receive.denyCurrentBranch",
                "updateInstead"
            ]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.warning("git config failed at remote location %s.\n"
                            "You will not be able to push to checked out "
                            "branch." % path)

            # enable post-update hook:
            cmd = ssh_cmd + [
                "mv",
                opj(path, ".git/hooks/post-update.sample"),
                opj(path, ".git/hooks/post-update")
            ]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to enable post update hook.\n"
                          "Error: %s" % e.message)

            # initially update server info "manually":
            cmd = ssh_cmd + ["git", "-C", path, "update-server-info"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to update server info.\n"
                          "Error: %s" % e.message)

        # stop controlmaster (close ssh connection):
        cmd = ["ssh", "-O", "stop", "-S", control_path, host_name]
        out, err = runner.run(cmd, expect_stderr=True)

        if target:
            # add the sibling(s):
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None:
                target_pushurl = sshurl
            result_adding = AddSibling()(dataset=ds,
                                         name=target,
                                         url=target_url,
                                         pushurl=target_pushurl,
                                         recursive=recursive,
                                         force=existing in {'replace'})
    def __call__(sshurl, target=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None, recursive=False,
                 existing='raise', shared=False):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None
                               or target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("""No dataset found
                                 at or above {0}.""".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))
        assert(ds is not None and sshurl is not None)

        if not ds.is_installed():
            raise ValueError("""Dataset {0} is not installed yet.""".format(ds))
        assert(ds.repo is not None)

        # determine target parameters:
        parsed_target = urlparse(sshurl)
        host_name = parsed_target.netloc

        # TODO: Sufficient to fail on this condition?
        if not parsed_target.netloc:
            raise ValueError("Malformed URL: {0}".format(sshurl))

        if target_dir is None:
            if parsed_target.path:
                target_dir = parsed_target.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # setup SSH Connection:
        # TODO: Make the entire setup a helper to use it when pushing via
        # publish?

        # - build control master:
        from datalad.utils import assure_dir
        not_supported_on_windows("TODO")
        from os import geteuid  # Linux specific import
        var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
        assure_dir(var_run_user_datalad)
        control_path = "%s/%s" % (var_run_user_datalad, host_name)
        control_path += ":%s" % parsed_target.port if parsed_target.port else ""

        # - start control master:
        cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \
              "-o ControlPersist=yes %s exit" % (control_path, host_name)
        lgr.debug("Try starting control master by calling:\n%s" % cmd)
        import subprocess
        proc = subprocess.Popen(cmd, shell=True)
        proc.communicate(input="\n")  # why the f.. this is necessary?

        runner = Runner()
        ssh_cmd = ["ssh", "-S", control_path, host_name]

        lgr.info("Creating target datasets ...")
        for current_dataset in datasets:
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dataset.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(opj(target_dir,
                                    relpath(datasets[current_dataset].path,
                                            start=ds.path)))

            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                cmd = ssh_cmd + ["ls", path]
                try:
                    out, err = runner.run(cmd, expect_fail=True,
                                          expect_stderr=True)
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                                    path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'raise':
                        raise RuntimeError(
                            "Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        pass
                    else:
                        raise ValueError("Do not know how to hand existing=%s" % repr(existing))

                cmd = ssh_cmd + ["mkdir", "-p", path]
                try:
                    runner.run(cmd)
                except CommandError as e:
                    lgr.error("Remotely creating target directory failed at "
                              "%s.\nError: %s" % (path, str(e)))
                    continue

            # init git repo
            cmd = ssh_cmd + ["git", "-C", path, "init"]
            if shared:
                cmd.append("--shared=%s" % shared)
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Remotely initializing git repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, str(e)))
                continue

            # check git version on remote end:
            cmd = ssh_cmd + ["git", "version"]
            try:
                out, err = runner.run(cmd)
                git_version = out.lstrip("git version").strip()
                lgr.debug("Detected git version on server: %s" % git_version)
                if git_version < "2.4":
                    lgr.error("Git version >= 2.4 needed to configure remote."
                              " Version detected on server: %s\nSkipping ..."
                              % git_version)
                    continue

            except CommandError as e:
                lgr.warning(
                    "Failed to determine git version on remote.\n"
                    "Error: {0}\nTrying to configure anyway "
                    "...".format(e.message))

            # allow for pushing to checked out branch
            cmd = ssh_cmd + ["git", "-C", path, "config",
                             "receive.denyCurrentBranch",
                             "updateInstead"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.warning("git config failed at remote location %s.\n"
                            "You will not be able to push to checked out "
                            "branch." % path)

            # enable post-update hook:
            cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"),
                             opj(path, ".git/hooks/post-update")]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to enable post update hook.\n"
                          "Error: %s" % e.message)

            # initially update server info "manually":
            cmd = ssh_cmd + ["git", "-C", path, "update-server-info"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to update server info.\n"
                          "Error: %s" % e.message)

        # stop controlmaster (close ssh connection):
        cmd = ["ssh", "-O", "stop", "-S", control_path, host_name]
        out, err = runner.run(cmd, expect_stderr=True)

        if target:
            # add the sibling(s):
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None:
                target_pushurl = sshurl
            result_adding = AddSibling()(dataset=ds,
                                         name=target,
                                         url=target_url,
                                         pushurl=target_pushurl,
                                         recursive=recursive,
                                         force=existing in {'replace'})