def __init__(self): not_supported_on_windows("TODO: Make this an abstraction to " "interface platform dependent SSH") self._connections = dict() self._socket_dir = None
def __init__(self): not_supported_on_windows("TODO: Make this an abstraction to " "interface platform dependent SSH") self._connections = dict() self._socket_dir = None
def test_not_supported_on_windows(): with patch('datalad.utils.on_windows', True): assert_raises(NotImplementedError, not_supported_on_windows) assert_raises(NotImplementedError, not_supported_on_windows, "msg") with patch('datalad.utils.on_windows', False): assert_equal(not_supported_on_windows(), None) assert_equal(not_supported_on_windows("msg"), None)
def __init__(self): not_supported_on_windows("TODO: Make this an abstraction to " "interface platform dependent SSH") self._socket_dir = None self._connections = dict() # Initialization of prev_connections is happening during initial # handling of socket_dir, so we do not define them here explicitly # to an empty list to fail if logic is violated self._prev_connections = None
def __init__(self): not_supported_on_windows("TODO: Make this an abstraction to " "interface platform dependent SSH") self._connections = dict() self._socket_dir = None from os import listdir from os.path import isdir self._prev_connections = [opj(self.socket_dir, p) for p in listdir(self.socket_dir) if not isdir(opj(self.socket_dir, p))] lgr.log(5, "Found %d previous connections", len(self._prev_connections))
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='error', shared=False, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') assert(ds is not None and sshurl is not None and ds.repo is not None) # determine target parameters: sshri = RI(sshurl) if not isinstance(sshri, SSHRI) \ and not (isinstance(sshri, URL) and sshri.scheme == 'ssh'): raise ValueError("Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax".format(sshurl)) if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_subdatasets(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # request ssh connection: not_supported_on_windows("TODO") lgr.info("Connecting ...") ssh = ssh_manager.get_connection(sshurl) ssh.open() # flag to check if at dataset_root at_root = True # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) remote_repos_to_run_hook_for = [] for current_dspath in \ sorted(datasets.keys(), key=lambda x: x.count('/')): current_ds = datasets[current_dspath] if not current_ds.is_installed(): lgr.info("Skipping %s since not installed locally", current_dspath) continue if not replicate_local_structure: path = target_dir.replace("%NAME", current_dspath.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath(opj(target_dir, relpath(datasets[current_dspath].path, start=ds.path))) lgr.info("Creating target dataset {0} at {1}".format(current_dspath, path)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True try: out, err = ssh(["ls", path]) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'error': raise RuntimeError("Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': ssh(["chmod", "+r+w", "-R", path]) # enable write permissions to allow removing dir ssh(["rm", "-rf", path]) # remove target at path path_exists = False # if we succeeded in removing it elif existing == 'reconfigure': only_reconfigure = True else: raise ValueError("Do not know how to handle existing=%s" % repr(existing)) if not path_exists: try: ssh(["mkdir", "-p", path]) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, exc_str(e))) continue # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo( path, ssh, shared, datasets[current_dspath], description=target_url): continue # check git version on remote end lgr.info("Adjusting remote git configuration") remote_git_version = CreateSibling.get_remote_git_version(ssh) if remote_git_version and remote_git_version >= "2.4": # allow for pushing to checked out branch try: ssh(["git", "-C", path] + ["config", "receive.denyCurrentBranch", "updateInstead"]) except CommandError as e: lgr.error("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", path, exc_str(e)) else: lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure" % remote_git_version) # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook( path, ssh, datasets[current_dspath]) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) # publish web-interface to root dataset on publication server if at_root and ui: lgr.info("Uploading web interface to %s" % path) at_root = False try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: lgr.error("Failed to push web interface to the remote " "datalad repository.\nError: %s" % exc_str(e)) remote_repos_to_run_hook_for.append(path) # in reverse order would be depth first lgr.debug("Running post-update hooks in all created siblings") for path in remote_repos_to_run_hook_for[::-1]: # Trigger the hook try: ssh( ["cd '" + _path_(path, ".git") + "' && hooks/post-update"], wrap_args=False # we wrapped here manually ) except CommandError as e: lgr.error("Failed to run post-update hook under path %s. " "Error: %s" % (path, exc_str(e))) if target: # add the sibling(s): lgr.debug("Adding the siblings") if target_url is None: target_url = sshurl if target_pushurl is None and sshurl != target_url: target_pushurl = sshurl AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, fetch=True, force=existing in {'replace'}, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends)
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='error', shared=False, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') assert (ds is not None and sshurl is not None and ds.repo is not None) # determine target parameters: sshri = RI(sshurl) if not isinstance(sshri, SSHRI) \ and not (isinstance(sshri, URL) and sshri.scheme == 'ssh'): raise ValueError( "Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax" .format(sshurl)) if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_subdatasets(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # request ssh connection: not_supported_on_windows("TODO") lgr.info("Connecting ...") ssh = ssh_manager.get_connection(sshurl) ssh.open() # flag to check if at dataset_root at_root = True # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) remote_repos_to_run_hook_for = [] for current_dspath in \ sorted(datasets.keys(), key=lambda x: x.count('/')): current_ds = datasets[current_dspath] if not current_ds.is_installed(): lgr.info("Skipping %s since not installed locally", current_dspath) continue if not replicate_local_structure: path = target_dir.replace("%NAME", current_dspath.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath( opj(target_dir, relpath(datasets[current_dspath].path, start=ds.path))) lgr.info("Creating target dataset {0} at {1}".format( current_dspath, path)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True try: out, err = ssh(["ls", path]) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'error': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': ssh([ "chmod", "+r+w", "-R", path ]) # enable write permissions to allow removing dir ssh(["rm", "-rf", path]) # remove target at path path_exists = False # if we succeeded in removing it elif existing == 'reconfigure': only_reconfigure = True else: raise ValueError( "Do not know how to handle existing=%s" % repr(existing)) if not path_exists: try: ssh(["mkdir", "-p", path]) except CommandError as e: lgr.error( "Remotely creating target directory failed at " "%s.\nError: %s" % (path, exc_str(e))) continue # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo(path, ssh, shared, datasets[current_dspath], description=target_url): continue # check git version on remote end lgr.info("Adjusting remote git configuration") remote_git_version = CreateSibling.get_remote_git_version(ssh) if remote_git_version and remote_git_version >= "2.4": # allow for pushing to checked out branch try: ssh(["git", "-C", path] + [ "config", "receive.denyCurrentBranch", "updateInstead" ]) except CommandError as e: lgr.error( "git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", path, exc_str(e)) else: lgr.error( "Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure" % remote_git_version) # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook(path, ssh, datasets[current_dspath]) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) # publish web-interface to root dataset on publication server if at_root and ui: lgr.info("Uploading web interface to %s" % path) at_root = False try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: lgr.error("Failed to push web interface to the remote " "datalad repository.\nError: %s" % exc_str(e)) remote_repos_to_run_hook_for.append(path) # in reverse order would be depth first lgr.debug("Running post-update hooks in all created siblings") for path in remote_repos_to_run_hook_for[::-1]: # Trigger the hook try: ssh( ["cd '" + _path_(path, ".git") + "' && hooks/post-update"], wrap_args=False # we wrapped here manually ) except CommandError as e: lgr.error("Failed to run post-update hook under path %s. " "Error: %s" % (path, exc_str(e))) if target: # add the sibling(s): lgr.debug("Adding the siblings") if target_url is None: target_url = sshurl if target_pushurl is None and sshurl != target_url: target_pushurl = sshurl AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, fetch=True, force=existing in {'replace'}, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends)
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert (ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError( """Dataset {0} is not installed yet.""".format(ds)) assert (ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath( opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError( "Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning("Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + [ "git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead" ] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + [ "mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update") ] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})
def __call__(sshurl, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): # there is no point in doing anything further not_supported_on_windows( "Support for SSH connections is not yet implemented in Windows") # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option") if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified") # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc)) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings") # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to destinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(ds.path, super_ds.path)) # check the login URL sshri = RI(sshurl) if not is_ssh(sshri): raise ValueError( "Unsupported SSH URL: '{0}', " "use ssh://host/path or host:path syntax".format(sshurl)) if not name: # use the hostname as default remote name name = sshri.hostname lgr.debug( "No sibling name given, use URL hostname '%s' as sibling name", name) if since == '': # default behavior - only updated since last update # so we figure out what was the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) # # parse the base dataset to find all subdatasets that need processing # to_process = [] for ap in AnnotatePaths.__call__( dataset=refds_path, # only a single path! path=refds_path, recursive=recursive, recursion_limit=recursion_limit, action='create_sibling', # both next should not happen anyways unavailable_path_status='impossible', nondataset_path_status='error', modified=since, return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent': # this can happen when there is `since`, but we have no # use for anything but datasets here continue checkds_remotes = Dataset(ap['path']).repo.get_remotes() \ if ap.get('state', None) != 'absent' \ else [] if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set( assure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: ap['status'] = 'error' ap['message'] = ( 'unknown sibling(s) specified as publication dependency: %s', unknown_deps) yield ap continue if name in checkds_remotes and existing in ('error', 'skip'): ap['status'] = 'error' if existing == 'error' else 'notneeded' ap['message'] = ( "sibling '%s' already configured (specify alternative name, or force " "reconfiguration via --existing", name) yield ap continue to_process.append(ap) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir # request ssh connection: lgr.info("Connecting ...") assert (sshurl is not None) # delayed anal verification ssh = ssh_manager.get_connection(sshurl) if not ssh.get_annex_version(): raise MissingExternalDependency('git-annex', msg='on the remote system') # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, ds.path, ssh, replicate_local_structure, sshri, target_dir, target_url, target_pushurl, existing, shared, publish_depends, publish_by_default, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit) if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == ds.path and ui: lgr.info("Uploading web interface to %s" % path) try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s", path) try: ssh("cd {} && hooks/post-update".format( sh_quote(_path_(path, ".git")))) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='raise', shared=False): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = dataset if ds is not None and not isinstance(ds, Dataset): ds = Dataset(ds) if ds is None: # try to find a dataset at or above CWD dspath = GitRepo.get_toppath(abspath(getpwd())) if dspath is None: raise ValueError("""No dataset found at or above {0}.""".format(getpwd())) ds = Dataset(dspath) lgr.debug("Resolved dataset for target creation: {0}".format(ds)) assert(ds is not None and sshurl is not None) if not ds.is_installed(): raise ValueError("""Dataset {0} is not installed yet.""".format(ds)) assert(ds.repo is not None) # determine target parameters: parsed_target = urlparse(sshurl) host_name = parsed_target.netloc # TODO: Sufficient to fail on this condition? if not parsed_target.netloc: raise ValueError("Malformed URL: {0}".format(sshurl)) if target_dir is None: if parsed_target.path: target_dir = parsed_target.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_dataset_handles(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # setup SSH Connection: # TODO: Make the entire setup a helper to use it when pushing via # publish? # - build control master: from datalad.utils import assure_dir not_supported_on_windows("TODO") from os import geteuid # Linux specific import var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid() assure_dir(var_run_user_datalad) control_path = "%s/%s" % (var_run_user_datalad, host_name) control_path += ":%s" % parsed_target.port if parsed_target.port else "" # - start control master: cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \ "-o ControlPersist=yes %s exit" % (control_path, host_name) lgr.debug("Try starting control master by calling:\n%s" % cmd) import subprocess proc = subprocess.Popen(cmd, shell=True) proc.communicate(input="\n") # why the f.. this is necessary? runner = Runner() ssh_cmd = ["ssh", "-S", control_path, host_name] lgr.info("Creating target datasets ...") for current_dataset in datasets: if not replicate_local_structure: path = target_dir.replace("%NAME", current_dataset.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath(opj(target_dir, relpath(datasets[current_dataset].path, start=ds.path))) if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True cmd = ssh_cmd + ["ls", path] try: out, err = runner.run(cmd, expect_fail=True, expect_stderr=True) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'raise': raise RuntimeError( "Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': pass else: raise ValueError("Do not know how to hand existing=%s" % repr(existing)) cmd = ssh_cmd + ["mkdir", "-p", path] try: runner.run(cmd) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, str(e))) continue # init git repo cmd = ssh_cmd + ["git", "-C", path, "init"] if shared: cmd.append("--shared=%s" % shared) try: runner.run(cmd) except CommandError as e: lgr.error("Remotely initializing git repository failed at %s." "\nError: %s\nSkipping ..." % (path, str(e))) continue # check git version on remote end: cmd = ssh_cmd + ["git", "version"] try: out, err = runner.run(cmd) git_version = out.lstrip("git version").strip() lgr.debug("Detected git version on server: %s" % git_version) if git_version < "2.4": lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping ..." % git_version) continue except CommandError as e: lgr.warning( "Failed to determine git version on remote.\n" "Error: {0}\nTrying to configure anyway " "...".format(e.message)) # allow for pushing to checked out branch cmd = ssh_cmd + ["git", "-C", path, "config", "receive.denyCurrentBranch", "updateInstead"] try: runner.run(cmd) except CommandError as e: lgr.warning("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch." % path) # enable post-update hook: cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"), opj(path, ".git/hooks/post-update")] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to enable post update hook.\n" "Error: %s" % e.message) # initially update server info "manually": cmd = ssh_cmd + ["git", "-C", path, "update-server-info"] try: runner.run(cmd) except CommandError as e: lgr.error("Failed to update server info.\n" "Error: %s" % e.message) # stop controlmaster (close ssh connection): cmd = ["ssh", "-O", "stop", "-S", control_path, host_name] out, err = runner.run(cmd, expect_stderr=True) if target: # add the sibling(s): if target_url is None: target_url = sshurl if target_pushurl is None: target_pushurl = sshurl result_adding = AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, force=existing in {'replace'})