class SSHConnection(object): """Representation of a (shared) ssh connection. """ def __init__(self, ctrl_path, sshri, identity_file=None, use_remote_annex_bundle=True, force_ip=False): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool If set, look for a git-annex installation on the remote and prefer its binaries in the search path (i.e. prefer a bundled Git over a system package). force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses with -4 or -6. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) # on windows cmd args lists are always converted into a string using appropriate # quoting rules, on other platforms args lists are passed directly and we need # to take care of quoting ourselves ctrlpath_arg = "ControlPath={}".format( ctrl_path if on_windows else sh_quote(str(ctrl_path))) self._ssh_args = ["-o", ctrlpath_arg] self.ctrl_path = Path(ctrl_path) if self.sshri.port: self._ssh_args += ['-p', '{}'.format(self.sshri.port)] if force_ip: self._ssh_args.append("-{}".format(force_ip)) self._identity_file = identity_file self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {} self._opened_by_us = False def __call__(self, cmd, options=None, stdin=None, log_output=True): """Executes a command on the remote. It is the callers responsibility to properly quote commands for remote execution (e.g. filename with spaces of other special characters). Use the `sh_quote()` from the module for this purpose. Parameters ---------- cmd: str command to run on the remote options : list of str, optional Additional options to pass to the `-o` flag of `ssh`. Note: Many (probably most) of the available configuration options should not be set here because they can critically change the properties of the connection. This exists to allow options like SendEnv to be set. Returns ------- tuple of str stdout, stderr of the command run. """ # XXX: check for open socket once # and provide roll back if fails to run and was not explicitly # checked first # MIH: this would mean that we would have to distinguish failure # of a payload command from failure of SSH itself. SSH however, # only distinguishes success and failure of the entire operation # Increase in fragility from introspection makes a potential # performance benefit a questionable improvement. # make sure we have an open connection, will test if action is needed # by itself self.open() # locate annex and set the bundled vs. system Git machinery in motion if self._use_remote_annex_bundle: remote_annex_installdir = self.get_annex_installdir() if remote_annex_installdir: # make sure to use the bundled git version if any exists cmd = '{}; {}'.format( 'export "PATH={}:$PATH"'.format(remote_annex_installdir), cmd) # build SSH call, feed remote command as a single last argument # whatever it contains will go to the remote machine for execution # we cannot perform any sort of escaping, because it will limit # what we can do on the remote, e.g. concatenate commands with '&&' ssh_cmd = ["ssh"] + self._ssh_args for opt in options or []: ssh_cmd.extend(["-o", opt]) ssh_cmd += [self.sshri.as_str()] \ + [cmd] # TODO: pass expect parameters from above? # Hard to explain to toplevel users ... So for now, just set True out = self.runner.run( ssh_cmd, protocol=StdOutErrCapture if log_output else NoCapture, stdin=stdin) return out['stdout'], out['stderr'] @property def runner(self): if self._runner is None: self._runner = WitlessRunner() return self._runner def is_open(self): if not self.ctrl_path.exists(): lgr.log(5, "Not opening %s for checking since %s does not exist", self, self.ctrl_path) return False # check whether controlmaster is still running: cmd = ["ssh", "-O", "check"] + self._ssh_args + [self.sshri.as_str()] lgr.debug("Checking %s by calling %s" % (self, cmd)) try: # expect_stderr since ssh would announce to stderr # "Master is running" and that is normal, not worthy warning about # etc -- we are doing the check here for successful operation with tempfile.TemporaryFile() as tempf: self.runner.run( cmd, # do not leak output protocol=StdOutErrCapture, stdin=tempf) res = True except CommandError as e: if e.code != 255: # this is not a normal SSH error, whine ... raise e # SSH died and left socket behind, or server closed connection self.close() res = False lgr.debug("Check of %s has %s", self, { True: 'succeeded', False: 'failed' }[res]) return res def open(self): """Opens the connection. In other words: Creates the SSH ControlMaster to be used by this connection, if it is not there already. Returns ------- bool True when SSH reports success opening the connection, False when a ControlMaster for an open connection already exists. Raises ------ ConnectionOpenFailedError When starting the SSH ControlMaster process failed. """ # the socket should vanish almost instantly when the connection closes # sending explicit 'check' commands to the control master is expensive # (needs tempfile to shield stdin, Runner overhead, etc...) # as we do not use any advanced features (forwarding, stop[ing the # master without exiting) it should be relatively safe to just perform # the much cheaper check of an existing control path if self.ctrl_path.exists(): return False # set control options ctrl_options = [ "-fN", "-o", "ControlMaster=auto", "-o", "ControlPersist=15m" ] + self._ssh_args if self._identity_file: ctrl_options.extend(["-i", self._identity_file]) # create ssh control master command cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()] # start control master: lgr.debug("Opening %s by calling %s" % (self, cmd)) proc = Popen(cmd) stdout, stderr = proc.communicate( input="\n") # why the f.. this is necessary? # wait till the command exits, connection is conclusively # open or not at this point exit_code = proc.wait() if exit_code != 0: raise ConnectionOpenFailedError( cmd, 'Failed to open SSH connection (could not start ControlMaster process)', exit_code, stdout, stderr, ) self._opened_by_us = True return True def close(self): """Closes the connection. """ if not self._opened_by_us: lgr.debug("Not closing %s since was not opened by itself", self) return # stop controlmaster: cmd = ["ssh", "-O", "stop"] + self._ssh_args + [self.sshri.as_str()] lgr.debug("Closing %s by calling %s", self, cmd) try: self.runner.run(cmd, protocol=StdOutErrCapture) except CommandError as e: lgr.debug("Failed to run close command") if self.ctrl_path.exists(): lgr.debug("Removing existing control path %s", self.ctrl_path) # socket need to go in any case self.ctrl_path.unlink() if e.code != 255: # not a "normal" SSH error raise e def _get_scp_command_spec(self, recursive, preserve_attrs): """Internal helper for SCP interface methods""" # Convert ssh's port flag (-p) to scp's (-P). scp_options = ["-P" if x == "-p" else x for x in self._ssh_args] # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command scp_options += ["-r"] if recursive else [] scp_options += ["-p"] if preserve_attrs else [] return ["scp"] + scp_options def put(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder to destination on the remote. Note: this method performs escaping of filenames to an extent that moderately weird ones should work (spaces, quotes, pipes, other characters with special shell meaning), but more complicated cases might require appropriate external preprocessing of filenames. Parameters ---------- source : str or list file/folder path(s) to copy from on local destination : str file/folder path to copy to on remote recursive : bool flag to enable recursive copying of given sources preserve_attrs : bool preserve modification times, access times, and modes from the original file Returns ------- str stdout, stderr of the copy operation. """ # make sure we have an open connection, will test if action is needed # by itself self.open() scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs) # add source filepath(s) to scp command scp_cmd += ensure_list(source) # add destination path scp_cmd += [ '%s:%s' % ( self.sshri.hostname, _quote_filename_for_scp(destination), ) ] out = self.runner.run(scp_cmd, protocol=StdOutErrCapture) return out['stdout'], out['stderr'] def get(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder from remote to a local destination. Note: this method performs escaping of filenames to an extent that moderately weird ones should work (spaces, quotes, pipes, other characters with special shell meaning), but more complicated cases might require appropriate external preprocessing of filenames. Parameters ---------- source : str or list file/folder path(s) to copy from the remote host destination : str file/folder path to copy to on the local host recursive : bool flag to enable recursive copying of given sources preserve_attrs : bool preserve modification times, access times, and modes from the original file Returns ------- str stdout, stderr of the copy operation. """ # make sure we have an open connection, will test if action is needed # by itself self.open() scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs) # add source filepath(s) to scp command, prefixed with the remote host scp_cmd += [ "%s:%s" % (self.sshri.hostname, _quote_filename_for_scp(s)) for s in ensure_list(source) ] # add destination path scp_cmd += [destination] out = self.runner.run(scp_cmd, protocol=StdOutErrCapture) return out['stdout'], out['stderr'] def get_annex_installdir(self): key = 'installdir:annex' if key in self._remote_props: return self._remote_props[key] annex_install_dir = None # already set here to avoid any sort of recursion until we know # more self._remote_props[key] = annex_install_dir try: with tempfile.TemporaryFile() as tempf: # TODO does not work on windows annex_install_dir = self( # use sh -e to be able to fail at each stage of the process "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'", stdin=tempf)[0].strip() except CommandError as e: lgr.debug('Failed to locate remote git-annex installation: %s', exc_str(e)) self._remote_props[key] = annex_install_dir return annex_install_dir def get_annex_version(self): key = 'cmd:annex' if key in self._remote_props: return self._remote_props[key] try: # modern annex versions version = self('git annex version --raw')[0] except CommandError: # either no annex, or old version try: # fall back on method that could work with older installations out, err = self('git annex version') version = out.split('\n')[0].split(':')[1].strip() except CommandError as e: lgr.debug('Failed to determine remote git-annex version: %s', exc_str(e)) version = None self._remote_props[key] = version return version def get_git_version(self): key = 'cmd:git' if key in self._remote_props: return self._remote_props[key] git_version = None try: git_version = self('git version')[0].split()[2] except CommandError as e: lgr.debug('Failed to determine Git version: %s', exc_str(e)) self._remote_props[key] = git_version return git_version
class SSHConnection(object): """Representation of a (shared) ssh connection. """ def __init__(self, ctrl_path, sshri): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port')}) self.ctrl_path = ctrl_path self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path] if self.sshri.port: self._ctrl_options += ['-p', '{}'.format(self.sshri.port)] # essential properties of the remote system self._remote_props = {} self._opened_by_us = False def __call__(self, cmd, stdin=None, log_output=True): """Executes a command on the remote. It is the callers responsibility to properly quote commands for remote execution (e.g. filename with spaces of other special characters). Use the `sh_quote()` from the module for this purpose. Parameters ---------- cmd: str command to run on the remote Returns ------- tuple of str stdout, stderr of the command run. """ # TODO: do not do all those checks for every invocation!! # TODO: check for annex location once, check for open socket once # and provide roll back if fails to run and was not explicitly # checked first if not self.is_open(): if not self.open(): raise RuntimeError( 'Cannot open SSH connection to {}'.format( self.sshri)) # locate annex and set the bundled vs. system Git machinery in motion remote_annex_installdir = self.get_annex_installdir() if remote_annex_installdir: # make sure to use the bundled git version if any exists cmd = '{}; {}'.format( 'export "PATH={}:$PATH"'.format(remote_annex_installdir), cmd) # build SSH call, feed remote command as a single last argument # whatever it contains will go to the remote machine for execution # we cannot perform any sort of escaping, because it will limit # what we can do on the remote, e.g. concatenate commands with '&&' ssh_cmd = ["ssh"] + self._ctrl_options ssh_cmd += [self.sshri.as_str()] \ + [cmd] kwargs = dict( log_stdout=log_output, log_stderr=log_output, log_online=not log_output ) # TODO: pass expect parameters from above? # Hard to explain to toplevel users ... So for now, just set True return self.runner.run( ssh_cmd, expect_fail=True, expect_stderr=True, stdin=stdin, **kwargs) @property def runner(self): if self._runner is None: self._runner = Runner() return self._runner def is_open(self): if not exists(self.ctrl_path): lgr.log( 5, "Not opening %s for checking since %s does not exist", self, self.ctrl_path ) return False # check whether controlmaster is still running: cmd = ["ssh", "-O", "check"] + self._ctrl_options + [self.sshri.as_str()] lgr.debug("Checking %s by calling %s" % (self, cmd)) null = open('/dev/null') try: # expect_stderr since ssh would announce to stderr # "Master is running" and that is normal, not worthy warning about # etc -- we are doing the check here for successful operation out, err = self.runner.run(cmd, stdin=null, expect_stderr=True) res = True except CommandError as e: if e.code != 255: # this is not a normal SSH error, whine ... raise e # SSH died and left socket behind, or server closed connection self.close() res = False finally: null.close() lgr.debug("Check of %s has %s", self, {True: 'succeeded', False: 'failed'}[res]) return res def open(self): """Opens the connection. In other words: Creates the SSH controlmaster to be used by this connection, if it is not there already. Returns ------- bool Whether SSH reports success opening the connection """ if self.is_open(): return # set control options ctrl_options = ["-fN", "-o", "ControlMaster=auto", "-o", "ControlPersist=15m"] + self._ctrl_options # create ssh control master command cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()] # start control master: lgr.debug("Opening %s by calling %s" % (self, cmd)) proc = Popen(cmd) stdout, stderr = proc.communicate(input="\n") # why the f.. this is necessary? # wait till the command exits, connection is conclusively # open or not at this point exit_code = proc.wait() ret = exit_code == 0 if not ret: lgr.warning( "Failed to run cmd %s. Exit code=%s\nstdout: %s\nstderr: %s", cmd, exit_code, stdout, stderr ) else: self._opened_by_us = True return ret def close(self): """Closes the connection. """ if not self._opened_by_us: lgr.debug("Not closing %s since was not opened by itself", self) return # stop controlmaster: cmd = ["ssh", "-O", "stop"] + self._ctrl_options + [self.sshri.as_str()] lgr.debug("Closing %s by calling %s", self, cmd) try: self.runner.run(cmd, expect_stderr=True, expect_fail=True) except CommandError as e: lgr.debug("Failed to run close command") if exists(self.ctrl_path): lgr.debug("Removing existing control path %s", self.ctrl_path) # socket need to go in any case remove(self.ctrl_path) if e.code != 255: # not a "normal" SSH error raise e def copy(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder to destination on the remote. Parameters ---------- source: str or list file/folder path(s) to copy from on local destination: str file/folder path to copy to on remote Returns ------- str stdout, stderr of the copy operation. """ # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command scp_options = self._ctrl_options + ["-r"] if recursive else self._ctrl_options scp_options += ["-p"] if preserve_attrs else [] scp_cmd = ["scp"] + scp_options # add source filepath(s) to scp command scp_cmd += source if isinstance(source, list) \ else [source] # add destination path scp_cmd += ['%s:"%s"' % (self.sshri.hostname, destination)] return self.runner.run(scp_cmd) def get_annex_installdir(self): key = 'installdir:annex' if key in self._remote_props: return self._remote_props[key] annex_install_dir = None # already set here to avoid any sort of recursion until we know # more self._remote_props[key] = annex_install_dir try: with open('/dev/null') as null: annex_install_dir = self( # use sh -e to be able to fail at each stage of the process "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'" , stdin=null )[0].strip() except CommandError as e: lgr.debug('Failed to locate remote git-annex installation: %s', exc_str(e)) self._remote_props[key] = annex_install_dir return annex_install_dir def get_annex_version(self): key = 'cmd:annex' if key in self._remote_props: return self._remote_props[key] try: # modern annex versions version = self('git annex version --raw')[0] except CommandError: # either no annex, or old version try: # fall back on method that could work with older installations out, err = self('git annex version') version = out.split('\n')[0].split(':')[1].strip() except CommandError as e: lgr.debug('Failed to determine remote git-annex version: %s', exc_str(e)) version = None self._remote_props[key] = version return version def get_git_version(self): key = 'cmd:git' if key in self._remote_props: return self._remote_props[key] git_version = None try: git_version = self('git version')[0].split()[2] except CommandError as e: lgr.debug('Failed to determine Git version: %s', exc_str(e)) self._remote_props[key] = git_version return git_version
class SSHConnection(object): """Representation of a (shared) ssh connection. """ def __init__(self, ctrl_path, sshri): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- ctrl_path: str path to SSH controlmaster sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. """ self._runner = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port')}) self.ctrl_path = ctrl_path self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path] if self.sshri.port: self._ctrl_options += ['-p', '{}'.format(self.sshri.port)] # essential properties of the remote system self._remote_props = {} self._opened_by_us = False def __call__(self, cmd, stdin=None, log_output=True): """Executes a command on the remote. It is the callers responsibility to properly quote commands for remote execution (e.g. filename with spaces of other special characters). Use the `sh_quote()` from the module for this purpose. Parameters ---------- cmd: str command to run on the remote Returns ------- tuple of str stdout, stderr of the command run. """ # TODO: do not do all those checks for every invocation!! # TODO: check for annex location once, check for open socket once # and provide roll back if fails to run and was not explicitly # checked first if not self.is_open(): if not self.open(): raise RuntimeError( 'Cannot open SSH connection to {}'.format( self.sshri)) # locate annex and set the bundled vs. system Git machinery in motion remote_annex_installdir = self.get_annex_installdir() if remote_annex_installdir: # make sure to use the bundled git version if any exists cmd = '{}; {}'.format( 'export "PATH={}:$PATH"'.format(remote_annex_installdir), cmd) # build SSH call, feed remote command as a single last argument # whatever it contains will go to the remote machine for execution # we cannot perform any sort of escaping, because it will limit # what we can do on the remote, e.g. concatenate commands with '&&' ssh_cmd = ["ssh"] + self._ctrl_options ssh_cmd += [self.sshri.as_str()] \ + [cmd] kwargs = dict( log_stdout=log_output, log_stderr=log_output, log_online=not log_output ) # TODO: pass expect parameters from above? # Hard to explain to toplevel users ... So for now, just set True return self.runner.run( ssh_cmd, expect_fail=True, expect_stderr=True, stdin=stdin, **kwargs) @property def runner(self): if self._runner is None: self._runner = Runner() return self._runner def is_open(self): if not exists(self.ctrl_path): lgr.log( 5, "Not opening %s for checking since %s does not exist", self, self.ctrl_path ) return False # check whether controlmaster is still running: cmd = ["ssh", "-O", "check"] + self._ctrl_options + [self.sshri.as_str()] lgr.debug("Checking %s by calling %s" % (self, cmd)) null = open('/dev/null') try: out, err = self.runner.run(cmd, stdin=null) res = True except CommandError as e: if e.code != 255: # this is not a normal SSH error, whine ... raise e # SSH died and left socket behind, or server closed connection self.close() res = False finally: null.close() lgr.debug("Check of %s has %s", self, {True: 'succeeded', False: 'failed'}[res]) return res def open(self): """Opens the connection. In other words: Creates the SSH controlmaster to be used by this connection, if it is not there already. Returns ------- bool Whether SSH reports success opening the connection """ if self.is_open(): return # set control options ctrl_options = ["-fN", "-o", "ControlMaster=auto", "-o", "ControlPersist=15m"] + self._ctrl_options # create ssh control master command cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()] # start control master: lgr.debug("Opening %s by calling %s" % (self, cmd)) proc = Popen(cmd) stdout, stderr = proc.communicate(input="\n") # why the f.. this is necessary? # wait till the command exits, connection is conclusively # open or not at this point exit_code = proc.wait() ret = exit_code == 0 if not ret: lgr.warning( "Failed to run cmd %s. Exit code=%s\nstdout: %s\nstderr: %s", cmd, exit_code, stdout, stderr ) else: self._opened_by_us = True return ret def close(self): """Closes the connection. """ if not self._opened_by_us: lgr.debug("Not closing %s since was not opened by itself", self) return # stop controlmaster: cmd = ["ssh", "-O", "stop"] + self._ctrl_options + [self.sshri.as_str()] lgr.debug("Closing %s by calling %s", self, cmd) try: self.runner.run(cmd, expect_stderr=True, expect_fail=True) except CommandError as e: lgr.debug("Failed to run close command") if exists(self.ctrl_path): lgr.debug("Removing existing control path %s", self.ctrl_path) # socket need to go in any case remove(self.ctrl_path) if e.code != 255: # not a "normal" SSH error raise e def copy(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder to destination on the remote. Parameters ---------- source: str or list file/folder path(s) to copy from on local destination: str file/folder path to copy to on remote Returns ------- str stdout, stderr of the copy operation. """ # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command scp_options = self._ctrl_options + ["-r"] if recursive else self._ctrl_options scp_options += ["-p"] if preserve_attrs else [] scp_cmd = ["scp"] + scp_options # add source filepath(s) to scp command scp_cmd += source if isinstance(source, list) \ else [source] # add destination path scp_cmd += ['%s:"%s"' % (self.sshri.hostname, destination)] return self.runner.run(scp_cmd) def get_annex_installdir(self): key = 'installdir:annex' if key in self._remote_props: return self._remote_props[key] annex_install_dir = None # already set here to avoid any sort of recursion until we know # more self._remote_props[key] = annex_install_dir try: with open('/dev/null') as null: annex_install_dir = self( # use sh -e to be able to fail at each stage of the process "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'" , stdin=null )[0].strip() except CommandError as e: lgr.debug('Failed to locate remote git-annex installation: %s', exc_str(e)) self._remote_props[key] = annex_install_dir return annex_install_dir def get_annex_version(self): key = 'cmd:annex' if key in self._remote_props: return self._remote_props[key] try: # modern annex versions version = self('git annex version --raw')[0] except CommandError: # either no annex, or old version try: # fall back on method that could work with older installations out, err = self('git annex version') version = out.split('\n')[0].split(':')[1].strip() except CommandError as e: lgr.debug('Failed to determine remote git-annex version: %s', exc_str(e)) version = None self._remote_props[key] = version return version def get_git_version(self): key = 'cmd:git' if key in self._remote_props: return self._remote_props[key] git_version = None try: git_version = self('git version')[0].split()[2] except CommandError as e: lgr.debug('Failed to determine Git version: %s', exc_str(e)) self._remote_props[key] = git_version return git_version
class BaseSSHConnection(object): """Representation of an SSH connection. """ def __init__(self, sshri, identity_file=None, use_remote_annex_bundle=None, force_ip=False): """Create a connection handler The actual opening of the connection is performed on-demand. Parameters ---------- sshri: SSHRI SSH resource identifier (contains all connection-relevant info), or another resource identifier that can be converted into an SSHRI. identity_file : str or None Value to pass to ssh's -i option. use_remote_annex_bundle : bool, optional If enabled, look for a git-annex installation on the remote and prefer its Git binaries in the search path (i.e. prefer a bundled Git over a system package). See also the configuration setting datalad.ssh.try-use-annex-bundled-git force_ip : {False, 4, 6} Force the use of IPv4 or IPv6 addresses with -4 or -6. .. versionchanged:: 0.16 The default for `use_remote_annex_bundle` changed from `True` to `None`. Instead of attempting to use a potentially available git-annex bundle on the remote host by default, this behavior is now conditional on the `datalad.ssh.try-use-annex-bundled-git` (off by default). """ self._runner = None self._ssh_executable = None from datalad.support.network import SSHRI, is_ssh if not is_ssh(sshri): raise ValueError( "Non-SSH resource identifiers are not supported for SSH " "connections: {}".format(sshri)) self.sshri = SSHRI( **{ k: v for k, v in sshri.fields.items() if k in ('username', 'hostname', 'port') }) # arguments only used for opening a connection self._ssh_open_args = [] # arguments for annex ssh invocation self._ssh_args = [] self._ssh_open_args.extend( ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else []) if force_ip: self._ssh_open_args.append("-{}".format(force_ip)) if identity_file: self._ssh_open_args.extend(["-i", identity_file]) self._use_remote_annex_bundle = use_remote_annex_bundle # essential properties of the remote system self._remote_props = {} def __call__(self, cmd, options=None, stdin=None, log_output=True): """Executes a command on the remote. It is the callers responsibility to properly quote commands for remote execution (e.g. filename with spaces of other special characters). Parameters ---------- cmd: str command to run on the remote options : list of str, optional Additional options to pass to the `-o` flag of `ssh`. Note: Many (probably most) of the available configuration options should not be set here because they can critically change the properties of the connection. This exists to allow options like SendEnv to be set. Returns ------- tuple of str stdout, stderr of the command run. """ raise NotImplementedError def open(self): """Opens the connection. Returns ------- bool To return True if connection establishes a control socket successfully. Return False otherwise """ raise NotImplementedError def close(self): """Closes the connection. """ raise NotImplementedError @property def ssh_executable(self): """determine which ssh client executable should be used. """ if not self._ssh_executable: from datalad import cfg self._ssh_executable = cfg.obtain("datalad.ssh.executable") return self._ssh_executable @property def runner(self): if self._runner is None: self._runner = WitlessRunner() return self._runner def _adjust_cmd_for_bundle_execution(self, cmd): from datalad import cfg # locate annex and set the bundled vs. system Git machinery in motion if self._use_remote_annex_bundle \ or cfg.obtain('datalad.ssh.try-use-annex-bundled-git'): remote_annex_installdir = self.get_annex_installdir() if remote_annex_installdir: # make sure to use the bundled git version if any exists cmd = '{}; {}'.format( 'export "PATH={}:$PATH"'.format(remote_annex_installdir), cmd) return cmd def _exec_ssh(self, ssh_cmd, cmd, options=None, stdin=None, log_output=True): cmd = self._adjust_cmd_for_bundle_execution(cmd) for opt in options or []: ssh_cmd.extend(["-o", opt]) # build SSH call, feed remote command as a single last argument # whatever it contains will go to the remote machine for execution # we cannot perform any sort of escaping, because it will limit # what we can do on the remote, e.g. concatenate commands with '&&' ssh_cmd += [self.sshri.as_str()] + [cmd] lgr.debug("%s is used to run %s", self, ssh_cmd) # TODO: pass expect parameters from above? # Hard to explain to toplevel users ... So for now, just set True out = self.runner.run( ssh_cmd, protocol=StdOutErrCapture if log_output else NoCapture, stdin=stdin) return out['stdout'], out['stderr'] def _get_scp_command_spec(self, recursive, preserve_attrs): """Internal helper for SCP interface methods""" # Convert ssh's port flag (-p) to scp's (-P). scp_options = ["-P" if x == "-p" else x for x in self._ssh_args] # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command scp_options += ["-r"] if recursive else [] scp_options += ["-p"] if preserve_attrs else [] return ["scp"] + scp_options def put(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder to destination on the remote. Note: this method performs escaping of filenames to an extent that moderately weird ones should work (spaces, quotes, pipes, other characters with special shell meaning), but more complicated cases might require appropriate external preprocessing of filenames. Parameters ---------- source : str or list file/folder path(s) to copy from on local destination : str file/folder path to copy to on remote recursive : bool flag to enable recursive copying of given sources preserve_attrs : bool preserve modification times, access times, and modes from the original file Returns ------- str stdout, stderr of the copy operation. """ # make sure we have an open connection, will test if action is needed # by itself self.open() scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs) # add source filepath(s) to scp command scp_cmd += ensure_list(source) # add destination path scp_cmd += [ '%s:%s' % ( self.sshri.hostname, _quote_filename_for_scp(destination), ) ] out = self.runner.run(scp_cmd, protocol=StdOutErrCapture) return out['stdout'], out['stderr'] def get(self, source, destination, recursive=False, preserve_attrs=False): """Copies source file/folder from remote to a local destination. Note: this method performs escaping of filenames to an extent that moderately weird ones should work (spaces, quotes, pipes, other characters with special shell meaning), but more complicated cases might require appropriate external preprocessing of filenames. Parameters ---------- source : str or list file/folder path(s) to copy from the remote host destination : str file/folder path to copy to on the local host recursive : bool flag to enable recursive copying of given sources preserve_attrs : bool preserve modification times, access times, and modes from the original file Returns ------- str stdout, stderr of the copy operation. """ # make sure we have an open connection, will test if action is needed # by itself self.open() scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs) # add source filepath(s) to scp command, prefixed with the remote host scp_cmd += [ "%s:%s" % (self.sshri.hostname, _quote_filename_for_scp(s)) for s in ensure_list(source) ] # add destination path scp_cmd += [destination] out = self.runner.run(scp_cmd, protocol=StdOutErrCapture) return out['stdout'], out['stderr'] def get_annex_installdir(self): key = 'installdir:annex' if key in self._remote_props: return self._remote_props[key] annex_install_dir = None # already set here to avoid any sort of recursion until we know # more self._remote_props[key] = annex_install_dir try: with tempfile.TemporaryFile() as tempf: # TODO does not work on windows annex_install_dir = self( # use sh -e to be able to fail at each stage of the process "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'", stdin=tempf)[0].strip() except CommandError as e: lgr.debug('Failed to locate remote git-annex installation: %s', CapturedException(e)) self._remote_props[key] = annex_install_dir return annex_install_dir def get_annex_version(self): key = 'cmd:annex' if key in self._remote_props: return self._remote_props[key] try: # modern annex versions version = self('git annex version --raw')[0] except CommandError: # either no annex, or old version try: # fall back on method that could work with older installations out, err = self('git annex version') version = out.split('\n')[0].split(':')[1].strip() except CommandError as e: lgr.debug('Failed to determine remote git-annex version: %s', CapturedException(e)) version = None self._remote_props[key] = version return version def get_git_version(self): key = 'cmd:git' if key in self._remote_props: return self._remote_props[key] git_version = None try: git_version = self('git version')[0].split()[2] except CommandError as e: lgr.debug('Failed to determine Git version: %s', CapturedException(e)) self._remote_props[key] = git_version return git_version