Example #1
0
    def __init__(self, ctrl_path, sshri):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items()
                              if k in ('username', 'hostname', 'port')})
        self.ctrl_path = ctrl_path
        self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path]
        if self.sshri.port:
            self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False
Example #2
0
    def __init__(self,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=None,
                 force_ip=False):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool, optional
          If enabled, look for a git-annex installation on the remote and
          prefer its Git binaries in the search path (i.e. prefer a bundled
          Git over a system package). See also the configuration setting
          datalad.ssh.try-use-annex-bundled-git
        force_ip : {False, 4, 6}
           Force the use of IPv4 or IPv6 addresses with -4 or -6.

        .. versionchanged:: 0.16
           The default for `use_remote_annex_bundle` changed from `True`
           to `None`. Instead of attempting to use a potentially available
           git-annex bundle on the remote host by default, this behavior
           is now conditional on the `datalad.ssh.try-use-annex-bundled-git`
           (off by default).
        """
        self._runner = None
        self._ssh_executable = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        # arguments only used for opening a connection
        self._ssh_open_args = []
        # arguments for annex ssh invocation
        self._ssh_args = []
        self._ssh_open_args.extend(
            ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else [])
        if force_ip:
            self._ssh_open_args.append("-{}".format(force_ip))
        if identity_file:
            self._ssh_open_args.extend(["-i", identity_file])

        self._use_remote_annex_bundle = use_remote_annex_bundle
        # essential properties of the remote system
        self._remote_props = {}
    def __init__(self,
                 ctrl_path,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=True,
                 force_ip=False):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool
          If set, look for a git-annex installation on the remote and
          prefer its binaries in the search path (i.e. prefer a bundled
          Git over a system package).
        force_ip : {False, 4, 6}
           Force the use of IPv4 or IPv6 addresses with -4 or -6.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        # on windows cmd args lists are always converted into a string using appropriate
        # quoting rules, on other platforms args lists are passed directly and we need
        # to take care of quoting ourselves
        ctrlpath_arg = "ControlPath={}".format(
            ctrl_path if on_windows else sh_quote(str(ctrl_path)))
        self._ssh_args = ["-o", ctrlpath_arg]
        self.ctrl_path = Path(ctrl_path)
        if self.sshri.port:
            self._ssh_args += ['-p', '{}'.format(self.sshri.port)]

        if force_ip:
            self._ssh_args.append("-{}".format(force_ip))
        self._identity_file = identity_file
        self._use_remote_annex_bundle = use_remote_annex_bundle

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False
Example #4
0
def test_url_eq():
    eq_(URL(), URL())
    # doesn't make sense to ask what kind of a url it is an empty URL
    #eq_(RI(), RI())
    neq_(URL(), URL(hostname='x'))
    # Different types aren't equal even if have the same fields values
    neq_(URL(path='x'), PathRI(path='x'))
    neq_(URL(hostname='x'), SSHRI(hostname='x'))
    neq_(str(URL(hostname='x')), str(SSHRI(hostname='x')))
    def __init__(self,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=True,
                 force_ip=False):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool
          If set, look for a git-annex installation on the remote and
          prefer its binaries in the search path (i.e. prefer a bundled
          Git over a system package).
        force_ip : {False, 4, 6}
           Force the use of IPv4 or IPv6 addresses with -4 or -6.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        # arguments only used for opening a connection
        self._ssh_open_args = []
        # arguments for annex ssh invocation
        self._ssh_args = []
        self._ssh_open_args.extend(
            ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else [])
        if force_ip:
            self._ssh_open_args.append("-{}".format(force_ip))
        if identity_file:
            self._ssh_open_args.extend(["-i", identity_file])

        self._use_remote_annex_bundle = use_remote_annex_bundle
        # essential properties of the remote system
        self._remote_props = {}
Example #6
0
    def __init__(self, ctrl_path, sshri):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items()
                              if k in ('username', 'hostname', 'port')})
        self.ctrl_path = ctrl_path
        self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path]
        if self.sshri.port:
            self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False
Example #7
0
    def __init__(self,
                 ctrl_path,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=True):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool
          If set, look for a git-annex installation on the remote and
          prefer its binaries in the search path (i.e. prefer a bundled
          Git over a system package).
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        self._ctrl_options = ["-o", "ControlPath=\"%s\"" % ctrl_path]
        self.ctrl_path = Path(ctrl_path)
        if self.sshri.port:
            self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

        self._identity_file = identity_file
        self._use_remote_annex_bundle = use_remote_annex_bundle

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False
class SSHConnection(object):
    """Representation of a (shared) ssh connection.
    """
    def __init__(self,
                 ctrl_path,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=True,
                 force_ip=False):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool
          If set, look for a git-annex installation on the remote and
          prefer its binaries in the search path (i.e. prefer a bundled
          Git over a system package).
        force_ip : {False, 4, 6}
           Force the use of IPv4 or IPv6 addresses with -4 or -6.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        # on windows cmd args lists are always converted into a string using appropriate
        # quoting rules, on other platforms args lists are passed directly and we need
        # to take care of quoting ourselves
        ctrlpath_arg = "ControlPath={}".format(
            ctrl_path if on_windows else sh_quote(str(ctrl_path)))
        self._ssh_args = ["-o", ctrlpath_arg]
        self.ctrl_path = Path(ctrl_path)
        if self.sshri.port:
            self._ssh_args += ['-p', '{}'.format(self.sshri.port)]

        if force_ip:
            self._ssh_args.append("-{}".format(force_ip))
        self._identity_file = identity_file
        self._use_remote_annex_bundle = use_remote_annex_bundle

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False

    def __call__(self, cmd, options=None, stdin=None, log_output=True):
        """Executes a command on the remote.

        It is the callers responsibility to properly quote commands
        for remote execution (e.g. filename with spaces of other special
        characters). Use the `sh_quote()` from the module for this purpose.

        Parameters
        ----------
        cmd: str
          command to run on the remote
        options : list of str, optional
          Additional options to pass to the `-o` flag of `ssh`. Note: Many
          (probably most) of the available configuration options should not be
          set here because they can critically change the properties of the
          connection. This exists to allow options like SendEnv to be set.

        Returns
        -------
        tuple of str
          stdout, stderr of the command run.
        """

        # XXX: check for open socket once
        #      and provide roll back if fails to run and was not explicitly
        #      checked first
        # MIH: this would mean that we would have to distinguish failure
        #      of a payload command from failure of SSH itself. SSH however,
        #      only distinguishes success and failure of the entire operation
        #      Increase in fragility from introspection makes a potential
        #      performance benefit a questionable improvement.
        # make sure we have an open connection, will test if action is needed
        # by itself
        self.open()

        # locate annex and set the bundled vs. system Git machinery in motion
        if self._use_remote_annex_bundle:
            remote_annex_installdir = self.get_annex_installdir()
            if remote_annex_installdir:
                # make sure to use the bundled git version if any exists
                cmd = '{}; {}'.format(
                    'export "PATH={}:$PATH"'.format(remote_annex_installdir),
                    cmd)

        # build SSH call, feed remote command as a single last argument
        # whatever it contains will go to the remote machine for execution
        # we cannot perform any sort of escaping, because it will limit
        # what we can do on the remote, e.g. concatenate commands with '&&'
        ssh_cmd = ["ssh"] + self._ssh_args
        for opt in options or []:
            ssh_cmd.extend(["-o", opt])

        ssh_cmd += [self.sshri.as_str()] \
            + [cmd]

        # TODO: pass expect parameters from above?
        # Hard to explain to toplevel users ... So for now, just set True
        out = self.runner.run(
            ssh_cmd,
            protocol=StdOutErrCapture if log_output else NoCapture,
            stdin=stdin)
        return out['stdout'], out['stderr']

    @property
    def runner(self):
        if self._runner is None:
            self._runner = WitlessRunner()
        return self._runner

    def is_open(self):
        if not self.ctrl_path.exists():
            lgr.log(5, "Not opening %s for checking since %s does not exist",
                    self, self.ctrl_path)
            return False
        # check whether controlmaster is still running:
        cmd = ["ssh", "-O", "check"] + self._ssh_args + [self.sshri.as_str()]
        lgr.debug("Checking %s by calling %s" % (self, cmd))
        try:
            # expect_stderr since ssh would announce to stderr
            # "Master is running" and that is normal, not worthy warning about
            # etc -- we are doing the check here for successful operation
            with tempfile.TemporaryFile() as tempf:
                self.runner.run(
                    cmd,
                    # do not leak output
                    protocol=StdOutErrCapture,
                    stdin=tempf)
            res = True
        except CommandError as e:
            if e.code != 255:
                # this is not a normal SSH error, whine ...
                raise e
            # SSH died and left socket behind, or server closed connection
            self.close()
            res = False
        lgr.debug("Check of %s has %s", self, {
            True: 'succeeded',
            False: 'failed'
        }[res])
        return res

    def open(self):
        """Opens the connection.

        In other words: Creates the SSH ControlMaster to be used by this
        connection, if it is not there already.

        Returns
        -------
        bool
          True when SSH reports success opening the connection, False when
          a ControlMaster for an open connection already exists.

        Raises
        ------
        ConnectionOpenFailedError
          When starting the SSH ControlMaster process failed.
        """
        # the socket should vanish almost instantly when the connection closes
        # sending explicit 'check' commands to the control master is expensive
        # (needs tempfile to shield stdin, Runner overhead, etc...)
        # as we do not use any advanced features (forwarding, stop[ing the
        # master without exiting) it should be relatively safe to just perform
        # the much cheaper check of an existing control path
        if self.ctrl_path.exists():
            return False

        # set control options
        ctrl_options = [
            "-fN", "-o", "ControlMaster=auto", "-o", "ControlPersist=15m"
        ] + self._ssh_args
        if self._identity_file:
            ctrl_options.extend(["-i", self._identity_file])
        # create ssh control master command
        cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()]

        # start control master:
        lgr.debug("Opening %s by calling %s" % (self, cmd))
        proc = Popen(cmd)
        stdout, stderr = proc.communicate(
            input="\n")  # why the f.. this is necessary?

        # wait till the command exits, connection is conclusively
        # open or not at this point
        exit_code = proc.wait()

        if exit_code != 0:
            raise ConnectionOpenFailedError(
                cmd,
                'Failed to open SSH connection (could not start ControlMaster process)',
                exit_code,
                stdout,
                stderr,
            )
        self._opened_by_us = True
        return True

    def close(self):
        """Closes the connection.
        """
        if not self._opened_by_us:
            lgr.debug("Not closing %s since was not opened by itself", self)
            return
        # stop controlmaster:
        cmd = ["ssh", "-O", "stop"] + self._ssh_args + [self.sshri.as_str()]
        lgr.debug("Closing %s by calling %s", self, cmd)
        try:
            self.runner.run(cmd, protocol=StdOutErrCapture)
        except CommandError as e:
            lgr.debug("Failed to run close command")
            if self.ctrl_path.exists():
                lgr.debug("Removing existing control path %s", self.ctrl_path)
                # socket need to go in any case
                self.ctrl_path.unlink()
            if e.code != 255:
                # not a "normal" SSH error
                raise e

    def _get_scp_command_spec(self, recursive, preserve_attrs):
        """Internal helper for SCP interface methods"""
        # Convert ssh's port flag (-p) to scp's (-P).
        scp_options = ["-P" if x == "-p" else x for x in self._ssh_args]
        # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command
        scp_options += ["-r"] if recursive else []
        scp_options += ["-p"] if preserve_attrs else []
        return ["scp"] + scp_options

    def put(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder to destination on the remote.

        Note: this method performs escaping of filenames to an extent that
        moderately weird ones should work (spaces, quotes, pipes, other
        characters with special shell meaning), but more complicated cases
        might require appropriate external preprocessing of filenames.

        Parameters
        ----------
        source : str or list
          file/folder path(s) to copy from on local
        destination : str
          file/folder path to copy to on remote
        recursive : bool
          flag to enable recursive copying of given sources
        preserve_attrs : bool
          preserve modification times, access times, and modes from the
          original file

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """
        # make sure we have an open connection, will test if action is needed
        # by itself
        self.open()
        scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs)
        # add source filepath(s) to scp command
        scp_cmd += ensure_list(source)
        # add destination path
        scp_cmd += [
            '%s:%s' % (
                self.sshri.hostname,
                _quote_filename_for_scp(destination),
            )
        ]
        out = self.runner.run(scp_cmd, protocol=StdOutErrCapture)
        return out['stdout'], out['stderr']

    def get(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder from remote to a local destination.

        Note: this method performs escaping of filenames to an extent that
        moderately weird ones should work (spaces, quotes, pipes, other
        characters with special shell meaning), but more complicated cases
        might require appropriate external preprocessing of filenames.

        Parameters
        ----------
        source : str or list
          file/folder path(s) to copy from the remote host
        destination : str
          file/folder path to copy to on the local host
        recursive : bool
          flag to enable recursive copying of given sources
        preserve_attrs : bool
          preserve modification times, access times, and modes from the
          original file

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """
        # make sure we have an open connection, will test if action is needed
        # by itself
        self.open()
        scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs)
        # add source filepath(s) to scp command, prefixed with the remote host
        scp_cmd += [
            "%s:%s" % (self.sshri.hostname, _quote_filename_for_scp(s))
            for s in ensure_list(source)
        ]
        # add destination path
        scp_cmd += [destination]
        out = self.runner.run(scp_cmd, protocol=StdOutErrCapture)
        return out['stdout'], out['stderr']

    def get_annex_installdir(self):
        key = 'installdir:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        annex_install_dir = None
        # already set here to avoid any sort of recursion until we know
        # more
        self._remote_props[key] = annex_install_dir
        try:
            with tempfile.TemporaryFile() as tempf:
                # TODO does not work on windows
                annex_install_dir = self(
                    # use sh -e to be able to fail at each stage of the process
                    "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'",
                    stdin=tempf)[0].strip()
        except CommandError as e:
            lgr.debug('Failed to locate remote git-annex installation: %s',
                      exc_str(e))
        self._remote_props[key] = annex_install_dir
        return annex_install_dir

    def get_annex_version(self):
        key = 'cmd:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        try:
            # modern annex versions
            version = self('git annex version --raw')[0]
        except CommandError:
            # either no annex, or old version
            try:
                # fall back on method that could work with older installations
                out, err = self('git annex version')
                version = out.split('\n')[0].split(':')[1].strip()
            except CommandError as e:
                lgr.debug('Failed to determine remote git-annex version: %s',
                          exc_str(e))
                version = None
        self._remote_props[key] = version
        return version

    def get_git_version(self):
        key = 'cmd:git'
        if key in self._remote_props:
            return self._remote_props[key]
        git_version = None
        try:
            git_version = self('git version')[0].split()[2]
        except CommandError as e:
            lgr.debug('Failed to determine Git version: %s', exc_str(e))
        self._remote_props[key] = git_version
        return git_version
Example #9
0
def test_url_samples():
    _check_ri("http://example.com", URL, scheme='http', hostname="example.com")
    # "complete" one for classical http
    _check_ri("http://*****:*****@example.com:8080/p/sp?p1=v1&p2=v2#frag",
              URL,
              scheme='http',
              hostname="example.com",
              port=8080,
              username='******',
              password='******',
              path='/p/sp',
              query='p1=v1&p2=v2',
              fragment='frag')

    # sample one for ssh with specifying the scheme
    # XXX? might be useful?  https://github.com/FriendCode/giturlparse.py
    _check_ri("ssh://host/path/sp1",
              URL,
              scheme='ssh',
              hostname='host',
              path='/path/sp1')
    _check_ri("user@host:path/sp1",
              SSHRI,
              hostname='host',
              path='path/sp1',
              username='******')
    _check_ri("host:path/sp1", SSHRI, hostname='host', path='path/sp1')
    _check_ri("host:path", SSHRI, hostname='host', path='path')
    _check_ri("host:/path", SSHRI, hostname='host', path='/path')
    _check_ri("user@host", SSHRI, hostname='host', username='******')
    # TODO!!!  should this be a legit URL like this?
    # _check_ri("host", SSHRI, hostname='host'))
    eq_(repr(RI("host:path")), "SSHRI(hostname='host', path='path')")

    # And now perspective 'datalad', implicit=True urls pointing to the canonical center location
    _check_ri("///", DataLadRI)
    _check_ri("///p/s1", DataLadRI, path='p/s1')
    # could be considered by someone as "URI reference" relative to scheme
    _check_ri("//a/", DataLadRI, remote='a')
    _check_ri("//a/data", DataLadRI, path='data', remote='a')

    # here we will do custom magic allowing only schemes with + in them, such as dl+archive
    # or not so custom as
    _check_ri("hg+https://host/user/proj",
              URL,
              scheme="hg+https",
              hostname='host',
              path='/user/proj')
    # "old" style
    _check_ri("dl+archive:KEY/path/sp1#size=123",
              URL,
              scheme='dl+archive',
              path='KEY/path/sp1',
              fragment='size=123')
    # "new" style
    _check_ri("dl+archive:KEY#path=path/sp1&size=123",
              URL,
              scheme='dl+archive',
              path='KEY',
              fragment='path=path/sp1&size=123')
    # actually above one is probably wrong since we need to encode the path
    _check_ri("dl+archive:KEY#path=path%2Fbsp1&size=123",
              URL,
              scheme='dl+archive',
              path='KEY',
              fragment='path=path%2Fbsp1&size=123')

    #https://en.wikipedia.org/wiki/File_URI_scheme
    _check_ri("file://host", URL, scheme='file', hostname='host')
    _check_ri("file://host/path/sp1",
              URL,
              scheme='file',
              hostname='host',
              path='/path/sp1')
    # stock libraries of Python aren't quite ready for ipv6
    ipv6address = '2001:db8:85a3::8a2e:370:7334'
    _check_ri("file://%s/path/sp1" % ipv6address,
              URL,
              scheme='file',
              hostname=ipv6address,
              path='/path/sp1')
    for lh in ('localhost', '::1', '', '127.3.4.155'):
        _check_ri("file://%s/path/sp1" % lh,
                  URL,
                  localpath='/path/sp1',
                  scheme='file',
                  hostname=lh,
                  path='/path/sp1')
    _check_ri('http://[1fff:0:a88:85a3::ac1f]:8001/index.html',
              URL,
              scheme='http',
              hostname='1fff:0:a88:85a3::ac1f',
              port=8001,
              path='/index.html')
    _check_ri("file:///path/sp1",
              URL,
              localpath='/path/sp1',
              scheme='file',
              path='/path/sp1')
    # we don't do any magical comprehension for home paths/drives for windows
    # of file:// urls, thus leaving /~ and /c: for now:
    _check_ri("file:///~/path/sp1",
              URL,
              localpath='/~/path/sp1',
              scheme='file',
              path='/~/path/sp1')
    _check_ri("file:///%7E/path/sp1",
              URL,
              localpath='/~/path/sp1',
              scheme='file',
              path='/~/path/sp1',
              exact_str=False)
    # not sure but let's check
    _check_ri("file:///c:/path/sp1",
              URL,
              localpath='/c:/path/sp1',
              scheme='file',
              path='/c:/path/sp1',
              exact_str=False)

    # and now implicit paths or actually they are also "URI references"
    _check_ri("f", PathRI, localpath='f', path='f')
    _check_ri("f/s1", PathRI, localpath='f/s1', path='f/s1')
    _check_ri(PurePosixPath("f"), PathRI, localpath='f', path='f')
    _check_ri(PurePosixPath("f/s1"), PathRI, localpath='f/s1', path='f/s1')
    # colons are problematic and might cause confusion into SSHRI
    _check_ri("f/s:1", PathRI, localpath='f/s:1', path='f/s:1')
    _check_ri("f/s:", PathRI, localpath='f/s:', path='f/s:')
    _check_ri("/f", PathRI, localpath='/f', path='/f')
    _check_ri("/f/s1", PathRI, localpath='/f/s1', path='/f/s1')

    # some github ones, just to make sure
    _check_ri("git://host/user/proj",
              URL,
              scheme="git",
              hostname="host",
              path="/user/proj")
    _check_ri("git@host:user/proj",
              SSHRI,
              hostname="host",
              path="user/proj",
              username='******')

    _check_ri('weired:/', SSHRI, hostname='weired', path='/')
    # since schema is not allowing some symbols so we need to add additional check
    _check_ri('weired_url:/', SSHRI, hostname='weired_url', path='/')
    _check_ri('example.com:/', SSHRI, hostname='example.com', path='/')
    _check_ri('example.com:path/sp1',
              SSHRI,
              hostname='example.com',
              path='path/sp1')
    _check_ri('example.com/path/sp1\:fname',
              PathRI,
              localpath='example.com/path/sp1\:fname',
              path='example.com/path/sp1\:fname')
    # ssh is as stupid as us, so we will stay "Consistently" dumb
    """
    $> ssh example.com/path/sp1:fname
    ssh: Could not resolve hostname example.com/path/sp1:fname: Name or service not known

    edit 20190516 yoh: but this looks like a perfectly valid path.
    SSH knows that it is not a path but its SSHRI so it can stay dumb.
    We are trying to be smart and choose between RIs (even when we know that
    it is e.g. a file).
    """
    _check_ri('e.com/p/sp:f',
              PathRI,
              localpath='e.com/p/sp:f',
              path='e.com/p/sp:f')
    _check_ri('[email protected]/mydir',
              PathRI,
              localpath='[email protected]/mydir',
              path='[email protected]/mydir')

    # SSHRIs have .port, but it is empty
    eq_(SSHRI(hostname='example.com').port, '')

    # check that we are getting a warning logged when url can't be reconstructed
    # precisely
    # actually failed to come up with one -- becomes late here
    #_check_ri("http://host///..//p", scheme='http', path='/..//p')

    # actually this one is good enough to trigger a warning and I still don't know
    # what it should exactly be!?
    with swallow_logs(new_level=logging.DEBUG) as cml:
        weired_str = 'weired://'
        weired_url = RI(weired_str)
        repr(weired_url)
        cml.assert_logged('Parsed version of SSHRI .weired:/. '
                          'differs from original .weired://.')
        # but we store original str
        eq_(str(weired_url), weired_str)
        neq_(weired_url.as_str(), weired_str)

    raise SkipTest(
        "TODO: file://::1/some does complain about parsed version dropping ::1"
    )
Example #10
0
class SSHConnection(object):
    """Representation of a (shared) ssh connection.
    """

    def __init__(self, ctrl_path, sshri):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items()
                              if k in ('username', 'hostname', 'port')})
        self.ctrl_path = ctrl_path
        self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path]
        if self.sshri.port:
            self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False

    def __call__(self, cmd, stdin=None, log_output=True):
        """Executes a command on the remote.

        It is the callers responsibility to properly quote commands
        for remote execution (e.g. filename with spaces of other special
        characters). Use the `sh_quote()` from the module for this purpose.

        Parameters
        ----------
        cmd: str
          command to run on the remote

        Returns
        -------
        tuple of str
          stdout, stderr of the command run.
        """

        # TODO:  do not do all those checks for every invocation!!
        # TODO: check for annex location once, check for open socket once
        #       and provide roll back if fails to run and was not explicitly
        #       checked first
        if not self.is_open():
            if not self.open():
                raise RuntimeError(
                    'Cannot open SSH connection to {}'.format(
                        self.sshri))

        # locate annex and set the bundled vs. system Git machinery in motion
        remote_annex_installdir = self.get_annex_installdir()
        if remote_annex_installdir:
            # make sure to use the bundled git version if any exists
            cmd = '{}; {}'.format(
                'export "PATH={}:$PATH"'.format(remote_annex_installdir),
                cmd)

        # build SSH call, feed remote command as a single last argument
        # whatever it contains will go to the remote machine for execution
        # we cannot perform any sort of escaping, because it will limit
        # what we can do on the remote, e.g. concatenate commands with '&&'
        ssh_cmd = ["ssh"] + self._ctrl_options
        ssh_cmd += [self.sshri.as_str()] \
            + [cmd]

        kwargs = dict(
            log_stdout=log_output, log_stderr=log_output,
            log_online=not log_output
        )

        # TODO: pass expect parameters from above?
        # Hard to explain to toplevel users ... So for now, just set True
        return self.runner.run(
            ssh_cmd,
            expect_fail=True,
            expect_stderr=True,
            stdin=stdin,
            **kwargs)

    @property
    def runner(self):
        if self._runner is None:
            self._runner = Runner()
        return self._runner

    def is_open(self):
        if not exists(self.ctrl_path):
            lgr.log(
                5,
                "Not opening %s for checking since %s does not exist",
                self, self.ctrl_path
            )
            return False
        # check whether controlmaster is still running:
        cmd = ["ssh", "-O", "check"] + self._ctrl_options + [self.sshri.as_str()]
        lgr.debug("Checking %s by calling %s" % (self, cmd))
        null = open('/dev/null')
        try:
            out, err = self.runner.run(cmd, stdin=null)
            res = True
        except CommandError as e:
            if e.code != 255:
                # this is not a normal SSH error, whine ...
                raise e
            # SSH died and left socket behind, or server closed connection
            self.close()
            res = False
        finally:
            null.close()
        lgr.debug("Check of %s has %s", self, {True: 'succeeded', False: 'failed'}[res])
        return res

    def open(self):
        """Opens the connection.

        In other words: Creates the SSH controlmaster to be used by this
        connection, if it is not there already.

        Returns
        -------
        bool
          Whether SSH reports success opening the connection
        """
        if self.is_open():
            return

        # set control options
        ctrl_options = ["-fN",
                        "-o", "ControlMaster=auto",
                        "-o", "ControlPersist=15m"] + self._ctrl_options
        # create ssh control master command
        cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()]

        # start control master:
        lgr.debug("Opening %s by calling %s" % (self, cmd))
        proc = Popen(cmd)
        stdout, stderr = proc.communicate(input="\n")  # why the f.. this is necessary?

        # wait till the command exits, connection is conclusively
        # open or not at this point
        exit_code = proc.wait()
        ret = exit_code == 0

        if not ret:
            lgr.warning(
                "Failed to run cmd %s. Exit code=%s\nstdout: %s\nstderr: %s",
                cmd, exit_code, stdout, stderr
            )
        else:
            self._opened_by_us = True
        return ret

    def close(self):
        """Closes the connection.
        """
        if not self._opened_by_us:
            lgr.debug("Not closing %s since was not opened by itself", self)
            return
        # stop controlmaster:
        cmd = ["ssh", "-O", "stop"] + self._ctrl_options + [self.sshri.as_str()]
        lgr.debug("Closing %s by calling %s", self, cmd)
        try:
            self.runner.run(cmd, expect_stderr=True, expect_fail=True)
        except CommandError as e:
            lgr.debug("Failed to run close command")
            if exists(self.ctrl_path):
                lgr.debug("Removing existing control path %s", self.ctrl_path)
                # socket need to go in any case
                remove(self.ctrl_path)
            if e.code != 255:
                # not a "normal" SSH error
                raise e

    def copy(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder to destination on the remote.

        Parameters
        ----------
        source: str or list
          file/folder path(s) to copy from on local
        destination: str
          file/folder path to copy to on remote

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """

        # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command
        scp_options = self._ctrl_options + ["-r"] if recursive else self._ctrl_options
        scp_options += ["-p"] if preserve_attrs else []
        scp_cmd = ["scp"] + scp_options

        # add source filepath(s) to scp command
        scp_cmd += source if isinstance(source, list) \
            else [source]

        # add destination path
        scp_cmd += ['%s:"%s"' % (self.sshri.hostname, destination)]
        return self.runner.run(scp_cmd)

    def get_annex_installdir(self):
        key = 'installdir:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        annex_install_dir = None
        # already set here to avoid any sort of recursion until we know
        # more
        self._remote_props[key] = annex_install_dir
        try:
            with open('/dev/null') as null:
                annex_install_dir = self(
                    # use sh -e to be able to fail at each stage of the process
                    "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'"
                    , stdin=null
                )[0].strip()
        except CommandError as e:
            lgr.debug('Failed to locate remote git-annex installation: %s',
                      exc_str(e))
        self._remote_props[key] = annex_install_dir
        return annex_install_dir

    def get_annex_version(self):
        key = 'cmd:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        try:
            # modern annex versions
            version = self('git annex version --raw')[0]
        except CommandError:
            # either no annex, or old version
            try:
                # fall back on method that could work with older installations
                out, err = self('git annex version')
                version = out.split('\n')[0].split(':')[1].strip()
            except CommandError as e:
                lgr.debug('Failed to determine remote git-annex version: %s',
                          exc_str(e))
                version = None
        self._remote_props[key] = version
        return version

    def get_git_version(self):
        key = 'cmd:git'
        if key in self._remote_props:
            return self._remote_props[key]
        git_version = None
        try:
            git_version = self('git version')[0].split()[2]
        except CommandError as e:
            lgr.debug('Failed to determine Git version: %s',
                      exc_str(e))
        self._remote_props[key] = git_version
        return git_version
Example #11
0
class SSHConnection(object):
    """Representation of a (shared) ssh connection.
    """

    def __init__(self, ctrl_path, sshri):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        ctrl_path: str
          path to SSH controlmaster
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        """
        self._runner = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(**{k: v for k, v in sshri.fields.items()
                              if k in ('username', 'hostname', 'port')})
        self.ctrl_path = ctrl_path
        self._ctrl_options = ["-o", "ControlPath=\"%s\"" % self.ctrl_path]
        if self.sshri.port:
            self._ctrl_options += ['-p', '{}'.format(self.sshri.port)]

        # essential properties of the remote system
        self._remote_props = {}
        self._opened_by_us = False

    def __call__(self, cmd, stdin=None, log_output=True):
        """Executes a command on the remote.

        It is the callers responsibility to properly quote commands
        for remote execution (e.g. filename with spaces of other special
        characters). Use the `sh_quote()` from the module for this purpose.

        Parameters
        ----------
        cmd: str
          command to run on the remote

        Returns
        -------
        tuple of str
          stdout, stderr of the command run.
        """

        # TODO:  do not do all those checks for every invocation!!
        # TODO: check for annex location once, check for open socket once
        #       and provide roll back if fails to run and was not explicitly
        #       checked first
        if not self.is_open():
            if not self.open():
                raise RuntimeError(
                    'Cannot open SSH connection to {}'.format(
                        self.sshri))

        # locate annex and set the bundled vs. system Git machinery in motion
        remote_annex_installdir = self.get_annex_installdir()
        if remote_annex_installdir:
            # make sure to use the bundled git version if any exists
            cmd = '{}; {}'.format(
                'export "PATH={}:$PATH"'.format(remote_annex_installdir),
                cmd)

        # build SSH call, feed remote command as a single last argument
        # whatever it contains will go to the remote machine for execution
        # we cannot perform any sort of escaping, because it will limit
        # what we can do on the remote, e.g. concatenate commands with '&&'
        ssh_cmd = ["ssh"] + self._ctrl_options
        ssh_cmd += [self.sshri.as_str()] \
            + [cmd]

        kwargs = dict(
            log_stdout=log_output, log_stderr=log_output,
            log_online=not log_output
        )

        # TODO: pass expect parameters from above?
        # Hard to explain to toplevel users ... So for now, just set True
        return self.runner.run(
            ssh_cmd,
            expect_fail=True,
            expect_stderr=True,
            stdin=stdin,
            **kwargs)

    @property
    def runner(self):
        if self._runner is None:
            self._runner = Runner()
        return self._runner

    def is_open(self):
        if not exists(self.ctrl_path):
            lgr.log(
                5,
                "Not opening %s for checking since %s does not exist",
                self, self.ctrl_path
            )
            return False
        # check whether controlmaster is still running:
        cmd = ["ssh", "-O", "check"] + self._ctrl_options + [self.sshri.as_str()]
        lgr.debug("Checking %s by calling %s" % (self, cmd))
        null = open('/dev/null')
        try:
            # expect_stderr since ssh would announce to stderr
            # "Master is running" and that is normal, not worthy warning about
            # etc -- we are doing the check here for successful operation
            out, err = self.runner.run(cmd, stdin=null, expect_stderr=True)
            res = True
        except CommandError as e:
            if e.code != 255:
                # this is not a normal SSH error, whine ...
                raise e
            # SSH died and left socket behind, or server closed connection
            self.close()
            res = False
        finally:
            null.close()
        lgr.debug("Check of %s has %s", self, {True: 'succeeded', False: 'failed'}[res])
        return res

    def open(self):
        """Opens the connection.

        In other words: Creates the SSH controlmaster to be used by this
        connection, if it is not there already.

        Returns
        -------
        bool
          Whether SSH reports success opening the connection
        """
        if self.is_open():
            return

        # set control options
        ctrl_options = ["-fN",
                        "-o", "ControlMaster=auto",
                        "-o", "ControlPersist=15m"] + self._ctrl_options
        # create ssh control master command
        cmd = ["ssh"] + ctrl_options + [self.sshri.as_str()]

        # start control master:
        lgr.debug("Opening %s by calling %s" % (self, cmd))
        proc = Popen(cmd)
        stdout, stderr = proc.communicate(input="\n")  # why the f.. this is necessary?

        # wait till the command exits, connection is conclusively
        # open or not at this point
        exit_code = proc.wait()
        ret = exit_code == 0

        if not ret:
            lgr.warning(
                "Failed to run cmd %s. Exit code=%s\nstdout: %s\nstderr: %s",
                cmd, exit_code, stdout, stderr
            )
        else:
            self._opened_by_us = True
        return ret

    def close(self):
        """Closes the connection.
        """
        if not self._opened_by_us:
            lgr.debug("Not closing %s since was not opened by itself", self)
            return
        # stop controlmaster:
        cmd = ["ssh", "-O", "stop"] + self._ctrl_options + [self.sshri.as_str()]
        lgr.debug("Closing %s by calling %s", self, cmd)
        try:
            self.runner.run(cmd, expect_stderr=True, expect_fail=True)
        except CommandError as e:
            lgr.debug("Failed to run close command")
            if exists(self.ctrl_path):
                lgr.debug("Removing existing control path %s", self.ctrl_path)
                # socket need to go in any case
                remove(self.ctrl_path)
            if e.code != 255:
                # not a "normal" SSH error
                raise e

    def copy(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder to destination on the remote.

        Parameters
        ----------
        source: str or list
          file/folder path(s) to copy from on local
        destination: str
          file/folder path to copy to on remote

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """

        # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command
        scp_options = self._ctrl_options + ["-r"] if recursive else self._ctrl_options
        scp_options += ["-p"] if preserve_attrs else []
        scp_cmd = ["scp"] + scp_options

        # add source filepath(s) to scp command
        scp_cmd += source if isinstance(source, list) \
            else [source]

        # add destination path
        scp_cmd += ['%s:"%s"' % (self.sshri.hostname, destination)]
        return self.runner.run(scp_cmd)

    def get_annex_installdir(self):
        key = 'installdir:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        annex_install_dir = None
        # already set here to avoid any sort of recursion until we know
        # more
        self._remote_props[key] = annex_install_dir
        try:
            with open('/dev/null') as null:
                annex_install_dir = self(
                    # use sh -e to be able to fail at each stage of the process
                    "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'"
                    , stdin=null
                )[0].strip()
        except CommandError as e:
            lgr.debug('Failed to locate remote git-annex installation: %s',
                      exc_str(e))
        self._remote_props[key] = annex_install_dir
        return annex_install_dir

    def get_annex_version(self):
        key = 'cmd:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        try:
            # modern annex versions
            version = self('git annex version --raw')[0]
        except CommandError:
            # either no annex, or old version
            try:
                # fall back on method that could work with older installations
                out, err = self('git annex version')
                version = out.split('\n')[0].split(':')[1].strip()
            except CommandError as e:
                lgr.debug('Failed to determine remote git-annex version: %s',
                          exc_str(e))
                version = None
        self._remote_props[key] = version
        return version

    def get_git_version(self):
        key = 'cmd:git'
        if key in self._remote_props:
            return self._remote_props[key]
        git_version = None
        try:
            git_version = self('git version')[0].split()[2]
        except CommandError as e:
            lgr.debug('Failed to determine Git version: %s',
                      exc_str(e))
        self._remote_props[key] = git_version
        return git_version
Example #12
0
class BaseSSHConnection(object):
    """Representation of an SSH connection.
    """
    def __init__(self,
                 sshri,
                 identity_file=None,
                 use_remote_annex_bundle=None,
                 force_ip=False):
        """Create a connection handler

        The actual opening of the connection is performed on-demand.

        Parameters
        ----------
        sshri: SSHRI
          SSH resource identifier (contains all connection-relevant info),
          or another resource identifier that can be converted into an SSHRI.
        identity_file : str or None
          Value to pass to ssh's -i option.
        use_remote_annex_bundle : bool, optional
          If enabled, look for a git-annex installation on the remote and
          prefer its Git binaries in the search path (i.e. prefer a bundled
          Git over a system package). See also the configuration setting
          datalad.ssh.try-use-annex-bundled-git
        force_ip : {False, 4, 6}
           Force the use of IPv4 or IPv6 addresses with -4 or -6.

        .. versionchanged:: 0.16
           The default for `use_remote_annex_bundle` changed from `True`
           to `None`. Instead of attempting to use a potentially available
           git-annex bundle on the remote host by default, this behavior
           is now conditional on the `datalad.ssh.try-use-annex-bundled-git`
           (off by default).
        """
        self._runner = None
        self._ssh_executable = None

        from datalad.support.network import SSHRI, is_ssh
        if not is_ssh(sshri):
            raise ValueError(
                "Non-SSH resource identifiers are not supported for SSH "
                "connections: {}".format(sshri))
        self.sshri = SSHRI(
            **{
                k: v
                for k, v in sshri.fields.items()
                if k in ('username', 'hostname', 'port')
            })
        # arguments only used for opening a connection
        self._ssh_open_args = []
        # arguments for annex ssh invocation
        self._ssh_args = []
        self._ssh_open_args.extend(
            ['-p', '{}'.format(self.sshri.port)] if self.sshri.port else [])
        if force_ip:
            self._ssh_open_args.append("-{}".format(force_ip))
        if identity_file:
            self._ssh_open_args.extend(["-i", identity_file])

        self._use_remote_annex_bundle = use_remote_annex_bundle
        # essential properties of the remote system
        self._remote_props = {}

    def __call__(self, cmd, options=None, stdin=None, log_output=True):
        """Executes a command on the remote.

        It is the callers responsibility to properly quote commands
        for remote execution (e.g. filename with spaces of other special
        characters).

        Parameters
        ----------
        cmd: str
          command to run on the remote
        options : list of str, optional
          Additional options to pass to the `-o` flag of `ssh`. Note: Many
          (probably most) of the available configuration options should not be
          set here because they can critically change the properties of the
          connection. This exists to allow options like SendEnv to be set.

        Returns
        -------
        tuple of str
          stdout, stderr of the command run.
        """
        raise NotImplementedError

    def open(self):
        """Opens the connection.

        Returns
        -------
        bool
          To return True if connection establishes a control socket successfully.
          Return False otherwise
        """

        raise NotImplementedError

    def close(self):
        """Closes the connection.
        """

        raise NotImplementedError

    @property
    def ssh_executable(self):
        """determine which ssh client executable should be used.
        """
        if not self._ssh_executable:
            from datalad import cfg
            self._ssh_executable = cfg.obtain("datalad.ssh.executable")
        return self._ssh_executable

    @property
    def runner(self):
        if self._runner is None:
            self._runner = WitlessRunner()
        return self._runner

    def _adjust_cmd_for_bundle_execution(self, cmd):
        from datalad import cfg
        # locate annex and set the bundled vs. system Git machinery in motion
        if self._use_remote_annex_bundle \
                or cfg.obtain('datalad.ssh.try-use-annex-bundled-git'):
            remote_annex_installdir = self.get_annex_installdir()
            if remote_annex_installdir:
                # make sure to use the bundled git version if any exists
                cmd = '{}; {}'.format(
                    'export "PATH={}:$PATH"'.format(remote_annex_installdir),
                    cmd)
        return cmd

    def _exec_ssh(self,
                  ssh_cmd,
                  cmd,
                  options=None,
                  stdin=None,
                  log_output=True):
        cmd = self._adjust_cmd_for_bundle_execution(cmd)

        for opt in options or []:
            ssh_cmd.extend(["-o", opt])

        # build SSH call, feed remote command as a single last argument
        # whatever it contains will go to the remote machine for execution
        # we cannot perform any sort of escaping, because it will limit
        # what we can do on the remote, e.g. concatenate commands with '&&'
        ssh_cmd += [self.sshri.as_str()] + [cmd]

        lgr.debug("%s is used to run %s", self, ssh_cmd)

        # TODO: pass expect parameters from above?
        # Hard to explain to toplevel users ... So for now, just set True
        out = self.runner.run(
            ssh_cmd,
            protocol=StdOutErrCapture if log_output else NoCapture,
            stdin=stdin)
        return out['stdout'], out['stderr']

    def _get_scp_command_spec(self, recursive, preserve_attrs):
        """Internal helper for SCP interface methods"""
        # Convert ssh's port flag (-p) to scp's (-P).
        scp_options = ["-P" if x == "-p" else x for x in self._ssh_args]
        # add recursive, preserve_attributes flag if recursive, preserve_attrs set and create scp command
        scp_options += ["-r"] if recursive else []
        scp_options += ["-p"] if preserve_attrs else []
        return ["scp"] + scp_options

    def put(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder to destination on the remote.

        Note: this method performs escaping of filenames to an extent that
        moderately weird ones should work (spaces, quotes, pipes, other
        characters with special shell meaning), but more complicated cases
        might require appropriate external preprocessing of filenames.

        Parameters
        ----------
        source : str or list
          file/folder path(s) to copy from on local
        destination : str
          file/folder path to copy to on remote
        recursive : bool
          flag to enable recursive copying of given sources
        preserve_attrs : bool
          preserve modification times, access times, and modes from the
          original file

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """
        # make sure we have an open connection, will test if action is needed
        # by itself
        self.open()
        scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs)
        # add source filepath(s) to scp command
        scp_cmd += ensure_list(source)
        # add destination path
        scp_cmd += [
            '%s:%s' % (
                self.sshri.hostname,
                _quote_filename_for_scp(destination),
            )
        ]
        out = self.runner.run(scp_cmd, protocol=StdOutErrCapture)
        return out['stdout'], out['stderr']

    def get(self, source, destination, recursive=False, preserve_attrs=False):
        """Copies source file/folder from remote to a local destination.

        Note: this method performs escaping of filenames to an extent that
        moderately weird ones should work (spaces, quotes, pipes, other
        characters with special shell meaning), but more complicated cases
        might require appropriate external preprocessing of filenames.

        Parameters
        ----------
        source : str or list
          file/folder path(s) to copy from the remote host
        destination : str
          file/folder path to copy to on the local host
        recursive : bool
          flag to enable recursive copying of given sources
        preserve_attrs : bool
          preserve modification times, access times, and modes from the
          original file

        Returns
        -------
        str
          stdout, stderr of the copy operation.
        """
        # make sure we have an open connection, will test if action is needed
        # by itself
        self.open()
        scp_cmd = self._get_scp_command_spec(recursive, preserve_attrs)
        # add source filepath(s) to scp command, prefixed with the remote host
        scp_cmd += [
            "%s:%s" % (self.sshri.hostname, _quote_filename_for_scp(s))
            for s in ensure_list(source)
        ]
        # add destination path
        scp_cmd += [destination]
        out = self.runner.run(scp_cmd, protocol=StdOutErrCapture)
        return out['stdout'], out['stderr']

    def get_annex_installdir(self):
        key = 'installdir:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        annex_install_dir = None
        # already set here to avoid any sort of recursion until we know
        # more
        self._remote_props[key] = annex_install_dir
        try:
            with tempfile.TemporaryFile() as tempf:
                # TODO does not work on windows
                annex_install_dir = self(
                    # use sh -e to be able to fail at each stage of the process
                    "sh -e -c 'dirname $(readlink -f $(which git-annex-shell))'",
                    stdin=tempf)[0].strip()
        except CommandError as e:
            lgr.debug('Failed to locate remote git-annex installation: %s',
                      CapturedException(e))
        self._remote_props[key] = annex_install_dir
        return annex_install_dir

    def get_annex_version(self):
        key = 'cmd:annex'
        if key in self._remote_props:
            return self._remote_props[key]
        try:
            # modern annex versions
            version = self('git annex version --raw')[0]
        except CommandError:
            # either no annex, or old version
            try:
                # fall back on method that could work with older installations
                out, err = self('git annex version')
                version = out.split('\n')[0].split(':')[1].strip()
            except CommandError as e:
                lgr.debug('Failed to determine remote git-annex version: %s',
                          CapturedException(e))
                version = None
        self._remote_props[key] = version
        return version

    def get_git_version(self):
        key = 'cmd:git'
        if key in self._remote_props:
            return self._remote_props[key]
        git_version = None
        try:
            git_version = self('git version')[0].split()[2]
        except CommandError as e:
            lgr.debug('Failed to determine Git version: %s',
                      CapturedException(e))
        self._remote_props[key] = git_version
        return git_version