def reconnect(self, password): """Attempts to reconnect to the node using the standard connect() but does not do so indefinitely. This imposes a strict number of retry attempts before failing out """ attempts = 1 last_err = 'unknown' while attempts < 5: self.log_debug("Attempting reconnect (#%s) to node" % attempts) try: if self.connect(password): return True except Exception as err: self.log_debug("Attempt #%s exception: %s" % (attempts, err)) last_err = err attempts += 1 self.log_error("Unable to reconnect to node after 5 attempts, " "aborting.") raise ConnectionException("last exception from transport: %s" % last_err)
def _create_ssh_session(self): """ Using ControlPersist, create the initial connection to the node. This will generate an OpenSSH ControlPersist socket within the tmp directory created or specified for sos-collector to use. At most, we will wait 30 seconds for a connection. This involves a 15 second wait for the initial connection attempt, and a subsequent 15 second wait for a response when we supply a password. Since we connect to nodes in parallel (using the --threads value), this means that the time between 'Connecting to nodes...' and 'Beginning collection of sosreports' that users see can be up to an amount of time equal to 30*(num_nodes/threads) seconds. Returns True if session is successfully opened, else raise Exception """ # Don't use self.ssh_cmd here as we need to add a few additional # parameters to establish the initial connection self.log_info('Opening SSH session to create control socket') connected = False ssh_key = '' ssh_port = '' if self.opts.ssh_port != 22: ssh_port = "-p%s " % self.opts.ssh_port if self.opts.ssh_key: ssh_key = "-i%s" % self.opts.ssh_key cmd = ("ssh %s %s -oControlPersist=600 -oControlMaster=auto " "-oStrictHostKeyChecking=no -oControlPath=%s %s@%s " "\"echo Connected\"" % (ssh_key, ssh_port, self.control_path, self.opts.ssh_user, self.address)) res = pexpect.spawn(cmd, encoding='utf-8') connect_expects = [ u'Connected', u'password:'******'.*Permission denied.*', u'.* port .*: No route to host', u'.*Could not resolve hostname.*', pexpect.TIMEOUT ] index = res.expect(connect_expects, timeout=15) if index == 0: connected = True elif index == 1: if self._password: pass_expects = [ u'Connected', u'Permission denied, please try again.', pexpect.TIMEOUT ] res.sendline(self._password) pass_index = res.expect(pass_expects, timeout=15) if pass_index == 0: connected = True elif pass_index == 1: # Note that we do not get an exitstatus here, so matching # this line means an invalid password will be reported for # both invalid passwords and invalid user names raise InvalidPasswordException elif pass_index == 2: raise TimeoutPasswordAuthException else: raise PasswordRequestException elif index == 2: raise AuthPermissionDeniedException elif index == 3: raise ConnectionException(self.address, self.opts.ssh_port) elif index == 4: raise ConnectionException(self.address) elif index == 5: raise ConnectionTimeoutException else: raise Exception("Unknown error, client returned %s" % res.before) if connected: self.log_debug("Successfully created control socket at %s" % self.control_path) return True return False