def testTriggerUpdate(self): """Tests that we correctly handle updater errors.""" update_url = 'http://server/test/url' host = self.mox.CreateMockAnything() self.mox.StubOutWithMock(host, 'run') host.hostname = 'test_host' expected_cmd = ('/usr/bin/update_engine_client --check_for_update ' '--omaha_url=http://server/test/url') updater = autoupdater.ChromiumOSUpdater(update_url, host=host) # Test with success. host.run(expected_cmd) # SSH Timeout host.run(expected_cmd).AndRaise( error.AutoservSSHTimeout("ssh timed out", 255)) # SSH Permission Error host.run(expected_cmd).AndRaise( error.AutoservSshPermissionDeniedError("ssh timed out", 255)) # Command Failed Error cmd_result_1 = self.mox.CreateMockAnything() cmd_result_1.exit_status = 1 host.run(expected_cmd).AndRaise( error.AutoservRunError("ssh timed out", cmd_result_1)) # Generic SSH Error (maybe) cmd_result_255 = self.mox.CreateMockAnything() cmd_result_255.exit_status = 255 host.run(expected_cmd).AndRaise( error.AutoservRunError("Sometimes SSH specific result.", cmd_result_255)) self.mox.ReplayAll() # Verify Success. updater.trigger_update() # Verify each type of error listed above. self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.mox.VerifyAll()
def _init_transport(self): for path, key in self.keys.iteritems(): try: logging.debug("Connecting with %s", path) transport = self._connect_transport(key) transport.set_keepalive(self.KEEPALIVE_TIMEOUT_SECONDS) self.transport = transport self.pid = os.getpid() return except paramiko.AuthenticationException: logging.debug("Authentication failure") else: raise error.AutoservSshPermissionDeniedError( "Permission denied using all keys available to ParamikoHost", utils.CmdResult())
def _run(self, command, timeout, ignore_status, stdout, stderr, connect_timeout, env, options, stdin, args): """Helper function for run().""" ssh_cmd = self.ssh_command(connect_timeout, options) if not env.strip(): env = "" else: env = "export %s;" % env for arg in args: command += ' "%s"' % utils.sh_escape(arg) full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) result = utils.run(full_cmd, timeout, True, stdout, stderr, verbose=False, stdin=stdin, stderr_is_expected=ignore_status) # The error messages will show up in band (indistinguishable # from stuff sent through the SSH connection), so we have the # remote computer echo the message "Connected." before running # any command. Since the following 2 errors have to do with # connecting, it's safe to do these checks. if result.exit_status == 255: if re.search( r'^ssh: connect to host .* port .*: ' r'Connection timed out\r$', result.stderr): raise error.AutoservSSHTimeout("ssh timed out", result) if "Permission denied." in result.stderr: msg = "ssh permission denied" raise error.AutoservSshPermissionDeniedError(msg, result) if not ignore_status and result.exit_status > 0: raise error.AutoservRunError("command execution error", result) return result
def _run(self, command, timeout, ignore_status, stdout, stderr, connect_timeout, env, options, stdin, args, ignore_timeout): """Helper function for run().""" ssh_cmd = self.ssh_command(connect_timeout, options) if not env.strip(): env = "" else: env = "export %s;" % env for arg in args: command += ' "%s"' % utils.sh_escape(arg) full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP # container, sometimes shortly after reboot we will see DNS # resolution errors on ssh commands; the problem never # occurs more than once in a row. This especially affects # the autoupdate_Rollback test, but other cases have been # affected, too. # # We work around it by detecting the first DNS resolution error # and retrying exactly one time. dns_retry_count = 2 while True: result = utils.run(full_cmd, timeout, True, stdout, stderr, verbose=False, stdin=stdin, stderr_is_expected=ignore_status, ignore_timeout=ignore_timeout) dns_retry_count -= 1 if (result and result.exit_status == 255 and re.search( r'^ssh: .*: Name or service not known', result.stderr)): if dns_retry_count: logging.debug('Retrying because of DNS failure') continue logging.debug('Retry failed.') autotest_stats.Counter('dns_retry_hack.fail').increment() elif not dns_retry_count: logging.debug('Retry succeeded.') autotest_stats.Counter('dns_retry_hack.pass').increment() break if ignore_timeout and not result: return None # The error messages will show up in band (indistinguishable # from stuff sent through the SSH connection), so we have the # remote computer echo the message "Connected." before running # any command. Since the following 2 errors have to do with # connecting, it's safe to do these checks. if result.exit_status == 255: if re.search( r'^ssh: connect to host .* port .*: ' r'Connection timed out\r$', result.stderr): raise error.AutoservSSHTimeout("ssh timed out", result) if "Permission denied." in result.stderr: msg = "ssh permission denied" raise error.AutoservSshPermissionDeniedError(msg, result) if not ignore_status and result.exit_status > 0: raise error.AutoservRunError("command execution error", result) return result
def testTriggerUpdate(self): """Tests that we correctly handle updater errors.""" update_url = 'http://server/test/url' self.host = self.mox.CreateMockAnything() self.mox.StubOutWithMock(self.host, 'run') self.mox.StubOutWithMock(autoupdater.ChromiumOSUpdater, 'get_last_update_error') self.host.hostname = 'test_host' updater_control_bin = '/usr/bin/update_engine_client' test_url = 'http://server/test/url' expected_wait_cmd = ('%s -status | grep CURRENT_OP' % updater_control_bin) expected_cmd = ('%s --check_for_update --omaha_url=%s' % (updater_control_bin, test_url)) self.mox.StubOutWithMock(time, "sleep") UPDATE_ENGINE_RETRY_WAIT_TIME = 5 # Generic SSH Error. cmd_result_255 = self.mox.CreateMockAnything() cmd_result_255.exit_status = 255 # Command Failed Error cmd_result_1 = self.mox.CreateMockAnything() cmd_result_1.exit_status = 1 # Error 37 cmd_result_37 = self.mox.CreateMockAnything() cmd_result_37.exit_status = 37 updater = autoupdater.ChromiumOSUpdater(update_url, host=self.host) # (SUCCESS) Expect one wait command and one status command. self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd) # (SUCCESS) Test with one retry to wait for update-engine. self._host_run_for_update(expected_wait_cmd, exception=error.AutoservRunError( 'non-zero status', cmd_result_1)) time.sleep(UPDATE_ENGINE_RETRY_WAIT_TIME) self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd) # (SUCCESS) One-time SSH timeout, then success on retry. self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd, exception=error.AutoservSSHTimeout( 'ssh timed out', cmd_result_255)) self._host_run_for_update(expected_cmd) # (SUCCESS) One-time ERROR 37, then success. self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd, exception=error.AutoservRunError( 'ERROR_CODE=37', cmd_result_37)) self._host_run_for_update(expected_cmd) # (FAILURE) Bad status of update engine. self._host_run_for_update(expected_wait_cmd) self._host_run_for_update( expected_cmd, bad_update_status=True, exception=error.InstallError('host is not in installable state')) # (FAILURE) Two-time SSH timeout. self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd, exception=error.AutoservSSHTimeout( 'ssh timed out', cmd_result_255)) self._host_run_for_update(expected_cmd, exception=error.AutoservSSHTimeout( 'ssh timed out', cmd_result_255)) # (FAILURE) SSH Permission Error self._host_run_for_update(expected_wait_cmd) self._host_run_for_update( expected_cmd, exception=error.AutoservSshPermissionDeniedError( 'no permission', cmd_result_255)) # (FAILURE) Other ssh failure self._host_run_for_update(expected_wait_cmd) self._host_run_for_update( expected_cmd, exception=error.AutoservSshPermissionDeniedError( 'no permission', cmd_result_255)) # (FAILURE) Other error self._host_run_for_update(expected_wait_cmd) self._host_run_for_update(expected_cmd, exception=error.AutoservRunError( "unknown error", cmd_result_1)) self.mox.ReplayAll() # Expect success updater.trigger_update() updater.trigger_update() updater.trigger_update() updater.trigger_update() # Expect errors as listed above self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.assertRaises(autoupdater.RootFSUpdateError, updater.trigger_update) self.mox.VerifyAll()
def _run(self, command, timeout, ignore_status, stdout, stderr, connect_timeout, env, options, stdin, args, ignore_timeout, ssh_failure_retry_ok): """Helper function for run().""" ssh_cmd = self.ssh_command(connect_timeout, options) if not env.strip(): env = "" else: env = "export %s;" % env for arg in args: command += ' "%s"' % utils.sh_escape(arg) full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command)) # TODO(jrbarnette): crbug.com/484726 - When we're in an SSP # container, sometimes shortly after reboot we will see DNS # resolution errors on ssh commands; the problem never # occurs more than once in a row. This especially affects # the autoupdate_Rollback test, but other cases have been # affected, too. # # We work around it by detecting the first DNS resolution error # and retrying exactly one time. dns_error_retry_count = 1 def counters_inc(counter_name, failure_name): """Helper function to increment metrics counters. @param counter_name: string indicating which counter to use @param failure_name: string indentifying an error, or 'success' """ if counter_name == 'call': # ssh_counter records the outcome of each ssh invocation # inside _run(), including exceptions. ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls') fields = { 'error': failure_name or 'success', 'attempt': ssh_call_count } ssh_counter.increment(fields=fields) if counter_name == 'run': # run_counter records each call to _run() with its result # and how many tries were made. Calls are recorded when # _run() exits (including exiting with an exception) run_counter = metrics.Counter('chromeos/autotest/ssh/runs') fields = { 'error': failure_name or 'success', 'attempt': ssh_call_count } run_counter.increment(fields=fields) # If ssh_failure_retry_ok is True, retry twice on timeouts and generic # error 255: if a simple retry doesn't work, kill the ssh master # connection and try again. (Note that either error could come from # the command running in the DUT, in which case the retry may be # useless but, in theory, also harmless.) if ssh_failure_retry_ok: # Ignore ssh command timeout, even though it could be a timeout due # to the command executing in the remote host. Note that passing # ignore_timeout = True makes utils.run() return None on timeouts # (and only on timeouts). original_ignore_timeout = ignore_timeout ignore_timeout = True ssh_failure_retry_count = 2 else: ssh_failure_retry_count = 0 ssh_call_count = 0 while True: try: # Increment call count first, in case utils.run() throws an # exception. ssh_call_count += 1 result = utils.run(full_cmd, timeout, True, stdout, stderr, verbose=False, stdin=stdin, stderr_is_expected=ignore_status, ignore_timeout=ignore_timeout) except Exception as e: # No retries on exception. counters_inc('call', 'exception') counters_inc('run', 'exception') raise e failure_name = None if result: if result.exit_status == 255: if re.search(r'^ssh: .*: Name or service not known', result.stderr): failure_name = 'dns_failure' else: failure_name = 'error_255' elif result.exit_status > 0: failure_name = 'nonzero_status' else: # result == None failure_name = 'timeout' # Record the outcome of the ssh invocation. counters_inc('call', failure_name) if failure_name: # There was a failure: decide whether to retry. if failure_name == 'dns_failure': if dns_error_retry_count > 0: logging.debug('retrying ssh because of DNS failure') dns_error_retry_count -= 1 continue else: if ssh_failure_retry_count == 2: logging.debug('retrying ssh command after %s', failure_name) ssh_failure_retry_count -= 1 continue elif ssh_failure_retry_count == 1: # After two failures, restart the master connection # before the final try. logging.debug('retry 2: restarting master connection') self.restart_master_ssh() # Last retry: reinstate timeout behavior. ignore_timeout = original_ignore_timeout ssh_failure_retry_count -= 1 continue # No retry conditions occurred. Exit the loop. break # The outcomes of ssh invocations have been recorded. Now record # the outcome of this function. if ignore_timeout and not result: counters_inc('run', 'ignored_timeout') return None # The error messages will show up in band (indistinguishable # from stuff sent through the SSH connection), so we have the # remote computer echo the message "Connected." before running # any command. Since the following 2 errors have to do with # connecting, it's safe to do these checks. if result.exit_status == 255: if re.search( r'^ssh: connect to host .* port .*: ' r'Connection timed out\r$', result.stderr): counters_inc('run', 'final_timeout') raise error.AutoservSSHTimeout("ssh timed out", result) if "Permission denied." in result.stderr: msg = "ssh permission denied" counters_inc('run', 'final_eperm') raise error.AutoservSshPermissionDeniedError(msg, result) if not ignore_status and result.exit_status > 0: counters_inc('run', 'final_run_error') raise error.AutoservRunError("command execution error", result) counters_inc('run', failure_name) return result