def _wait_for_reboot(self, old_boot_id): logging.info("Client is rebooting") logging.info("Waiting for client to halt") if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT, old_boot_id=old_boot_id): err = "%s failed to shutdown after %d" err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT) raise error.AutotestRunError(err) logging.info("Client down, waiting for restart") if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT): # since reboot failed # hardreset the machine once if possible # before failing this control file warning = "%s did not come back up, hard resetting" warning %= self.host.hostname logging.warning(warning) try: self.host.hardreset(wait=False) except (AttributeError, error.AutoservUnsupportedError), detail: warning = ("Hard reset unsupported on %s: %s" % (self.hostname, detail)) logging.warning(warning) raise error.AutotestRunError( "%s failed to boot after %ds" % (self.host.hostname, self.host.DEFAULT_REBOOT_TIMEOUT))
def execute_section(self, section, timeout, stderr_redirector, client_disconnect_timeout): logging.info("Executing %s/autotest %s/control phase %d", self.autodir, self.autodir, section) if self.background: result = self._execute_in_background(section, timeout) else: result = self._execute_daemon(section, timeout, stderr_redirector, client_disconnect_timeout) last_line = stderr_redirector.last_line # check if we failed hard enough to warrant an exception if result.exit_status == 1: err = error.AutotestRunError("client job was aborted") elif not self.background and not result.stderr: err = error.AutotestRunError( "execute_section %s failed to return anything\n" "stdout:%s\n" % (section, result.stdout)) else: err = None # log something if the client failed AND never finished logging if err and not self.is_client_job_finished(last_line): self.log_unexpected_abort(stderr_redirector) if err: raise err else: return stderr_redirector.last_line
def execute_control(self, timeout=None, client_disconnect_timeout=None): if not self.background: collector = log_collector(self.host, self.tag, self.results_dir) hostname = self.host.hostname remote_results = collector.client_results_dir local_results = collector.server_results_dir self.host.job.add_client_log(hostname, remote_results, local_results) job_record_context = self.host.job.get_record_context() section = 0 start_time = time.time() logger = client_logger(self.host, self.tag, self.results_dir) try: while not timeout or time.time() < start_time + timeout: if timeout: section_timeout = start_time + timeout - time.time() else: section_timeout = None boot_id = self.host.get_boot_id() last = self.execute_section(section, section_timeout, logger, client_disconnect_timeout) if self.background: return section += 1 if self.is_client_job_finished(last): logging.info("Client complete") return elif self.is_client_job_rebooting(last): try: self._wait_for_reboot(boot_id) except error.AutotestRunError, e: self.host.job.record("ABORT", None, "reboot", str(e)) self.host.job.record("END ABORT", None, None, str(e)) raise continue # if we reach here, something unexpected happened self.log_unexpected_abort(logger) # give the client machine a chance to recover from a crash self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600) msg = ("Aborting - unexpected final status message from " "client on %s: %s\n") % (self.host.hostname, last) raise error.AutotestRunError(msg) finally: logger.close() if not self.background: collector.collect_client_job_results() collector.remove_redundant_client_logs() state_file = os.path.basename(self.remote_control_state) state_path = os.path.join(self.results_dir, state_file) self.host.job.postprocess_client_state(state_path) self.host.job.remove_client_log(hostname, remote_results, local_results) job_record_context.restore() # should only get here if we timed out assert timeout raise error.AutotestTimeoutError()