class OpTestConfiguration(): def __init__(self): # initialize OpTestUtil with this object the OpTestConfiguration self.util = OpTestUtil(self) # initialize OpTestCronus with this object the OpTestConfiguration self.cronus = OpTestCronus(self) self.args = [] self.remaining_args = [] self.basedir = os.path.dirname(sys.argv[0]) self.signal_ready = False # indicator for properly initialized self.atexit_ready = False # indicator for properly initialized self.aes_print_helpers = True # Need state for locker_wait self.dump = True # Need state for cleanup self.lock_dict = {'res_id': None, 'name': None, 'Group_Name': None, 'envs': [], } self.util_server = None # Hostlocker or AES self.util_bmc_server = None # OpenBMC REST Server atexit.register(self.cleanup) # allows cleanup handler to run (OpExit) self.firmware_versions = None self.nvram_debug_opts = None for dirname in (next(os.walk(os.path.join(self.basedir, 'addons')))[1]): if dirname == "__pycache__": continue optAddons[dirname] = importlib.import_module( "addons." + dirname + ".OpTest" + dirname + "Setup") return def cleanup(self): if self.atexit_ready: # calling cleanup before args initialized pointless # attribute errors thrown in cleanup, e.g. ./op-test -h self.util.cleanup() def parse_args(self, argv=None): conf_parser = argparse.ArgumentParser(add_help=False) # We have two parsers so we have correct --help, we need -c in both conf_parser.add_argument("-c", "--config-file", help="Configuration File", metavar="FILE") args, remaining_args = conf_parser.parse_known_args(argv) defaults = {} config = configparser.SafeConfigParser() config.read([os.path.expanduser("~/.op-test-framework.conf")]) if args.config_file: if os.access(args.config_file, os.R_OK): config.read([args.config_file]) else: raise OSError(errno.ENOENT, os.strerror( errno.ENOENT), args.config_file) try: defaults = dict(config.items('op-test')) except configparser.NoSectionError: pass parser = get_parser() parser.set_defaults(**defaults) if defaults.get('qemu_binary'): qemu_default = defaults['qemu_binary'] if defaults.get('mambo_binary'): mambo_default = defaults['mambo_binary'] if defaults.get('mambo_initial_run_script'): mambo_default = defaults['mambo_initial_run_script'] parser.add_argument("--check-ssh-keys", action='store_true', default=False, help="Check remote host keys when using SSH (auto-yes on new)") parser.add_argument("--known-hosts-file", help="Specify a custom known_hosts file") self.args, self.remaining_args = parser.parse_known_args( remaining_args) args_dict = vars(self.args) # if we have a bmc_type we start with appropriate template if args_dict.get('bmc_type') is not None: dict_merge = default_templates.get( args_dict.get('bmc_type').lower()) if dict_merge is not None: # overlays dict merge on top of default_val default_val.update(dict_merge) for key in default_val: if args_dict.get(key) is None: args_dict[key] = default_val[key] stateMap = {'UNKNOWN': common.OpTestSystem.OpSystemState.UNKNOWN, 'UNKNOWN_BAD': common.OpTestSystem.OpSystemState.UNKNOWN_BAD, 'OFF': common.OpTestSystem.OpSystemState.OFF, 'PETITBOOT': common.OpTestSystem.OpSystemState.PETITBOOT, 'PETITBOOT_SHELL': common.OpTestSystem.OpSystemState.PETITBOOT_SHELL, 'OS': common.OpTestSystem.OpSystemState.OS } # Some quick sanity checking if self.args.known_hosts_file and not self.args.check_ssh_keys: parser.error("--known-hosts-file requires --check-ssh-keys") # Setup some defaults for the output options # Order of precedence # 1. cmdline arg # 2. env variable # 3. default path if (self.args.output): outdir = self.args.output elif ("OP_TEST_OUTPUT" in os.environ): outdir = os.environ["OP_TEST_OUTPUT"] else: outdir = os.path.join(self.basedir, "test-reports") self.outsuffix = "test-run-%s" % self.get_suffix() outdir = os.path.join(outdir, self.outsuffix) # Normalize the path to fully qualified and create if not there self.output = os.path.abspath(outdir) if (not os.path.exists(self.output)): os.makedirs(self.output) if (self.args.logdir): logdir = self.args.logdir elif ("OP_TEST_LOGDIR" in os.environ): logdir = os.environ["OP_TEST_LOGDIR"] else: logdir = self.output self.logdir = os.path.abspath(logdir) if (not os.path.exists(self.logdir)): os.makedirs(self.logdir) print(("Logs in: {}".format(self.logdir))) OpTestLogger.optest_logger_glob.logdir = self.logdir # Grab the suffix, if not given use current time self.outsuffix = self.get_suffix() # set up where all the logs go logfile = os.path.join(self.output, "%s.log" % self.outsuffix) logcmd = "tee %s" % (logfile) # we use 'cat -v' to convert control characters # to something that won't affect the user's terminal if self.args.quiet: logcmd = logcmd + "> /dev/null" # save sh_level for later refresh loggers OpTestLogger.optest_logger_glob.sh_level = logging.ERROR OpTestLogger.optest_logger_glob.sh.setLevel(logging.ERROR) else: logcmd = logcmd + "| sed -u -e 's/\\r$//g'|cat -v" # save sh_level for later refresh loggers OpTestLogger.optest_logger_glob.sh_level = logging.INFO OpTestLogger.optest_logger_glob.sh.setLevel(logging.INFO) OpTestLogger.optest_logger_glob.setUpLoggerFile( datetime.utcnow().strftime("%Y%m%d%H%M%S%f")+'.main.log') OpTestLogger.optest_logger_glob.setUpLoggerDebugFile( datetime.utcnow().strftime("%Y%m%d%H%M%S%f")+'.debug.log') OpTestLogger.optest_logger_glob.optest_logger.info( 'TestCase Log files: {}/*'.format(self.output)) OpTestLogger.optest_logger_glob.optest_logger.info( 'StreamHandler setup {}'.format('quiet' if self.args.quiet else 'normal')) self.logfile_proc = subprocess.Popen(logcmd, stdin=subprocess.PIPE, stderr=sys.stderr, stdout=sys.stdout, shell=True, universal_newlines=True, encoding='utf-8') self.logfile = self.logfile_proc.stdin # we have enough setup to allow # signal handler cleanup to run self.signal_ready = True # atexit viable for cleanup to run self.atexit_ready = True # now that we have loggers, dump conf file to help debug later OpTestLogger.optest_logger_glob.optest_logger.debug( "conf file defaults={}".format(defaults)) cmd = "git describe --always" try: git_output = subprocess.check_output(cmd.split()) # log for triage of how dated the repo is OpTestLogger.optest_logger_glob.optest_logger.debug( "op-test-framework git level = {}".format(git_output)) except Exception as e: OpTestLogger.optest_logger_glob.optest_logger.debug( "Unable to get git describe") # setup AES and Hostlocker configs after the logging is setup locker_timeout = time.time() + 60*self.args.locker_wait locker_code = errno.ETIME # 62 locker_message = ("OpTestSystem waited {} minutes but was unable" " to lock environment/host requested," " either pick another environment/host or increase " "--locker-wait, try --aes q with options for " "--aes-search-args to view availability, or as" " appropriate for your hostlocker" .format(self.args.locker_wait)) locker_exit_exception = OpExit(message=locker_message, code=locker_code) while True: try: rollup_flag = False self.util.check_lockers() break except Exception as e: OpTestLogger.optest_logger_glob.optest_logger.debug( "locker_wait Exception={}".format(e)) if "unable to lock" in e.message: self.aes_print_helpers = False # SystemExit exception needs message to print rollup_message = locker_exit_exception.message rollup_exception = locker_exit_exception else: rollup_message = e.message rollup_exception = e rollup_flag = True # bubble exception out if time.time() > locker_timeout or rollup_flag: # if not "unable to lock" we bubble up underlying exception OpTestLogger.optest_logger_glob.optest_logger.warning( "{}".format(rollup_message)) raise rollup_exception else: OpTestLogger.optest_logger_glob.optest_logger.info( "OpTestSystem waiting for requested environment/host" " total time to wait is {} minutes, we will check" " every minute" .format(self.args.locker_wait)) time.sleep(60) if self.args.machine_state == None: if self.args.bmc_type in ['qemu', 'mambo']: # Force UNKNOWN_BAD so that we don't try to setup the console early self.startState = common.OpTestSystem.OpSystemState.UNKNOWN_BAD else: self.startState = common.OpTestSystem.OpSystemState.UNKNOWN else: self.startState = stateMap[self.args.machine_state] return self.args, self.remaining_args def get_suffix(self): # Grab the suffix, if not given use current time if (self.args.suffix): outsuffix = self.args.suffix else: outsuffix = time.strftime("%Y%m%d%H%M%S") return outsuffix def objs(self): if self.args.list_suites or self.args.list_tests: return # check to see if bmc_ip even pings to validate configuration parms try: self.util.PingFunc( self.args.bmc_ip, totalSleepTime=BMC_CONST.PING_RETRY_FOR_STABILITY) except Exception as e: # we are trying to catch sooner rather than later # if we have reservations that need cleaned up # otherwise we would have to try/except for cleanup # in lots of places # testcases.HelloWorld in CI fails if we throw this # raise only if we have reservations to cleanup if self.args.hostlocker is not None \ or self.args.aes is not None \ or self.args.aes_search_args is not None: self.util.cleanup() raise ParameterCheck(message="OpTestSystem PingFunc fails to " "ping '{}', check your configuration and setup, see " "Exception details: {}".format(self.args.bmc_ip, e)) try: host = common.OpTestHost.OpTestHost(self.args.host_ip, self.args.host_user, self.args.host_password, self.args.bmc_ip, self.output, scratch_disk=self.args.host_scratch_disk, proxy=self.args.proxy, logfile=self.logfile, check_ssh_keys=self.args.check_ssh_keys, known_hosts_file=self.args.known_hosts_file, conf=self) if self.args.bmc_type in ['AMI', 'SMC']: web = OpTestWeb(self.args.bmc_ip, self.args.bmc_usernameipmi, self.args.bmc_passwordipmi) bmc = None if self.args.bmc_type in ['AMI']: ipmi = OpTestIPMI(self.args.bmc_ip, self.args.bmc_usernameipmi, self.args.bmc_passwordipmi, host=host, host_console_command=self.args.host_serial_console_command, logfile=self.logfile, ) bmc = OpTestBMC(ip=self.args.bmc_ip, username=self.args.bmc_username, password=self.args.bmc_password, logfile=self.logfile, ipmi=ipmi, web=web, check_ssh_keys=self.args.check_ssh_keys, known_hosts_file=self.args.known_hosts_file ) elif self.args.bmc_type in ['SMC']: ipmi = OpTestSMCIPMI(self.args.bmc_ip, self.args.bmc_usernameipmi, self.args.bmc_passwordipmi, logfile=self.logfile, host=host, ) bmc = OpTestSMC(ip=self.args.bmc_ip, username=self.args.bmc_username, password=self.args.bmc_password, ipmi=ipmi, web=web, check_ssh_keys=self.args.check_ssh_keys, known_hosts_file=self.args.known_hosts_file ) self.op_system = common.OpTestSystem.OpTestSystem( state=self.startState, bmc=bmc, host=host, conf=self, ) ipmi.set_system(self.op_system) bmc.set_system(self.op_system) elif self.args.bmc_type in ['FSP']: ipmi = OpTestIPMI(self.args.bmc_ip, None, # FSP does not use UID self.args.bmc_passwordipmi, host=host, logfile=self.logfile) bmc = OpTestFSP(self.args.bmc_ip, self.args.bmc_username, self.args.bmc_password, ipmi=ipmi, ) self.op_system = common.OpTestSystem.OpTestFSPSystem( state=self.startState, bmc=bmc, host=host, conf=self, ) ipmi.set_system(self.op_system) elif self.args.bmc_type in ['FSP_PHYP']: hmc = None if all(v is not None for v in [self.args.hmc_ip, self.args.hmc_username, self.args.hmc_password]): hmc = OpTestHMC(self.args.hmc_ip, self.args.hmc_username, self.args.hmc_password, managed_system=self.args.system_name, lpar_name=self.args.lpar_name, lpar_vios=self.args.lpar_vios, lpar_prof=self.args.lpar_prof, lpar_user=self.args.host_user, lpar_password=self.args.host_password, logfile=self.logfile ) else: raise Exception( "HMC IP, username and password is required") bmc = OpTestFSP(self.args.bmc_ip, self.args.bmc_username, self.args.bmc_password, ) self.op_system = common.OpTestSystem.OpTestFSPSystem( state=self.startState, bmc=bmc, host=host, hmc=hmc, conf=self, ) hmc.set_system(self.op_system) elif self.args.bmc_type in ['OpenBMC']: ipmi = OpTestIPMI(self.args.bmc_ip, self.args.bmc_usernameipmi, self.args.bmc_passwordipmi, host=host, logfile=self.logfile) rest_api = HostManagement(conf=self, ip=self.args.bmc_ip, username=self.args.bmc_username, password=self.args.bmc_password) bmc = OpTestOpenBMC(ip=self.args.bmc_ip, username=self.args.bmc_username, password=self.args.bmc_password, ipmi=ipmi, rest_api=rest_api, logfile=self.logfile, check_ssh_keys=self.args.check_ssh_keys, known_hosts_file=self.args.known_hosts_file) self.op_system = common.OpTestSystem.OpTestOpenBMCSystem( host=host, bmc=bmc, state=self.startState, conf=self, ) bmc.set_system(self.op_system) elif self.args.bmc_type in ['qemu']: print((repr(self.args))) bmc = OpTestQemu(conf=self, qemu_binary=self.args.qemu_binary, pnor=self.args.host_pnor, skiboot=self.args.flash_skiboot, kernel=self.args.flash_kernel, initramfs=self.args.flash_initramfs, cdrom=self.args.os_cdrom, logfile=self.logfile) self.op_system = common.OpTestSystem.OpTestQemuSystem(host=host, bmc=bmc, state=self.startState, conf=self, ) bmc.set_system(self.op_system) elif self.args.bmc_type in ['mambo']: if not (os.stat(self.args.mambo_binary).st_mode & stat.S_IXOTH): raise ParameterCheck(message="Check that the file exists with X permissions mambo-binary={}" .format(self.args.mambo_binary)) if self.args.flash_skiboot is None \ or not os.access(self.args.flash_skiboot, os.R_OK): raise ParameterCheck(message="Check that the file exists with R permissions flash-skiboot={}" .format(self.args.flash_skiboot)) if self.args.flash_kernel is None \ or not os.access(self.args.flash_kernel, os.R_OK): raise ParameterCheck(message="Check that the file exists with R permissions flash-kernel={}" .format(self.args.flash_kernel)) bmc = OpTestMambo(mambo_binary=self.args.mambo_binary, mambo_initial_run_script=self.args.mambo_initial_run_script, mambo_autorun=self.args.mambo_autorun, skiboot=self.args.flash_skiboot, kernel=self.args.flash_kernel, initramfs=self.args.flash_initramfs, timeout_factor=self.args.mambo_timeout_factor, logfile=self.logfile) self.op_system = common.OpTestSystem.OpTestMamboSystem(host=host, bmc=bmc, state=self.startState, conf=self, ) bmc.set_system(self.op_system) # Check that the bmc_type exists in our loaded addons then create our objects elif self.args.bmc_type in optAddons: (bmc, self.op_system) = optAddons[self.args.bmc_type].createSystem( self, host) else: self.util.cleanup() raise Exception("Unsupported BMC Type '{}', check your " "upper/lower cases for bmc_type and verify " "any credentials used from HostLocker or " "AES Version (see aes_get_creds " "version_mappings)".format(self.args.bmc_type)) host.set_system(self.op_system) return except Exception as e: traceback.print_exc() self.util.cleanup() raise e def bmc(self): return self.op_system.bmc def hmc(self): return self.op_system.hmc def system(self): return self.op_system def host(self): return self.op_system.host() def ipmi(self): return self.op_system.ipmi() def lspci_file(self): return self.args.host_lspci def platform(self): return self.args.platform
class OpTestHMIHandling(unittest.TestCase): def setUp(self): conf = OpTestConfiguration.conf self.cv_HOST = conf.host() self.cv_IPMI = conf.ipmi() self.cv_FSP = conf.bmc() self.cv_SYSTEM = conf.system() self.bmc_type = conf.args.bmc_type self.util = OpTestUtil() def ipmi_monitor_sol_ipl(self, console, timeout): # Error injection causing the SOL console to terminate immediately. # So Let's re-connect the console console.close() self.cv_SYSTEM.set_state(OpSystemState.IPLing) try: self.cv_SYSTEM.goto_state(OpSystemState.OS) except: self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_SYSTEM.set_state(OpSystemState.OS) console.close() print "System booted fine to host OS..." return BMC_CONST.FW_SUCCESS def verify_proc_recovery(self, l_res): if any("Processor Recovery done" in line for line in l_res) and \ any("Harmless Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Processor recovery done" return else: raise Exception( "HMI handling failed to log message: for proc_recv_done") def verify_timer_facility_recovery(self, l_res): if any("Timer facility experienced an error" in line for line in l_res) and \ any("Severe Hypervisor Maintenance interrupt [Recovered]" in line for line in l_res): print "Timer facility experienced an error and got recovered" return else: raise Exception("HMI handling failed to log message") def init_test(self): self.cv_SYSTEM.goto_state(OpSystemState.OS) self.proc_gen = self.cv_HOST.host_get_proc_gen() l_chips = self.cv_HOST.host_get_list_of_chips( ) # ['00000000', '00000001', '00000010'] if not l_chips: raise Exception("Getscom failed to list processor chip ids") l_cores = self.cv_HOST.host_get_cores() if not l_cores: raise Exception("Failed to get list of core id's") print l_cores # {0: ['4', '5', '6', 'c', 'd', 'e'], 1: ['4', '5', '6', 'c', 'd', 'e'], 10: ['4', '5', '6', 'c', 'd', 'e']} # Remove master core where injecting core checkstop leads to IPL expected failures # after 2 failures system will starts boot in Golden side of PNOR l_cores[0][1].pop(0) print l_cores self.l_dic = [] i = 0 for tup in l_cores: new_list = [l_chips[i], tup[1]] self.l_dic.append(new_list) i += 1 print self.l_dic # self.l_dic is a list of chip id's, core id's . and is of below format # [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] # In-order to inject HMI errors on cpu's, cpu should be running, so disabling the sleep states 1 and 2 of all CPU's self.disable_cpu_idle_states() # Disable kdump to check behaviour of IPL caused due to kernel panic after injection of core/system checkstop self.disable_kdump_service() def disable_kdump_service(self): l_oslevel = self.cv_HOST.host_get_OS_Level() try: if "Ubuntu" in l_oslevel: self.cv_HOST.host_run_command("service kdump-tools stop") else: self.cv_HOST.host_run_command("service kdump stop") except CommandFailed as cf: if cf.exitcode == 5: # kdump may not be enabled, so it's not a failure to stop it pass def enable_idle_state(self, i_idle): l_cmd = "for i in /sys/devices/system/cpu/cpu*/cpuidle/state%s/disable; do echo 0 > $i; done" % i_idle self.cv_HOST.host_run_command(l_cmd) def disable_idle_state(self, i_idle): l_cmd = "for i in /sys/devices/system/cpu/cpu*/cpuidle/state%s/disable; do echo 1 > $i; done" % i_idle self.cv_HOST.host_run_command(l_cmd) # Disable all CPU idle states except snooze state def disable_cpu_idle_states(self): states = self.cv_HOST.host_run_command( "find /sys/devices/system/cpu/cpu*/cpuidle/state* -type d | cut -d'/' -f8 | sort -u | sed -e 's/^state//'" ) for state in states: if state is "0": try: self.cv_HOST.host_run_command("cpupower idle-set -e 0") except CommandFailed: self.enable_idle_state("0") continue try: self.cv_HOST.host_run_command("cpupower idle-set -d %s" % state) except CommandFailed: self.disable_idle_state(state) def form_scom_addr(self, addr, core): if self.proc_gen in ["POWER8", "POWER8E"]: val = addr[0] + str(core) + addr[2:] elif self.proc_gen in ["POWER9"]: val = hex( eval("0x%s | (((%s & 0x1f) + 0x20) << 24)" % (addr, int(core, 16)))) print val return val def clearGardEntries(self): self.cv_SYSTEM.goto_state(OpSystemState.OS) self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) if "FSP" in self.bmc_type: res = self.cv_FSP.fspc.run_command("gard --clr all") self.assertIn("Success in clearing Gard Data", res, "Failed to clear GARD entries") print self.cv_FSP.fspc.run_command("gard --gc cpu") else: g = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard list all") if "No GARD entries to display" not in g: self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard clear all") cleared_gard = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard list") self.assertIn("No GARD entries to display", cleared_gard, "Failed to clear GARD entries") self.cv_SYSTEM.goto_state(OpSystemState.OFF) self.cv_SYSTEM.goto_state(OpSystemState.OS) ## # @brief This function executes HMI test case based on the i_test value, Before test starts # disabling kdump service to make sure system reboots, after injecting non-recoverable errors. # # @param i_test @type int: this is the type of test case want to execute # BMC_CONST.HMI_PROC_RECV_DONE: Processor recovery done # BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: proc_recv_error_masked # BMC_CONST.HMI_MALFUNCTION_ALERT: malfunction_alert # BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: hypervisor resource error def _testHMIHandling(self, i_test): l_test = i_test self.init_test() self.cv_SYSTEM.goto_state(OpSystemState.OS) self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_SYSTEM.host_console_login() self.cv_SYSTEM.host_console_unique_prompt() l_con.run_command("uname -a") l_con.run_command("cat /etc/os-release") l_con.run_command("lscpu") l_con.run_command("dmesg -D") if l_test == BMC_CONST.HMI_PROC_RECV_DONE: self._test_proc_recv_done() elif l_test == BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: self._test_proc_recv_error_masked() elif l_test == BMC_CONST.HMI_MALFUNCTION_ALERT: self._test_malfunction_allert() elif l_test == BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: self._test_hyp_resource_err() elif l_test == BMC_CONST.TOD_ERRORS: # TOD Error recovery works on systems having more than one chip TOD # Skip this test on single chip systems(as recovery fails on 1S systems) if len(self.l_dic) == 1: l_msg = "This is a single chip system, TOD Error recovery won't work" print l_msg return BMC_CONST.FW_SUCCESS elif len(self.l_dic) > 1: self._test_tod_errors(BMC_CONST.PSS_HAMMING_DISTANCE) self._test_tod_errors(BMC_CONST.INTERNAL_PATH_OR_PARITY_ERROR) self._test_tod_errors(BMC_CONST.TOD_DATA_PARITY_ERROR) self._test_tod_errors(BMC_CONST.TOD_SYNC_CHECK_ERROR) self._test_tod_errors(BMC_CONST.FSM_STATE_PARITY_ERROR) self._test_tod_errors(BMC_CONST.MASTER_PATH_CONTROL_REGISTER) self._test_tod_errors( BMC_CONST.PORT_0_PRIMARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_1_PRIMARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_0_SECONDARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_1_SECONDARY_CONFIGURATION_REGISTER) self._test_tod_errors(BMC_CONST.SLAVE_PATH_CONTROL_REGISTER) self._test_tod_errors(BMC_CONST.INTERNAL_PATH_CONTROL_REGISTER) self._test_tod_errors(BMC_CONST.PR_SC_MS_SL_CONTROL_REGISTER) else: raise Exception("Getting Chip information failed") elif l_test == BMC_CONST.TFMR_ERRORS: self._testTFMR_Errors(BMC_CONST.TB_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_HDEC_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_DEC_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_PURR_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_SPURR_PARITY_ERROR) else: raise Exception("Please provide valid test case") self.cv_HOST.ssh.state = SSHConnectionState.DISCONNECTED return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: processor recovery done # and also this function injecting error on all the cpus one by one and # verify whether cpu is recovered or not. def _test_proc_recv_done(self): if self.proc_gen in ["POWER9"]: scom_addr = "20010A40" elif self.proc_gen in ["POWER8", "POWER8E"]: scom_addr = "10013100" else: return for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = self.form_scom_addr(scom_addr, l_core) l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000100000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=20) except CommandFailed as cf: l_res = cf.output if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): raise Exception( "Processor recovery failed: Kernel got panic") elif any("Petitboot" in line for line in l_res): raise Exception( "System reached petitboot:Processor recovery failed" ) elif any("ISTEP" in line for line in l_res): raise Exception( "System started booting: Processor recovery failed" ) else: raise Exception( "Failed to inject thread hang recoverable error %s", str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_proc_recovery(l_res) return ## # @brief This function is used to test HMI: proc_recv_error_masked # Processor went through recovery for an error which is actually masked for reporting # this function also injecting the error on all the cpu's one-by-one. def _test_proc_recv_error_masked(self): if self.proc_gen in ["POWER8", "POWER8E"]: scom_addr = "10013100" else: return for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = self.form_scom_addr(scom_addr, l_core) l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000080000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=20) except CommandFailed as cf: l_res = cf.output if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): raise Exception( "Processor recovery failed: Kernel got panic") elif any("Petitboot" in line for line in l_res): raise Exception( "System reached petitboot:Processor recovery failed" ) elif any("ISTEP" in line for line in l_res): raise Exception( "System started booting: Processor recovery failed" ) else: raise Exception( "Failed to inject thread hang recoverable error %s", str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_proc_recovery(l_res) return ## # @brief This function is used to test hmi malfunction alert:Core checkstop # A processor core in the system has to be checkstopped (failed recovery). # Injecting core checkstop on random core of random chip def _test_malfunction_allert(self): if self.proc_gen in ["POWER9"]: scom_addr = "20010A40" elif self.proc_gen in ["POWER8", "POWER8E"]: scom_addr = "10013100" else: return # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = self.form_scom_addr(scom_addr, l_core) l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 1000000000000000" % ( l_chip, l_reg) # Core checkstop will lead to system IPL, so we will wait for certain time for IPL # to finish #l_res = self.cv_SYSTEM.sys_get_ipmi_console().run_command(l_cmd, timeout=600) console = self.cv_SYSTEM.sys_get_ipmi_console() console.sol.sendline(l_cmd) self.ipmi_monitor_sol_ipl(console, timeout=600) ## # @brief This function is used to test HMI: Hypervisor resource error # Injecting Hypervisor resource error on random core of random chip def _test_hyp_resource_err(self): if self.proc_gen in ["POWER9"]: scom_addr = "20010A40" elif self.proc_gen in ["POWER8", "POWER8E"]: scom_addr = "10013100" else: return # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = self.form_scom_addr(scom_addr, l_core) l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000008000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.sol.sendline(l_cmd) self.ipmi_monitor_sol_ipl(console, timeout=600) ## # @brief This function tests timer facility related error injections and check # the corresponding error got recovered. And this process is repeated # for all the active cores in all the chips. # # @param i_error @type string: this is the type of error want to inject # BMC_CONST.TB_PARITY_ERROR # BMC_CONST.TFMR_PARITY_ERROR # BMC_CONST.TFMR_HDEC_PARITY_ERROR # BMC_CONST.TFMR_DEC_PARITY_ERROR # BMC_CONST.TFMR_PURR_PARITY_ERROR # BMC_CONST.TFMR_SPURR_PARITY_ERROR def _testTFMR_Errors(self, i_error): if self.proc_gen in ["POWER9"]: scom_addr = "20010A84" elif self.proc_gen in ["POWER8", "POWER8E"]: scom_addr = "10013281" else: return l_error = i_error for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = self.form_scom_addr(scom_addr, l_core) l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( l_chip, l_reg, l_error) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=20) except CommandFailed as cf: l_res = cf.output if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "TFMR error injection: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:TFMR error injection recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: TFMR error injection recovery failed" else: raise Exception("Failed to inject TFMR error %s " % str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_timer_facility_recovery(l_res) return ## # @brief This function tests chip TOD related error injections and check # the corresponding error got recovered. And this error injection # happening on a random chip. This tod errors should test on systems # having more than one processor socket(chip). On single chip system # TOD error recovery won't work. # # @param i_error @type string: this is the type of error want to inject # These errors represented in common/OpTestConstants.py file. def _test_tod_errors(self, i_error): l_error = i_error l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( l_chip, BMC_CONST.TOD_ERROR_REG, l_error) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") # As of now putscom command to TOD register will fail with return code -1. # putscom indirectly call getscom to read the value again. # But getscom to TOD error reg there is no access # TOD Error reg has only WO access and there is no read access try: l_res = console.run_command(l_cmd, timeout=20) except CommandFailed as cf: l_res = cf.output if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): print "TOD ERROR Injection-kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: raise Exception( "TOD: PSS Hamming distance error injection failed %s", str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_timer_facility_recovery(l_res) return ## # @brief This function enables a single core def host_enable_single_core(self): self.cv_HOST.host_enable_single_core()
class OpTestIPMIReprovision(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostUser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostIP=None, i_hostUser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostIP, i_hostUser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostIP, i_hostUser, i_hostPasswd,i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostIP, i_hostUser, i_hostPasswd) self.util = OpTestUtil() self.opTestHMIHandling = OpTestHMIHandling(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostIP, i_hostUser, i_hostPasswd) ## # @brief This function will cover following test steps # Testcase: NVRAM Partition-IPMI Reprovision # 1. Update NVRAM config data with test config data # i.e "nvram --update-config test-name=test-value" # 2. Issue an IPMI PNOR Reprovision request command, to reset NVRAM partition to default. # 3. Wait for PNOR Reprovision progress to complete(00). # 4. Do a Hard reboot(Power OFF/ON) to avoid nvram cache data. # 5. Once system booted, check for NVRAM parition whether the test config data # got erased or not. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_nvram_ipmi_reprovision(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() self.cv_HOST.host_run_command("uname -a") self.cv_HOST.host_run_command(BMC_CONST.NVRAM_PRINT_CFG) print "IPMI_Reprovision: Updating the nvram partition with test cfg data" self.cv_HOST.host_run_command(BMC_CONST.NVRAM_UPDATE_CONFIG_TEST_DATA) self.cv_HOST.host_run_command(BMC_CONST.NVRAM_PRINT_CFG) print "IPMI_Reprovision: issuing ipmi pnor reprovision request" self.cv_SYSTEM.sys_issue_ipmi_pnor_reprovision_request() print "IPMI_Reprovision: wait for reprovision to complete" self.cv_SYSTEM.sys_wait_for_ipmi_pnor_reprovision_to_complete() print "IPMI_Reprovision: gathering the opal message logs" self.cv_HOST.host_gather_opal_msg_log() print "IPMI_Reprovision: Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "IPMI_Reprovision: System is in standby/Soft-off state" else: l_msg = "IPMI_Reprovision: System failed to reach standby/Soft-off state after pnor reprovisioning" raise OpTestError(l_msg) print "IPMI_Reprovision: Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_res = self.cv_HOST.host_run_command(BMC_CONST.NVRAM_PRINT_CFG) if l_res.__contains__(BMC_CONST.NVRAM_TEST_DATA): l_msg = "NVRAM Partition - IPMI Reprovision not happening, nvram test config data still exists" raise OpTestError(l_msg) print "NVRAM Partition - IPMI Reprovision is done, cleared the nvram test config data" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function will cover following test steps # Testcase: GARD Partition-IPMI Reprovision # 1. Inject core checkstop using existed function from OpTestHMIHandling.py # 2. Do a Hard reboot(IPMI Power OFF/ON) # 2. Issue an IPMI PNOR Reprovision request command, to reset GUARD partition to default. # 3. Wait for IPMI PNOR Reprovision progress to complete(00). # 4. Check for GUARD parition whether the existing gard records erased or not. # 6. Reboot the system back to see system is booting fine or not. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_gard_ipmi_reprovision(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "IPMI_Reprovision: Injecting system core checkstop to guard the phyisical cpu" self.opTestHMIHandling.testHMIHandling(BMC_CONST.HMI_MALFUNCTION_ALERT) print "IPMI_Reprovision: Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "IPMI_Reprovision: System is in standby/Soft-off state" else: l_msg = "IPMI_Reprovision: System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "IPMI_Reprovision: Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) print "IPMI_Reprovision: issuing ipmi pnor reprovision request" self.cv_SYSTEM.sys_issue_ipmi_pnor_reprovision_request() print "IPMI_Reprovision: wait for reprovision to complete" self.cv_SYSTEM.sys_wait_for_ipmi_pnor_reprovision_to_complete() print "IPMI_Reprovision: gathering the opal message logs" self.cv_HOST.host_gather_opal_msg_log() self.cv_HOST.host_get_OS_Level() self.cv_HOST.host_clone_skiboot_source(BMC_CONST.CLONE_SKIBOOT_DIR) # Compile the necessary tools xscom-utils and gard utility self.cv_HOST.host_compile_gard_utility(BMC_CONST.CLONE_SKIBOOT_DIR) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_IPMI.ipmi_host_login(l_con) self.cv_IPMI.ipmi_host_set_unique_prompt(l_con) self.cv_IPMI.run_host_cmd_on_ipmi_console("uname -a") l_res = self.cv_SYSTEM.sys_list_gard_records(BMC_CONST.GARD_TOOL_DIR) print l_res if BMC_CONST.NO_GARD_RECORDS in l_res: print "GUARD Partition - IPMI Reprovision is done, cleared HW gard records" else: l_msg = "IPMI: Reprovision not happening, gard records are not erased" raise OpTestError(l_msg) print "IPMI_Reprovision: Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "IPMI_Reprovision: System is in standby/Soft-off state" else: l_msg = "IPMI_Reprovision: System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "IPMI_Reprovision: Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) print "IPMI_Reprovision: gathering the opal message logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS
class OpTestFlashBase(unittest.TestCase): def setUp(self): conf = OpTestConfiguration.conf self.cv_SYSTEM = conf.system() self.cv_BMC = conf.bmc() self.cv_REST = self.cv_BMC.get_rest_api() self.cv_HOST = conf.host() self.cv_IPMI = conf.ipmi() self.platform = conf.platform() self.util = OpTestUtil() self.bmc_type = conf.args.bmc_type self.bmc_ip = conf.args.bmc_ip self.bmc_username = conf.args.bmc_username self.bmc_password = conf.args.bmc_password self.pupdate_binary = conf.args.pupdate self.pflash = conf.args.pflash def validate_side_activated(self): l_bmc_side, l_pnor_side = self.cv_IPMI.ipmi_get_side_activated() self.assertIn(BMC_CONST.PRIMARY_SIDE, l_bmc_side, "BMC: Primary side is not active") if (l_pnor_side == BMC_CONST.GOLDEN_SIDE): print "PNOR: Primary side is not active" bios_sensor = self.cv_IPMI.ipmi_get_golden_side_sensor_id() self.assertNotEqual( bios_sensor, None, "Failed to get the BIOS Golden side sensor id") boot_count_sensor = self.cv_IPMI.ipmi_get_boot_count_sensor_id() self.assertNotEqual(boot_count_sensor, None, "Failed to get the Boot Count sensor id") self.cv_IPMI.ipmi_set_pnor_primary_side(bios_sensor, boot_count_sensor) l_bmc_side, l_pnor_side = self.cv_IPMI.ipmi_get_side_activated() self.assertIn(BMC_CONST.PRIMARY_SIDE, l_pnor_side, "PNOR: Primary side is not active") def get_pnor_level(self): rc = self.cv_IPMI.ipmi_get_PNOR_level() print rc def bmc_down_check(self): self.assertTrue(self.util.ping_fail_check(self.cv_BMC.host_name), "FSP/BMC keeps on pinging up") def scp_file(self, src_file_path, dst_file_path): self.util.copyFilesToDest(src_file_path, self.bmc_username, self.bmc_ip, dst_file_path, self.bmc_password, "2", BMC_CONST.SCP_TO_REMOTE) def get_version_tar(self, file_path): tar = tarfile.open(file_path) for member in tar.getmembers(): fd = tar.extractfile(member) content = fd.read() if "version=" in content: content = content.split("\n") content = [x for x in content if "version=" in x] version = content[0].split("=")[-1] break tar.close() print version return version def get_image_version(self, path): output = self.cv_BMC.run_command("cat %s | grep \"version=\"" % path) return output[0].split("=")[-1] def delete_images_dir(self): try: self.cv_BMC.run_command("rm -rf /tmp/images/*") except CommandFailed: pass def get_image_path(self, image_version): retry = 0 while (retry < 20): image_list = [] try: image_list = self.cv_BMC.run_command( "ls -1 -d /tmp/images/*/ --color=never") except CommandFailed as cf: pass for i in range(0, len(image_list)): version = self.get_image_version(image_list[i] + "MANIFEST") if (version == image_version): return image_list[i] time.sleep(5) retry += 1 def get_image_id(self, version): img_path = self.get_image_path(version) img_id = img_path.split("/")[-2] print "Image id for Host image is : %s" % img_id return img_id def wait_for_bmc_runtime(self): self.util.PingFunc(self.bmc_ip, BMC_CONST.PING_RETRY_FOR_STABILITY) if "SMC" in self.bmc_type: self.cv_IPMI.ipmi_wait_for_bmc_runtime() elif "OpenBMC" in self.bmc_type: self.cv_REST.wait_for_bmc_runtime() return
class OpTestIPMIPowerControl(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() ## # @brief This function will test below system power control operations # IPMI Power ON # Power OFF # Power Soft # Power Cycle # Power Reset # So each operation is executed through ipmi commands. and # check_system_status function will check whether FW and Host OS # Boot completed or not. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testIPMIPowerControl(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.check_system_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) print "Performing a IPMI Soft Power OFF Operation" # Perform a IPMI Soft Power OFF Operation(Graceful shutdown) self.cv_IPMI.ipmi_power_soft() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Perform a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.check_system_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) print "Performing a IPMI Power Cycle(Soft reboot) Operation " # Perform a IPMI Power Cycle(Soft reboot) Operation only when system is in ON state self.cv_IPMI.ipmi_power_cycle() self.check_system_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) print "Performing a IPMI Power Hard Reset Operation" # Perform a IPMI Power Hard Reset Operation self.cv_IPMI.ipmi_power_reset() self.check_system_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) return BMC_CONST.FW_SUCCESS ## # @brief This function will check for system status and wait for # FW and Host OS Boot progress to complete. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def check_system_status(self): if int(self.cv_SYSTEM.sys_ipl_wait_for_working_state()) == 0: print "System booted to working state" else: l_msg = "System failed to boot" raise OpTestError(l_msg) if int(self.cv_SYSTEM.sys_wait_for_os_boot_complete()) == 0: print "System booted to Host OS" else: l_msg = "System failed to boot Host OS" raise OpTestError(l_msg) return BMC_CONST.FW_SUCCESS
class OpTestEnergyScale(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_platName=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_PLATFORM = i_platName self.util = OpTestUtil() ## # @brief This function will get and return platform power limits for supported machine # # @param i_platform type: str platform name to get the power limits # # @return l_power_limit_low lower platform power limit # l_power_limit_high higher platform power limit # or raise OpTestError if it is a new platform # def get_platform_power_limits(self, i_platform): l_platform = i_platform if BMC_CONST.HABANERO in l_platform: l_power_limit_high = BMC_CONST.HABANERO_POWER_LIMIT_HIGH l_power_limit_low = BMC_CONST.HABANERO_POWER_LIMIT_LOW elif BMC_CONST.FIRESTONE in l_platform: l_power_limit_high = BMC_CONST.FIRESTONE_POWER_LIMIT_HIGH l_power_limit_low = BMC_CONST.FIRESTONE_POWER_LIMIT_LOW elif BMC_CONST.GARRISON in l_platform: l_power_limit_high = BMC_CONST.GARRISON_POWER_LIMIT_HIGH l_power_limit_low = BMC_CONST.GARRISON_POWER_LIMIT_LOW else: l_msg = "New platform, add power limit support to this platform and retry" raise OpTestError(l_msg) return l_power_limit_low, l_power_limit_high ## # @brief This function will test Energy scale features at standby state # 1. Power OFF the system. # 2. Validate below Energy scale features at standby state # ipmitool dcmi power get_limit :Get the configured power limits. # ipmitool dcmi power set_limit limit <value> :Power Limit Requested in Watts. # ipmitool dcmi power activate :Activate the set power limit. # ipmitool dcmi power deactivate :Deactivate the set power limit. # 3. Once platform power limit activated execute below dcmi commands at standby state. # ipmitool dcmi discover :This command is used to discover # supported capabilities in DCMI. # ipmitool dcmi power reading :Get power related readings from the system. # ipmitool dcmi power get_limit :Get the configured power limits. # ipmitool dcmi power sensors :Prints the available DCMI sensors. # ipmitool dcmi get_temp_reading :Get Temperature Sensor Readings. # 4. Power ON the system. # 5. Check after system booted to runtime, whether occ's are active or not. # 6. Again in runtime execute all dcmi commands to check the functionality. # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_energy_scale_at_standby_state(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Energy Scale Test 1: Get, Set, activate and deactivate platform power limit at power off" l_power_limit_low, l_power_limit_high = self.get_platform_power_limits( self.cv_PLATFORM) print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int( self.cv_SYSTEM.sys_wait_for_standby_state( BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) self.cv_IPMI.ipmi_sdr_clear() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_set_power_limit(l_power_limit_high) self.cv_IPMI.ipmi_activate_power_limit() self.cv_IPMI.ipmi_deactivate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_deactivate_power_limit() self.cv_IPMI.ipmi_set_power_limit(l_power_limit_low) self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_set_power_limit(l_power_limit_high) self.cv_IPMI.ipmi_get_power_limit() print "Get All dcmi readings at power off" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_status = self.cv_IPMI.ipmi_get_occ_status() print l_status if BMC_CONST.OCC_DEVICE_ENABLED in l_status: print "OCC's are up and active" else: l_msg = "OCC's are not in active state" raise OpTestError(l_msg) print self.cv_IPMI.ipmi_get_power_limit() print "Get All dcmi readings at runtime" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) ## # @brief This function will test Energy scale features at standby state # 1. Power OFF the system. # 2. Power On the system to boot to host OS # 2. Validate below Energy scale features at runtime state # ipmitool dcmi power get_limit :Get the configured power limits. # ipmitool dcmi power set_limit limit <value> :Power Limit Requested in Watts. # ipmitool dcmi power activate :Activate the set power limit. # ipmitool dcmi power deactivate :Deactivate the set power limit. # 3. Once platform power limit activated execute below dcmi commands at runtime state. # ipmitool dcmi discover :This command is used to discover # supported capabilities in DCMI. # ipmitool dcmi power reading :Get power related readings from the system. # ipmitool dcmi power get_limit :Get the configured power limits. # ipmitool dcmi power sensors :Prints the available DCMI sensors. # ipmitool dcmi get_temp_reading :Get Temperature Sensor Readings. # 4. Issue Power OFF/ON to check whether system boots after setting platform power limit at runtime. # 5. Again in runtime execute all dcmi commands to check the functionality. # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_energy_scale_at_runtime_state(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Energy Scale Test 2: Get, Set, activate and deactivate platform power limit at runtime" l_power_limit_low, l_power_limit_high = self.get_platform_power_limits( self.cv_PLATFORM) print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int( self.cv_SYSTEM.sys_wait_for_standby_state( BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Get All dcmi readings at power off" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) self.cv_IPMI.ipmi_sdr_clear() print self.cv_IPMI.ipmi_get_power_limit() print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_status = self.cv_IPMI.ipmi_get_occ_status() print l_status if BMC_CONST.OCC_DEVICE_ENABLED in l_status: print "OCC's are up and active" else: l_msg = "OCC's are not in active state" raise OpTestError(l_msg) print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_set_power_limit(l_power_limit_high) self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_deactivate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_set_power_limit(l_power_limit_low) self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_deactivate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() self.cv_IPMI.ipmi_activate_power_limit() print self.cv_IPMI.ipmi_get_power_limit() print "Get All dcmi readings at runtime" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int( self.cv_SYSTEM.sys_wait_for_standby_state( BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_status = self.cv_IPMI.ipmi_get_occ_status() print l_status if BMC_CONST.OCC_DEVICE_ENABLED in l_status: print "OCC's are up and active" else: l_msg = "OCC's are not in active state" raise OpTestError(l_msg) print "Get All dcmi readings at runtime" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) ## # @brief This function will test below dcmi commands at both standby and runtime states # ipmitool dcmi discover :This command is used to discover # supported capabilities in DCMI. # ipmitool dcmi power reading :Get power related readings from the system. # ipmitool dcmi power get_limit :Get the configured power limits. # ipmitool dcmi power sensors :Prints the available DCMI sensors. # ipmitool dcmi get_temp_reading :Get Temperature Sensor Readings. # ipmitool dcmi get_mc_id_string :Get management controller identifier string. # ipmitool dcmi get_conf_param :Get DCMI Configuration Parameters. # ipmitool dcmi oob_discover :Ping/Pong Message for DCMI Discovery. # # @return BMC_CONST.FW_SUCCESS or BMC_CONST.FW_FAILED # def test_dcmi_at_standby_and_runtime_states(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Energy scale Test 3: Get Sensors, Temperature and Power reading's at power off and runtime" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int( self.cv_SYSTEM.sys_wait_for_standby_state( BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Get All dcmi readings at power off" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) l_status = self.cv_IPMI.ipmi_get_occ_status() print l_status if BMC_CONST.OCC_DEVICE_ENABLED in l_status: print "OCC's are up and active" else: l_msg = "OCC's are not in active state" raise OpTestError(l_msg) print "Get All dcmi readings at runtime" self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_DISCOVER) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_POWER_GET_LIMIT) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_SENSORS) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_MC_ID_STRING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_TEMP_READING) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_GET_CONF_PARAM) self.run_ipmi_cmd(BMC_CONST.IPMI_DCMI_OOB_DISCOVER) ## # @brief It will execute and test the return code of ipmi command. # # @param i_cmd @type string:The ipmitool command, for example: chassis power on; echo $? # # @return l_res @type list: output of command or raise OpTestError # def run_ipmi_cmd(self, i_cmd): l_cmd = i_cmd l_res = self.cv_IPMI.ipmitool_execute_command(l_cmd) print l_res l_res = l_res.splitlines() if int(l_res[-1]): l_msg = "IPMI: command failed %c" % l_cmd raise OpTestError(l_msg) return l_res
class OpTestFWTS(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostIP The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostPasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostPasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostPasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostPasswd) self.util = OpTestUtil() self.user = i_hostuser self.ip = i_hostip self.passwd = i_hostPasswd ## # @brief This function just brings the system to host OS. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_system_reboot(self): print "Testing FWTS: Booting system to OS" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int( self.cv_SYSTEM.sys_wait_for_standby_state( BMC_CONST.SYSTEM_STANDBY_STATE_DELAY) ) == BMC_CONST.FW_SUCCESS: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function just executes the fwts_execution.sh on host OS # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def test_fwts(self): l_oslevel = self.cv_HOST.host_get_OS_Level() if not "Ubuntu" in l_oslevel: return # Copy the fwts execution file to the tmp folder in the host base_path = (os.path.dirname( os.path.abspath(__file__))).split('testcases')[0] fwts_script = base_path + "/testcases/fwts_execution.sh" try: self.util.copyFilesToDest(fwts_script, self.user, self.ip, "/tmp/", self.passwd) except: l_msg = "Copying fwts file to host failed" print l_msg raise OpTestError(l_msg) l_res = self.cv_HOST.host_run_command("/tmp/fwts_execution.sh") print l_res
class OpTestFlashBase(unittest.TestCase): def setUp(self): conf = OpTestConfiguration.conf self.cv_SYSTEM = conf.system() self.cv_BMC = conf.bmc() self.cv_REST = self.cv_BMC.get_rest_api() self.cv_HOST = conf.host() self.cv_IPMI = conf.ipmi() self.platform = conf.platform() self.util = OpTestUtil() self.bmc_type = conf.args.bmc_type self.bmc_ip = conf.args.bmc_ip self.bmc_username = conf.args.bmc_username self.bmc_password = conf.args.bmc_password self.pupdate_binary = conf.args.pupdate self.pflash = conf.args.pflash def validate_side_activated(self): l_bmc_side, l_pnor_side = self.cv_IPMI.ipmi_get_side_activated() self.assertIn(BMC_CONST.PRIMARY_SIDE, l_bmc_side, "BMC: Primary side is not active") # TODO force setting of primary side to BIOS Golden side sensor self.assertIn(BMC_CONST.PRIMARY_SIDE, l_pnor_side, "PNOR: Primary side is not active") def get_pnor_level(self): rc = self.cv_IPMI.ipmi_get_PNOR_level() print rc def bmc_down_check(self): self.assertTrue(self.util.ping_fail_check(self.cv_BMC.host_name), "FSP/BMC keeps on pinging up") def scp_file(self, src_file_path, dst_file_path): self.util.copyFilesToDest(src_file_path, self.bmc_username, self.bmc_ip, dst_file_path, self.bmc_password, "2", BMC_CONST.SCP_TO_REMOTE) def get_version_tar(self, file_path): tar = tarfile.open(file_path) for member in tar.getmembers(): fd = tar.extractfile(member) content = fd.read() if "version=" in content: content = content.split("\n") content = [x for x in content if "version=" in x] version = content[0].split("=")[-1] break tar.close() print version return version def get_image_version(self, path): output = self.cv_BMC.run_command("cat %s | grep \"version=\"" % path) return output[0].split("=")[-1] def wait_for_bmc_runtime(self): self.util.PingFunc(self.bmc_ip, BMC_CONST.PING_RETRY_FOR_STABILITY) if "SMC" in self.bmc_type: self.cv_IPMI.ipmi_wait_for_bmc_runtime() elif "OpenBMC" in self.bmc_type: self.cv_REST.wait_for_bmc_runtime() return
class OpTestSystemBootSequence(): ## Initialize this object # @param i_bmcIP The IP address of the BMC # @param i_bmcUser The userid to log into the BMC with # @param i_bmcPasswd The password of the userid to log into the BMC with # @param i_bmcUserIpmi The userid to issue the BMC IPMI commands with # @param i_bmcPasswdIpmi The password of BMC IPMI userid # @param i_ffdcDir Optional param to indicate where to write FFDC # # "Only required for inband tests" else Default = None # @param i_hostip The IP address of the HOST # @param i_hostuser The userid to log into the HOST # @param i_hostpasswd The password of the userid to log into the HOST with # def __init__(self, i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir=None, i_hostip=None, i_hostuser=None, i_hostpasswd=None): self.cv_BMC = OpTestBMC(i_bmcIP, i_bmcUser, i_bmcPasswd, i_ffdcDir) self.cv_IPMI = OpTestIPMI(i_bmcIP, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostpasswd) self.cv_HOST = OpTestHost(i_hostip, i_hostuser, i_hostpasswd, i_bmcIP, i_ffdcDir) self.cv_SYSTEM = OpTestSystem(i_bmcIP, i_bmcUser, i_bmcPasswd, i_bmcUserIpmi, i_bmcPasswdIpmi, i_ffdcDir, i_hostip, i_hostuser, i_hostpasswd) self.util = OpTestUtil() ## # @brief This function will test mc cold reset boot sequence # It has below steps # 1. Do a system Power OFF(Host should go down) # 2. Set auto reboot policy to off(chassis policy always-off) # 3. Issue a BMC Cold reset. # 4. After BMC comes up, Issue a Power ON of the system # 5. Check for system status and gather OPAL msg log. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testMcColdResetBootSequence(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Testing MC Cold reset boot sequence" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Setting the system power policy to always-off" self.cv_IPMI.ipmi_set_power_policy("always-off") # Perform a BMC Cold Reset Operation self.cv_IPMI.ipmi_cold_reset() print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function will test mc warm reset boot sequence # It has below steps # 1. Do a system Power OFF(Host should go down) # 2. Set auto reboot policy to off(chassis policy always-off) # 3. Issue a BMC Warm reset. # 4. After BMC comes up, Issue a Power ON of the system # 5. Check for system status and gather OPAL msg log. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testMcWarmResetBootSequence(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Testing MC Warm reset boot sequence" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == 0: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Setting the system power policy to always-off" self.cv_IPMI.ipmi_set_power_policy("always-off") # Perform a BMC Warm Reset Operation self.cv_IPMI.ipmi_warm_reset() print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function will test system auto reboot policy always-off. # It has below steps # 1. Do a system Power OFF(Host should go down) # 2. Set auto reboot policy to off(chassis policy always-off) # 3. Issue a BMC Cold reset. # 4. After BMC comes up, expect the system to not boot. # 5. Issue a Power ON of the system # 6. Check for system status and gather OPAL msg log. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testSystemPowerPolicyOff(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Testing System Power Policy:always-off" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Setting the system power policy to always-off" self.cv_IPMI.ipmi_set_power_policy("always-off") # Perform a BMC Cold Reset Operation if int(self.cv_SYSTEM.sys_cold_reset_bmc()) == BMC_CONST.FW_SUCCESS: print "System auto reboot policy for always-off works as expected" print "System Power status not changed" print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() else: print "Power restore policy failed" l_msg = "Fail: chassis policy always-off making the system to auto boot" print l_msg self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function will test system auto reboot policy always-on. # It has below steps # 1. Do a system Power OFF(Host should go down) # 2. Set auto reboot policy to on(chassis policy always-on) # 3. Issue a BMC Cold reset. # 4. After BMC comes up, Here expect the system to boot, # If not power policy is not working as expected # 5. Check for system status and gather OPAL msg log. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testSystemPowerPolicyOn(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Testing System Power Policy:Always-ON" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Setting the system power policy to always-on" self.cv_IPMI.ipmi_set_power_policy("always-on") # Perform a BMC Cold Reset Operation if int(self.cv_SYSTEM.sys_cold_reset_bmc()) == BMC_CONST.FW_FAILED: print "System auto reboot policy for always-on works as expected" print "System Power status changed as expected" else: print "Power restore policy failed" l_msg = "Fail: chassis policy always-on making the system not to auto boot" print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() return BMC_CONST.FW_SUCCESS ## # @brief This function will test system auto reboot policy previous # It has below steps # 1. Do a system Power OFF(Host should go down) # 2. Set auto reboot policy to previous(chassis policy previous) # 3. Issue a BMC Cold reset. # 4. After BMC comes up, system power status will change based on # previous power status before issuing cold reset. # 5. Check for system status and gather OPAL msg log. # # @return BMC_CONST.FW_SUCCESS or raise OpTestError # def testSystemPowerPolicyPrevious(self): self.cv_SYSTEM.sys_bmc_power_on_validate_host() print "Testing System Power Policy:previous" print "Performing a IPMI Power OFF Operation" # Perform a IPMI Power OFF Operation(Immediate Shutdown) self.cv_IPMI.ipmi_power_off() if int(self.cv_SYSTEM.sys_wait_for_standby_state(BMC_CONST.SYSTEM_STANDBY_STATE_DELAY)) == BMC_CONST.FW_SUCCESS: print "System is in standby/Soft-off state" else: l_msg = "System failed to reach standby/Soft-off state" raise OpTestError(l_msg) print "Setting the system power policy to previous" self.cv_IPMI.ipmi_set_power_policy("previous") # Perform a BMC Cold Reset Operation if int(self.cv_SYSTEM.sys_cold_reset_bmc()) == BMC_CONST.FW_SUCCESS: print "System auto reboot policy for previous works as expected" print "System Power status not changed" print "Performing a IPMI Power ON Operation" # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() else: print "Power restore policy failed" l_msg = "Fail: chassis policy previous making the system to auto boot" print l_msg self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log() # Perform a BMC Cold Reset Operation if int(self.cv_SYSTEM.sys_cold_reset_bmc()) == BMC_CONST.FW_SUCCESS: print "System auto reboot policy for previous works as expected" print "System Power status not changed" else: print "Power restore policy previous failed" l_msg = "Fail: chassis policy previous making the system to change power status" print l_msg # Perform a IPMI Power ON Operation self.cv_IPMI.ipmi_power_on() self.cv_SYSTEM.sys_check_host_status() self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.cv_IPMI.clear_ssh_keys(self.cv_HOST.ip) print "Gathering the OPAL msg logs" self.cv_HOST.host_gather_opal_msg_log()
class OpTestHMIHandling(unittest.TestCase): def setUp(self): conf = OpTestConfiguration.conf self.cv_HOST = conf.host() self.cv_IPMI = conf.ipmi() self.cv_FSP = conf.bmc() self.cv_SYSTEM = conf.system() self.bmc_type = conf.args.bmc_type self.util = OpTestUtil() def ipmi_monitor_sol_ipl(self, console, timeout): ipl_status = False try: console.sol.expect('login:'******'s(executing getscom -l to get chip id's) l_res = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH getscom -l") l_chips = [] for line in l_res: matchObj = re.search("(\d{8}).*processor", line) if matchObj: l_chips.append(matchObj.group(1)) if not l_chips: raise Exception("Getscom failed to list processor chip ids") l_chips.sort() print l_chips # ['00000000', '00000001', '00000010'] # Currently getting the list of active core id's with respect to each chip is by using opal msg log # TODO: Need to identify best way to get list of cores(If Opal msg log is empty) l_cmd = "cat /sys/firmware/opal/msglog | grep -i CHIP" l_res = self.cv_HOST.host_run_command(l_cmd) l_cores = {} self.l_dic = [] for line in l_res: matchObj = re.search("Chip (\d{1,2}) Core ([a-z0-9])", line) if matchObj: if l_cores.has_key(int(matchObj.group(1))): (l_cores[int(matchObj.group(1))]).append(matchObj.group(2)) else: l_cores[int(matchObj.group(1))] = list(matchObj.group(2)) if not l_cores: raise Exception( "Failed in getting core ids information from OPAL msg log") print l_cores # {0: ['4', '5', '6', 'c', 'd', 'e'], 1: ['4', '5', '6', 'c', 'd', 'e'], 10: ['4', '5', '6', 'c', 'd', 'e']} l_cores = sorted(l_cores.iteritems()) # Remove master core where injecting core checkstop leads to IPL expected failures # after 2 failures system will starts boot in Golden side of PNOR l_cores[0][1].pop(0) print l_cores i = 0 for tup in l_cores: new_list = [l_chips[i], tup[1]] self.l_dic.append(new_list) i += 1 print self.l_dic # self.l_dic is a list of chip id's, core id's . and is of below format # [['00000000', ['4', '5', '6', 'c', 'd', 'e']], ['00000001', ['4', '5', '6', 'c', 'd', 'e']], ['00000010', ['4', '5', '6', 'c', 'd', 'e']]] # In-order to inject HMI errors on cpu's, cpu should be running, so disabling the sleep states 1 and 2 of all CPU's self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) self.cv_HOST.host_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.DISABLE_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE2) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE1) self.cv_HOST.host_run_command(BMC_CONST.GET_CPU_SLEEP_STATE0) l_oslevel = self.cv_HOST.host_get_OS_Level() try: if "Ubuntu" in l_oslevel: self.cv_HOST.host_run_command("service kdump-tools stop") else: self.cv_HOST.host_run_command("service kdump stop") except CommandFailed as cf: if cf.exitcode == 5: # kdump may not be enabled, so it's not a failure to stop it pass def clearGardEntries(self): self.cv_SYSTEM.goto_state(OpSystemState.OS) self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) if "FSP" in self.bmc_type: res = self.cv_FSP.fspc.run_command("gard --clr all") self.assertIn("Success in clearing Gard Data", res, "Failed to clear GARD entries") print self.cv_FSP.fspc.run_command("gard --gc cpu") else: g = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard list all") if "No GARD entries to display" not in g: self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard clear all") cleared_gard = self.cv_HOST.host_run_command( "PATH=/usr/local/sbin:$PATH opal-gard list") self.assertIn("No GARD entries to display", cleared_gard, "Failed to clear GARD entries") self.cv_SYSTEM.goto_state(OpSystemState.OFF) self.cv_SYSTEM.goto_state(OpSystemState.OS) ## # @brief This function executes HMI test case based on the i_test value, Before test starts # disabling kdump service to make sure system reboots, after injecting non-recoverable errors. # # @param i_test @type int: this is the type of test case want to execute # BMC_CONST.HMI_PROC_RECV_DONE: Processor recovery done # BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: proc_recv_error_masked # BMC_CONST.HMI_MALFUNCTION_ALERT: malfunction_alert # BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: hypervisor resource error def _testHMIHandling(self, i_test): l_test = i_test self.util.PingFunc(self.cv_HOST.ip, BMC_CONST.PING_RETRY_POWERCYCLE) self.init_test() self.cv_SYSTEM.goto_state(OpSystemState.OS) l_con = self.cv_SYSTEM.sys_get_ipmi_console() self.cv_SYSTEM.host_console_login() self.cv_SYSTEM.host_console_unique_prompt() l_con.run_command("uname -a") l_con.run_command("cat /etc/os-release") l_con.run_command("lscpu") l_con.run_command("dmesg -D") if l_test == BMC_CONST.HMI_PROC_RECV_DONE: self._test_proc_recv_done() elif l_test == BMC_CONST.HMI_PROC_RECV_ERROR_MASKED: self._test_proc_recv_error_masked() elif l_test == BMC_CONST.HMI_MALFUNCTION_ALERT: self._test_malfunction_allert() elif l_test == BMC_CONST.HMI_HYPERVISOR_RESOURCE_ERROR: self._test_hyp_resource_err() elif l_test == BMC_CONST.TOD_ERRORS: # TOD Error recovery works on systems having more than one chip TOD # Skip this test on single chip systems(as recovery fails on 1S systems) if len(self.l_dic) == 1: l_msg = "This is a single chip system, TOD Error recovery won't work" print l_msg return BMC_CONST.FW_SUCCESS elif len(self.l_dic) > 1: self._test_tod_errors(BMC_CONST.PSS_HAMMING_DISTANCE) self._test_tod_errors(BMC_CONST.INTERNAL_PATH_OR_PARITY_ERROR) self._test_tod_errors(BMC_CONST.TOD_DATA_PARITY_ERROR) self._test_tod_errors(BMC_CONST.TOD_SYNC_CHECK_ERROR) self._test_tod_errors(BMC_CONST.FSM_STATE_PARITY_ERROR) self._test_tod_errors(BMC_CONST.MASTER_PATH_CONTROL_REGISTER) self._test_tod_errors( BMC_CONST.PORT_0_PRIMARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_1_PRIMARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_0_SECONDARY_CONFIGURATION_REGISTER) self._test_tod_errors( BMC_CONST.PORT_1_SECONDARY_CONFIGURATION_REGISTER) self._test_tod_errors(BMC_CONST.SLAVE_PATH_CONTROL_REGISTER) self._test_tod_errors(BMC_CONST.INTERNAL_PATH_CONTROL_REGISTER) self._test_tod_errors(BMC_CONST.PR_SC_MS_SL_CONTROL_REGISTER) else: raise Exception("Getting Chip information failed") elif l_test == BMC_CONST.TFMR_ERRORS: self._testTFMR_Errors(BMC_CONST.TB_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_HDEC_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_DEC_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_PURR_PARITY_ERROR) self._testTFMR_Errors(BMC_CONST.TFMR_SPURR_PARITY_ERROR) else: raise Exception("Please provide valid test case") self.cv_HOST.ssh.state = SSHConnectionState.DISCONNECTED return BMC_CONST.FW_SUCCESS ## # @brief This function is used to test HMI: processor recovery done # and also this function injecting error on all the cpus one by one and # verify whether cpu is recovered or not. def _test_proc_recv_done(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000100000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=120) except CommandFailed as cf: if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): raise Exception( "Processor recovery failed: Kernel got panic") elif any("Petitboot" in line for line in l_res): raise Exception( "System reached petitboot:Processor recovery failed" ) elif any("ISTEP" in line for line in l_res): raise Exception( "System started booting: Processor recovery failed" ) else: raise Exception( "Failed to inject thread hang recoverable error %s", str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_proc_recovery(l_res) return ## # @brief This function is used to test HMI: proc_recv_error_masked # Processor went through recovery for an error which is actually masked for reporting # this function also injecting the error on all the cpu's one-by-one. def _test_proc_recv_error_masked(self): for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013100" % l_core l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000080000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=120) except CommandFailed as cf: if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): raise Exception( "Processor recovery failed: Kernel got panic") elif any("Petitboot" in line for line in l_res): raise Exception( "System reached petitboot:Processor recovery failed" ) elif any("ISTEP" in line for line in l_res): raise Exception( "System started booting: Processor recovery failed" ) else: raise Exception( "Failed to inject thread hang recoverable error %s", str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_proc_recovery(l_res) return ## # @brief This function is used to test hmi malfunction alert:Core checkstop # A processor core in the system has to be checkstopped (failed recovery). # Injecting core checkstop on random core of random chip def _test_malfunction_allert(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 1000000000000000" % ( l_chip, l_reg) # Core checkstop will lead to system IPL, so we will wait for certain time for IPL # to finish #l_res = self.cv_SYSTEM.sys_get_ipmi_console().run_command(l_cmd, timeout=600) console = self.cv_SYSTEM.sys_get_ipmi_console() console.sol.sendline(l_cmd) self.ipmi_monitor_sol_ipl(console, timeout=600) ## # @brief This function is used to test HMI: Hypervisor resource error # Injecting Hypervisor resource error on random core of random chip def _test_hyp_resource_err(self): # Get random pair of chip vs cores l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] # Get random core number l_core = random.choice(l_pair[1]) l_reg = "1%s013100" % l_core l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s 0000000000008000" % ( l_chip, l_reg) console = self.cv_SYSTEM.sys_get_ipmi_console() console.sol.sendline(l_cmd) self.ipmi_monitor_sol_ipl(console, timeout=600) ## # @brief This function tests timer facility related error injections and check # the corresponding error got recovered. And this process is repeated # for all the active cores in all the chips. # # @param i_error @type string: this is the type of error want to inject # BMC_CONST.TB_PARITY_ERROR # BMC_CONST.TFMR_PARITY_ERROR # BMC_CONST.TFMR_HDEC_PARITY_ERROR # BMC_CONST.TFMR_DEC_PARITY_ERROR # BMC_CONST.TFMR_PURR_PARITY_ERROR # BMC_CONST.TFMR_SPURR_PARITY_ERROR def _testTFMR_Errors(self, i_error): l_error = i_error for l_pair in self.l_dic: l_chip = l_pair[0] for l_core in l_pair[1]: l_reg = "1%s013281" % l_core l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( l_chip, l_reg, l_error) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") try: l_res = console.run_command(l_cmd, timeout=120) except CommandFailed as cf: if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): l_msg = "TFMR error injection: Kernel got panic" elif any("Petitboot" in line for line in l_res): l_msg = "System reached petitboot:TFMR error injection recovery failed" elif any("ISTEP" in line for line in l_res): l_msg = "System started booting: TFMR error injection recovery failed" else: raise Exception("Failed to inject TFMR error %s " % str(cf)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_timer_facility_recovery(l_res) return ## # @brief This function tests chip TOD related error injections and check # the corresponding error got recovered. And this error injection # happening on a random chip. This tod errors should test on systems # having more than one processor socket(chip). On single chip system # TOD error recovery won't work. # # @param i_error @type string: this is the type of error want to inject # These errors represented in common/OpTestConstants.py file. def _test_tod_errors(self, i_error): l_error = i_error l_pair = random.choice(self.l_dic) # Get random chip id l_chip = l_pair[0] l_cmd = "PATH=/usr/local/sbin:$PATH putscom -c %s %s %s" % ( l_chip, BMC_CONST.TOD_ERROR_REG, l_error) console = self.cv_SYSTEM.sys_get_ipmi_console() console.run_command("dmesg -C") # As of now putscom command to TOD register will fail with return code -1. # putscom indirectly call getscom to read the value again. # But getscom to TOD error reg there is no access # TOD Error reg has only WO access and there is no read access try: l_res = console.run_command(l_cmd, timeout=120) except CommandFailed as cf: if cf.exitcode == 1: pass else: if any("Kernel panic - not syncing" in line for line in l_res): print "TOD ERROR Injection-kernel got panic" elif any("login:"******"System booted to host OS without any kernel panic message" elif any("Petitboot" in line for line in l_res): print "System reached petitboot without any kernel panic message" elif any("ISTEP" in line for line in l_res): print "System started booting without any kernel panic message" else: raise Exception( "TOD: PSS Hamming distance error injection failed %s", str(c)) time.sleep(0.2) l_res = console.run_command("dmesg") self.verify_timer_facility_recovery(l_res) return ## # @brief This function enables a single core def host_enable_single_core(self): self.cv_HOST.host_enable_single_core()