def test_lab_setup_kpi(collect_kpi): """ This test extracts the time required to run lab_setup.sh only. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") log_path = LabSetup.LOG_PATH kpi_name = LabSetup.NAME host = "controller-0" start_pattern = LabSetup.START end_pattern = LabSetup.END kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, end_pattern=end_pattern, sudo=True, topdown=True, uptime=15, fail_ok=False)
def test_system_install_kpi(collect_kpi): """ This is the time to install the full system from beginning to end. Caveat is that it is designed to work with auto-install due to the way the end_pattern is constructed. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") host = "controller-0" kpi_name = SystemInstall.NAME log_path = SystemInstall.LOG_PATH start_pattern = SystemInstall.START start_path = SystemInstall.START_PATH end_pattern = SystemInstall.END kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, end_pattern=end_pattern, start_path=start_path, sudo=True, topdown=True, start_pattern_init=True, fail_ok=False)
def test_kpi_cyclictest_vm(collect_kpi, prepare_test_session, get_rt_guest_image, get_hypervisor, add_admin_role_func): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") hypervisor = get_hypervisor testable_hypervisors[hypervisor]['for_vm_test'] = True LOG.info('Hypervisor chosen to host rt vm: {}'.format(hypervisor)) vm_id, vcpu_count, non_rt_core = create_rt_vm(hypervisor) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) cyclictest_dir = '/root/cyclictest/' program = os.path.join(os.path.normpath(cyclictest_dir), os.path.basename(CYCLICTEST_EXE)) program_active_con = os.path.join(os.path.normpath(CYCLICTEST_DIR), os.path.basename(CYCLICTEST_EXE)) cpu_info = { 'vm_cores': [id_ for id_ in range(vcpu_count) if id_ != non_rt_core] } with vm_helper.ssh_to_vm_from_natbox(vm_id) as vm_ssh: prep_test_on_host(vm_ssh, vm_id, program_active_con, ControllerClient.get_active_controller().host, cyclictest_dir=cyclictest_dir) run_log, hist_file = run_cyclictest(vm_ssh, program, vm_id, cyclictest_dir=cyclictest_dir, cpu_info=cpu_info) LOG.info("Process and upload test results") local_run_log, local_hist_file = fetch_results_from_target( target_ssh=vm_ssh, target_host=vm_id, run_log=run_log, hist_file=hist_file, is_guest=True) testable_hypervisors[hypervisor]['for_vm_test'] = False avg_val, six_nines_val = calculate_results(run_log=local_run_log, hist_file=local_hist_file, cores_to_ignore=None, num_cores=(vcpu_count - 1)) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_VM_AVG, kpi_val=avg_val, uptime=15, unit=CyclicTest.UNIT) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_VM_6_NINES, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT)
def _test_heat_kpi(collect_kpi): """ Time to launch heat stacks. Only applies to labs where .heat_resources is present. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") log_path = HeatStacks.LOG_PATH kpi_name = HeatStacks.NAME host = "controller-0" start_pattern = HeatStacks.START end_pattern = HeatStacks.END kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, end_pattern=end_pattern, sudo=True, topdown=True, start_pattern_init=True, uptime=15, fail_ok=False)
def collected_upgrade_controller0_kpi(lab, collect_kpi, init_time=None): """ Args: lab: collect_kpi: init_time Returns: """ if not collect_kpi: LOG.info("KPI only test. Skip due to kpi collection is not enabled") return lab_name = lab['short_name'] log_path = UpgradeController0.LOG_PATH kpi_name = UpgradeController0.NAME host = "controller-1" start_pattern = UpgradeController0.START start_path = UpgradeController0.START_PATH end_pattern = UpgradeController0.END kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, start_path=start_path, end_pattern=end_pattern, init_time=init_time, sudo=True, topdown=True)
def test_node_install_kpi(collect_kpi): """ This test measures the install time for each node in the system. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") hosts = system_helper.get_hosts() print("System has hosts: {}".format(hosts)) log_path = NodeInstall.LOG_PATH start_cmd = 'head -n 1 {}'.format(log_path) end_cmd = 'tail -n 1 {}'.format(log_path) date_cmd = '{} -n 1 /var/log/bash.log' with host_helper.ssh_to_host('controller-0') as con0_ssh: bash_start = con0_ssh.exec_sudo_cmd(date_cmd.format('head'), fail_ok=False)[1] bash_end = con0_ssh.exec_sudo_cmd(date_cmd.format('tail'), fail_ok=False)[1] bash_start = re.findall(TIMESTAMP_PATTERN, bash_start.strip())[0] bash_end = re.findall(TIMESTAMP_PATTERN, bash_end.strip())[0] date_ = bash_start.split('T')[0] def _get_time_delta(start_, end_): start_ = start_.replace(',', '.') end_ = end_.replace(',', '.') start_t = '{}T{}'.format(date_, start_) end_t = '{}T{}'.format(date_, end_) time_delta = common.get_timedelta_for_isotimes(start_t, end_t).total_seconds() if time_delta < 0: end_t = '{}T{}'.format(bash_end.split('T')[0], end_) time_delta = common.get_timedelta_for_isotimes( start_t, end_t).total_seconds() return time_delta for host in hosts: with host_helper.ssh_to_host(hostname=host) as host_ssh: start_output = host_ssh.exec_sudo_cmd(start_cmd, fail_ok=False)[1].strip() end_output = host_ssh.exec_sudo_cmd(end_cmd, fail_ok=False)[1].strip() kpi_name = NodeInstall.NAME.format(host) start_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, start_output)[0] end_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, end_output)[0] install_duration = _get_time_delta(start_time, end_time) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, kpi_val=install_duration, fail_ok=False)
def test_kpi_cyclictest_hypervisor(collect_kpi, prepare_test_session, get_hypervisor): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") global testable_hypervisors chosen_hypervisor = get_hypervisor cpu_info = testable_hypervisors[chosen_hypervisor] cpu_info['for_host_test'] = True LOG.info( 'Hypervisor chosen to run cyclictest: {}'.format(chosen_hypervisor)) active_controller_name = system_helper.get_active_controller_name() program = os.path.join(os.path.normpath(CYCLICTEST_DIR), os.path.basename(CYCLICTEST_EXE)) LOG.debug('program={}'.format(program)) with host_helper.ssh_to_host(chosen_hypervisor) as target_ssh: prep_test_on_host(target_ssh, chosen_hypervisor, program, active_controller_name) run_log, hist_file = run_cyclictest(target_ssh, program, chosen_hypervisor, cpu_info=cpu_info) LOG.info("Process and upload test results") local_run_log, local_hist_file = fetch_results_from_target( target_ssh=target_ssh, target_host=chosen_hypervisor, active_con_name=active_controller_name, run_log=run_log, hist_file=hist_file) testable_hypervisors[chosen_hypervisor]['for_host_test'] = False avg_val, six_nines_val = calculate_results(run_log=local_run_log, hist_file=local_hist_file, cores_to_ignore=None, num_cores=len( cpu_info['vm_cores'])) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_HYPERVISOR_AVG, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_HYPERVISOR_6_NINES, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT)
def test_swact_controller_platform(wait_for_con_drbd_sync_complete, collect_kpi): """ Verify swact active controller Test Steps: - Swact active controller - Verify standby controller and active controller are swapped - Verify nodes are ready in kubectl get nodes """ if system_helper.is_aio_simplex(): skip("Simplex system detected") if not wait_for_con_drbd_sync_complete: skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS) LOG.tc_step('retrieve active and available controllers') pre_active_controller, pre_standby_controller = system_helper.get_active_standby_controllers( ) assert pre_standby_controller, "No standby controller available" collect_kpi = None if container_helper.is_stx_openstack_deployed( ) else collect_kpi init_time = None if collect_kpi: init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) LOG.tc_step( "Swact active controller and ensure active controller is changed") host_helper.swact_host(hostname=pre_active_controller) LOG.tc_step("Check hosts are Ready in kubectl get nodes after swact") kube_helper.wait_for_nodes_ready(hosts=(pre_active_controller, pre_standby_controller), timeout=30) if collect_kpi: kpi_name = SwactPlatform.NAME kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, init_time=init_time, log_path=SwactPlatform.LOG_PATH, end_pattern=SwactPlatform.END, host=pre_standby_controller, start_host=pre_active_controller, start_pattern=SwactPlatform.START, start_path=SwactPlatform.START_PATH, uptime=1, fail_ok=False)
def collect_upgrade_start_kpi(lab, collect_kpi): lab_name = lab['short_name'] log_path = UpgradeStart.LOG_PATH kpi_name = UpgradeStart.NAME host = "controller-0" start_pattern = UpgradeStart.START end_pattern = UpgradeStart.END try: kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, end_pattern=end_pattern, sudo=True, topdown=True, uptime=15) except ValueError as evalue: LOG.info("Unable to collect upgrade start kpi for lab {}: {}".format(lab_name, evalue))
def collect_upgrade_complete_kpi(lab, collect_kpi): """ This measures the time to run upgrade-activate. """ if not collect_kpi: LOG.info("KPI only test. Skip due to kpi collection is not enabled") lab_name = lab['short_name'] host = "controller-0" kpi_name = UpgradeComplete.NAME log_path = UpgradeComplete.LOG_PATH start_pattern = UpgradeComplete.START end_pattern = UpgradeComplete.END kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, host=host, start_pattern=start_pattern, end_pattern=end_pattern, sudo=True, topdown=True, uptime=15)
def test_kpi_cinder_volume_creation(collect_kpi): """ KPI test - cinder volume creation Args: collect_kpi: Test Steps: - Create a 20g cinder volume using default tis guest - Collect duration kpi from cinder create cli sent to volume available """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled.") LOG.tc_step( "Create a 20g volume from default tis guest and collect image download rate, " "image conversion rate, and total volume creation time") # init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) image = glance_helper.get_guest_image(guest_os='tis-centos-guest-qcow2', cleanup='function') vol_id = cinder_helper.create_volume(name='20g', source_id=image, size=20, cleanup='function')[1] vol_updated = cinder_helper.get_volume_show_values( vol_id, 'updated_at')[0].split('.')[0] # Logs no longer available for image downloading and conversion. # code_download, out_download = \ # kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ImageDownload.NAME, # host=None, # log_path=ImageDownload.LOG_PATH, # end_pattern=ImageDownload.GREP_PATTERN, # python_pattern=ImageDownload.PYTHON_PATTERN, # init_time=init_time, uptime=1, # unit=ImageDownload.UNIT) # code_conv, out_conv = \ # kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ImageConversion.NAME, # host=None, # log_path=ImageConversion.LOG_PATH, # end_pattern=ImageConversion.GREP_PATTERN, # python_pattern=ImageConversion.PYTHON_PATTERN, # init_time=init_time, uptime=1, # unit=ImageConversion.UNIT) code_create, out_create = \ kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=VolCreate.NAME, host=None, log_path=VolCreate.LOG_PATH, end_pattern=vol_updated, start_pattern=VolCreate.START, uptime=1) # # assert code_download == 0, out_download # assert code_conv == 0, out_conv assert code_create == 0, out_create
def test_kpi_evacuate(self, vm_type, get_hosts, collect_kpi): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled.") if not system_helper.is_avs() and vm_type in ('dpdk', 'avp'): skip('avp vif unsupported by OVS') def operation(vm_id_, host_): vm_helper.evacuate_vms(host=host_, vms_to_check=vm_id_, ping_vms=True) vm_test, vm_observer = vm_helper.launch_vm_pair( vm_type=vm_type, storage_backing='local_image') host_src_evacuation, host_observer = self._prepare_test( vm_test, vm_observer, get_hosts.copy(), with_router=True) time.sleep(60) with_router_kpi = vm_helper.get_traffic_loss_duration_on_operation( vm_test, vm_observer, operation, vm_test, host_src_evacuation) assert with_router_kpi > 0, "Traffic loss duration is not properly detected" kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=Evacuate.NAME.format( vm_type, 'with'), kpi_val=with_router_kpi / 1000, uptime=5) host_helper.wait_for_hosts_ready(hosts=host_src_evacuation) if len(get_hosts) > 2: host_src_evacuation, host_observer = self._prepare_test( vm_test, vm_observer, get_hosts.copy(), with_router=False) time.sleep(60) without_router_kpi = vm_helper.get_traffic_loss_duration_on_operation( vm_test, vm_observer, operation, vm_test, host_src_evacuation) assert without_router_kpi > 0, "Traffic loss duration is not properly detected" kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=Evacuate.NAME.format( vm_type, 'no'), kpi_val=without_router_kpi / 1000, uptime=5)
def collect_upgrade_orchestration_kpi(lab, collect_kpi): """ Args: lab: collect_kpi: Returns: """ if not collect_kpi: LOG.info("KPI only test. Skip due to kpi collection is not enabled") lab_name = lab['short_name'] print("Upgrade host: {}".format(upgrade_host)) kpi_name = UpgradeOrchestration.NAME.format(upgrade_host) orchestration_duration = orchestration_helper.get_current_strategy_phase_duration("upgrade", "apply") kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, lab_name=lab_name, kpi_val=orchestration_duration)
def test_swact_uncontrolled_kpi_platform(collect_kpi): if not collect_kpi or container_helper.is_stx_openstack_deployed(): skip( "KPI test for platform only. Skip due to kpi collection is not enabled or openstack " "application is deployed.") start_host, end_host = system_helper.get_active_standby_controllers() if not end_host: skip("No standby host to swact to") init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) host_helper.reboot_hosts(hostnames=start_host) kpi_name = SwactUncontrolledPlatform.NAME kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, init_time=init_time, log_path=SwactUncontrolledPlatform.LOG_PATH, end_pattern=SwactUncontrolledPlatform.END, host=end_host, start_host=start_host, start_pattern=SwactUncontrolledPlatform.START, start_path=SwactUncontrolledPlatform.START_PATH, uptime=5, fail_ok=False)
def test_drbd_kpi(no_simplex, collect_kpi): """ This test extracts the DRBD sync time from log files """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var('LAB_NAME') log_path = DRBDSync.LOG_PATH kpi_name = DRBDSync.NAME end_pattern = DRBDSync.GREP_PATTERN python_pattern = DRBDSync.PYTHON_PATTERN kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, python_pattern=python_pattern, lab_name=lab_name, unit=DRBDSync.UNIT, average_for_all=True, end_pattern=end_pattern, uptime=15, fail_ok=False)
def test_kpi_vm_launch_migrate_rebuild(ixia_required, collect_kpi, hosts_per_backing, boot_from): """ KPI test - vm startup time. Args: collect_kpi: hosts_per_backing boot_from Test Steps: - Create a flavor with 2 vcpus, dedicated cpu policy and storage backing (if boot-from-image) - Launch a vm from specified boot source - Collect the vm startup time via event log """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled.") # vm launch KPI if boot_from != 'volume': storage_backing = boot_from hosts = hosts_per_backing.get(boot_from) if not hosts: skip(SkipStorageBacking.NO_HOST_WITH_BACKING.format(boot_from)) target_host = hosts[0] LOG.tc_step("Clear local storage cache on {}".format(target_host)) storage_helper.clear_local_storage_cache(host=target_host) LOG.tc_step("Create a flavor with 2 vcpus, dedicated cpu policy, and {} storage".format(storage_backing)) boot_source = 'image' flavor = nova_helper.create_flavor(name=boot_from, vcpus=2, storage_backing=storage_backing)[1] else: target_host = None boot_source = 'volume' storage_backing = keywords.host_helper.get_storage_backing_with_max_hosts()[0] LOG.tc_step("Create a flavor with 2 vcpus, and dedicated cpu policy and {} storage".format(storage_backing)) flavor = nova_helper.create_flavor(vcpus=2, storage_backing=storage_backing)[1] ResourceCleanup.add('flavor', flavor) nova_helper.set_flavor(flavor, **{FlavorSpec.CPU_POLICY: 'dedicated'}) host_str = ' on {}'.format(target_host) if target_host else '' LOG.tc_step("Boot a vm from {}{} and collect vm startup time".format(boot_from, host_str)) mgmt_net_id = network_helper.get_mgmt_net_id() tenant_net_id = network_helper.get_tenant_net_id() internal_net_id = network_helper.get_internal_net_id() nics = [{'net-id': mgmt_net_id}, {'net-id': tenant_net_id}, {'net-id': internal_net_id}] vm_id = vm_helper.boot_vm(boot_from, flavor=flavor, source=boot_source, nics=nics, cleanup='function')[1] code_boot, out_boot = \ kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=VmStartup.NAME.format(boot_from), log_path=VmStartup.LOG_PATH, end_pattern=VmStartup.END.format(vm_id), start_pattern=VmStartup.START.format(vm_id), uptime=1) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) # Migration KPI if ('ixia_ports' in ProjVar.get_var("LAB")) and (len(hosts_per_backing.get(storage_backing)) >= 2): LOG.info("Run migrate tests when more than 2 {} hosts available".format(storage_backing)) LOG.tc_step("Launch an observer vm") mgmt_net_observer = network_helper.get_mgmt_net_id(auth_info=Tenant.get_secondary()) tenant_net_observer = network_helper.get_tenant_net_id(auth_info=Tenant.get_secondary()) nics_observer = [{'net-id': mgmt_net_observer}, {'net-id': tenant_net_observer}, {'net-id': internal_net_id}] vm_observer = vm_helper.boot_vm('observer', flavor=flavor, source=boot_source, nics=nics_observer, cleanup='function', auth_info=Tenant.get_secondary())[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_observer) vm_helper.setup_kernel_routing(vm_observer) vm_helper.setup_kernel_routing(vm_id) vm_helper.route_vm_pair(vm_observer, vm_id) if 'local_lvm' != boot_from: # live migration unsupported for boot-from-image vm with local_lvm storage LOG.tc_step("Collect live migrate KPI for vm booted from {}".format(boot_from)) def operation_live(vm_id_): code, msg = vm_helper.live_migrate_vm(vm_id=vm_id_) assert 0 == code, msg vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_) # kernel routing vm_helper.ping_between_routed_vms(vm_id, vm_observer, vshell=False) time.sleep(30) duration = vm_helper.get_traffic_loss_duration_on_operation(vm_id, vm_observer, operation_live, vm_id) assert duration > 0, "No traffic loss detected during live migration for {} vm".format(boot_from) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=LiveMigrate.NAME.format(boot_from), kpi_val=duration, uptime=1, unit='Time(ms)') vim_duration = vm_helper.get_live_migrate_duration(vm_id=vm_id) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=LiveMigrate.NOVA_NAME.format(boot_from), kpi_val=vim_duration, uptime=1, unit='Time(s)') LOG.tc_step("Collect cold migrate KPI for vm booted from {}".format(boot_from)) def operation_cold(vm_id_): code, msg = vm_helper.cold_migrate_vm(vm_id=vm_id_) assert 0 == code, msg vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_) vm_helper.ping_between_routed_vms(vm_id, vm_observer, vshell=False) time.sleep(30) duration = vm_helper.get_traffic_loss_duration_on_operation(vm_id, vm_observer, operation_cold, vm_id) assert duration > 0, "No traffic loss detected during cold migration for {} vm".format(boot_from) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ColdMigrate.NAME.format(boot_from), kpi_val=duration, uptime=1, unit='Time(ms)') vim_duration = vm_helper.get_cold_migrate_duration(vm_id=vm_id) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=ColdMigrate.NOVA_NAME.format(boot_from), kpi_val=vim_duration, uptime=1, unit='Time(s)') # Rebuild KPI if 'volume' != boot_from: LOG.info("Run rebuild test for vm booted from image") def operation_rebuild(vm_id_): code, msg = vm_helper.rebuild_vm(vm_id=vm_id_) assert 0 == code, msg vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id_) vm_helper.ping_vms_from_vm(vm_id, vm_id, net_types=('data', 'internal')) LOG.tc_step("Collect vm rebuild KPI for vm booted from {}".format(boot_from)) time.sleep(30) duration = vm_helper.get_ping_loss_duration_on_operation(vm_id, 300, 0.5, operation_rebuild, vm_id) assert duration > 0, "No ping loss detected during rebuild for {} vm".format(boot_from) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=Rebuild.NAME.format(boot_from), kpi_val=duration, uptime=1, unit='Time(ms)') # Check the vm boot result at the end after collecting other KPIs assert code_boot == 0, out_boot
def test_idle_kpi(collect_kpi): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") LOG.tc_step("Delete vms and volumes on system if any") vm_helper.delete_vms() is_aio = system_helper.is_aio_system() active_con = system_helper.get_active_controller_name() con_ssh = ControllerClient.get_active_controller() cpu_arg = '' if is_aio: LOG.info("AIO system found, check platform cores only") cpu_arg = ' -P ' platform_cores_per_proc = host_helper.get_host_cpu_cores_for_function( hostname=active_con, func='Platform', core_type='log_core', thread=None, con_ssh=con_ssh) platform_cpus = [] for proc in platform_cores_per_proc: platform_cpus += platform_cores_per_proc[proc] cpu_arg += ','.join([str(val) for val in platform_cpus]) LOG.tc_step( "Sleep for 5 minutes, then monitor for cpu and memory usage every 10 seconds for 5 minutes" ) time.sleep(300) output = con_ssh.exec_cmd( 'sar -u{} 10 30 -r | grep --color=never "Average"'.format(cpu_arg), expect_timeout=600, fail_ok=False)[1] # Sample output: # controller-1:~$ sar -u -P 0,1 1 3 -r | grep Average # Average: CPU %user %nice %system %iowait %steal %idle # Average: 0 8.52 0.00 4.92 1.97 0.00 84.59 # Average: 1 14.19 0.00 4.73 0.00 0.00 81.08 # Average: kbmemfree kbmemused %memused kbbuffers kbcached kbcommit %commit kbactive kbinact kbdirty # Average: 105130499 26616873 20.20 203707 782956 63556293 48.24 24702756 529517 579 lines = output.splitlines() start_index = 0 for i in range(len(lines)): if lines(i).startswith('Average:'): start_index = i break lines = lines[start_index:] # Parse mem usage stats mem_vals = lines.pop(-1).split() mem_headers = lines.pop(-1).split() mem_usage_index = mem_headers.index('%memused') mem_usage = float(mem_vals[mem_usage_index]) # Parse cpu usage stats cpu_headers = lines.pop(0).split() cpu_lines = [line.split() for line in lines] idle_cpu_index = cpu_headers.index('%idle') cpus_idle = [float(cpu_vals[idle_cpu_index]) for cpu_vals in cpu_lines] avg_cpu_idle = sum(cpus_idle) / len(cpu_lines) avg_cpu_usage = round(100 - avg_cpu_idle, 4) cpu_kpi_name = Idle.NAME_CPU mem_kpi_name = Idle.NAME_MEM if not container_helper.is_stx_openstack_deployed(): cpu_kpi_name += '_platform' mem_kpi_name += '_platform' kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=cpu_kpi_name, kpi_val=avg_cpu_usage, uptime=5, unit='Percentage', fail_ok=False) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=mem_kpi_name, kpi_val=mem_usage, uptime=5, unit='Percentage', fail_ok=False)
def test_lock_with_vms(self, target_hosts, no_simplex, add_admin_role_func, collect_kpi): """ Test lock host with vms on it. Args: target_hosts (list): targeted host(s) to lock that was prepared by the target_hosts test fixture. Skip Conditions: - Less than 2 hypervisor hosts on the system Prerequisites: - Hosts storage backing are pre-configured to storage backing under test ie., 2 or more hosts should support the storage backing under test. Test Setups: - Set instances quota to 10 if it was less than 8 - Determine storage backing(s) under test. i.e.,storage backings supported by at least 2 hosts on the system - Create flavors with storage extra specs set based on storage backings under test - Create vms_to_test that can be live migrated using created flavors - Determine target host(s) to perform lock based on which host(s) have the most vms_to_test - Live migrate vms to target host(s) Test Steps: - Lock target host - Verify lock succeeded and vms status unchanged - Repeat above steps if more than one target host Test Teardown: - Delete created vms and volumes - Delete created flavors - Unlock locked target host(s) """ storage_backing, host = target_hosts vms_num = 5 vm_helper.ensure_vms_quotas(vms_num=vms_num) LOG.tc_step( "Boot {} vms with various storage settings".format(vms_num)) vms = vm_helper.boot_vms_various_types(cleanup='function', vms_num=vms_num, storage_backing=storage_backing, target_host=host) LOG.tc_step("Attempt to lock target host {}...".format(host)) HostsToRecover.add(host) init_time = None if collect_kpi: init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) host_helper.lock_host(host=host, check_first=False, fail_ok=False, swact=True) LOG.tc_step("Verify lock succeeded and vms still in good state") vm_helper.wait_for_vms_values(vms=vms, fail_ok=False) for vm in vms: vm_host = vm_helper.get_vm_host(vm_id=vm) assert vm_host != host, "VM is still on {} after lock".format(host) vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY) if collect_kpi: LOG.info("Collect kpi for lock host with vms") kpi_log_parser.record_kpi( local_kpi_file=collect_kpi, kpi_name=HostLock.WITH_VM.format(storage_backing), host=None, log_path=HostLock.LOG_PATH, end_pattern=HostLock.END.format(host), start_pattern=HostLock.START.format(host), start_path=HostLock.START_PATH, init_time=init_time, uptime=5, fail_ok=False)
def test_lock_unlock_host(host_type, collect_kpi): """ Verify lock unlock host Test Steps: - Select a host per given type. If type is controller, select standby controller. - Lock selected host and ensure it is successfully locked - Unlock selected host and ensure it is successfully unlocked """ init_time = None if collect_kpi: init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) LOG.tc_step("Select a {} node from system if any".format(host_type)) if host_type == 'controller': if system_helper.is_aio_simplex(): host = 'controller-0' else: host = system_helper.get_standby_controller_name() assert host, "No standby controller available" else: if host_type == 'compute' and (system_helper.is_aio_duplex() or system_helper.is_aio_simplex()): skip("No compute host on AIO system") elif host_type == 'storage' and not system_helper.is_storage_system(): skip("System does not have storage nodes") hosts = system_helper.get_hosts(personality=host_type, availability=HostAvailState.AVAILABLE, operational=HostOperState.ENABLED) assert hosts, "No good {} host on system".format(host_type) host = hosts[0] LOG.tc_step( "Lock {} host - {} and ensure it is successfully locked".format( host_type, host)) HostsToRecover.add(host) host_helper.lock_host(host, swact=False) # wait for services to stabilize before unlocking time.sleep(20) # unlock standby controller node and verify controller node is successfully unlocked LOG.tc_step( "Unlock {} host - {} and ensure it is successfully unlocked".format( host_type, host)) host_helper.unlock_host(host) LOG.tc_step("Check helm list after host unlocked") con_ssh = ControllerClient.get_active_controller() con_ssh.exec_cmd('helm list', fail_ok=False) if collect_kpi: lock_kpi_name = HostLock.NAME.format(host_type) unlock_kpi_name = HostUnlock.NAME.format(host_type) unlock_host_type = host_type if container_helper.is_stx_openstack_deployed(): if system_helper.is_aio_system(): unlock_host_type = 'compute' else: lock_kpi_name += '_platform' unlock_kpi_name += '_platform' if unlock_host_type == 'compute': unlock_host_type = 'compute_platform' LOG.info("Collect kpi for lock/unlock {}".format(host_type)) code_lock, out_lock = kpi_log_parser.record_kpi( local_kpi_file=collect_kpi, kpi_name=lock_kpi_name, host=None, log_path=HostLock.LOG_PATH, end_pattern=HostLock.END.format(host), start_pattern=HostLock.START.format(host), start_path=HostLock.START_PATH, init_time=init_time) time.sleep(30) # delay in sysinv log vs nova hypervisor list code_unlock, out_unlock = kpi_log_parser.record_kpi( local_kpi_file=collect_kpi, kpi_name=unlock_kpi_name, host=None, log_path=HostUnlock.LOG_PATH, end_pattern=HostUnlock.END[unlock_host_type].format(host), init_time=init_time, start_pattern=HostUnlock.START.format(host), start_path=HostUnlock.START_PATH) assert code_lock == 0, 'Failed to collect kpi for host-lock {}. ' \ 'Error: \n'.format(host, out_lock) assert code_unlock == 0, 'Failed to collect kpi for host-unlock {}. ' \ 'Error: \n'.format(host, out_lock)