def test_get_info(): pv0_storage_nodes = system_helper.get_storage_nodes(pv0) assert 'storage-0' in pv0_storage_nodes and 'storage-1' in pv0_storage_nodes assert not system_helper.get_storage_nodes(r720) assert not system_helper.is_aio_system() LOG.tc_func_start() assert system_helper.is_aio_system(r720) LOG.tc_func_end() LOG.tc_func_start() assert not system_helper.is_aio_system(r730_3_7) LOG.tc_func_end() LOG.tc_func_start() assert not system_helper.get_storage_nodes(r730_3_7) LOG.tc_func_end()
def test_system_type_is_readonly(): """ Verify System Type is readonly Test Steps: - Determine the System Type based on whether the system is CPE or not - Attempt to modify the System Type to a different type - Compare the types and verify they are the same, fail the test case otherwise Notes: - Covers SysInv test-case 71) Verify the system type is read-only and cannot be changed via CLI """ LOG.tc_step('Determine the real System Type for the lab') if system_helper.is_aio_system(): cur_system_type = SystemType.CPE else: cur_system_type = SystemType.STANDARD LOG.tc_step('Attempt to modify System Type') change_to_system_type = SystemType.CPE if cur_system_type == SystemType.CPE: change_to_system_type = SystemType.STANDARD code, msg = system_helper.modify_system( fail_ok=True, system_mode='{}'.format(change_to_system_type)) LOG.tc_step('Verify system rejected to change System Type to {}'.format( change_to_system_type)) assert 1 == code, msg
def test_system_type(): """ Verify the System Type can be retrieved from SysInv and is correct Test Steps: - Determine the System Type based on whether the system is CPE or not - Retrieve the System Type information from SystInv - Compare the types and verify they are the same, fail the test case otherwise Notes: - Covers SysInv test-cases: 66) Query the product type on CPE system using CLI 67) Query the product type on STD system using CLI """ LOG.tc_step('Determine the real System Type the lab') if system_helper.is_aio_system(): expt_system_type = SystemType.CPE else: expt_system_type = SystemType.STANDARD LOG.tc_step('Get System Type from system inventory') table_ = table_parser.table(cli.system('show')[1]) displayed_system_type = table_parser.get_value_two_col_table( table_, 'system_type') LOG.tc_step( 'Verify the expected System Type is the same as that from System Inventory' ) assert expt_system_type == displayed_system_type, 'Expected system_type is: {}; Displayed system type: {}.'. \ format(expt_system_type, displayed_system_type)
def test_cpe_services_and_functions(): if system_helper.host_exists(host='compute-0'): skip("compute-0 exists - skip for non-CPE lab") LOG.tc_step("Check controller+compute subfunction via system host-show") controllers = system_helper.get_controllers() for controller in controllers: assert system_helper.is_aio_system(controller=controller), \ "{} does not have controller+compute subfunction in system host-show".format(controller) LOG.tc_step("Check CPE system services via nova service-list") check_params = [ "nova-scheduler", # "nova-cert", "nova-conductor", # "nova-consoleauth", # removed in Train "nova-compute" ] binaries = nova_helper.get_compute_services(field='Binary') assert set(check_params) <= set(binaries), "Not all binaries from {} exist in 'nova service-list'".\ format(check_params) LOG.tc_step("Check all nodes are ready in kubectl get nodes") kube_helper.wait_for_nodes_ready(timeout=3)
def sys_lock_unlock_standby(number_of_times=1): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ timeout = VMTimeout.DHCP_RETRY if system_helper.is_aio_system( ) else VMTimeout.PING_VM for i in range(0, number_of_times): active, standby = system_helper.get_active_standby_controllers() LOG.tc_step("Doing iteration of {} of total iteration {}".format( i, number_of_times)) LOG.tc_step("'sudo reboot -f' from {}".format(standby)) host_helper.lock_host(host=standby) LOG.tc_step("Check vms status after locking standby") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=timeout) host_helper.unlock_host(host=standby) vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_system_persist_over_host_reboot(host_type, stx_openstack_required): """ Validate Inventory summary over reboot of one of the controller see if data persists over reboot Test Steps: - capture Inventory summary for list of hosts on system service-list and neutron agent-list - reboot the current Controller-Active - Wait for reboot to complete - Validate key items from inventory persist over reboot """ if host_type == 'controller': host = system_helper.get_active_controller_name() elif host_type == 'compute': if system_helper.is_aio_system(): skip("No compute host for AIO system") host = None else: hosts = system_helper.get_hosts(personality='storage') if not hosts: skip(msg="Lab has no storage nodes. Skip rebooting storage node.") host = hosts[0] LOG.tc_step("Pre-check for system status") system_helper.wait_for_services_enable() up_hypervisors = host_helper.get_up_hypervisors() network_helper.wait_for_agents_healthy(hosts=up_hypervisors) LOG.tc_step("Launch a vm") vm_id = vm_helper.boot_vm(cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) if host is None: host = vm_helper.get_vm_host(vm_id) LOG.tc_step("Reboot a {} node and wait for reboot completes: {}".format(host_type, host)) HostsToRecover.add(host) host_helper.reboot_hosts(host) host_helper.wait_for_hosts_ready(host) LOG.tc_step("Check vm is still active and pingable after {} reboot".format(host)) vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ACTIVE, fail_ok=False) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id, timeout=VMTimeout.DHCP_RETRY) LOG.tc_step("Check neutron agents and system services are in good state after {} reboot".format(host)) network_helper.wait_for_agents_healthy(up_hypervisors) system_helper.wait_for_services_enable() if host in up_hypervisors: LOG.tc_step("Check {} can still host vm after reboot".format(host)) if not vm_helper.get_vm_host(vm_id) == host: time.sleep(30) vm_helper.live_migrate_vm(vm_id, destination_host=host)
def get_hosts_with_backing(add_admin_role_module): storage_backing, hosts = keywords.host_helper.get_storage_backing_with_max_hosts( ) if len(hosts) < 2: skip( "Minimum of two hypervisors must support the same storage_backing." ) if not system_helper.is_aio_system(): host_under_test = hosts[0] else: host_under_test = system_helper.get_standby_controller_name() return storage_backing, host_under_test
def test_lock_unlock_host(host_type): """ Verify lock unlock host Test Steps: - Select a host per given type. If type is controller, select standby controller. - Lock selected host and ensure it is successfully locked - Unlock selected host and ensure it is successfully unlocked """ LOG.tc_step("Select a {} node from system if any".format(host_type)) if host_type == 'controller': if system_helper.is_aio_simplex(): host = 'controller-0' else: host = system_helper.get_standby_controller_name() assert host, "No standby controller available" else: if host_type == 'compute' and system_helper.is_aio_system(): skip("No compute host on AIO system") elif host_type == 'storage' and not system_helper.is_storage_system(): skip("System does not have storage nodes") hosts = system_helper.get_hosts(personality=host_type, availability=HostAvailState.AVAILABLE, operational=HostOperState.ENABLED) assert hosts, "No good {} host on system".format(host_type) host = hosts[0] LOG.tc_step("Lock {} host - {} and ensure it is successfully " "locked".format(host_type, host)) HostsToRecover.add(host) host_helper.lock_host(host, swact=False) # wait for services to stabilize before unlocking time.sleep(20) # unlock standby controller node and verify controller node is # successfully unlocked LOG.tc_step("Unlock {} host - {} and ensure it is successfully " "unlocked".format(host_type, host)) host_helper.unlock_host(host)
def pre_download_setup(): lab = InstallVars.get_install_var('LAB') # establish ssh connection with controller-0 controller0_conn = ControllerClient.get_active_controller() cpe = system_helper.is_aio_system(controller0_conn) bld_server = get_build_server_info( InstallVars.get_install_var('BUILD_SERVER')) output_dir = ProjVar.get_var('LOG_DIR') current_version = system_helper.get_sw_version(use_existing=False) load_path = BuildServerPath.LATEST_HOST_BUILD_PATHS[current_version] bld_server_attr = dict() bld_server_attr['name'] = bld_server['name'] bld_server_attr['server_ip'] = bld_server['ip'] bld_server_attr['prompt'] = Prompt.BUILD_SERVER_PROMPT_BASE.format( 'svc-cgcsauto', bld_server['name']) bld_server_conn = SSHClient(bld_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=bld_server_attr['prompt']) bld_server_conn.connect() bld_server_conn.exec_cmd("bash") bld_server_conn.set_prompt(bld_server_attr['prompt']) bld_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) bld_server_attr['ssh_conn'] = bld_server_conn bld_server_obj = Server(**bld_server_attr) _download_setup = { 'lab': lab, 'cpe': cpe, 'output_dir': output_dir, 'current_version': current_version, 'build_server': bld_server_obj, 'load_path': load_path, } return _download_setup
def revert(request): """ Revert to pre-test mnfa parameters after test """ #skip("Force reboot hosts not ready to test") if system_helper.is_aio_system(): skip("Not applicable on small systems") mnfa_threshold_default_val = system_helper.get_service_parameter_values( service='platform', section='maintenance', name='mnfa_threshold') mnfa_timeout_default_val = system_helper.get_service_parameter_values( service='platform', section='maintenance', name='mnfa_timeout') def restore_default_parameters(): LOG.fixture_step( 'Check MNFA service parameter values and revert if needed') mnfa_threshold_current_val = system_helper.get_service_parameter_values( service='platform', section='maintenance', name='mnfa_threshold') mnfa_timeout_default_current_val = system_helper.get_service_parameter_values( service='platform', section='maintenance', name='mnfa_timeout') alarms = system_helper.get_alarms( alarm_id=EventLogID.CONFIG_OUT_OF_DATE) if alarms or mnfa_threshold_current_val != mnfa_threshold_default_val or mnfa_timeout_default_val != \ mnfa_timeout_default_current_val: system_helper.modify_service_parameter( service='platform', section='maintenance', name='mnfa_threshold', apply=False, value=mnfa_threshold_default_val[0]) system_helper.modify_service_parameter( service='platform', check_first=False, section='maintenance', name='mnfa_timeout', apply=True, value=mnfa_timeout_default_val[0]) request.addfinalizer(restore_default_parameters)
def _test_create_partition_and_associate_with_pv_cgts_vg(): """ This test attempt to create a partition and then associate it with a PV (physical volume), resulting in the partition being In-use. Assumptions: * There's some free disk space available Test steps: * Query hosts to determine disk space * Create partition * Associate it with cgts-vg PV * Checks the partition is in-use state * Attempts to delete the partition that is in-use. It should fail. * Attempt to assign the in-use partition to another PV. It should fail. Teardown: * None DISABLING: This fails since the partition says 'adding on unlock'. Should it be in-service? Follow up with dev. """ global partitions_to_restore partitions_to_restore = {} if not system_helper.is_aio_system(): skip("This test requires an AIO system.") hosts = system_helper.get_controllers() for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for uuid in free_disks: size_gib = float(free_disks[uuid]) if size_gib <= 1: LOG.tc_step("Skip this disk due to insufficient space") continue LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, uuid, "1") uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) LOG.tc_step("Associating partition {} with cgts-vg".format(uuid)) # cmd = "host-pv-add -t partition {} cgts-vg {}".format(host, uuid) cmd = "host-pv-add {} cgts-vg {}".format(host, uuid) rc, out = cli.system(cmd) assert rc == 0, "Associating partition with PV failed" LOG.tc_step("Check that partition is In-use state") storage_helper.wait_for_host_partition_status( host=host, uuid=uuid, final_status=PartitionStatus.IN_USE, interim_status=PartitionStatus.READY, timeout=CP_TIMEOUT) LOG.tc_step("Attempt to delete In-Use partition") rc, out = storage_helper.delete_host_partition(host, uuid, fail_ok=True) assert rc != 0, "Partition deletion was expected to fail but instead passed" LOG.tc_step( "Attempt to associate the In-Use partition with another PV") # cmd = "host-pv-add -t partition {} nova-local {}".format(host, uuid) cmd = "host-pv-add {} nova-local {}".format(host, uuid) rc, out = cli.system(cmd) assert rc != 0, "Partition association succeeded but was expected to fail" # Only test one disk on each host break # Do it on one host only break
def _test_storage_profile(personality, from_backing, to_backing): """ This test creates a storage profile and then applies it to a node with identical hardware, assuming one exists. Storage profiles do not apply on controller nodes. Storage profiles can be applied on controller+compute nodes, compute nodes and storage nodes. Arguments: - personality (string) - controller, compute or storage - from_backing (string) - image, remote or None - to_backing (string) - image, remote or None Test Steps: 1. Query system and determine which nodes have compatible hardware. 2. Create a storage profile on one of those nodes 3. Apply the created storage profile on a compatible node* 4. Ensure the storage profiles have been successfully applied. * If the node is a compute node or a controller+compute, we will also change the backend if required for additional coverage. Returns: - Nothing """ global PROFILES_TO_DELETE PROFILES_TO_DELETE = [] # Skip if test is not applicable to hardware under test if personality == 'controller' and not system_helper.is_aio_system(): skip("Test does not apply to controller hosts without subtype compute") hosts = system_helper.get_hosts(personality=personality) if not hosts: skip("No hosts of type {} available".format(personality)) if (from_backing == "remote" or to_backing == "remote") and not system_helper.is_storage_system(): skip("This test doesn't apply to systems without storage hosts") LOG.tc_step("Identify hardware compatible hosts") hash_to_hosts = get_hw_compatible_hosts(hosts) # Pick the hardware group that has the most compatible hosts current_size = 0 candidate_hosts = [] for value in hash_to_hosts: candidate_size = len(hash_to_hosts[value]) if candidate_size > current_size: current_size = candidate_size candidate_hosts = hash_to_hosts[value] LOG.info( "This is the total set of candidate hosts: {}".format(candidate_hosts)) if len(candidate_hosts) < 2: skip("Insufficient hardware compatible hosts to run test") # Rsync lab setup dot files between controllers con_ssh = ControllerClient.get_active_controller() _rsync_files_to_con1(con_ssh=con_ssh, file_to_check="force.txt") # Take the hardware compatible hosts and check if any of them already have # the backend that we want. This will save us test time. new_to_backing = None if personality == "compute": from_hosts = [] to_hosts = [] for host in candidate_hosts: host_backing = host_helper.get_host_instance_backing(host) if host_backing == from_backing: from_hosts.append(host) elif host_backing == to_backing: to_hosts.append(host) else: pass LOG.info( "Candidate hosts that already have the right from backing {}: {}". format(from_backing, from_hosts)) LOG.info( "Candidate hosts that already have the right to backing {}: {}". format(to_backing, to_hosts)) # Determine what hosts to use if not from_hosts and to_hosts: to_host = random.choice(to_hosts) candidate_hosts.remove(to_host) from_host = random.choice(candidate_hosts) elif not to_hosts and from_hosts: from_host = random.choice(from_hosts) candidate_hosts.remove(from_host) to_host = random.choice(candidate_hosts) elif not to_hosts and not from_hosts: to_host = random.choice(candidate_hosts) candidate_hosts.remove(to_host) from_host = random.choice(candidate_hosts) else: to_host = random.choice(to_hosts) from_host = random.choice(from_hosts) LOG.info("From host is: {}".format(from_host)) LOG.info("To host is: {}".format(to_host)) LOG.tc_step( "Check from host backing and convert to {} if necessary".format( from_backing)) host_helper.set_host_storage_backing(from_host, from_backing) system_helper.wait_for_host_values( from_host, availability=HostAvailState.AVAILABLE, timeout=120, fail_ok=False) LOG.tc_step( "Check to host backing and convert to {} if necessary".format( to_backing)) new_to_backing = host_helper.set_host_storage_backing( to_host, to_backing) elif personality == "controller": # For now, we don't want to host reinstall controller-0 since it will default to # pxeboot, but this could be examined as a possible enhancement. from_host = "controller-0" to_host = "controller-1" LOG.info("From host is: {}".format(from_host)) LOG.info("To host is: {}".format(to_host)) LOG.tc_step( "Check from host backing and convert to {} if necessary".format( from_backing)) host_helper.set_host_storage_backing(from_host, from_backing) LOG.tc_step( "Check to host backing and convert to {} if necessary".format( to_backing)) new_to_backing = host_helper.set_host_storage_backing( to_host, to_backing) else: # Backing doesn't apply to storage nodes so just pick from compatible hardware from_host = random.choice(candidate_hosts) candidate_hosts.remove(from_host) to_host = random.choice(candidate_hosts) LOG.tc_step( "Create storage and interface profiles on the from host {}".format( from_host)) prof_name = 'storprof_{}_{}'.format( from_host, time.strftime('%Y%m%d_%H%M%S', time.localtime())) storage_helper.create_storage_profile(from_host, profile_name=prof_name) PROFILES_TO_DELETE.append(prof_name) # Deleting VMs in case the remaining host(s) cannot handle all VMs # migrating on lock, particularly important in the case of AIO-DX systems. LOG.tc_step( "Delete all VMs and lock the host before applying the storage profile") vm_helper.delete_vms() HostsToRecover.add(to_host, scope='function') system_helper.wait_for_host_values(from_host, availability=HostAvailState.AVAILABLE, timeout=120, fail_ok=False) system_helper.wait_for_host_values(to_host, availability=HostAvailState.AVAILABLE, timeout=120, fail_ok=False) # Negative test #1 - attempt to apply profile on unlocked host (should be rejected) LOG.tc_step('Apply the storage-profile {} onto unlocked host:{}'.format( prof_name, to_host)) cmd = 'host-apply-storprofile {} {}'.format(to_host, prof_name) rc, msg = cli.system(cmd, fail_ok=True) assert rc != 0, msg host_helper.lock_host(to_host, swact=True) # 3 conditions to watch for: no partitions, ready partitions and in-use # partitions on the compute. If in-use, delete and freshly install host. # If ready, delete all ready partitions to make room for potentially new # partitions. If no partitions, just delete nova-local lvg. if personality == "compute": # Negative test #2 - attempt to apply profile onto host with existing # nova-local (should be rejected) LOG.tc_step( 'Apply the storage-profile {} onto host with existing nova-local:{}' .format(prof_name, to_host)) cmd = 'host-apply-storprofile {} {}'.format(to_host, prof_name) rc, msg = cli.system(cmd, fail_ok=True) assert rc != 0, msg # If we were simply switching backing (without applying a storage # profile), the nova-local lvg deletion can be omitted according to design LOG.tc_step("Delete nova-local lvg on to host {}".format(to_host)) cli.system("host-lvg-delete {} nova-local".format(to_host)) in_use = storage_helper.get_host_partitions(to_host, "In-Use") if in_use: # Negative test #3 - attempt to apply profile onto host with existing # in-use partitions (should be rejected) LOG.tc_step('Apply the storage-profile {} onto host with existing \ in-use partitions:{}'.format(prof_name, to_host)) cmd = 'host-apply-storprofile {} {}'.format(to_host, prof_name) rc, msg = cli.system(cmd, fail_ok=True) assert rc != 0, msg LOG.tc_step( "In-use partitions found. Must delete the host and freshly install before proceeding." ) LOG.info("Host {} has in-use partitions {}".format( to_host, in_use)) lab = InstallVars.get_install_var("LAB") lab.update(create_node_dict(lab['compute_nodes'], 'compute')) lab['boot_device_dict'] = create_node_boot_dict(lab['name']) install_helper.open_vlm_console_thread(to_host) LOG.tc_step("Delete the host {}".format(to_host)) cli.system("host-bulk-export") cli.system("host-delete {}".format(to_host)) assert len( system_helper.get_controllers()) > 1, "Host deletion failed" cli.system("host-bulk-add hosts.xml") system_helper.wait_for_host_values( to_host, timeout=6000, availability=HostAvailState.ONLINE) wait_for_disks(to_host) ready = storage_helper.get_host_partitions(to_host, "Ready") if ready: LOG.tc_step( "Ready partitions have been found. Must delete them before profile application" ) LOG.info("Host {} has Ready partitions {}".format(to_host, ready)) for uuid in reversed(ready): storage_helper.delete_host_partition(to_host, uuid) # Don't bother restoring in this case since the system should be # functional after profile is applied. LOG.tc_step('Apply the storage-profile {} onto host:{}'.format( prof_name, to_host)) cli.system('host-apply-storprofile {} {}'.format(to_host, prof_name)) LOG.tc_step("Unlock to host") host_helper.unlock_host(to_host) to_host_backing = host_helper.get_host_instance_backing(to_host) LOG.info("To host backing was {} and is now {}".format( new_to_backing, to_host_backing)) assert to_host_backing == from_backing, "Host backing was not changed on storage profile application" if personality == "storage": if not storage_helper.is_ceph_healthy(): skip("Cannot run test when ceph is not healthy") LOG.tc_step("Delete the host {}".format(to_host)) cli.system("host-bulk-export") cli.system("host-delete {}".format(to_host)) cli.system("host-bulk-add hosts.xml") system_helper.wait_for_host_values(to_host, timeout=6000, availability=HostAvailState.ONLINE) wait_for_disks(to_host) LOG.tc_step('Apply the storage-profile {} onto host:{}'.format( prof_name, to_host)) cli.system('host-apply-storprofile {} {}'.format(to_host, prof_name)) # Re-provision interfaces through lab_setup.sh LOG.tc_step("Reprovision the host as necessary") files = ['interfaces'] con_ssh = ControllerClient.get_active_controller() delete_lab_setup_files(con_ssh, to_host, files) rc, msg = install_helper.run_lab_setup() assert rc == 0, msg LOG.tc_step("Unlock to host") host_helper.unlock_host(to_host) if personality == "controller": # Note, install helper doesn't work on all labs. Some labs don't # display BIOS type which causes install helper to fail lab = InstallVars.get_install_var("LAB") lab.update(create_node_dict(lab['controller_nodes'], 'controller')) lab['boot_device_dict'] = create_node_boot_dict(lab['name']) install_helper.open_vlm_console_thread(to_host) LOG.tc_step("Delete the host {}".format(to_host)) cli.system("host-bulk-export") cli.system("host-delete {}".format(to_host)) assert len(system_helper.get_controllers()) > 1, "Host deletion failed" cli.system("host-bulk-add hosts.xml") system_helper.wait_for_host_values(to_host, timeout=6000, availability=HostAvailState.ONLINE) wait_for_disks(to_host) LOG.tc_step("Apply the storage-profile {} onto host:{}".format( prof_name, to_host)) cli.system("host-apply-storprofile {} {}".format(to_host, prof_name)) # Need to re-provision everything on node through lab_setup (except storage) LOG.tc_step("Reprovision the host as necessary") files = [ 'interfaces', 'cinder_device', 'vswitch_cpus', 'shared_cpus', 'extend_cgts_vg', 'addresses' ] con_ssh = ControllerClient.get_active_controller() delete_lab_setup_files(con_ssh, to_host, files) rc, msg = install_helper.run_lab_setup() assert rc == 0, msg LOG.tc_step("Unlock to host") host_helper.unlock_host(to_host) to_host_backing = host_helper.get_host_instance_backing(to_host) LOG.info("To host backing was {} and is now {}".format( new_to_backing, to_host_backing)) assert to_host_backing == from_backing, "Host backing was not changed on storage profile application"
def test_idle_kpi(collect_kpi): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") LOG.tc_step("Delete vms and volumes on system if any") vm_helper.delete_vms() is_aio = system_helper.is_aio_system() active_con = system_helper.get_active_controller_name() con_ssh = ControllerClient.get_active_controller() cpu_arg = '' if is_aio: LOG.info("AIO system found, check platform cores only") cpu_arg = ' -P ' platform_cores_per_proc = host_helper.get_host_cpu_cores_for_function( hostname=active_con, func='Platform', core_type='log_core', thread=None, con_ssh=con_ssh) platform_cpus = [] for proc in platform_cores_per_proc: platform_cpus += platform_cores_per_proc[proc] cpu_arg += ','.join([str(val) for val in platform_cpus]) LOG.tc_step( "Sleep for 5 minutes, then monitor for cpu and memory usage every 10 seconds for 5 minutes" ) time.sleep(300) output = con_ssh.exec_cmd( 'sar -u{} 10 30 -r | grep --color=never "Average"'.format(cpu_arg), expect_timeout=600, fail_ok=False)[1] # Sample output: # controller-1:~$ sar -u -P 0,1 1 3 -r | grep Average # Average: CPU %user %nice %system %iowait %steal %idle # Average: 0 8.52 0.00 4.92 1.97 0.00 84.59 # Average: 1 14.19 0.00 4.73 0.00 0.00 81.08 # Average: kbmemfree kbmemused %memused kbbuffers kbcached kbcommit %commit kbactive kbinact kbdirty # Average: 105130499 26616873 20.20 203707 782956 63556293 48.24 24702756 529517 579 lines = output.splitlines() start_index = 0 for i in range(len(lines)): if lines(i).startswith('Average:'): start_index = i break lines = lines[start_index:] # Parse mem usage stats mem_vals = lines.pop(-1).split() mem_headers = lines.pop(-1).split() mem_usage_index = mem_headers.index('%memused') mem_usage = float(mem_vals[mem_usage_index]) # Parse cpu usage stats cpu_headers = lines.pop(0).split() cpu_lines = [line.split() for line in lines] idle_cpu_index = cpu_headers.index('%idle') cpus_idle = [float(cpu_vals[idle_cpu_index]) for cpu_vals in cpu_lines] avg_cpu_idle = sum(cpus_idle) / len(cpu_lines) avg_cpu_usage = round(100 - avg_cpu_idle, 4) cpu_kpi_name = Idle.NAME_CPU mem_kpi_name = Idle.NAME_MEM if not container_helper.is_stx_openstack_deployed(): cpu_kpi_name += '_platform' mem_kpi_name += '_platform' kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=cpu_kpi_name, kpi_val=avg_cpu_usage, uptime=5, unit='Percentage', fail_ok=False) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=mem_kpi_name, kpi_val=mem_usage, uptime=5, unit='Percentage', fail_ok=False)
def test_lock_unlock_host(host_type, collect_kpi): """ Verify lock unlock host Test Steps: - Select a host per given type. If type is controller, select standby controller. - Lock selected host and ensure it is successfully locked - Unlock selected host and ensure it is successfully unlocked """ init_time = None if collect_kpi: init_time = common.get_date_in_format(date_format=KPI_DATE_FORMAT) LOG.tc_step("Select a {} node from system if any".format(host_type)) if host_type == 'controller': if system_helper.is_aio_simplex(): host = 'controller-0' else: host = system_helper.get_standby_controller_name() assert host, "No standby controller available" else: if host_type == 'compute' and (system_helper.is_aio_duplex() or system_helper.is_aio_simplex()): skip("No compute host on AIO system") elif host_type == 'storage' and not system_helper.is_storage_system(): skip("System does not have storage nodes") hosts = system_helper.get_hosts(personality=host_type, availability=HostAvailState.AVAILABLE, operational=HostOperState.ENABLED) assert hosts, "No good {} host on system".format(host_type) host = hosts[0] LOG.tc_step( "Lock {} host - {} and ensure it is successfully locked".format( host_type, host)) HostsToRecover.add(host) host_helper.lock_host(host, swact=False) # wait for services to stabilize before unlocking time.sleep(20) # unlock standby controller node and verify controller node is successfully unlocked LOG.tc_step( "Unlock {} host - {} and ensure it is successfully unlocked".format( host_type, host)) host_helper.unlock_host(host) LOG.tc_step("Check helm list after host unlocked") con_ssh = ControllerClient.get_active_controller() con_ssh.exec_cmd('helm list', fail_ok=False) if collect_kpi: lock_kpi_name = HostLock.NAME.format(host_type) unlock_kpi_name = HostUnlock.NAME.format(host_type) unlock_host_type = host_type if container_helper.is_stx_openstack_deployed(): if system_helper.is_aio_system(): unlock_host_type = 'compute' else: lock_kpi_name += '_platform' unlock_kpi_name += '_platform' if unlock_host_type == 'compute': unlock_host_type = 'compute_platform' LOG.info("Collect kpi for lock/unlock {}".format(host_type)) code_lock, out_lock = kpi_log_parser.record_kpi( local_kpi_file=collect_kpi, kpi_name=lock_kpi_name, host=None, log_path=HostLock.LOG_PATH, end_pattern=HostLock.END.format(host), start_pattern=HostLock.START.format(host), start_path=HostLock.START_PATH, init_time=init_time) time.sleep(30) # delay in sysinv log vs nova hypervisor list code_unlock, out_unlock = kpi_log_parser.record_kpi( local_kpi_file=collect_kpi, kpi_name=unlock_kpi_name, host=None, log_path=HostUnlock.LOG_PATH, end_pattern=HostUnlock.END[unlock_host_type].format(host), init_time=init_time, start_pattern=HostUnlock.START.format(host), start_path=HostUnlock.START_PATH) assert code_lock == 0, 'Failed to collect kpi for host-lock {}. ' \ 'Error: \n'.format(host, out_lock) assert code_unlock == 0, 'Failed to collect kpi for host-unlock {}. ' \ 'Error: \n'.format(host, out_lock)
def orphan_audit_setup(request): """ SCPs files to setup test orphan audit test """ con_ssh = ControllerClient.get_active_controller() vm_host = host_helper.get_up_hypervisors()[0] LOG.fixture_step("SCP orphan_guest.xml to active controller") source_file = TestServerPath.TEST_FILES + 'orphan_guest.xml' common.scp_from_test_server_to_active_controller( source_file, dest_dir=HostLinuxUser.get_home(), dest_name='orphan_guest.xml', timeout=120, con_ssh=None) LOG.fixture_step( "Change orphan_guest.xml specs allow a vm to be properly launched") LOG.info("If test is running on VBox, change domain type in xml to qemu") nat_name = ProjVar.get_var('NATBOX').get('name') if nat_name == 'localhost' or nat_name.startswith('128.224.'): LOG.info("Changing domain type in xml to qemu") con_ssh.exec_sudo_cmd("sed -i 's/kvm/qemu/g' orphan_guest.xml") con_ssh.exec_sudo_cmd( "sed -i 's/qemu-qemu/qemu-kvm/g' orphan_guest.xml") if GuestImages.DEFAULT['guest'] != 'tis-centos-guest': LOG.info("Update xml files to use default image") con_ssh.exec_sudo_cmd( "sed -i 's/tis-centos-guest/{}/g' orphan_guest.xml".format( GuestImages.DEFAULT['guest'])) # Check if system is AIO, skip scp to computes if it is if not system_helper.is_aio_system(): LOG.fixture_step("Non-AIO system detected, SCP files to compute") with host_helper.ssh_to_host(vm_host) as host_ssh: LOG.info("Create images dir in compute host") host_ssh.exec_cmd('mkdir -p images') def teardown(): LOG.fixture_step("Delete all files scp'd over") with host_helper.ssh_to_host(vm_host) as host_ssh_: host_ssh_.exec_cmd('rm -rf images/{}.img'.format( GuestImages.DEFAULT['guest'])) host_ssh_.exec_cmd('rm orphan_guest.xml') request.addfinalizer(teardown) # copy Default guest img and XML file over to compute img_path = StxPath.IMAGES + GuestImages.IMAGE_FILES.get( GuestImages.DEFAULT['guest'])[2] con_ssh.scp_on_source(HostLinuxUser.get_home() + 'orphan_guest.xml', HostLinuxUser.get_user(), vm_host, HostLinuxUser.get_home(), HostLinuxUser.get_password(), timeout=60) con_ssh.scp_on_source(img_path, HostLinuxUser.get_user(), vm_host, StxPath.IMAGES, HostLinuxUser.get_password(), timeout=300) else: vm_host = system_helper.get_active_controller_name() return vm_host