def test_heat_vm_scale_after_actions(vm_scaling_stack, actions): """ Test VM auto scaling with swact: Create heat stack for auto scaling using NestedAutoScale.yaml, swact and perform vm scale up and down. Test Steps: - Create a heat stack for auto scaling vm () - Verify heat stack is created successfully - Verify heat resources are created - live migrate the vm if not sx - cold migrate the vm if not sx - swact if not sx - reboot -f vm host - trigger auto scale by boosting cpu usage in the vm (using dd) - verify it scale up to the max number of vms (3) - trigger scale down by killing dd in the vm - verify the vm scale down to min number (1) - Delete Heat stack and verify resource deletion """ stack_name, vm_id = vm_scaling_stack vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) if not system_helper.is_aio_simplex(): actions = actions.split('-') if "swact" in actions: LOG.tc_step("Swact before scale in/out") host_helper.swact_host() if "live_migrate" in actions: LOG.tc_step("live migrate vm before scale in/out") vm_helper.live_migrate_vm(vm_id) if "cold_migrate" in actions: LOG.tc_step("cold migrate vm before scale in/out") vm_helper.cold_migrate_vm(vm_id) if "host_reboot" in actions: if system_helper.is_aio_simplex(): host_helper.reboot_hosts('controller-0') vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ACTIVE, timeout=600, check_interval=10, fail_ok=False) vm_helper.wait_for_vm_pingable_from_natbox( vm_id, timeout=VMTimeout.DHCP_RETRY) else: LOG.tc_step("evacuate vm before scale in/out") vm_host = vm_helper.get_vm_host(vm_id=vm_id) vm_helper.evacuate_vms(host=vm_host, vms_to_check=vm_id) LOG.tc_step( "Wait for {} vms to auto scale out to {} after running dd in vm(s)". format(stack_name, 3)) vm_helper.wait_for_auto_vm_scale_out(stack_name, expt_max=3) LOG.tc_step( "Wait for {} vms to auto scale in to {} after killing dd processes in vms" .format(stack_name, 1)) vm_helper.wait_for_auto_vm_scale_in(stack_name, expt_min=1)
def test_kpi_cyclictest_vm(collect_kpi, prepare_test_session, get_rt_guest_image, get_hypervisor, add_admin_role_func): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") hypervisor = get_hypervisor testable_hypervisors[hypervisor]['for_vm_test'] = True LOG.info('Hypervisor chosen to host rt vm: {}'.format(hypervisor)) vm_id, vcpu_count, non_rt_core = create_rt_vm(hypervisor) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) cyclictest_dir = '/root/cyclictest/' program = os.path.join(os.path.normpath(cyclictest_dir), os.path.basename(CYCLICTEST_EXE)) program_active_con = os.path.join(os.path.normpath(CYCLICTEST_DIR), os.path.basename(CYCLICTEST_EXE)) cpu_info = { 'vm_cores': [id_ for id_ in range(vcpu_count) if id_ != non_rt_core] } with vm_helper.ssh_to_vm_from_natbox(vm_id) as vm_ssh: prep_test_on_host(vm_ssh, vm_id, program_active_con, ControllerClient.get_active_controller().host, cyclictest_dir=cyclictest_dir) run_log, hist_file = run_cyclictest(vm_ssh, program, vm_id, cyclictest_dir=cyclictest_dir, cpu_info=cpu_info) LOG.info("Process and upload test results") local_run_log, local_hist_file = fetch_results_from_target( target_ssh=vm_ssh, target_host=vm_id, run_log=run_log, hist_file=hist_file, is_guest=True) testable_hypervisors[hypervisor]['for_vm_test'] = False avg_val, six_nines_val = calculate_results(run_log=local_run_log, hist_file=local_hist_file, cores_to_ignore=None, num_cores=(vcpu_count - 1)) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_VM_AVG, kpi_val=avg_val, uptime=15, unit=CyclicTest.UNIT) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_VM_6_NINES, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT)
def test_migrate_4k_vm_positive(ephemeral, swap, cpu_pol, vcpus, vm_type, ensure_sufficient_4k_pages): """ Test live and cold migrate 4k vm with various vm storage configurations Args: ephemeral (int): swap (int): cpu_pol (str): vcpus (int): vm_type (str): boot-from image or volume vm ensure_sufficient_4k_pages (tuple): module test fixture to configure 4k pages Setups: - Select at least 2 hosts with specified storage backing. e.g., local_image, or remote - Ensure 2 hosts are in nova zone (move rest to cgcsauto zone if more than 2) - Configure the 2 hosts with large amount of 4k pages Test Steps: - Create flavor with specified ephemeral, swap, """ storage_backing, hosts = ensure_sufficient_4k_pages vm_id = _boot_vm_under_test(storage_backing, ephemeral, swap, cpu_pol, vcpus, vm_type) LOG.tc_step("Cold migrate VM and ensure it succeeded") vm_helper.cold_migrate_vm(vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) __check_pagesize(vm_id) LOG.tc_step("Attempt to live migrate VM") vm_helper.live_migrate_vm(vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) __check_pagesize(vm_id)
def base_setup(self): flavor_id = nova_helper.create_flavor(name='dedicated')[1] ResourceCleanup.add('flavor', flavor_id, scope='class') extra_specs = {FlavorSpec.CPU_POLICY: 'dedicated'} nova_helper.set_flavor(flavor=flavor_id, **extra_specs) mgmt_net_id = network_helper.get_mgmt_net_id() tenant_net_id = network_helper.get_tenant_net_id() internal_net_id = network_helper.get_internal_net_id() nics = [{'net-id': mgmt_net_id}, {'net-id': tenant_net_id}, {'net-id': internal_net_id}] LOG.fixture_step( "(class) Boot a base vm with following nics: {}".format(nics)) base_vm = vm_helper.boot_vm(name='multiports_base', flavor=flavor_id, nics=nics, cleanup='class', reuse_vol=False)[1] vm_helper.wait_for_vm_pingable_from_natbox(base_vm) vm_helper.ping_vms_from_vm(base_vm, base_vm, net_types='data') return base_vm, flavor_id, mgmt_net_id, tenant_net_id, internal_net_id
def test_ea_vm_with_crypto_vfs(_flavors, hosts_pci_device_info): """ Verify guest can be launched with one crypto VF, AVP, VIRTIO, and SRIOV interfaces. Verify device cannot be disabled while on use. ( mainly for labs with two computes) Args: _flavors: hosts_pci_device_info: """ # hosts = list(hosts_pci_device_info.keys()) vm_name = 'vm_with_pci_device' mgmt_net_id = network_helper.get_mgmt_net_id() nics = [{'net-id': mgmt_net_id}] flavor_id = _flavors['flavor_qat_vf_1'] LOG.tc_step("Boot a vm {} with pci-sriov nics and flavor flavor_qat_vf_1".format(vm_name)) vm_id = vm_helper.boot_vm(vm_name, flavor=flavor_id, nics=nics, cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) LOG.info("VM {} booted successfully and become active with crypto VF".format(vm_name)) vm_host = vm_helper.get_vm_host(vm_id) pci_dev_info = hosts_pci_device_info[vm_host][0]['pci_address'] # device_address = pci_dev_info['pci_address'] host_dev_name = pci_dev_info['device_name'] expt_qat_devs = {host_dev_name: 1} check_helper.check_qat_service(vm_id=vm_id, qat_devs=expt_qat_devs) _perform_nova_actions(vms_dict={vm_name: vm_id}, flavors=_flavors) check_helper.check_qat_service(vm_id=vm_id, qat_devs=expt_qat_devs)
def _test_check_vm_disk_on_compute(storage, hosts_per_backing): """ Tests that existence of volumes are properly reported for lvm-backed vms. Skip: - Skip if no lvm-configured compute nodes available Test steps: - Create a flavor for a lvm-backed vms and boot vm out of that flavor - SSH onto the node hosting the VM and do the following: - Run ps aux and confirm that there is a qemu process - Run sudo lvs and confirm the existence of a thin pool - Run sudo lvs and confirm the existence of a volume for the vm - Ensure that the "free" space shown for the hypervisor (obtained by running "nova hypervisor-show <compute node>" and then checking the "free_disk_gb" field) reflects the space available within the thin pool - Delete the instance and ensure that space is returned to the hypervisor Test Teardown: - Delete created VM if not already done """ hosts_with_backing = hosts_per_backing.get(storage, []) if not hosts_with_backing: skip(SkipStorageBacking.NO_HOST_WITH_BACKING.format(storage)) LOG.tc_step("Create flavor and boot vm") flavor = nova_helper.create_flavor(storage_backing=storage)[1] ResourceCleanup.add('flavor', flavor, scope='function') vm = vm_helper.boot_vm(source='image', flavor=flavor, cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm) vm_host = vm_helper.get_vm_host(vm) with host_helper.ssh_to_host(vm_host) as compute_ssh: LOG.tc_step("Look for qemu process") compute_ssh.exec_sudo_cmd(cmd="lvs --units g") assert check_for_qemu_process(compute_ssh), "qemu process not found when calling ps" LOG.tc_step("Look for pool information") thin_pool_size = get_initial_pool_space(compute_ssh, vm) vm_vol_name = vm + '_disk' raw_vm_volume_output = \ compute_ssh.exec_sudo_cmd(cmd="lvs --units g --noheadings -o lv_size -S lv_name={}".format(vm_vol_name))[1] assert raw_vm_volume_output, "created vm volume not found" vm_volume_size = float(raw_vm_volume_output.strip('<g')) LOG.tc_step("Calculate compute free disk space and ensure that it reflects thin pool") expected_space_left = int(thin_pool_size - vm_volume_size) free_disk_space = get_compute_free_disk_gb(vm_host) assert expected_space_left - 1 <= free_disk_space <= expected_space_left + 1, \ 'Hypervisor-show does not reflect space within thin pool' LOG.tc_step("Calculate free space following vm deletion (ensure volume space is returned)") vm_helper.delete_vms(vm) free_disk_space = get_compute_free_disk_gb(vm_host) assert int(thin_pool_size) == free_disk_space, \ 'Space is not properly returned to the hypervisor or hypervisor info does not properly reflect it'
def sys_lock_unlock_standby(number_of_times=1): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ timeout = VMTimeout.DHCP_RETRY if system_helper.is_aio_system( ) else VMTimeout.PING_VM for i in range(0, number_of_times): active, standby = system_helper.get_active_standby_controllers() LOG.tc_step("Doing iteration of {} of total iteration {}".format( i, number_of_times)) LOG.tc_step("'sudo reboot -f' from {}".format(standby)) host_helper.lock_host(host=standby) LOG.tc_step("Check vms status after locking standby") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=timeout) host_helper.unlock_host(host=standby) vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_boot_and_ping_vm(guest_os, opensuse11_image, opensuse12_image, opensuse13_image, rhel6_image, rhel7_image): vm_id = vm_helper.boot_vm(guest_os=guest_os, source='image', cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id)
def sys_reboot_storage(): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ controllers, computes, storages = system_helper.get_hosts_per_personality( rtn_tuple=True) LOG.info("Online or Available hosts before power-off: {}".format(storages)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(storages)) try: vlm_helper.power_off_hosts_simultaneously(storages) finally: LOG.tc_step( "Wait for 60 seconds and power on hosts: {}".format(storages)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(storages)) vlm_helper.power_on_hosts(storages, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=storages) LOG.tc_step("Check vms status after storage nodes reboot") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_boot_windows_guest(): """ Boot a windows guest to assist for manual testing on windows guest """ # Change the following parameters to change the vm type. guest = 'win_2012' # such as tis-centos-guest storage = 'local_image' # local_lvm, local_image, or remote boot_source = 'image' # volume or image LOG.tc_step("Get/Create {} glance image".format(guest)) glance_helper.get_guest_image(guest_os=guest) LOG.tc_step("Create flavor with {} storage backing".format(storage)) flv_id = nova_helper.create_flavor(name='{}-{}'.format(storage, guest), vcpus=4, ram=8192, storage_backing=storage, guest_os=guest)[1] nova_helper.set_flavor(flv_id, **{FlavorSpec.CPU_POLICY: 'dedicated'}) LOG.tc_step("Boot {} vm".format(guest)) vm_id = vm_helper.boot_vm(name='{}-{}'.format(guest, storage), flavor=flv_id, guest_os=guest, source=boot_source)[1] LOG.tc_step("Ping vm and ssh to it") vm_helper.wait_for_vm_pingable_from_natbox(vm_id) with vm_helper.ssh_to_vm_from_natbox(vm_id) as vm_ssh: code, output = vm_ssh.exec_cmd('pwd', fail_ok=False) LOG.info(output) LOG.info( "{} is successfully booted from {} with {} storage backing".format( guest, boot_source, storage))
def _test_migrate_anti_affinity_vms_in_parallel(): """ cold-migrate and live-migrate vms from anti-affinity group It will check if the heat stack is launched already if not it will launch the stack find the vms in anti-affinity group and will do cold and live migration """ # First make sure heat stack is there: system_test_helper.launch_heat_stack() srv_grps_info = nova_helper.get_server_groups_info(headers=('Policies', 'Metadata', 'Members')) vms = [] for group in srv_grps_info: policies, metadata, members = srv_grps_info[group] if members and 'anti-affinity' in policies and metadata[ 'wrs-sg:best_effort'] == 'false': if len(members) >= 10: vms = members[range(0, 9)] break else: skip("There are no VMs in anti-affinity server group") check_vm_hosts(vms=vms, policy='anti_affinity') for vm_id in vms: vm_helper.wait_for_vm_status(vm_id=vm_id, check_interval=10) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) vm_helper.live_migrate_vm(vm_id=vm_id) vm_helper.cold_migrate_vm(vm_id=vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) check_vm_hosts(vms=vms, policy='anti_affinity')
def test_migrate_stress(check_hypervisors, boot_source, count): LOG.tc_step("Launch a VM from {}".format(boot_source)) vm = vm_helper.boot_vm(name='{}-stress'.format(boot_source), cleanup='function', source=boot_source)[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm) block_mig = True if boot_source == 'image' else False if not block_mig: LOG.tc_step("Attempt to block migration on boot-from-volume VM and ensure if fails") code = vm_helper.live_migrate_vm(vm_id=vm, block_migrate=True)[0] assert code > 0, "Block migration passed unexpectedly for boot-from-volume vm" vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm) LOG.tc_step("Live migrate and ping vm 1000 times") for i in range(count): LOG.info('Live migration iter{}'.format(i+1)) vm_helper.live_migrate_vm(vm) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm) LOG.tc_step("Cold migrate vm followed by live migrate {} times".format(count)) for i in range(count): LOG.info('Cold+live migration iter{}'.format(i + 1)) vm_helper.cold_migrate_vm(vm_id=vm) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm) vm_helper.live_migrate_vm(vm) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_lock_with_max_vms_simplex(self, simplex_only): vms_num = host_helper.get_max_vms_supported(host='controller-0') vm_helper.ensure_vms_quotas(vms_num=vms_num) LOG.tc_step( "Boot {} vms with various storage settings".format(vms_num)) vms = vm_helper.boot_vms_various_types(cleanup='function', vms_num=vms_num) LOG.tc_step("Lock vm host on simplex system") HostsToRecover.add('controller-0') host_helper.lock_host('controller-0') LOG.tc_step("Ensure vms are in {} state after locked host come " "online".format(VMStatus.STOPPED)) vm_helper.wait_for_vms_values(vms, value=VMStatus.STOPPED, fail_ok=False) LOG.tc_step("Unlock host on simplex system") host_helper.unlock_host(host='controller-0') LOG.tc_step("Ensure vms are Active and Pingable from NatBox") vm_helper.wait_for_vms_values(vms, value=VMStatus.ACTIVE, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm, timeout=VMTimeout.DHCP_RETRY)
def test_dc_dead_office_recovery_central( reserve_unreserve_all_hosts_module_central): """ Test dead office recovery main cloud Args: Setups: - Reserve all nodes for central cloud in vlm Test Steps: - Launch various types of VMs in primary clouds. - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check all the subclouds are syncs as start of the test. - check all the VMs are up in subclouds which are launched. """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() central_auth = Tenant.get('admin_platform', dc_region='SystemController') hosts = system_helper.get_hosts(auth_info=central_auth) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') hosts_to_check = system_helper.get_hosts( availability=['available', 'online'], auth_info=central_auth) LOG.info("Online or Available hosts before power-off: {}".format( hosts_to_check)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(hosts)) try: vlm_helper.power_off_hosts_simultaneously(hosts, region='central_region') except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region='central_region') LOG.tc_step("Check subclouds managed") current_managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') assert managed_subclouds == current_managed_subclouds, 'current managed subclouds are diffrent from \ origin {} current {}'.format( current_managed_subclouds, managed_subclouds) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) LOG.tc_step("Check vms are reachable after central clouds DOR test") for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
def test_evacuate_vms(self, vms_): """ Test evacuated vms Args: vms_: (fixture to create vms) Pre-requisites: - At least two up hypervisors on system Test Steps: - Create vms with various options: - vm booted from cinder volume, - vm booted from glance image, - vm booted from glance image, and have an extra cinder volume attached after launch, - vm booed from cinder volume with ephemeral and swap disks - Move vms onto same hypervisor - sudo reboot -f on the host - Ensure vms are successfully evacuated to other host - Live migrate vms back to original host - Check vms can move back, and vms are still reachable from natbox - Check system services are enabled and neutron agents are alive """ vms, target_host = vms_ pre_res_sys, pre_msg_sys = system_helper.wait_for_services_enable( timeout=20, fail_ok=True) up_hypervisors = host_helper.get_up_hypervisors() pre_res_neutron, pre_msg_neutron = \ network_helper.wait_for_agents_healthy( up_hypervisors, timeout=20, fail_ok=True) LOG.tc_step( "reboot -f on vms host, ensure vms are successfully evacuated and " "host is recovered after reboot") vm_helper.evacuate_vms(host=target_host, vms_to_check=vms, wait_for_host_up=True, ping_vms=True) LOG.tc_step("Check rebooted host can still host vm") vm_helper.live_migrate_vm(vms[0], destination_host=target_host) vm_helper.wait_for_vm_pingable_from_natbox(vms[0]) LOG.tc_step("Check system services and neutron agents after {} " "reboot".format(target_host)) post_res_sys, post_msg_sys = system_helper.wait_for_services_enable( fail_ok=True) post_res_neutron, post_msg_neutron = \ network_helper.wait_for_agents_healthy(hosts=up_hypervisors, fail_ok=True) assert post_res_sys, "\nPost-evac system services stats: {}" \ "\nPre-evac system services stats: {}". \ format(post_msg_sys, pre_msg_sys) assert post_res_neutron, "\nPost evac neutron agents stats: {}" \ "\nPre-evac neutron agents stats: {}". \ format(pre_msg_neutron, post_msg_neutron)
def test_reboot_only_host(self, get_zone): """ Test reboot only hypervisor on the system Args: get_zone: fixture to create stxauto aggregate, to ensure vms can only on one host Setups: - If more than 1 hypervisor: Create stxauto aggregate and add one host to the aggregate Test Steps: - Launch various vms on target host - vm booted from cinder volume, - vm booted from glance image, - vm booted from glance image, and have an extra cinder volume attached after launch, - vm booed from cinder volume with ephemeral and swap disks - sudo reboot -f only host - Check host is recovered - Check vms are recovered and reachable from NatBox """ zone = get_zone LOG.tc_step("Launch 5 vms in {} zone".format(zone)) vms = vm_helper.boot_vms_various_types(avail_zone=zone, cleanup='function') target_host = vm_helper.get_vm_host(vm_id=vms[0]) for vm in vms[1:]: vm_host = vm_helper.get_vm_host(vm) assert target_host == vm_host, "VMs are not booted on same host" LOG.tc_step("Reboot -f from target host {}".format(target_host)) HostsToRecover.add(target_host) host_helper.reboot_hosts(target_host) LOG.tc_step("Check vms are in Active state after host come back up") res, active_vms, inactive_vms = vm_helper.wait_for_vms_values( vms=vms, value=VMStatus.ACTIVE, timeout=600) vms_host_err = [] for vm in vms: if vm_helper.get_vm_host(vm) != target_host: vms_host_err.append(vm) assert not vms_host_err, "Following VMs are not on the same host {}: " \ "{}\nVMs did not reach Active state: {}". \ format(target_host, vms_host_err, inactive_vms) assert not inactive_vms, "VMs did not reach Active state after " \ "evacuated to other host: " \ "{}".format(inactive_vms) LOG.tc_step("Check VMs are pingable from NatBox after evacuation") vm_helper.wait_for_vm_pingable_from_natbox( vms, timeout=VMTimeout.DHCP_RETRY)
def vif_model_check(request): vif_model = request.param LOG.fixture_step( "Get a network that supports {} to boot vm".format(vif_model)) pci_net = network_helper.get_pci_vm_network(pci_type=vif_model, net_name='internal0-net') if not pci_net: skip(SkipHostIf.PCI_IF_UNAVAIL) extra_pcipt_net_name = extra_pcipt_net = None if not isinstance(pci_net, str): pci_net, extra_pcipt_net_name = pci_net LOG.info("PCI network selected to boot vm: {}".format(pci_net)) LOG.fixture_step("Create a flavor with dedicated cpu policy") flavor_id = nova_helper.create_flavor(name='dedicated', ram=2048, cleanup='module')[1] extra_specs = {FlavorSpec.CPU_POLICY: 'dedicated'} nova_helper.set_flavor(flavor=flavor_id, **extra_specs) LOG.fixture_step("Boot a base vm with above flavor and virtio nics") mgmt_net_id = network_helper.get_mgmt_net_id() pci_net_id, seg_id, pnet_name = network_helper.get_network_values( network=pci_net, fields=('id', 'provider:segmentation_id', 'provider:physical_network')) nics = [{'net-id': mgmt_net_id}, {'net-id': pci_net_id}] nics_to_test = [{ 'net-id': mgmt_net_id }, { 'net-id': pci_net_id, 'vif-model': vif_model }] pcipt_seg_ids = {} if vif_model == 'pci-passthrough': pcipt_seg_ids[pci_net] = seg_id if extra_pcipt_net_name: extra_pcipt_net, seg_id = network_helper.get_network_values( network=extra_pcipt_net_name, fields=('id', 'provider:segmentation_id')) nics.append({'net-id': extra_pcipt_net}) nics_to_test.append({ 'net-id': extra_pcipt_net, 'vif-model': vif_model }) pcipt_seg_ids[extra_pcipt_net_name] = seg_id base_vm = vm_helper.boot_vm(flavor=flavor_id, nics=nics, cleanup='module')[1] vm_helper.wait_for_vm_pingable_from_natbox(base_vm) vm_helper.ping_vms_from_vm(base_vm, base_vm, net_types=['mgmt', 'internal']) return vif_model, base_vm, flavor_id, nics_to_test, pcipt_seg_ids, pnet_name, extra_pcipt_net
def _ping_vm_data(vm_under_test, base_vm_id, action): LOG.tc_step( "Verify ping vm_under_test {} from vm {} over mgmt & data networks works after {}" .format(vm_under_test, base_vm_id, action)) vm_helper.wait_for_vm_pingable_from_natbox(vm_under_test) vm_helper.ping_vms_from_vm(to_vms=vm_under_test, from_vm=base_vm_id, net_types=['data'], retry=10)
def test_non_primary_tenant(): vm_1 = vm_helper.boot_vm(cleanup='function', auth_info=Tenant.get('tenant1'))[1] vm_2 = vm_helper.launch_vms(vm_type='dpdk', auth_info=Tenant.get('tenant1'))[0][0] vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_1) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_2) vm_helper.ping_vms_from_natbox(vm_ids=vm_2) vm_helper.ping_vms_from_vm(vm_2, vm_1, net_types='mgmt')
def test_vm_meta_data_access_after_delete_add_interfaces_router(_router_info): """ VM meta-data retrieval Test Steps: - Launch a boot-from-image vm - Retrieve vm meta_data within vm from metadata server - Ensure vm uuid from metadata server is the same as nova show - Delete all Router Interfaces - Re-add Router Interfaces - Verify metadata access works - Delete Router - Create Router and Add Interfaces - Verify metadata access works Test Teardown: - Ensure Router exist - Verify the external gateway info matches - Ensure all interfaces exist - Delete created vm and flavor """ router_id, router_name, gateway_ip, ext_gateway_info, router_subnets, \ ext_gateway_subnet, is_dvr = _router_info LOG.tc_step("Launch a boot-from-image vm") vm_id = vm_helper.boot_vm(source='image', cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id, fail_ok=False) LOG.tc_step('Retrieve vm meta_data within vm from metadata server ' 'before Interface delete') _access_metadata_server_from_vm(vm_id=vm_id) LOG.tc_step('Delete Router Interfaces') _delete_router_interfaces(router_id, router_subnets, ext_gateway_subnet) LOG.tc_step('Re-add Router Interfaces') _add_router_interfaces(router_id, router_subnets, ext_gateway_subnet) LOG.tc_step('Retrieve vm meta_data within vm from metadata server ' 'after delete/add Router Interfaces') _access_metadata_server_from_vm(vm_id=vm_id) LOG.tc_step('Delete Router') network_helper.delete_router(router=router_id) LOG.tc_step('Create Router') router_id = network_helper.create_router(name=router_name)[1] LOG.tc_step('Set external gateway info for router {}'.format(router_id)) _set_external_gatewayway_info(router_id, ext_gateway_subnet, gateway_ip, is_dvr) LOG.tc_step('Re-add Router Interfaces') _add_router_interfaces(router_id, router_subnets, ext_gateway_subnet) LOG.tc_step('Retrieve vm meta_data within vm from metadata server after ' 'delete/create Router') _access_metadata_server_from_vm(vm_id=vm_id)
def test_snat_reset_router_ext_gateway(snat_setups): """ Test VM external access after evacuation. Args: snat_setups (tuple): returns vm id and fip. Enable snat, create vm and attach floating ip. Test Setups: - Find a tenant router that is dvr or non-dvr based on the parameter - Enable SNAT on tenant router - boot a vm and attach a floating ip - Ping vm from NatBox Test Steps: - Ping outside from VM - Clear router gateway - Verify vm cannot be ping'd from NatBox - Set router gateway - Verify vm can be ping'd from NatBox - Verify vm can ping outside Test Teardown: - Delete the created vm (module) - Disable SNAT on router (module) """ vm_, fip = snat_setups LOG.tc_step("Ping vm management net ip from NatBox") vm_helper.ping_vms_from_natbox(vm_, use_fip=False) # vm_helper.ping_vms_from_natbox(vm_, use_fip=True) LOG.tc_step("Ping outside from VM".format(vm_)) vm_helper.ping_ext_from_vm(vm_, use_fip=True) LOG.tc_step("Disassociate floatingip from vm and verify it's successful.") network_helper.unset_floating_ip(floating_ip=fip, port=True) # assert not network_helper.get_floating_ip_info(fip=fip, field='fixed_ip_address'), \ # "Floating ip {} still attached to fixed ip".format(fip) LOG.tc_step("Clear router gateway and verify vm cannot be ping'd from NatBox") fixed_ip = network_helper.get_router_ext_gateway_info()['external_fixed_ips'][0]['ip_address'] network_helper.clear_router_gateway(check_first=False) ping_res = vm_helper.ping_vms_from_natbox(vm_, fail_ok=True, use_fip=False)[0] assert ping_res is False, "VM can still be ping'd from outside after clearing router gateway." LOG.tc_step("Set router gateway with the same fixed ip") network_helper.set_router_gateway(clear_first=False, fixed_ips=fixed_ip, enable_snat=True) LOG.tc_step("Verify SNAT is enabled by default after setting router gateway.") assert network_helper.get_router_ext_gateway_info()['enable_snat'], "SNAT is not enabled by default." LOG.tc_step("Associate floating ip to vm") network_helper.associate_floating_ip_to_vm(floating_ip=fip, vm_id=vm_) LOG.tc_step("Verify vm can ping to and be ping'd from outside") vm_helper.wait_for_vm_pingable_from_natbox(vm_, timeout=60, fail_ok=False) vm_helper.ping_ext_from_vm(vm_, use_fip=True)
def sys_lock_unlock_hosts(number_of_hosts_to_lock): """ This is to test the evacuation of vms due to compute lock/unlock :return: """ # identify a host with atleast 5 vms vms_by_compute_dic = vm_helper.get_vms_per_host() compute_to_lock = [] vms_to_check = [] hosts_threads = [] timeout = 1000 for k, v in vms_by_compute_dic.items(): if len(v) >= 5: compute_to_lock.append(k) vms_to_check.append(v) if compute_to_lock is None: skip("There are no compute with 5 or moer vms") if len(compute_to_lock) > number_of_hosts_to_lock: compute_to_lock = compute_to_lock[0:number_of_hosts_to_lock] vms_to_check = vms_to_check[0:number_of_hosts_to_lock] else: LOG.warning( "There are only {} computes available with more than 5 vms ". format(len(compute_to_lock))) for host in compute_to_lock: new_thread = MThread(host_helper.lock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end() LOG.tc_step("Verify lock succeeded and vms still in good state") for vm_list in vms_to_check: vm_helper.wait_for_vms_values(vms=vm_list, fail_ok=False) for host, vms in zip(compute_to_lock, vms_to_check): for vm in vms: vm_host = vm_helper.get_vm_host(vm_id=vm) assert vm_host != host, "VM is still on {} after lock".format(host) vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY) hosts_threads = [] for host in compute_to_lock: new_thread = MThread(host_helper.unlock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end()
def test_system_persist_over_host_reboot(host_type, stx_openstack_required): """ Validate Inventory summary over reboot of one of the controller see if data persists over reboot Test Steps: - capture Inventory summary for list of hosts on system service-list and neutron agent-list - reboot the current Controller-Active - Wait for reboot to complete - Validate key items from inventory persist over reboot """ if host_type == 'controller': host = system_helper.get_active_controller_name() elif host_type == 'compute': if system_helper.is_aio_system(): skip("No compute host for AIO system") host = None else: hosts = system_helper.get_hosts(personality='storage') if not hosts: skip(msg="Lab has no storage nodes. Skip rebooting storage node.") host = hosts[0] LOG.tc_step("Pre-check for system status") system_helper.wait_for_services_enable() up_hypervisors = host_helper.get_up_hypervisors() network_helper.wait_for_agents_healthy(hosts=up_hypervisors) LOG.tc_step("Launch a vm") vm_id = vm_helper.boot_vm(cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) if host is None: host = vm_helper.get_vm_host(vm_id) LOG.tc_step("Reboot a {} node and wait for reboot completes: {}".format(host_type, host)) HostsToRecover.add(host) host_helper.reboot_hosts(host) host_helper.wait_for_hosts_ready(host) LOG.tc_step("Check vm is still active and pingable after {} reboot".format(host)) vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ACTIVE, fail_ok=False) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id, timeout=VMTimeout.DHCP_RETRY) LOG.tc_step("Check neutron agents and system services are in good state after {} reboot".format(host)) network_helper.wait_for_agents_healthy(up_hypervisors) system_helper.wait_for_services_enable() if host in up_hypervisors: LOG.tc_step("Check {} can still host vm after reboot".format(host)) if not vm_helper.get_vm_host(vm_id) == host: time.sleep(30) vm_helper.live_migrate_vm(vm_id, destination_host=host)
def test_dvr_update_router(router_info, _bring_up_router): """ Test update router to distributed and non-distributed Args: router_info (str): router_id (str) Setups: - Get the router id and original distributed setting Test Steps: - Boot a vm before updating router and ping vm from NatBox - Change the distributed value of the router and verify it's updated successfully - Verify router is in ACTIVE state - Verify vm can still be ping'd from NatBox - Repeat the three steps above with the distributed value reverted to original value Teardown: - Delete vm - Revert router to it's original distributed setting if not already done so """ global result_ result_ = False router_id = router_info LOG.tc_step("Boot a vm before updating router and ping vm from NatBox") vm_id = vm_helper.boot_vm(name='dvr_update', reuse_vol=False, cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id, fail_ok=False) for update_to_val in [False, True]: LOG.tc_step("Update router distributed to {}".format(update_to_val)) network_helper.set_router_mode(router_id, distributed=update_to_val, enable_on_failure=False) # Wait for 30 seconds to allow the router update completes time.sleep(30) LOG.tc_step( "Verify router is in active state and vm can be ping'd from NatBox" ) assert RouterStatus.ACTIVE == \ network_helper.get_router_values(router_id, fields='status')[0], \ "Router is not in active state after updating distributed to " \ "{}.".format(update_to_val) vm_helper.wait_for_vm_pingable_from_natbox(vm_id, fail_ok=False) result_ = True
def test_force_lock_with_non_mig_vms(add_host_to_zone): """ Test force lock host with non-migrate-able vms on it Prerequisites: - Minimum of two up hypervisors Test Setups: - Add admin role to primary tenant - Create cgcsauto aggregate - Add host_under_test to cgcsauto aggregate - Create flavor for vms_to_test with storage_backing support by host_under_test - Create vms_to_test on host_under_test that can be live migrated Test Steps: - Force lock target host - Verify force lock returns 0 - Verify VMs cannot find a host to boot and are in error state - Unlock locked target host - Verify VMs are active on host once it is up and available - Verify VMs can be pinged Test Teardown: - Remove admin role from primary tenant - Delete created vms - Remove host_under_test from cgcsauto aggregate """ storage_backing, host_under_test = add_host_to_zone # Create flavor with storage_backing the host_under_test supports flavor_id = nova_helper.create_flavor(storage_backing=storage_backing)[1] # Boot VMs on the host using the above flavor. LOG.tc_step("Boot VM on {}".format(host_under_test)) vm_id = vm_helper.boot_vm(vm_host=host_under_test, flavor=flavor_id, avail_zone='cgcsauto', cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) # Force lock host that VMs are booted on. LOG.tc_step("Force lock {}".format(host_under_test)) HostsToRecover.add(host_under_test) lock_code, lock_output = host_helper.lock_host(host_under_test, force=True) assert lock_code == 0, "Failed to lock {}. Details: {}".format( host_under_test, lock_output) vm_helper.wait_for_vm_values(vm_id, fail_ok=False, **{'status': 'ERROR'}) host_helper.unlock_host(host_under_test) vm_helper.wait_for_vm_values(vm_id, timeout=300, fail_ok=False, **{'status': 'ACTIVE'}) vm_helper.wait_for_vm_pingable_from_natbox(vm_id, timeout=VMTimeout.DHCP_RETRY)
def test_live_migrate_vm_positive(hosts_per_stor_backing, storage_backing, ephemeral, swap, cpu_pol, vcpus, vm_type, block_mig): """ Skip Condition: - Less than two hosts have specified storage backing Test Steps: - create flavor with specified vcpus, cpu_policy, ephemeral, swap, storage_backing - boot vm from specified boot source with above flavor - (attach volume to vm if 'image_with_vol', specified in vm_type) - Live migrate the vm with specified block_migration flag - Verify VM is successfully live migrated to different host Teardown: - Delete created vm, volume, flavor """ if len(hosts_per_stor_backing.get(storage_backing, [])) < 2: skip("Less than two hosts have {} storage backing".format( storage_backing)) vm_id = _boot_vm_under_test(storage_backing, ephemeral, swap, cpu_pol, vcpus, vm_type) prev_vm_host = vm_helper.get_vm_host(vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) vm_disks = vm_helper.get_vm_devices_via_virsh(vm_id) file_paths, content = touch_files_under_vm_disks(vm_id=vm_id, ephemeral=ephemeral, swap=swap, vm_type=vm_type, disks=vm_disks) LOG.tc_step("Live migrate VM and ensure it succeeded") # block_mig = True if boot_source == 'image' else False code, output = vm_helper.live_migrate_vm(vm_id, block_migrate=block_mig) assert 0 == code, "Live migrate is not successful. Details: {}".format( output) post_vm_host = vm_helper.get_vm_host(vm_id) assert prev_vm_host != post_vm_host LOG.tc_step("Ensure vm is pingable from NatBox after live migration") vm_helper.wait_for_vm_pingable_from_natbox(vm_id) LOG.tc_step("Check files after live migrate") check_helper.check_vm_files(vm_id=vm_id, storage_backing=storage_backing, ephemeral=ephemeral, swap=swap, vm_type=vm_type, vm_action='live_migrate', file_paths=file_paths, content=content, disks=vm_disks, prev_host=prev_vm_host, post_host=post_vm_host)
def _test_ea_vm_co_existence_with_and_without_crypto_vfs(_flavors): """ Verify guest with cypto VFs can co-exists with guest without crypto VFs. Args: _flavors: Returns: """ mgmt_net_id = network_helper.get_mgmt_net_id() tenant_net_ids = network_helper.get_tenant_net_ids() internal_net_id = network_helper.get_internal_net_id() vif_type = get_vif_type() vm_params = {'vm_no_crypto_1': [_flavors['flavor_none'], [{'net-id': mgmt_net_id}, {'net-id': tenant_net_ids[0], 'vif-model': vif_type}, {'net-id': internal_net_id, 'vif-model': vif_type}]], 'vm_no_crypto_2': [_flavors['flavor_none'], [{'net-id': mgmt_net_id}, {'net-id': tenant_net_ids[1], 'vif-model': vif_type}, {'net-id': internal_net_id, 'vif-model': vif_type}]], 'vm_sriov_crypto': [_flavors['flavor_qat_vf_1'], [{'net-id': mgmt_net_id}, {'net-id': tenant_net_ids[2], 'vif-model': vif_type}, {'net-id': internal_net_id, 'vif-model': 'pci-sriov'}]], 'vm_crypto_1': [_flavors['flavor_qat_vf_1'], [{'net-id': mgmt_net_id}, {'net-id': tenant_net_ids[3], 'vif-model': vif_type}, {'net-id': internal_net_id, 'vif-model': vif_type}]], 'vm_crypto_2': [_flavors['flavor_qat_vf_1'], [{'net-id': mgmt_net_id}, {'net-id': tenant_net_ids[4], 'vif-model': vif_type}, {'net-id': internal_net_id, 'vif-model': vif_type}]], } vms = {} vms_qat_devs = {} for vm_name, param in vm_params.items(): LOG.tc_step("Boot vm {} with {} flavor".format(vm_name, param[0])) vm_id = vm_helper.boot_vm('{}'.format(vm_name), flavor=param[0], nics=param[1], cleanup='function')[1] LOG.info("Verify VM can be pinged from NAT box...") vm_helper.wait_for_vm_pingable_from_natbox(vm_id), "VM is not pingable." vms[vm_name] = vm_id vm_host = vm_helper.get_vm_host(vm_id) host_dev_name = host_helper.get_host_devices(vm_host, field='device name', **{'class id': DevClassID.QAT_VF})[0] expt_qat_devs = {} if '_no_crypto' in vm_name else {host_dev_name: 1} vms_qat_devs[vm_id] = expt_qat_devs check_helper.check_qat_service(vm_id=vm_id, qat_devs=expt_qat_devs) _perform_nova_actions(vms, flavors=_flavors, vfs=None) for vm_id_, expt_qat_devs_ in vms_qat_devs.items(): check_helper.check_qat_service(vm_id_, qat_devs=expt_qat_devs_)
def test_migration_auto_converge(no_simplex): """ Auto converge a VM with stress-ng running Test Steps: - Create flavor - Create a heat stack (launch a vm with stress-ng) - Perform live-migration and verify connectivity Test Teardown: - Delete stacks,vm, flavors created """ LOG.tc_step("Create a flavor with 2 vcpus") flavor_id = nova_helper.create_flavor(vcpus=2, ram=1024, root_disk=3)[1] ResourceCleanup.add('flavor', flavor_id) # add migration timout extra_specs = {FlavorSpec.LIVE_MIG_TIME_OUT: 300} nova_helper.set_flavor(flavor=flavor_id, **extra_specs) LOG.tc_step("Get the heat file name to use") heat_template = _get_stress_ng_heat() stack_name = vm_name = 'stress_ng' LOG.tc_step("Creating heat stack") code, msg = heat_helper.create_stack(stack_name=stack_name, template=heat_template, parameters={ 'flavor': flavor_id, 'name': vm_name }, cleanup='function') assert code == 0, "Failed to create heat stack" LOG.info("Verifying server creation via heat") vm_id = vm_helper.get_vm_id_from_name(vm_name='stress_ng', strict=False) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) with vm_helper.ssh_to_vm_from_natbox(vm_id) as vm_ssh: LOG.tc_step("Check for Stress-ng inside vm") assert 0 == wait_for_stress_ng(vm_ssh), " Stress-ng is not running" for vm_actions in [['live_migrate']]: LOG.tc_step("Perform following action(s) on vm {}: {}".format( vm_id, vm_actions)) for action in vm_actions: vm_helper.perform_action_on_vm(vm_id, action=action) LOG.tc_step("Ping vm from natbox") vm_helper.wait_for_vm_pingable_from_natbox(vm_id)
def _perform_nova_actions(vms_dict, flavors, vfs=None): for vm_name, vm_id in vms_dict.items(): LOG.tc_step("Cold migrate VM {} ....".format(vm_name)) vm_helper.cold_migrate_vm(vm_id=vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) LOG.tc_step("Live migrate VM {} ....".format(vm_name)) expt_codes = [0] if 'vm_no_crypto' in vm_name else [1, 6] code, msg = vm_helper.live_migrate_vm(vm_id=vm_id, fail_ok=True) assert code in expt_codes, "Expect live migrate to fail for vm with pci device attached. Actual: {}".format(msg) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) LOG.tc_step("Suspend/Resume VM {} ....".format(vm_name)) vm_helper.suspend_vm(vm_id) vm_helper.resume_vm(vm_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id) if vfs is None: resize_flavor_id = flavors["flavor_resize_qat_vf_1"] if "no_crypto" not in vm_name else \ flavors["flavor_resize_none"] else: resize_flavor_id = flavors['flavor_resize_qat_vf_{}'.format(vfs)] LOG.info("Resizing VM {} to new flavor {} ...".format(vm_name, resize_flavor_id)) vm_helper.resize_vm(vm_id, resize_flavor_id) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id)
def launch_delete_vm(flavor, end_time, end_event): iter_ = 0 name, flv_id = flavor while time.time() < end_time: iter_ += 1 if end_event.is_set(): assert 0, "Another thread failed. Terminate rest." LOG.tc_step("Iter{} - Launch and delete vm on {}".format(iter_, name)) vm_id = vm_helper.boot_vm(name=name, flavor=flv_id)[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id) time.sleep(15) vm_helper.delete_vms(vms=vm_id)