def test_lock_with_max_vms_simplex(self, simplex_only): vms_num = host_helper.get_max_vms_supported(host='controller-0') vm_helper.ensure_vms_quotas(vms_num=vms_num) LOG.tc_step( "Boot {} vms with various storage settings".format(vms_num)) vms = vm_helper.boot_vms_various_types(cleanup='function', vms_num=vms_num) LOG.tc_step("Lock vm host on simplex system") HostsToRecover.add('controller-0') host_helper.lock_host('controller-0') LOG.tc_step("Ensure vms are in {} state after locked host come " "online".format(VMStatus.STOPPED)) vm_helper.wait_for_vms_values(vms, value=VMStatus.STOPPED, fail_ok=False) LOG.tc_step("Unlock host on simplex system") host_helper.unlock_host(host='controller-0') LOG.tc_step("Ensure vms are Active and Pingable from NatBox") vm_helper.wait_for_vms_values(vms, value=VMStatus.ACTIVE, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm, timeout=VMTimeout.DHCP_RETRY)
def sys_lock_unlock_standby(number_of_times=1): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ timeout = VMTimeout.DHCP_RETRY if system_helper.is_aio_system( ) else VMTimeout.PING_VM for i in range(0, number_of_times): active, standby = system_helper.get_active_standby_controllers() LOG.tc_step("Doing iteration of {} of total iteration {}".format( i, number_of_times)) LOG.tc_step("'sudo reboot -f' from {}".format(standby)) host_helper.lock_host(host=standby) LOG.tc_step("Check vms status after locking standby") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=timeout) host_helper.unlock_host(host=standby) vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_launch_vms_for_traffic(): stack1 = "/home/sysadmin/lab_setup-tenant1-resources.yaml" stack1_name = "lab_setup-tenant1-resources" stack2 = "/home/sysadmin/lab_setup-tenant2-resources.yaml" stack2_name = "lab_setup-tenant2-resources" script_name = "/home/sysadmin/create_resource_stacks.sh" con_ssh = ControllerClient.get_active_controller() if con_ssh.file_exists(file_path=script_name): cmd1 = 'chmod 755 ' + script_name con_ssh.exec_cmd(cmd1) con_ssh.exec_cmd(script_name, fail_ok=False) # may be better to delete all tenant stacks if any heat_helper.create_stack(stack_name=stack1_name, template=stack1, auth_info=Tenant.get('tenant1'), timeout=1000, cleanup=None) heat_helper.create_stack(stack_name=stack2_name, template=stack2, auth_info=Tenant.get('tenant2'), timeout=1000, cleanup=None) LOG.info("Checking all VMs are in active state") vms = system_test_helper.get_all_vms() vm_helper.wait_for_vms_values(vms=vms, fail_ok=False)
def launch_lab_setup_tenants_vms(): home_dir = HostLinuxUser.get_home() stack1 = "{}/lab_setup-tenant1-resources.yaml".format(home_dir) stack1_name = "lab_setup-tenant1-resources" stack2 = "{}/lab_setup-tenant2-resources.yaml".format(home_dir) stack2_name = "lab_setup-tenant2-resources" script_name = "{}/create_resource_stacks.sh".format(home_dir) con_ssh = ControllerClient.get_active_controller() if con_ssh.file_exists(file_path=script_name): cmd1 = 'chmod 755 ' + script_name con_ssh.exec_cmd(cmd1) con_ssh.exec_cmd(script_name, fail_ok=False) stack_id_t1 = heat_helper.get_stacks(name=stack1_name, auth_info=Tenant.get('tenant1')) # may be better to delete all tenant stacks if any if not stack_id_t1: heat_helper.create_stack(stack_name=stack1_name, template=stack1, auth_info=Tenant.get('tenant1'), timeout=1000, cleanup=None) stack_id_t2 = heat_helper.get_stacks(name=stack2_name, auth_info=Tenant.get('tenant2')) if not stack_id_t2: heat_helper.create_stack(stack_name=stack2_name, template=stack2, auth_info=Tenant.get('tenant2'), timeout=1000, cleanup=None) LOG.info("Checking all VMs are in active state") vms = get_all_vms() vm_helper.wait_for_vms_values(vms=vms, fail_ok=False)
def sys_reboot_storage(): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ controllers, computes, storages = system_helper.get_hosts_per_personality( rtn_tuple=True) LOG.info("Online or Available hosts before power-off: {}".format(storages)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(storages)) try: vlm_helper.power_off_hosts_simultaneously(storages) finally: LOG.tc_step( "Wait for 60 seconds and power on hosts: {}".format(storages)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(storages)) vlm_helper.power_on_hosts(storages, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=storages) LOG.tc_step("Check vms status after storage nodes reboot") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_dc_dead_office_recovery_central( reserve_unreserve_all_hosts_module_central): """ Test dead office recovery main cloud Args: Setups: - Reserve all nodes for central cloud in vlm Test Steps: - Launch various types of VMs in primary clouds. - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check all the subclouds are syncs as start of the test. - check all the VMs are up in subclouds which are launched. """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() central_auth = Tenant.get('admin_platform', dc_region='SystemController') hosts = system_helper.get_hosts(auth_info=central_auth) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') hosts_to_check = system_helper.get_hosts( availability=['available', 'online'], auth_info=central_auth) LOG.info("Online or Available hosts before power-off: {}".format( hosts_to_check)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(hosts)) try: vlm_helper.power_off_hosts_simultaneously(hosts, region='central_region') except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region='central_region') LOG.tc_step("Check subclouds managed") current_managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') assert managed_subclouds == current_managed_subclouds, 'current managed subclouds are diffrent from \ origin {} current {}'.format( current_managed_subclouds, managed_subclouds) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) LOG.tc_step("Check vms are reachable after central clouds DOR test") for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
def sys_lock_unlock_hosts(number_of_hosts_to_lock): """ This is to test the evacuation of vms due to compute lock/unlock :return: """ # identify a host with atleast 5 vms vms_by_compute_dic = vm_helper.get_vms_per_host() compute_to_lock = [] vms_to_check = [] hosts_threads = [] timeout = 1000 for k, v in vms_by_compute_dic.items(): if len(v) >= 5: compute_to_lock.append(k) vms_to_check.append(v) if compute_to_lock is None: skip("There are no compute with 5 or moer vms") if len(compute_to_lock) > number_of_hosts_to_lock: compute_to_lock = compute_to_lock[0:number_of_hosts_to_lock] vms_to_check = vms_to_check[0:number_of_hosts_to_lock] else: LOG.warning( "There are only {} computes available with more than 5 vms ". format(len(compute_to_lock))) for host in compute_to_lock: new_thread = MThread(host_helper.lock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end() LOG.tc_step("Verify lock succeeded and vms still in good state") for vm_list in vms_to_check: vm_helper.wait_for_vms_values(vms=vm_list, fail_ok=False) for host, vms in zip(compute_to_lock, vms_to_check): for vm in vms: vm_host = vm_helper.get_vm_host(vm_id=vm) assert vm_host != host, "VM is still on {} after lock".format(host) vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY) hosts_threads = [] for host in compute_to_lock: new_thread = MThread(host_helper.unlock_host, host) new_thread.start_thread(timeout=timeout + 30) hosts_threads.append(new_thread) for host_thr in hosts_threads: host_thr.wait_for_thread_end()
def test_dead_office_recovery(reserve_unreserve_all_hosts_module): """ Test dead office recovery with vms Args: reserve_unreserve_all_hosts_module: test fixture to reserve unreserve all vlm nodes for lab under test Setups: - Reserve all nodes in vlm Test Steps: - Boot 5 vms with various boot_source, disks, etc and ensure they can be reached from NatBox - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check vms are recovered after hosts come back up and vms can be reached from NatBox """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() hosts = system_helper.get_hosts() hosts_to_check = system_helper.get_hosts(availability=['available', 'online']) LOG.info("Online or Available hosts before power-off: {}".format(hosts_to_check)) LOG.tc_step("Powering off hosts in multi-processes to simulate power outage: {}".format(hosts)) region = None if ProjVar.get_var('IS_DC'): region = ProjVar.get_var('PRIMARY_SUBCLOUD') try: vlm_helper.power_off_hosts_simultaneously(hosts, region=region) except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT+HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region=region) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=VMTimeout.DHCP_RETRY) computes = host_helper.get_hypervisors() if len(computes) >= 4: system_helper.wait_for_alarm(alarm_id=EventLogID.MULTI_NODE_RECOVERY, timeout=120) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.MULTI_NODE_RECOVERY, check_interval=60, timeout=1200)
def test_reboot_only_host(self, get_zone): """ Test reboot only hypervisor on the system Args: get_zone: fixture to create stxauto aggregate, to ensure vms can only on one host Setups: - If more than 1 hypervisor: Create stxauto aggregate and add one host to the aggregate Test Steps: - Launch various vms on target host - vm booted from cinder volume, - vm booted from glance image, - vm booted from glance image, and have an extra cinder volume attached after launch, - vm booed from cinder volume with ephemeral and swap disks - sudo reboot -f only host - Check host is recovered - Check vms are recovered and reachable from NatBox """ zone = get_zone LOG.tc_step("Launch 5 vms in {} zone".format(zone)) vms = vm_helper.boot_vms_various_types(avail_zone=zone, cleanup='function') target_host = vm_helper.get_vm_host(vm_id=vms[0]) for vm in vms[1:]: vm_host = vm_helper.get_vm_host(vm) assert target_host == vm_host, "VMs are not booted on same host" LOG.tc_step("Reboot -f from target host {}".format(target_host)) HostsToRecover.add(target_host) host_helper.reboot_hosts(target_host) LOG.tc_step("Check vms are in Active state after host come back up") res, active_vms, inactive_vms = vm_helper.wait_for_vms_values( vms=vms, value=VMStatus.ACTIVE, timeout=600) vms_host_err = [] for vm in vms: if vm_helper.get_vm_host(vm) != target_host: vms_host_err.append(vm) assert not vms_host_err, "Following VMs are not on the same host {}: " \ "{}\nVMs did not reach Active state: {}". \ format(target_host, vms_host_err, inactive_vms) assert not inactive_vms, "VMs did not reach Active state after " \ "evacuated to other host: " \ "{}".format(inactive_vms) LOG.tc_step("Check VMs are pingable from NatBox after evacuation") vm_helper.wait_for_vm_pingable_from_natbox( vms, timeout=VMTimeout.DHCP_RETRY)
def test_force_lock_with_mig_vms(get_hosts_with_backing): """ Test force lock host with migrate-able vms on it Prerequisites: - Minimum of two hosts supporting the same storage backing. Test Setups: - Add admin role to primary tenant - Boot various VMs on host_under_test that can be live migrated Test Steps: - Get status info from VMs - Force lock target host - Verify force lock returns 0 - Wait until VMs are active on a secondary host - Verify VMs can be pinged Test Teardown: - Remove admin role from primary tenant - Delete created vms - Unlock locked target host(s) """ storage_backing, host_under_test = get_hosts_with_backing # Boot VMs on the host. LOG.tc_step("Boot VMs on {}".format(host_under_test)) vm_ids = vm_helper.boot_vms_various_types(storage_backing=storage_backing, target_host=host_under_test, cleanup='function') # Force lock host that VMs are booted on LOG.tc_step("Force lock {}".format(host_under_test)) HostsToRecover.add(host_under_test) lock_code, lock_output = host_helper.lock_host(host_under_test, force=True, check_first=False) assert lock_code == 0, "Failed to force lock {}. Details: {}".format( host_under_test, lock_output) # Expect VMs to migrate off force-locked host (non-gracefully) LOG.tc_step( "Wait for 'Active' status of VMs after host force lock completes") vm_helper.wait_for_vms_values(vm_ids, fail_ok=False) for vm in vm_ids: vm_helper.wait_for_vm_pingable_from_natbox( vm, timeout=VMTimeout.DHCP_RETRY)
def sys_uncontrolled_swact(number_of_times=1): """ This is to identify the storage nodes and turn them off and on via vlm :return: """ for i in range(0, number_of_times): active, standby = system_helper.get_active_standby_controllers() LOG.tc_step("Doing iteration of {} of total iteration {}".format( i, number_of_times)) LOG.tc_step("'sudo reboot -f' from {}".format(standby)) host_helper.reboot_hosts(hostnames=active) LOG.tc_step("Check vms status after controller swact") vms = get_all_vms() vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm)
def test_lock_with_vms(self, target_hosts, no_simplex, add_admin_role_func): """ Test lock host with vms on it. Args: target_hosts (list): targeted host(s) to lock that was prepared by the target_hosts test fixture. Skip Conditions: - Less than 2 hypervisor hosts on the system Prerequisites: - Hosts storage backing are pre-configured to storage backing under test ie., 2 or more hosts should support the storage backing under test. Test Setups: - Set instances quota to 10 if it was less than 8 - Determine storage backing(s) under test. i.e.,storage backings supported by at least 2 hosts on the system - Create flavors with storage extra specs set based on storage backings under test - Create vms_to_test that can be live migrated using created flavors - Determine target host(s) to perform lock based on which host(s) have the most vms_to_test - Live migrate vms to target host(s) Test Steps: - Lock target host - Verify lock succeeded and vms status unchanged - Repeat above steps if more than one target host Test Teardown: - Delete created vms and volumes - Delete created flavors - Unlock locked target host(s) """ storage_backing, host = target_hosts vms_num = 5 vm_helper.ensure_vms_quotas(vms_num=vms_num) LOG.tc_step( "Boot {} vms with various storage settings".format(vms_num)) vms = vm_helper.boot_vms_various_types(cleanup='function', vms_num=vms_num, storage_backing=storage_backing, target_host=host) LOG.tc_step("Attempt to lock target host {}...".format(host)) HostsToRecover.add(host) host_helper.lock_host(host=host, check_first=False, fail_ok=False, swact=True) LOG.tc_step("Verify lock succeeded and vms still in good state") vm_helper.wait_for_vms_values(vms=vms, fail_ok=False) for vm in vms: vm_host = vm_helper.get_vm_host(vm_id=vm) assert vm_host != host, "VM is still on {} after lock".format(host) vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
def test_robustness_service_function_chaining(protocol, nsh_aware, same_host, add_protocol, symmetric, check_system, add_admin_role_module): """ Test Service Function Chaining Test Steps: - Check if the system is compatible - Boot the source VM, dest VM & SFC VM in same host or diff host based on <same_host: True or False> - Install necessary software and package inside guest for packet forwarding test - Create port pair using nsh_ware <True:False> - Create port pair group - Create SFC flow classifier using protocol <tcp:icmp:udp> - Create port Chain - Check packet forwarding from source to dest vm via SFC vm - Migrate VM by force_lock compute host - Check packet forwarding from source to dest vm via SFC vm - Create new flow classifier with new protocol (add_protocol) - Update port chain with new flow classifier - Check packet forwarding from source to dest vm via SFC vm with new classifier - Evacuate VM by rebooting compute host - Verify VM evacuated - Check packet forwarding from source to dest vm via SFC vm with new classifier Test Teardown: - Delete port chain, port pair group, port pair, flow classifier, vms, volumes created """ nsh_aware = True if nsh_aware == 'nsh_aware' else False same_host = True if same_host == 'same_host' else False symmetric = True if symmetric == 'symmetric' else False LOG.tc_step("Check if the system is compatible to run this test") computes = check_system LOG.tc_step("Boot the VM in same host: {}".format(same_host)) hosts_to_boot = [computes[0]] * 3 if same_host else computes[0:3] LOG.info("Boot the VM in following compute host 1:{}, 2:{}, 3:{}".format( hosts_to_boot[0], hosts_to_boot[1], hosts_to_boot[2])) LOG.tc_step("Boot the source and dest VM") vm_ids = [] vm_ids, source_vm_id, dest_vm_id, internal_net_id, mgmt_net_id, mgmt_nic = _setup_vm( vm_ids, hosts_to_boot) vm_helper.ping_vms_from_vm(to_vms=source_vm_id, from_vm=dest_vm_id, net_types=['mgmt'], retry=10) LOG.tc_step("Boot the SFC VM") sfc_vm_ids = [] sfc_vm_ids, sfc_vm_under_test, ingress_port_id, egress_port_id = _setup_sfc_vm( sfc_vm_ids, hosts_to_boot, mgmt_nic, internal_net_id) vm_helper.ping_vms_from_vm(to_vms=source_vm_id, from_vm=sfc_vm_under_test, net_types=['mgmt'], retry=10) # if protocol != 'icmp': LOG.tc_step("Install software package nc in vm {} {}".format( source_vm_id, dest_vm_id)) _install_sw_packages_in_vm(source_vm_id) _install_sw_packages_in_vm(dest_vm_id) LOG.tc_step("copy vxlan tool in sfc vm {}".format(sfc_vm_under_test)) vm_helper.scp_to_vm_from_natbox(vm_id=sfc_vm_under_test, source_file='/home/cgcs/sfc/vxlan_tool.py', dest_file='/root/vxlan_tool.py') LOG.tc_step("Create port pair") port_pair_ids = [] port_pair_id = _setup_port_pair(nsh_aware, ingress_port_id, egress_port_id) port_pair_ids.append(port_pair_id) LOG.tc_step("Create port pair group") port_pair_group_ids = [] port_pair_group_id = _setup_port_pair_groups(port_pair_id) port_pair_group_ids.append(port_pair_group_id) name = 'sfc_flow_classifier' LOG.tc_step("Create flow classifier:{}".format(name)) flow_classifier, dest_vm_internal_net_ip = _setup_flow_classifier( name, source_vm_id, dest_vm_id, protocol) LOG.tc_step("Create port chain") port_chain_id = _setup_port_chain(port_pair_group_id, flow_classifier, symmetric) LOG.tc_step( "Execute vxlan.py tool and verify {} packet received VM1 to VM2". format(protocol)) _check_packets_forwarded_in_sfc_vm(source_vm_id, dest_vm_id, sfc_vm_ids, dest_vm_internal_net_ip, protocol, nsh_aware, symmetric, load_balancing=False) LOG.tc_step("Force lock {}".format(hosts_to_boot)) if not same_host: for host_to_boot in hosts_to_boot: HostsToRecover.add(host_to_boot) lock_code, lock_output = host_helper.lock_host(host_to_boot, force=True, check_first=True) assert lock_code == 0, "Failed to force lock {}. Details: {}".format( host_to_boot, lock_output) else: HostsToRecover.add(hosts_to_boot[0]) lock_code, lock_output = host_helper.lock_host(hosts_to_boot[0], force=True, check_first=True) assert lock_code == 0, "Failed to force lock {}. Details: {}".format( hosts_to_boot[0], lock_output) # Expect VMs to migrate off force-locked host (non-gracefully) LOG.tc_step( "Wait for 'Active' status of VMs after host force lock completes") vm_helper.wait_for_vms_values(vm_ids, fail_ok=False) LOG.tc_step( "Execute vxlan.py tool and verify {} packet received VM1 to VM2". format(protocol)) _check_packets_forwarded_in_sfc_vm(source_vm_id, dest_vm_id, sfc_vm_ids, dest_vm_internal_net_ip, protocol, nsh_aware, symmetric, load_balancing=False) LOG.tc_step( "Create new flow classifier with protocol {}".format(add_protocol)) flow_classifier_name = 'new_sfc_flow_classifier' new_flow_classifier, dest_vm_internal_net_ip = _setup_flow_classifier( flow_classifier_name, source_vm_id, dest_vm_id, add_protocol) LOG.tc_step("Update port chain with new flow classifier:".format( new_flow_classifier)) network_helper.set_sfc_port_chain(port_chain_id, port_pair_groups=port_pair_group_id, flow_classifiers=new_flow_classifier) LOG.tc_step( "Execute vxlan.py tool and verify {} packet received VM1 to VM2". format(add_protocol)) _check_packets_forwarded_in_sfc_vm(source_vm_id, dest_vm_id, sfc_vm_ids, dest_vm_internal_net_ip, add_protocol, nsh_aware, symmetric, load_balancing=False) LOG.info("Get the host to reboot where the VMs launched") hosts_to_reboot = vm_helper.get_vms_hosts(vm_ids=vm_ids) LOG.tc_step( "Reboot VMs host {} and ensure vms are evacuated to other host".format( hosts_to_reboot)) vm_helper.evacuate_vms(host=hosts_to_reboot, vms_to_check=vm_ids, ping_vms=True) LOG.tc_step( "Execute vxlan.py tool and verify {} packet received VM1 to VM2". format(add_protocol)) _check_packets_forwarded_in_sfc_vm(source_vm_id, dest_vm_id, sfc_vm_ids, dest_vm_internal_net_ip, add_protocol, nsh_aware, symmetric, load_balancing=False)