def test_host_disk_wipe_rootfs(): """ This test attempts to run system host-disk-wipe on a node using the rootfs disk. Command format is: system host-disk-wipe [--confirm] <hostname or id> <disk uuid> Note, host-disk-wipe is only applicable to controller and compute nodes. It cannot be used on the rootfs disk. It cannot be used for a disk that is used by a PV or has partitions used by a PV. Arguments: - None Test Steps: 1. Determine which is the rootfs disk 2. Attempt to wipe the disk 3. Expect it to fail for every node Assumptions: - None """ computes = system_helper.get_hosts(personality="compute") storage = system_helper.get_hosts(personality="storage") hosts = system_helper.get_controllers() + computes + storage LOG.tc_step("Gather rootfs disks") rootfs = storage_helper.get_hosts_rootfs(hosts) for host in rootfs: uuid = rootfs[host] LOG.tc_step("Attempting to wipe {} from {}".format(uuid[0], host)) cmd = 'host-disk-wipe --confirm {} {}'.format(host, uuid[0]) rc, out = cli.system(cmd, fail_ok=True) assert rc != 0, "Expected wipe disk to fail but instead succeeded"
def _test_GET_ihosts_host_id_uppercaseUUID(sysinv_rest): """ Test GET of <resource> with valid authentication and upper case UUID values. RFC 4122 covers the need for uppercase UUID values Args: n/a Prerequisites: system is running Test Setups: n/a Test Steps: - Using requests GET <resource> with proper authentication - Determine if expected status_code of 200 is received Test Teardown: n/a """ path = "/ihosts/{}/addresses" r = sysinv_rest LOG.info("This test case will FAIL until CGTS-8265 is resolved") LOG.info(system_helper.get_hosts()) for host in system_helper.get_hosts(): uuid = system_helper.get_host_values(host, 'uuid')[0] message = "Using requests GET {} with proper authentication" LOG.tc_step(message.format(path)) status_code, text = r.get(resource=path.format(uuid.upper()), auth=True) message = "Retrieved: status_code: {} message: {}" LOG.info(message.format(status_code, text)) LOG.tc_step("Determine if expected code of 200 is received") message = "Expected code of 200 - received {} and message {}" assert status_code == 200, message.format(status_code, text)
def test_dc_dead_office_recovery_central( reserve_unreserve_all_hosts_module_central): """ Test dead office recovery main cloud Args: Setups: - Reserve all nodes for central cloud in vlm Test Steps: - Launch various types of VMs in primary clouds. - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check all the subclouds are syncs as start of the test. - check all the VMs are up in subclouds which are launched. """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() central_auth = Tenant.get('admin_platform', dc_region='SystemController') hosts = system_helper.get_hosts(auth_info=central_auth) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') hosts_to_check = system_helper.get_hosts( availability=['available', 'online'], auth_info=central_auth) LOG.info("Online or Available hosts before power-off: {}".format( hosts_to_check)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(hosts)) try: vlm_helper.power_off_hosts_simultaneously(hosts, region='central_region') except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region='central_region') LOG.tc_step("Check subclouds managed") current_managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') assert managed_subclouds == current_managed_subclouds, 'current managed subclouds are diffrent from \ origin {} current {}'.format( current_managed_subclouds, managed_subclouds) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) LOG.tc_step("Check vms are reachable after central clouds DOR test") for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
def test_host_disk_wipe_unassigned_disk(): """ This test attempts to run system host-disk-wipe on a node using any unassigned disk. Command format is: system host-disk-wipe [--confirm] <hostname or id> <disk uuid> Note, host-disk-wipe is only applicable to controller and compute nodes. It cannot be used on the rootfs disk. It cannot be used for a disk that is used by a PV or has partitions used by a PV. Arguments: - None Test Steps: 1. Determine which disks are unassigned by comparing size_gib to available_gib in system host-disk-list 2. Attempt to wipe the disk 3. Expect it to pass Assumptions: - None """ computes = system_helper.get_hosts(personality="compute", availability="available") controllers = system_helper.get_hosts(personality="controller", availability="available") hosts = controllers + computes found_disk = False for host in hosts: LOG.info("Query disks on host {}".format(host)) disks = storage_helper.get_host_disks(host) for disk_uuid in disks: cmd = "host-disk-show {} {}".format(host, disk_uuid) rc, out = cli.system(cmd) size_gib = table_parser.get_value_two_col_table( table_parser.table(out), "size_gib") available_gib = table_parser.get_value_two_col_table( table_parser.table(out), "available_gib") if int(float(size_gib)) == int(float(available_gib)): found_disk = True LOG.tc_step("Attempting to wipe disk {} from host {}".format( disk_uuid, host)) cmd = 'host-disk-wipe --confirm {} {}'.format(host, disk_uuid) rc, out = cli.system(cmd, fail_ok=True) assert rc == 0, "Expected wipe disk to pass but instead failed" break if not found_disk: skip("No unassigned disks to run test")
def test_dead_office_recovery(reserve_unreserve_all_hosts_module): """ Test dead office recovery with vms Args: reserve_unreserve_all_hosts_module: test fixture to reserve unreserve all vlm nodes for lab under test Setups: - Reserve all nodes in vlm Test Steps: - Boot 5 vms with various boot_source, disks, etc and ensure they can be reached from NatBox - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check vms are recovered after hosts come back up and vms can be reached from NatBox """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() hosts = system_helper.get_hosts() hosts_to_check = system_helper.get_hosts(availability=['available', 'online']) LOG.info("Online or Available hosts before power-off: {}".format(hosts_to_check)) LOG.tc_step("Powering off hosts in multi-processes to simulate power outage: {}".format(hosts)) region = None if ProjVar.get_var('IS_DC'): region = ProjVar.get_var('PRIMARY_SUBCLOUD') try: vlm_helper.power_off_hosts_simultaneously(hosts, region=region) except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT+HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region=region) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm, timeout=VMTimeout.DHCP_RETRY) computes = host_helper.get_hypervisors() if len(computes) >= 4: system_helper.wait_for_alarm(alarm_id=EventLogID.MULTI_NODE_RECOVERY, timeout=120) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.MULTI_NODE_RECOVERY, check_interval=60, timeout=1200)
def test_force_reboot_host(host_type): """ Verify lock unlock host Test Steps: - Select a host per given type. If type is controller, select standby controller. - Lock selected host and ensure it is successfully locked - Unlock selected host and ensure it is successfully unlocked """ LOG.tc_step("Select a {} node from system if any".format(host_type)) hosts = system_helper.get_hosts(availability=(HostAvailState.AVAILABLE, HostAvailState.DEGRADED), personality=host_type) if not hosts: skip("No available or degraded {} host found on system".format( host_type)) host = hosts[0] LOG.tc_step("Force reboot {} host: {}".format(host_type, host)) HostsToRecover.add(host) host_helper.reboot_hosts(hostnames=host) host_helper.wait_for_hosts_ready(host)
def test_reboot_hosts(hostnames): LOG.tc_step("Processing hostnames provided...") system_hosts = system_helper.get_hosts() is_str = False if isinstance(hostnames, str): is_str = True hostnames = [hostnames] tmp_hosts = hostnames for host in tmp_hosts: if host == 'active_controller': hostnames.remove(host) host = system_helper.get_active_controller_name() hostnames.append(host) elif host == 'standby_controller': hostnames.remove(host) host = system_helper.get_standby_controller_name() hostnames.append(host) if host not in system_hosts: skip("Host(s) not found in system. Host(s) requested: {}." "Hosts in system: {}".format(hostnames, system_hosts)) if is_str: hostnames = hostnames[0] LOG.tc_step("Rebooting following host(s): {}".format(hostnames)) results = host_helper.reboot_hosts(hostnames) LOG.tc_step("Results: {}".format(results)) assert results[0] == 0
def test_GET_idisks(sysinv_rest): """ Test GET of <resource> with valid authentication. Args: n/a Prerequisites: system is running Test Setups: n/a Test Steps: - Using requests GET <resource> with proper authentication - Determine if expected status_code of 200 is received Test Teardown: n/a """ r = sysinv_rest path = "/idisks/{}" hostnames = system_helper.get_hosts() for host in hostnames: disk_uuids = storage_helper.get_host_disks(host) for disk_uuid in disk_uuids: res = path.format(disk_uuid) message = "Using requests GET {} with proper authentication" LOG.tc_step(message.format(res)) status_code, text = r.get(resource=res, auth=True) message = "Retrieved: status_code: {} message: {}" LOG.debug(message.format(status_code, text)) if status_code == 404: pytest.skip("Unsupported resource in this configuration.") else: message = "Determine if expected code of 200 is received" LOG.tc_step(message) message = "Expected code of 200 - received {} and message {}" assert status_code == 200, message.format(status_code, text)
def test_assign_rootfs_disk_to_pv(): """ This test attempts to create a PV with type Disk on the rootfs. This is expected to fail. Assumptions: * None Test Steps: * Determine which disk is the rootfs * Attempt to create a PV on that disk using a PV type of Disk. Teardown: * None """ computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes rootfs = storage_helper.get_hosts_rootfs(hosts) for host in rootfs: uuid = rootfs[host] # cmd = "host-pv-add -t disk {} cgts-vg {}".format(host, uuid[0]) cmd = "host-pv-add {} cgts-vg {}".format(host, uuid[0]) rc, out = cli.system(cmd, fail_ok=True) assert rc != 0, "Expected PV creation to fail but instead succeeded"
def locate_usb(host_type="controller", min_size=13): """ Try to locate a USB device on a host of the type specified. Arguments: - host_type (string) - e.g. controller, compute, storage - min_size (int) - minimum size of USB required (GiB) Returns: - hostname, e.g. controller-0 - usb_device, e.g. /dev/sdb """ LOG.tc_step("Check all hosts of type {} for USB devices".format(host_type)) hosts = system_helper.get_hosts(personality=host_type) for host in hosts: with host_helper.ssh_to_host(host) as host_ssh: cmd = "ls --color=none -ltrd /dev/disk/by-id/usb*" rc, out = host_ssh.exec_cmd(cmd) if rc == 0: usb_device = "/dev/" + (out.splitlines()[0])[-3:] LOG.info("Found USB device {} on host {}".format(usb_device, host)) cmd = "blockdev --getsize64 {}".format(usb_device) usb_bytes = host_ssh.exec_sudo_cmd(cmd)[1] gib = int(usb_bytes) / (1024 * 1024 * 1024) if gib > min_size: LOG.info("Size of USB device is sufficient for test") return host, usb_device else: skip("Size of USB device is insufficient for test") return (None, None)
def test_create_zero_sized_host_partition(): """ This test attempts to create a partition of size zero once on each host. This should be rejected. Test steps: * Create partition of size zero * Ensure the provisioning is rejected Teardown: * None """ computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: disks = storage_helper.get_host_disks(host) for uuid in disks: LOG.tc_step( "Attempt to create zero sized partition on uuid {} on host {}". format(uuid, host)) rc, out = storage_helper.create_host_partition(host, uuid, "0", fail_ok=True) assert rc != 0, "Partition creation was expected to fail but instead succeeded" # Let's do this for one disk only on each host break
def _test_modify_non_existing_cpu_negative(lock_): """ TC1940 cpu data can't be modified for a non existing cpu Test Steps: - Choose a host to lock and find how many phys cores it has - Attempt to change the cpu settings for a phys core that doesn't exist - Verify that the cli is rejected """ host = lock_ table_ = host_helper.get_host_cpu_list_table(host) cores = set(table_parser.get_column(table_, 'phy_core')) fake_proc_num = 2 while fake_proc_num in cores: fake_proc_num += 1 fake_proc = 'p{}'.format(fake_proc_num) map_ = {fake_proc: 1} LOG.tc_step("Attempt to modify fake processor {}'s function to vSwitch".format(fake_proc)) code, out = host_helper.modify_host_cpu(host, 'vSwitch', fail_ok=True, **map_) assert 0 != code, "FAIL: Modifying a non existing processor was not rejected" hosts = system_helper.get_hosts() name = hosts[len(hosts) - 1] + "a" while True: if name not in hosts: break name += "a" LOG.tc_step("Attempt to modify fake host {}'s processor p0 function to vSwitch".format(name)) code, out = host_helper.modify_host_cpu(name, 'vSwitch', p0=1, fail_ok=True) LOG.tc_step("Verifying that the cli was rejected") assert 1 == code, "FAIL: Modifying a cpu on a non-existant host was not rejected"
def test_delete_unlocked_node_negative(): """ Attempts to delete each unlocked node. Fails if one unlocked node does get deleted. Test Steps: - Creates a list of every unlocked host - Iterate through each host and attempt to delete it - Verify that each host rejected the delete request """ hosts = system_helper.get_hosts(administrative='unlocked') deleted_nodes = [] for node in hosts: LOG.tc_step("attempting to delete {}".format(node)) LOG.info("{} state: {}".format(node, system_helper.get_host_values(node, fields='administrative')[0])) res, out = cli.system('host-delete', node, fail_ok=True) LOG.tc_step("Delete request - result: {}\tout: {}".format(res, out)) assert 1 == res, "FAIL: The delete request for {} was not rejected".format(node) LOG.tc_step("Confirming that the node was not deleted") res, out = cli.system('host-show', node, fail_ok=True) if 'host not found' in out or res != 0: # the node was deleted even though it said it wasn't LOG.tc_step("{} was deleted.".format(node)) deleted_nodes.append(node) assert not deleted_nodes, "Fail: Delete request for the following node(s) " \ "{} was accepted.".format(deleted_nodes)
def test_create_partition_using_non_existent_device_node(): """ This test attempts to create a partition using an invalid disk. It is expected to fail. Arguments: * None Steps: * Attempt to create a partition on a valid host using an invalid device node, e.g. /dev/sdz Teardown: * None """ # Safely hard-coded since we don't have enough physical slots for this to be # possible device_node = "/dev/sdz" size_gib = "1" computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: LOG.tc_step( "Creating partition on host {} with size {} using device node {}". format(host, size_gib, device_node)) rc, out = storage_helper.create_host_partition(host, device_node, size_gib, fail_ok=True) assert rc != 0, "Partition creation was successful"
def test_ping_hosts(): con_ssh = ControllerClient.get_active_controller() ping_failed_list = [] for hostname in system_helper.get_hosts(): LOG.tc_step( "Send 100 pings to {} from Active Controller".format(hostname)) ploss_rate, untran_p = network_helper.ping_server(hostname, con_ssh, num_pings=100, timeout=300, fail_ok=True) if ploss_rate > 0: if ploss_rate == 100: ping_failed_list.append( "{}: Packet loss rate: {}/100\n".format( hostname, ploss_rate)) else: ping_failed_list.append( "{}: All packets dropped.\n".format(hostname)) if untran_p > 0: ping_failed_list.append( "{}: {}/100 pings are untransmitted within 300 seconds".format( hostname, untran_p)) LOG.tc_step("Ensure all packets are received.") assert not ping_failed_list, "Dropped/Un-transmitted packets detected when ping hosts. " \ "Details:\n{}".format(ping_failed_list)
def backup_sensor_data_files(hosts=None, con_ssh=None): if hosts is None: hosts = system_helper.get_hosts() elif isinstance(hosts, str): hosts = [hosts] LOG.info("Check and ensure sensor data files for {} are copied to " "{} if available".format(hosts, HostLinuxUser.get_home())) hosts_with_file = [] con_ssh = ControllerClient.get_active_controller() if not con_ssh else \ con_ssh for host in hosts: dest_path = "{}/hwmond_{}_sensor_data".format(HostLinuxUser.get_home(), host) if con_ssh.file_exists(dest_path): hosts_with_file.append(host) else: source_path = BMCPath.SENSOR_DATA_FILE_PATH.format( BMCPath.SENSOR_DATA_DIR, host) if con_ssh.file_exists(source_path): con_ssh.exec_sudo_cmd('cp {} {}'.format( source_path, dest_path), fail_ok=False) hosts_with_file.append(host) LOG.info("Sensor data files for {} are copied to {}".format( hosts, HostLinuxUser.get_home())) return hosts
def sensor_data_fit(request): LOG.fixture_step("Get hosts with sensor enabled") hosts = system_helper.get_hosts() bmc_hosts = [] for host in hosts: if bmc_helper.get_sensors_table(host=host)['values']: bmc_hosts.append(host) if not bmc_hosts: skip("No sensor added for any host in system") con_ssh = ControllerClient.get_active_controller() LOG.fixture_step("(module) Save healthy sensor data files") bmc_helper.backup_sensor_data_files(bmc_hosts, con_ssh=con_ssh) LOG.fixture_step("(module) touch /var/run/fit/sensor_data") con_ssh.exec_sudo_cmd('mkdir -p /var/run/fit/', fail_ok=False) con_ssh.exec_sudo_cmd('touch /var/run/fit/sensor_data', fail_ok=False) def _revert(): LOG.fixture_step("(module) rm /var/run/fit/sensor_data") con_ssh_ = ControllerClient.get_active_controller() con_ssh_.exec_sudo_cmd('rm /var/run/fit/sensor_data', fail_ok=False) request.addfinalizer(_revert) return bmc_hosts
def test_node_install_kpi(collect_kpi): """ This test measures the install time for each node in the system. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") hosts = system_helper.get_hosts() print("System has hosts: {}".format(hosts)) log_path = NodeInstall.LOG_PATH start_cmd = 'head -n 1 {}'.format(log_path) end_cmd = 'tail -n 1 {}'.format(log_path) date_cmd = '{} -n 1 /var/log/bash.log' with host_helper.ssh_to_host('controller-0') as con0_ssh: bash_start = con0_ssh.exec_sudo_cmd(date_cmd.format('head'), fail_ok=False)[1] bash_end = con0_ssh.exec_sudo_cmd(date_cmd.format('tail'), fail_ok=False)[1] bash_start = re.findall(TIMESTAMP_PATTERN, bash_start.strip())[0] bash_end = re.findall(TIMESTAMP_PATTERN, bash_end.strip())[0] date_ = bash_start.split('T')[0] def _get_time_delta(start_, end_): start_ = start_.replace(',', '.') end_ = end_.replace(',', '.') start_t = '{}T{}'.format(date_, start_) end_t = '{}T{}'.format(date_, end_) time_delta = common.get_timedelta_for_isotimes(start_t, end_t).total_seconds() if time_delta < 0: end_t = '{}T{}'.format(bash_end.split('T')[0], end_) time_delta = common.get_timedelta_for_isotimes( start_t, end_t).total_seconds() return time_delta for host in hosts: with host_helper.ssh_to_host(hostname=host) as host_ssh: start_output = host_ssh.exec_sudo_cmd(start_cmd, fail_ok=False)[1].strip() end_output = host_ssh.exec_sudo_cmd(end_cmd, fail_ok=False)[1].strip() kpi_name = NodeInstall.NAME.format(host) start_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, start_output)[0] end_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, end_output)[0] install_duration = _get_time_delta(start_time, end_time) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, kpi_val=install_duration, fail_ok=False)
def test_modify_second_last_partition(): """ This test attempts to modify a partition that is not the last. It is expected to fail, since only the very last partition can be modified. Arguments: * None Test steps: * Create partition1 * Create partition2 * Attempt to modify partition1 Teardown: * None """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue partitions_to_restore[host] = [] for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_size = "1" partition_chunks = size_gib / int(partition_size) if partition_chunks < 3: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue LOG.info("Creating first partition on {}".format(host)) uuid = storage_helper.create_host_partition( host, disk_uuid, partition_size)[1] partitions_to_restore[host].append(uuid) LOG.info("Creating second partition on {}".format(host)) uuid1 = storage_helper.create_host_partition( host, disk_uuid, partition_size)[1] partitions_to_restore[host].append(uuid1) LOG.tc_step( "Modifying partition {} from size {} to size {} from host {} on disk {}" .format(uuid, partition_size, int(partition_size) + 1, host, disk_uuid)) rc, out = storage_helper.modify_host_partition( host, uuid, int(partition_size) + 1, fail_ok=True) assert rc != 0, "Partition modification was expected to fail but instead was successful"
def delete_images_from_host_registries(con_ssh=None, auth_info=Tenant.get('admin_platform')): hosts = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) for host in hosts: with host_helper.ssh_to_host(hostname=host, con_ssh=con_ssh) as host_ssh: LOG.info("Delete {} images for host: {}".format( STX_MONITOR_APP_NAME, host)) container_helper.remove_docker_images_with_pattern( pattern="elastic", con_ssh=host_ssh, timeout=120)
def test_system_persist_over_host_reboot(host_type, stx_openstack_required): """ Validate Inventory summary over reboot of one of the controller see if data persists over reboot Test Steps: - capture Inventory summary for list of hosts on system service-list and neutron agent-list - reboot the current Controller-Active - Wait for reboot to complete - Validate key items from inventory persist over reboot """ if host_type == 'controller': host = system_helper.get_active_controller_name() elif host_type == 'compute': if system_helper.is_aio_system(): skip("No compute host for AIO system") host = None else: hosts = system_helper.get_hosts(personality='storage') if not hosts: skip(msg="Lab has no storage nodes. Skip rebooting storage node.") host = hosts[0] LOG.tc_step("Pre-check for system status") system_helper.wait_for_services_enable() up_hypervisors = host_helper.get_up_hypervisors() network_helper.wait_for_agents_healthy(hosts=up_hypervisors) LOG.tc_step("Launch a vm") vm_id = vm_helper.boot_vm(cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm_id) if host is None: host = vm_helper.get_vm_host(vm_id) LOG.tc_step("Reboot a {} node and wait for reboot completes: {}".format(host_type, host)) HostsToRecover.add(host) host_helper.reboot_hosts(host) host_helper.wait_for_hosts_ready(host) LOG.tc_step("Check vm is still active and pingable after {} reboot".format(host)) vm_helper.wait_for_vm_status(vm_id, status=VMStatus.ACTIVE, fail_ok=False) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_id, timeout=VMTimeout.DHCP_RETRY) LOG.tc_step("Check neutron agents and system services are in good state after {} reboot".format(host)) network_helper.wait_for_agents_healthy(up_hypervisors) system_helper.wait_for_services_enable() if host in up_hypervisors: LOG.tc_step("Check {} can still host vm after reboot".format(host)) if not vm_helper.get_vm_host(vm_id) == host: time.sleep(30) vm_helper.live_migrate_vm(vm_id, destination_host=host)
def test_host_status(): """ System overview """ active_controller_host = system_helper.get_active_controller_name() LOG.info("Active Controller: {}".format(active_controller_host)) standby_controller_host = system_helper.get_standby_controller_name() LOG.info("Standby Controller {}".format(standby_controller_host)) host_list = system_helper.get_hosts() for host in host_list: LOG.info("Host: {}".format(host))
def test_delete_host_partitions(): """ This test creates host partitions and the teardown deletes them. Arguments: * None Test Steps: * Create a partition on each host Teardown: * Re-create those partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) assert rc == 0, "Partition creation was expected to succeed but instead failed" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def test_GET_ihosts_host_id_invalidUUID(sysinv_rest): """ Test GET of <resource> with valid authentication and upper case UUID values. RFC 4122 covers the need for uppercase UUID values Args: n/a Prerequisites: system is running Test Setups: n/a Test Steps: - Using requests GET <resource> with proper authentication - Determine if expected status_code of 200 is received Test Teardown: n/a """ path = "/ihosts/{}/addresses" r = sysinv_rest LOG.info(path) LOG.info(system_helper.get_hosts()) for host in system_helper.get_hosts(): uuid = system_helper.get_host_values(host, 'uuid')[0] LOG.info("host: {} uuid: {}".format(host, uuid)) message = "Using requests GET {} with proper authentication" LOG.tc_step(message.format(path)) # shift a->g, b->h, etc - all to generate invalid uuid shifted_uuid = ''.join(map(lambda x: chr((ord(x) - ord('a') + 6) % 26 + ord('a')) if x in string.ascii_lowercase else x, uuid.lower())) status_code, text = r.get(resource=path.format(shifted_uuid), auth=True) message = "Retrieved: status_code: {} message: {}" LOG.info(message.format(status_code, text)) LOG.tc_step("Determine if expected code of 400 is received") message = "Expected code of 400 - received {} and message {}" assert status_code == 400, message.format(status_code, text)
def test_kernel_module_signatures(): """ Test kernel modules are properly signed on all stx hosts. Steps on each host: - 'cat /proc/sys/kernel/tainted', ensure value is 4096. If not, do following steps: - 'grep --color=never -i "module verification failed" /var/log/kern.log' to find out failed modules - 'modinfo <failed_module> | grep --color=never -E "sig|filename" to display signing info for each module """ hosts = system_helper.get_hosts() failed_hosts = {} for host in hosts: with host_helper.ssh_to_host(host) as host_ssh: LOG.tc_step( "Check for unassigned kernel modules on {}".format(host)) output = host_ssh.exec_cmd('cat /proc/sys/kernel/tainted', fail_ok=False)[1] output_binary = '{0:b}'.format(int(output)) unassigned_module_bit = '0' # 14th bit is to flag unassigned module if len(output_binary) >= 14: unassigned_module_bit = output_binary[-14] if unassigned_module_bit != '0': LOG.error( "Kernel module verification(s) failed on {}. Collecting " "more info".format(host)) LOG.tc_step( "Check kern.log for modules with failed verification") failed_modules = [] err_out = host_ssh.exec_cmd( 'grep --color=never -i "module verification failed" ' '/var/log/kern.log')[1] for line in err_out.splitlines(): module = re.findall(r'\] (.*): module verification failed', line)[0].strip() if module not in failed_modules: failed_modules.append(module) failed_hosts[host] = failed_modules LOG.tc_step("Display signing info for {} failed kernel " "modules: {}".format(host, failed_modules)) for module in failed_modules: host_ssh.exec_cmd('modinfo {} | grep --color=never -E ' '"sig|filename"'.format(module)) assert not failed_hosts, "Kernel module signature verification " \ "failed on: {}".format(failed_hosts)
def pre_configs(request): """ Dovetail test fixture Args: request: - configure sshd_config on tis hosts to allow root access - update conf files on dovetail test node on cumulus """ if not ComplianceVar.get_var('DOVETAIL_SUITE'): skip('--dovetail-suite unspecified.') try: import yaml except ImportError: skip('pyymal package is not installed.') computes = host_helper.get_up_hypervisors() if len(computes) < 2: skip('Less than 2 computes in available states') active, standby = system_helper.get_active_standby_controllers() if not standby: skip('No standby controller on system') LOG.fixture_step( "Ensure dovetail test node mgmt nic connects to lab under test") compliance_helper.update_dovetail_mgmt_interface() controllers = [active, standby] storages = system_helper.get_hosts(personality='storage', availability=HostAvailState.AVAILABLE) hosts_dict = { 'controller': controllers, 'compute': computes, 'storage': storages } all_hosts = list(set(controllers + computes + storages)) LOG.fixture_step( "Enable port_security for the system and update existing networks") port_security = network_helper.get_network_values( 'external-net0', 'port_security_enabled')[0] port_security = eval(port_security) if not port_security: system_helper.add_ml2_extension_drivers(drivers='port_security') networks = network_helper.get_networks(auth_info=Tenant.get('admin')) for net in networks: network_helper.set_network(net_id=net, enable_port_security=True) configure_tis(all_hosts, request=request) configure_dovetail_server(hosts_per_personality=hosts_dict)
def test_create_many_small_host_partitions_on_a_single_host(): """ This test attempts to create multiple tiny partitions on a single host. Assumptions: * There's some free disk space available Test steps: * Query the hosts to determine disk space * Create small partitions until the disk space is consumed * Repeat on all applicable hosts Teardown: * Delete created partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: partitions_to_restore[host] = [] disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) num_partitions = 2 if size_gib <= num_partitions: LOG.info("Skipping disk {} due to insufficient space".format( disk_uuid)) continue partition_chunks = int(size_gib / num_partitions) usable_disks = True LOG.info("Creating partition on {}".format(host)) # partitions_to_restore[host] = [] for i in range(0, num_partitions): uuid = storage_helper.create_host_partition( host, disk_uuid, partition_chunks)[1] partitions_to_restore[host].append(uuid) # Only test one disk on each host break # Only test one host (otherwise takes too long) if usable_disks: break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def delete_all_monitor_labels(con_ssh=None, auth_info=Tenant.get('admin_platform')): LOG.info("Delete monitor labels from hosts") host_list = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) for host in host_list: # Remove all monitor labels from all hosts on the system host_helper.remove_host_labels(host, STX_MONITOR_LABELS, lock=False, unlock=False, con_ssh=con_ssh, auth_info=auth_info)
def lock_(request): hosts = system_helper.get_hosts() host = hosts[0] if hosts[0] == system_helper.get_active_controller_name(): if not system_helper.is_aio_simplex(): host = hosts[1] host_helper.lock_host(host) def unlock_(): host_helper.unlock_host(host) request.addfinalizer(unlock_) return host
def test_GET_various_host_id_valid(sysinv_rest, path): """ Test GET of <resource> with valid authentication. Args: sysinv_rest path Prerequisites: system is running Test Setups: n/a Test Steps: - Using requests GET <resource> with proper authentication - Determine if expected status_code of 200 is received Test Teardown: n/a """ r = sysinv_rest path = re.sub("-", "{}", path) LOG.info(path) LOG.info(system_helper.get_hosts()) for host in system_helper.get_hosts(): uuid = system_helper.get_host_values(host, 'uuid')[0] res = path.format(uuid) message = "Using requests GET {} with proper authentication" LOG.tc_step(message.format(res)) status_code, text = r.get(resource=res, auth=True) message = "Retrieved: status_code: {} message: {}" LOG.info(message.format(status_code, text)) if status_code == 404: pytest.skip("Unsupported resource in this configuration.") else: message = "Determine if expected code of 200 is received" LOG.tc_step(message) message = "Expected code of 200 - received {} and message {}" assert status_code == 200, message.format(status_code, text)