def _wait_for_config_apply(auth_info_, con_ssh_=None): controllers = system_helper.get_controllers(auth_info=auth_info_, con_ssh=con_ssh_) for controller in controllers: system_helper.wait_for_events( start=start_time, fail_ok=False, timeout=60, entity_instance_id='host=controller', event_log_id=EventLogID.CONFIG_OUT_OF_DATE, auth_info=auth_info_, con_ssh=con_ssh_, **{ 'Entity Instance ID': 'host={}'.format(controller), 'State': 'set' }) # Extend timeout for controller-1 config-out-date clear to 5min due to CGTS-8497 system_helper.wait_for_events( start=start_time, fail_ok=False, timeout=300, entity_instance_id='host=controller', event_log_id=EventLogID.CONFIG_OUT_OF_DATE, auth_info=auth_info_, con_ssh=con_ssh_, **{ 'Entity Instance ID': 'host={}'.format(controller), 'State': 'clear' })
def test_create_zero_sized_host_partition(): """ This test attempts to create a partition of size zero once on each host. This should be rejected. Test steps: * Create partition of size zero * Ensure the provisioning is rejected Teardown: * None """ computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: disks = storage_helper.get_host_disks(host) for uuid in disks: LOG.tc_step( "Attempt to create zero sized partition on uuid {} on host {}". format(uuid, host)) rc, out = storage_helper.create_host_partition(host, uuid, "0", fail_ok=True) assert rc != 0, "Partition creation was expected to fail but instead succeeded" # Let's do this for one disk only on each host break
def test_assign_rootfs_disk_to_pv(): """ This test attempts to create a PV with type Disk on the rootfs. This is expected to fail. Assumptions: * None Test Steps: * Determine which disk is the rootfs * Attempt to create a PV on that disk using a PV type of Disk. Teardown: * None """ computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes rootfs = storage_helper.get_hosts_rootfs(hosts) for host in rootfs: uuid = rootfs[host] # cmd = "host-pv-add -t disk {} cgts-vg {}".format(host, uuid[0]) cmd = "host-pv-add {} cgts-vg {}".format(host, uuid[0]) rc, out = cli.system(cmd, fail_ok=True) assert rc != 0, "Expected PV creation to fail but instead succeeded"
def test_create_partition_using_non_existent_device_node(): """ This test attempts to create a partition using an invalid disk. It is expected to fail. Arguments: * None Steps: * Attempt to create a partition on a valid host using an invalid device node, e.g. /dev/sdz Teardown: * None """ # Safely hard-coded since we don't have enough physical slots for this to be # possible device_node = "/dev/sdz" size_gib = "1" computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: LOG.tc_step( "Creating partition on host {} with size {} using device node {}". format(host, size_gib, device_node)) rc, out = storage_helper.create_host_partition(host, device_node, size_gib, fail_ok=True) assert rc != 0, "Partition creation was successful"
def test_cpe_services_and_functions(): if system_helper.host_exists(host='compute-0'): skip("compute-0 exists - skip for non-CPE lab") LOG.tc_step("Check controller+compute subfunction via system host-show") controllers = system_helper.get_controllers() for controller in controllers: assert system_helper.is_aio_system(controller=controller), \ "{} does not have controller+compute subfunction in system host-show".format(controller) LOG.tc_step("Check CPE system services via nova service-list") check_params = [ "nova-scheduler", # "nova-cert", "nova-conductor", # "nova-consoleauth", # removed in Train "nova-compute" ] binaries = nova_helper.get_compute_services(field='Binary') assert set(check_params) <= set(binaries), "Not all binaries from {} exist in 'nova service-list'".\ format(check_params) LOG.tc_step("Check all nodes are ready in kubectl get nodes") kube_helper.wait_for_nodes_ready(timeout=3)
def test_host_disk_wipe_rootfs(): """ This test attempts to run system host-disk-wipe on a node using the rootfs disk. Command format is: system host-disk-wipe [--confirm] <hostname or id> <disk uuid> Note, host-disk-wipe is only applicable to controller and compute nodes. It cannot be used on the rootfs disk. It cannot be used for a disk that is used by a PV or has partitions used by a PV. Arguments: - None Test Steps: 1. Determine which is the rootfs disk 2. Attempt to wipe the disk 3. Expect it to fail for every node Assumptions: - None """ computes = system_helper.get_hosts(personality="compute") storage = system_helper.get_hosts(personality="storage") hosts = system_helper.get_controllers() + computes + storage LOG.tc_step("Gather rootfs disks") rootfs = storage_helper.get_hosts_rootfs(hosts) for host in rootfs: uuid = rootfs[host] LOG.tc_step("Attempting to wipe {} from {}".format(uuid[0], host)) cmd = 'host-disk-wipe --confirm {} {}'.format(host, uuid[0]) rc, out = cli.system(cmd, fail_ok=True) assert rc != 0, "Expected wipe disk to fail but instead succeeded"
def test_modify_second_last_partition(): """ This test attempts to modify a partition that is not the last. It is expected to fail, since only the very last partition can be modified. Arguments: * None Test steps: * Create partition1 * Create partition2 * Attempt to modify partition1 Teardown: * None """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue partitions_to_restore[host] = [] for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_size = "1" partition_chunks = size_gib / int(partition_size) if partition_chunks < 3: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue LOG.info("Creating first partition on {}".format(host)) uuid = storage_helper.create_host_partition( host, disk_uuid, partition_size)[1] partitions_to_restore[host].append(uuid) LOG.info("Creating second partition on {}".format(host)) uuid1 = storage_helper.create_host_partition( host, disk_uuid, partition_size)[1] partitions_to_restore[host].append(uuid1) LOG.tc_step( "Modifying partition {} from size {} to size {} from host {} on disk {}" .format(uuid, partition_size, int(partition_size) + 1, host, disk_uuid)) rc, out = storage_helper.modify_host_partition( host, uuid, int(partition_size) + 1, fail_ok=True) assert rc != 0, "Partition modification was expected to fail but instead was successful"
def check_host(controller): host = system_helper.get_active_controller_name() if controller == 'standby': controllers = system_helper.get_controllers(availability=(HostAvailState.AVAILABLE, HostAvailState.DEGRADED, HostAvailState.ONLINE)) controllers.remove(host) if not controllers: skip('Standby controller does not exist or not in good state') host = controllers[0] return host
def test_delete_host_partitions(): """ This test creates host partitions and the teardown deletes them. Arguments: * None Test Steps: * Create a partition on each host Teardown: * Re-create those partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) assert rc == 0, "Partition creation was expected to succeed but instead failed" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def test_enable_tpm(swact_first): con_ssh = ControllerClient.get_active_controller() LOG.tc_step('Check if TPM is already configured') code, cert_id, cert_type = get_tpm_status(con_ssh) if code == 0: LOG.info('TPM already configured on the lab, cert_id:{}, cert_type:{}'. format(cert_id, cert_type)) LOG.tc_step('disable TPM first in order to test enabling TPM') code, output = remove_cert_from_tpm(con_ssh, fail_ok=False, check_first=False) assert 0 == code, 'failed to disable TPM' time.sleep(30) LOG.info('Waiting alarm: out-of-config cleaned up') system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE) else: LOG.info('TPM is NOT configured on the lab') LOG.info('-code:{}, cert_id:{}, cert_type:{}'.format( code, cert_id, cert_type)) if swact_first: LOG.tc_step('Swact the active controller as instructed') if len(system_helper.get_controllers()) < 2: LOG.info('Less than 2 controllers, skip swact') else: host_helper.swact_host(fail_ok=False) copy_config_from_local( con_ssh, local_conf_backup_dir, os.path.join(HostLinuxUser.get_home(), conf_backup_dir)) LOG.tc_step('Install HTTPS Certificate into TPM') code, output = store_cert_into_tpm( con_ssh, check_first=False, fail_ok=False, pem_password=HostLinuxUser.get_password()) assert 0 == code, 'Failed to instll certificate into TPM, cert-file' LOG.info('OK, certificate is installed into TPM') LOG.info('Wait the out-of-config alarm cleared') system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE) LOG.tc_step( 'Verify the configurations changes for impacted components, expecting all changes exit' ) verify_configuration_changes(expected=True, connection=con_ssh)
def test_create_many_small_host_partitions_on_a_single_host(): """ This test attempts to create multiple tiny partitions on a single host. Assumptions: * There's some free disk space available Test steps: * Query the hosts to determine disk space * Create small partitions until the disk space is consumed * Repeat on all applicable hosts Teardown: * Delete created partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: partitions_to_restore[host] = [] disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) num_partitions = 2 if size_gib <= num_partitions: LOG.info("Skipping disk {} due to insufficient space".format( disk_uuid)) continue partition_chunks = int(size_gib / num_partitions) usable_disks = True LOG.info("Creating partition on {}".format(host)) # partitions_to_restore[host] = [] for i in range(0, num_partitions): uuid = storage_helper.create_host_partition( host, disk_uuid, partition_chunks)[1] partitions_to_restore[host].append(uuid) # Only test one disk on each host break # Only test one host (otherwise takes too long) if usable_disks: break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def wait_for_con_drbd_sync_complete(): if len( system_helper.get_controllers( administrative=HostAdminState.UNLOCKED)) < 2: LOG.info( "Less than two unlocked controllers on system. Do not wait for drbd sync" ) return False host = 'controller-1' LOG.fixture_step( "Waiting for controller-1 drbd sync alarm gone if present") end_time = time.time() + 1200 while time.time() < end_time: drbd_alarms = system_helper.get_alarms( alarm_id=EventLogID.CON_DRBD_SYNC, reason_text='drbd-', entity_id=host, strict=False) if not drbd_alarms: LOG.info("{} drbd sync alarm is cleared".format(host)) break time.sleep(10) else: assert False, "drbd sync alarm {} is not cleared within timeout".format( EventLogID.CON_DRBD_SYNC) LOG.fixture_step( "Wait for {} becomes available in system host-list".format(host)) system_helper.wait_for_host_values(host, availability=HostAvailState.AVAILABLE, timeout=120, fail_ok=False, check_interval=10) LOG.fixture_step( "Wait for {} drbd-cinder in sm-dump to reach desired state".format( host)) host_helper.wait_for_sm_dump_desired_states(host, 'drbd-', strict=False, timeout=30, fail_ok=False) return True
def wait_for_tmp_status(cert_id, ssh_client=None, expected_status=''): rc, actual_id, actual_mode, actual_states = get_cert_info( cert_id, con_ssh=ssh_client) LOG.info('auctual_id={}, actual_mode={}, actual_states={}'.format( actual_id, actual_mode, actual_states)) controllers = system_helper.get_controllers(con_ssh=ssh_client) if expected_status == 'tpm-config-applied': for h in controllers: if h not in actual_states[ 'state'] or actual_states['state'][h] != expected_status: return 1, '{} is not in expected status: {}'.format( h, expected_status) return 0, 'all controllers:{} are in expected status:{}'.format( controllers, expected_status) elif rc != 0: return 0, 'no detailed information as expected' return 1, 'did not get expected status, continue to wait'
def test_increase_controllerfs(): """ This test increases the size of the various controllerfs filesystems all at once. Arguments: - None Test Steps: - Query the filesystem for their current size - Increase the size of each filesystem at once Assumptions: - There is sufficient free space to allow for an increase, otherwise skip test. """ drbdfs_val = {} LOG.tc_step("Determine the space available for each drbd filesystem") for fs in DRBDFS: drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0] LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs])) drbdfs_val[fs] = drbdfs_val[fs] + 1 LOG.info("Will attempt to increase the value of {} to {}".format( fs, drbdfs_val[fs])) LOG.tc_step("Increase the size of all filesystems") storage_helper.modify_controllerfs(**drbdfs_val) # Need to wait until the change takes effect before checking the # filesystems hosts = system_helper.get_controllers() for host in hosts: system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host), timeout=600) LOG.tc_step( "Confirm the underlying filesystem size matches what is expected") storage_helper.check_controllerfs(**drbdfs_val)
def test_disable_tpm(swact_first): ssh_client = ControllerClient.get_active_controller() LOG.tc_step('Check if TPM is already configured') code, cert_id, cert_type = get_tpm_status(ssh_client) if code == 0: LOG.info('TPM is configured on the lab') if swact_first: LOG.tc_step('Swact the active controller as instructed') if len(system_helper.get_controllers()) < 2: LOG.info('Less than 2 controllers, skip swact') else: host_helper.swact_host(fail_ok=False) copy_config_from_local( ssh_client, local_conf_backup_dir, os.path.join(HostLinuxUser.get_home(), conf_backup_dir)) LOG.tc_step('Disabling TPM') code, output = remove_cert_from_tpm(ssh_client, fail_ok=False, check_first=False) assert 0 == code, 'failed to disable TPM' LOG.info('Wait the out-of-config alarm cleared') system_helper.wait_for_alarm_gone(EventLogID.CONFIG_OUT_OF_DATE) LOG.tc_step( 'Verify the configurations changes for impacted components, DO NOT expect any of the changes' ) verify_configuration_changes(expected=False, connection=ssh_client) else: LOG.info('TPM is NOT configured on the lab, skip the test') skip('TPM is NOT configured on the lab, skip the test')
def test_delete_heat_after_swact(template_name): """ Test if a heat stack can be deleted after swact: Args: template_name (str): e.g, OS_Cinder_Volume. ===== Prerequisites (skip test if not met): - at least two hypervisors hosts on the system Test Steps: - Create a heat stack with the given template - Verify heat stack is created sucessfully - Verify heat resources are created - Swact controllers - Delete Heat stack and verify resource deletion """ if len(system_helper.get_controllers()) < 2: skip(SkipSysType.LESS_THAN_TWO_CONTROLLERS) # add test step verify_basic_template(template_name, delete_after_swact=True)
def _test_status_firewall_reboot(): """ Test iptables status after reboot of controller Test Steps: - Stop iptables service - Confirm iptables service has stopped - Reboot the controller being tested - Confirm iptables service is online - Repeat for second controller """ LOG.tc_step("Getting the controller(s)") controllers = system_helper.get_controllers() for controller in controllers: with host_helper.ssh_to_host(controller) as con_ssh: LOG.tc_step("Stopping iptables service") cmd = 'service iptables stop' con_ssh.exec_sudo_cmd(cmd) LOG.tc_step("checking iptables status") cmd = 'service iptables status' code, output = con_ssh.exec_sudo_cmd(cmd) assert 'Active: inactive' or 'Active: failed' in output, "iptables service did not stop running on host {}"\ .format(controller) LOG.tc_step("Rebooting {}".format(controller)) HostsToRecover.add(controller) host_helper.reboot_hosts(controller) with host_helper.ssh_to_host(controller) as con_ssh: LOG.tc_step( "Checking iptables status on host {} after reboot".format( controller)) cmd = 'service iptables status | grep --color=never Active' code, output = con_ssh.exec_sudo_cmd(cmd) assert 'active' in output, "iptables service did not start after reboot on host {}".format( controller)
def less_than_two_controllers(con_ssh=None, auth_info=Tenant.get('admin_platform')): return len( system_helper.get_controllers(con_ssh=con_ssh, auth_info=auth_info)) < 2
def less_than_two_cons(no_openstack): return len(system_helper.get_controllers()) < 2
def test_increase_host_partition_size_beyond_avail_disk_space(): """ This test attempts to increase the size of an existing host partition beyond the available space on disk. It is expected to fail. Assumptions: * Partitions are available in Ready state. Test steps: * Create partition * Modify the partition to consume over than the available disk space Teardown: * Delete created partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) assert rc == 0, "Partition creation was expected to succeed but instead failed" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) device_node = storage_helper.get_host_partition_values( host, uuid, "device_node")[0] device_node = device_node.rstrip(string.digits) if device_node.startswith("/dev/nvme"): device_node = device_node[:-1] size_gib += 1 LOG.tc_step( "Modifying partition {} from size 1 to size {} from host {} on device node {}" .format(uuid, int(size_gib), host, device_node)) rc, out = storage_helper.modify_host_partition(host, uuid, str(int(size_gib)), fail_ok=True) assert rc != 0, "Expected partition modification to fail and instead it succeeded" LOG.info(out) # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def test_resize_drbd_filesystem_while_resize_inprogress(): """ This test attempts to resize a drbd filesystem while an existing drbd resize is in progress. This should be rejected. Arguments: - None Test steps: 1. Increase the size of backup to allow for test to proceed. 2. Wait for alarms to clear and then check the underlying filesystem is updated 2. Attempt to resize the glance filesystem. This should be successful. 3. Attempt to resize cgcs again immediately. This should be rejected. Assumptions: - None """ start_time = common.get_date_in_format() drbdfs_val = {} fs = "extension" LOG.tc_step( "Increase the {} size before proceeding with rest of test".format(fs)) drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0] LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs])) drbdfs_val[fs] = int(drbdfs_val[fs]) + 5 LOG.info("Will attempt to increase the value of {} to {}".format( fs, drbdfs_val[fs])) LOG.tc_step("Increase the size of filesystems") storage_helper.modify_controllerfs(**drbdfs_val) hosts = system_helper.get_controllers() for host in hosts: system_helper.wait_for_events( event_log_id=EventLogID.CONFIG_OUT_OF_DATE, start=start_time, entity_instance_id="host={}".format(host), strict=False, **{'state': 'set'}) for host in hosts: system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host), timeout=600) LOG.tc_step( "Confirm the underlying filesystem size matches what is expected") storage_helper.check_controllerfs(**drbdfs_val) drbdfs_val = {} fs = "database" LOG.tc_step("Determine the current filesystem size") value = storage_helper.get_controllerfs_values(fs)[0] LOG.info("Current value of {} is {}".format(fs, value)) drbdfs_val[fs] = int(value) + 1 LOG.info("Will attempt to increase the value of {} to {}".format( fs, drbdfs_val[fs])) LOG.tc_step("Increase the size of filesystems") storage_helper.modify_controllerfs(**drbdfs_val) LOG.tc_step("Attempt to increase the size of the filesystem again") drbdfs_val[fs] = int(drbdfs_val[fs]) + 1 code = storage_helper.modify_controllerfs(fail_ok=True, **drbdfs_val)[0] assert 1 == code, "Filesystem modify succeeded while failure is expected: {}".format( drbdfs_val) # Appearance of sync alarm is delayed so wait for it to appear and then # clear if not system_helper.is_aio_simplex(): system_helper.wait_for_alarm(alarm_id=EventLogID.CON_DRBD_SYNC, timeout=300) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CON_DRBD_SYNC, timeout=300)
def test_modify_drdb_swact_then_reboot(): """ This test modifies the size of the drbd based filesystems, does and immediate swact and then reboots the active controller. Arguments: - None Test Steps: - Determine how much free space we have available - Increase datebase - Increase extension - Initiate a controller swact - Initiate a controller reboot Assumptions: - None """ drbdfs = DRBDFS con_ssh = ControllerClient.get_active_controller() LOG.tc_step("Determine the available free space on the system") cmd = "vgdisplay -C --noheadings --nosuffix -o vg_free --units g cgts-vg" rc, out = con_ssh.exec_sudo_cmd(cmd) free_space = out.lstrip() LOG.info("Available free space on the system is: {}".format(free_space)) if float(free_space) <= 10: skip("Not enough free space to complete test.") drbdfs_val = {} LOG.tc_step("Determine the space available for each drbd fs") for fs in drbdfs: table_ = table_parser.table( cli.system('controllerfs-show {}'.format(fs))[1]) drbdfs_val[fs] = table_parser.get_value_two_col_table(table_, 'size') LOG.info("Current fs values are: {}".format(drbdfs_val)) LOG.tc_step("Increase the size of the extension and database filesystem") partition_name = "database" partition_value = drbdfs_val[partition_name] backup_freespace = math.trunc(float(free_space) / 10) new_partition_value = backup_freespace + int(partition_value) cmd = "controllerfs-modify {}={}".format(partition_name, new_partition_value) cli.system(cmd) partition_name = "extension" partition_value = drbdfs_val[partition_name] cgcs_freespace = math.trunc(backup_freespace / 2) new_partition_value = cgcs_freespace + int(partition_value) cmd = "controllerfs-modify {}={}".format(partition_name, new_partition_value) cli.system(cmd) hosts = system_helper.get_controllers() for host in hosts: system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host), timeout=600) standby_cont = system_helper.get_standby_controller_name() system_helper.wait_for_host_values(standby_cont, availability=HostAvailState.AVAILABLE) host_helper.swact_host() act_cont = system_helper.get_active_controller_name() host_helper.reboot_hosts(act_cont) time.sleep(5) system_helper.wait_for_alarm_gone( alarm_id=EventLogID.HOST_RECOVERY_IN_PROGRESS, entity_id="host={}".format(act_cont), timeout=600)
def test_decrease_host_partition_size(): """ This test attempts to decrease the size of an existing host partition. It is expected to fail since decreasing the size of a partition is not supported. Test Steps: * Create a partition * Modify the partition to decrease its size Teardown: * Delete created partition """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) assert rc == 0, "Partition creation was expected to succeed but instead failed" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) device_node, size_gib = storage_helper.get_host_partition_values( host, uuid, ("device_node", "size_gib")) total_size = int(size_gib) - 1 LOG.tc_step( "Modifying partition {} from size {} to size {} from host {} on device node {}" .format(uuid, int(size_gib), str(total_size), host, device_node[:-1])) rc, out = storage_helper.modify_host_partition(host, uuid, str(total_size), fail_ok=True) assert rc != 0, "Expected partition modification to fail and instead it succeeded" # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def _test_increase_ceph_mon(): """ Increase the size of ceph-mon. Only applicable to a storage system. Fails until CGTS-8216 Test steps: 1. Determine the current size of ceph-mon 2. Attempt to modify ceph-mon to invalid values 3. Check if there is free space to increase ceph-mon 4. Attempt to increase ceph-mon 5. Wait for config out-of-date alarms to raise 6. Lock/unlock all affected nodes (controllers and storage) 7. Wait for alarms to clear 8. Check that ceph-mon has the correct updated value Enhancement: 1. Possibly check there is enough disk space for ceph-mon to increase. Not sure if this is required since there always seems to be some space on the rootfs. """ table_ = table_parser.table(cli.system("ceph-mon-list")[1]) ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib", **{"hostname": "controller-0"})[0] LOG.info("ceph_mon_gib is currently: {}".format(ceph_mon_gib)) LOG.tc_step("Attempt to modify ceph-mon to invalid values") invalid_cmg = ['19', '41', 'fds'] for value in invalid_cmg: host = "controller-0" cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(host, value), fail_ok=True) if int(ceph_mon_gib) >= 30: skip("Insufficient disk space to execute test") ceph_mon_gib_avail = 40 - int(ceph_mon_gib) new_ceph_mon_gib = math.trunc(ceph_mon_gib_avail / 10) + int(ceph_mon_gib) LOG.tc_step("Increase ceph_mon_gib to {}".format(new_ceph_mon_gib)) hosts = system_helper.get_controllers() for host in hosts: cli.system("ceph-mon-modify {} ceph_mon_gib={}".format( host, new_ceph_mon_gib)) # We only need to do this for one controller now and it applies to both break LOG.info("Wait for expected alarms to appear") storage_hosts = system_helper.get_storage_nodes() total_hosts = hosts + storage_hosts for host in total_hosts: system_helper.wait_for_alarm(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host)) LOG.tc_step("Lock/unlock all affected nodes") for host in storage_hosts: HostsToRecover.add(host) host_helper.lock_host(host) host_helper.unlock_host(host) system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host)) time.sleep(10) standby = system_helper.get_standby_controller_name() active = system_helper.get_active_controller_name() HostsToRecover.add(standby) host_helper.lock_host(standby) host_helper.unlock_host(standby) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(standby)) time.sleep(10) host_helper.swact_host(active) HostsToRecover.add(active) host_helper.lock_host(active) host_helper.unlock_host(active) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(active)) table_ = table_parser.table(cli.system("ceph-mon-list")[1]) ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib", **{"hostname": "controller-0"})[0] assert ceph_mon_gib != new_ceph_mon_gib, "ceph-mon did not change"
def test_attempt_host_unlock_during_partition_creation(): """ This test attempts to unlock a host while a partition is being created. It is expected to fail. Assumptions: * There's some free disk space available Test steps: * Query the hosts to determine disk space * Lock host * Create a partition but don't wait for completion * Attempt to unlock the host that is hosting the partition that is created Teardown: * Delete created partitions DISABLED since unlock while creating is not blocked. """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes # Filter out active controller active_controller = system_helper.get_active_controller_name() print("This is active controller: {}".format(active_controller)) hosts.remove(active_controller) usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for uuid in free_disks: size_gib = float(free_disks[uuid]) if size_gib < 2.0: LOG.info("Skip this disk due to insufficient space") continue LOG.tc_step("Lock {} and create a partition for disk {}".format( host, uuid)) HostsToRecover.add(host) host_helper.lock_host(host) usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, uuid, int(size_gib), wait=False) uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) LOG.tc_step( "Attempt to unlock host and ensure it's rejected when partition is " "being created") rc_ = host_helper.unlock_host(host, fail_ok=True, check_first=False)[0] assert rc_ != 0, "Unlock attempt unexpectedly passed" LOG.tc_step("wait for partition to be created") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) container_helper.wait_for_apps_status(apps='platform-integ-apps', status=AppStatus.APPLIED, check_interval=10) # Only test one disk on each host break # Do it on one host only break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def _test_create_partition_and_associate_with_pv_cgts_vg(): """ This test attempt to create a partition and then associate it with a PV (physical volume), resulting in the partition being In-use. Assumptions: * There's some free disk space available Test steps: * Query hosts to determine disk space * Create partition * Associate it with cgts-vg PV * Checks the partition is in-use state * Attempts to delete the partition that is in-use. It should fail. * Attempt to assign the in-use partition to another PV. It should fail. Teardown: * None DISABLING: This fails since the partition says 'adding on unlock'. Should it be in-service? Follow up with dev. """ global partitions_to_restore partitions_to_restore = {} if not system_helper.is_aio_system(): skip("This test requires an AIO system.") hosts = system_helper.get_controllers() for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for uuid in free_disks: size_gib = float(free_disks[uuid]) if size_gib <= 1: LOG.tc_step("Skip this disk due to insufficient space") continue LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, uuid, "1") uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) LOG.tc_step("Associating partition {} with cgts-vg".format(uuid)) # cmd = "host-pv-add -t partition {} cgts-vg {}".format(host, uuid) cmd = "host-pv-add {} cgts-vg {}".format(host, uuid) rc, out = cli.system(cmd) assert rc == 0, "Associating partition with PV failed" LOG.tc_step("Check that partition is In-use state") storage_helper.wait_for_host_partition_status( host=host, uuid=uuid, final_status=PartitionStatus.IN_USE, interim_status=PartitionStatus.READY, timeout=CP_TIMEOUT) LOG.tc_step("Attempt to delete In-Use partition") rc, out = storage_helper.delete_host_partition(host, uuid, fail_ok=True) assert rc != 0, "Partition deletion was expected to fail but instead passed" LOG.tc_step( "Attempt to associate the In-Use partition with another PV") # cmd = "host-pv-add -t partition {} nova-local {}".format(host, uuid) cmd = "host-pv-add {} nova-local {}".format(host, uuid) rc, out = cli.system(cmd) assert rc != 0, "Partition association succeeded but was expected to fail" # Only test one disk on each host break # Do it on one host only break
def test_increase_extensionfs_with_alarm(): """ This test increases the size of the extenteion controllerfs filesystems while there is an alarm condition for the fs. Arguments: - None Test Steps: - Query the filesystem for their current size - cause an alarm condition by filling the space on that fs - verify controller-0 is degraded - Increase the size of extension filesystem. - Verify alarm is gone Assumptions: - There is sufficient free space to allow for an increase, otherwise skip test. """ file_loc = "/opt/extension" cmd = "cd " + file_loc file_path = file_loc + "/" + "testFile" drbdfs_val = {} fs = "extension" active_controller = system_helper.get_active_controller_name() LOG.tc_step("Determine the space available for extension filesystem") drbdfs_val[fs] = storage_helper.get_controllerfs_values(fs)[0] LOG.info("Current value of {} is {}".format(fs, drbdfs_val[fs])) # get the 91% of the current size LOG.info( "Will attempt to fill up the space to 90% of fs {} of value of {}". format(fs, drbdfs_val[fs])) file_size = int((drbdfs_val[fs] * 0.91) * 1000) file_size = str(file_size) + "M" cmd1 = "fallocate -l {} testFile".format(file_size) con_ssh = ControllerClient.get_active_controller() con_ssh.exec_cmd(cmd) con_ssh.exec_sudo_cmd(cmd1) if not con_ssh.file_exists(file_path=file_path): LOG.info("File {} is not created".format(file_path)) return 0 # fill_in_fs(size=file_size) LOG.tc_step( "Verifying that the alarm is created after filling the fs space in {}". format(fs)) system_helper.wait_for_alarm(alarm_id="100.104", entity_id=active_controller, timeout=600, strict=False) # verify the controller is in degraded state LOG.tc_step( "Verifying controller is degraded after filling the fs space in {}". format(fs)) system_helper.wait_for_host_values(active_controller, availability='degraded') drbdfs_val[fs] = drbdfs_val[fs] + 2 LOG.info("Will attempt to increase the value of {} to {}".format( fs, drbdfs_val[fs])) LOG.tc_step("Increase the size of extension filesystem") storage_helper.modify_controllerfs(**drbdfs_val) # Need to wait until the change takes effect before checking the # filesystems hosts = system_helper.get_controllers() for host in hosts: system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host), timeout=600) LOG.tc_step( "Verifying that the alarm is cleared after increasing the fs space in {}" .format(fs)) system_helper.wait_for_alarm_gone(alarm_id="100.104", entity_id="host={}".format(host), timeout=600, strict=False) LOG.tc_step( "Confirm the underlying filesystem size matches what is expected") storage_helper.check_controllerfs(**drbdfs_val) # verify the controller is in available state LOG.tc_step( "Verifying that the controller is in available state after increasing the fs space in {}" .format(fs)) system_helper.wait_for_host_values(active_controller, availability='available')
def test_create_multiple_partitions_on_single_host(): """ This test attempts to create multiple partitions at once on a single host. While the first partition is being created, we will attempt to create a second partition. The creation of the second partition should be rejected but the creation of the first partition should be successful. Assumptions: * There's some free disk space available Test steps: * Query the hosts to determine disk space * Create a small partition but don't wait for creation * Immediately create a second small partition * Check that the second partition creation is rejected * Check the first partition was successfully created * Repeat on all applicable hosts Teardown: * Delete created partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating first partition on {}".format(host)) rc1, out1 = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) LOG.info("Creating second partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=True) assert rc != 0, "Partition creation was expected to fail but was instead successful" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out1), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def test_increase_host_partition_size(): """ Create a partition and then modify it to consume the entire disk Arguments: * None Test Steps: * Create a partition * Modify the partition so we consume all available space on the disk * Check that the disk available space goes to zero * Delete the partition * Check that the available space is freed Teardown: * Delete the partitions """ global partitions_to_restore partitions_to_restore = {} computes = system_helper.get_hosts(personality="compute") hosts = system_helper.get_controllers() + computes usable_disks = False for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for disk_uuid in free_disks: size_gib = float(free_disks[disk_uuid]) partition_chunks = int(size_gib) if partition_chunks < 2: LOG.info( "Skip disk {} due to insufficient space".format(disk_uuid)) continue usable_disks = True LOG.info("Creating partition on {}".format(host)) rc, out = storage_helper.create_host_partition(host, disk_uuid, "1", fail_ok=False, wait=False) assert rc == 0, "Partition creation was expected to succeed but instead failed" # Check that first disk was created uuid = table_parser.get_value_two_col_table( table_parser.table(out), "uuid") storage_helper.wait_for_host_partition_status(host=host, uuid=uuid, timeout=CP_TIMEOUT) partitions_to_restore[host] = [] partitions_to_restore[host].append(uuid) device_node = storage_helper.get_host_partition_values( host, uuid, "device_node")[0] device_node = device_node.rstrip(string.digits) if device_node.startswith("/dev/nvme"): device_node = device_node[:-1] LOG.tc_step( "Modifying partition {} from size 1 to size {} from host {} on device node {}" .format(uuid, int(size_gib) - 2, host, device_node)) storage_helper.modify_host_partition(host, uuid, str(int(size_gib) - 2)) new_disk_available_gib = storage_helper.get_host_disk_values( host, device_node, "available_gib")[0] assert 0 <= int(float(new_disk_available_gib)) <= 3, \ "Expected disk space to be consumed but instead we have {} available".format(new_disk_available_gib) # Only test one disk on each host break if not usable_disks: skip("Did not find disks with sufficient space to test with.")
def test_swact_100_times(): """ Skip Condition: - Less than two controllers on system Test Steps: - Boot a vm and ensure it's pingable - Start writing from pre-existed vm before swacting - Repeat following steps 100 times: - ensure system has standby controller - system host-swact - ensure all services are active in sudo sm-dump on new active controller - ensure pre-existed vm is still pingable from NatBox - ensure writing did not stop on pre-existed vm - ensure new vm can be launched in 2 minutes - ensure newly booted vm is pingable from NatBox - delete newly booted vm Teardown: - delete vms, volumes """ if len(system_helper.get_controllers()) < 2: skip("Less than two controllers on system") if not system_helper.get_standby_controller_name(): assert False, "No standby controller on system" LOG.tc_step("Boot a vm and ensure it's pingable") vm_base = vm_helper.boot_vm(name='pre_swact', cleanup='function')[1] LOG.tc_step("Start writing from pre-existed vm before swacting") end_event = Events("End write in base vm") base_vm_thread = vm_helper.write_in_vm(vm_base, end_event=end_event, expect_timeout=40, thread_timeout=60*100) try: for i in range(100): iter_str = "Swact iter{}/100 - ".format(i+1) LOG.tc_step("{}Ensure system has standby controller".format(iter_str)) standby = system_helper.get_standby_controller_name() assert standby LOG.tc_step("{}Swact active controller and ensure active controller is changed".format(iter_str)) host_helper.swact_host() LOG.tc_step("{}Check all services are up on active controller via sudo sm-dump".format(iter_str)) host_helper.wait_for_sm_dump_desired_states(controller=standby, fail_ok=False) LOG.tc_step("{}Ensure pre-existed vm still pingable post swact".format(iter_str)) vm_helper.wait_for_vm_pingable_from_natbox(vm_id=vm_base, timeout=45) time.sleep(5) LOG.tc_step("{}Ensure writing from pre-existed vm resumes after swact".format(iter_str)) assert base_vm_thread.res is True, "Writing in pre-existed vm stopped after {}".format(iter_str.lower()) LOG.tc_step("{}Attempt to boot new vm after 2 minutes of post swact and ensure it's pingable". format(iter_str)) time.sleep(60) for j in range(3): code, vm_new, msg = vm_helper.boot_vm(name='post_swact', fail_ok=True, cleanup='function') if code == 0: break LOG.warning("VM failed to boot - attempt{}".format(j+1)) vm_helper.delete_vms(vms=vm_new) assert j < 2, "No vm can be booted 2+ minutes after swact" LOG.tc_step("{}VM{} failed to boot, wait for 30 seconds and retry".format(j+1, iter_str)) time.sleep(30) vm_helper.wait_for_vm_pingable_from_natbox(vm_new) LOG.tc_step("{}Delete the vm created".format(iter_str)) vm_helper.delete_vms(vms=vm_new) except: raise finally: LOG.tc_step("End the base_vm_thread") end_event.set() base_vm_thread.wait_for_thread_end(timeout=20) post_standby = system_helper.get_standby_controller_name() assert post_standby, "System does not have standby controller after last swact"