def test_get_info(): pv0_storage_nodes = system_helper.get_storage_nodes(pv0) assert 'storage-0' in pv0_storage_nodes and 'storage-1' in pv0_storage_nodes assert not system_helper.get_storage_nodes(r720) assert not system_helper.is_aio_system() LOG.tc_func_start() assert system_helper.is_aio_system(r720) LOG.tc_func_end() LOG.tc_func_start() assert not system_helper.is_aio_system(r730_3_7) LOG.tc_func_end() LOG.tc_func_start() assert not system_helper.get_storage_nodes(r730_3_7) LOG.tc_func_end()
def test_create_host_partition_on_storage(): """ This test attempts to create a host partition on a storage node. It is expected to fail, since host partition creation is only supported on controllers and computes. Assumptions: * We run this on a storage system, otherwise we will skip the test. Test steps: * Query storage nodes for available disk space * Attempt to create a partition on a storage node * Check it is rejected """ hosts = system_helper.get_storage_nodes() if not hosts: skip("This test requires storage nodes.") LOG.tc_step("Gather the disks available on each host") for host in hosts: disks = storage_helper.get_host_disks(host) free_disks = storage_helper.get_host_disks_with_free_space(host, disks) if not free_disks: continue for uuid in free_disks: rc, out = storage_helper.create_host_partition( host, uuid, int(free_disks[uuid]), fail_ok=True) assert rc != 0, "Partition creation was successful"
def is_image_conversion_sufficient(img_file_path=None, guest_os=None, min_diff=0.05, con_ssh=None, img_host_ssh=None): """ Check if image conversion space is sufficient to convert given image to raw format Args: img_file_path (str): e.g., ~/images/tis-centos-guest.img guest_os (str): has to be specified if img_file_path is unspecified. e.g., 'tis-centos-guest' min_diff (int): in GB con_ssh: img_host_ssh Returns (bool): """ if con_ssh is None: con_ssh = ControllerClient.get_active_controller() if not system_helper.get_storage_nodes(con_ssh=con_ssh): return True avail_size = get_avail_image_conversion_space(con_ssh=con_ssh) file_size = get_image_size(img_file_path=img_file_path, guest_os=guest_os, virtual_size=True, ssh_client=img_host_ssh) return avail_size - file_size >= min_diff
def test_apply_storage_profile_negative(create_storage_profile, personality): if personality == 'controller': host_name = system_helper.get_standby_controller_name() assert host_name, "No standby controller available on system" else: host_name = host_helper.get_up_hypervisors()[0] # For storage systems, skip test if ceph isn't healthy if len(system_helper.get_storage_nodes()) > 0: ceph_healthy = storage_helper.is_ceph_healthy() if not ceph_healthy: skip('Skipping due to ceph not being healthy') profile_name = create_storage_profile['profile_name'] origin_disk_num = create_storage_profile['disk_num'] disks_num = len(storage_helper.get_host_disks(host_name, 'device_node')) expt_err = 'profile has more disks than host does' if disks_num < origin_disk_num -1 \ else "Please check if host's disks match profile criteria" expt_err_list = [ "Please check if host's disks match profile criteria", "Failed to create storage function. Host personality must be 'storage'", ] if disks_num < origin_disk_num - 1: expt_err_list.append("profile has more disks than host does") positional_arg = host_name + ' ' + profile_name HostsToRecover.add(host_name) host_helper.lock_host(host_name, swact=True) exitcode, output = cli.system('host-apply-storprofile', positional_arg, fail_ok=True) host_helper.unlock_host(host_name) assert exitcode == 1 and any(expt in output for expt in expt_err_list)
def test_lock_unlock_storage_hosts(no_simplex, no_duplex): """ Lock - Unlock Storage Hosts """ if ProjVar.get_var('SYS_TYPE') != SysType.STORAGE: skip('Only applicable to Standard-external system') storage_hosts = system_helper.get_storage_nodes() LOG.info(" Storage nodes found: {}".format(len(storage_hosts))) for host in storage_hosts: LOG.info("Storage Host: {}".format(host)) # Lock host_helper.lock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host) container_helper.wait_for_apps_status(apps="stx-openstack", status=AppStatus.APPLIED, timeout=600, check_interval=60) # Unlock host_helper.unlock_host(host=host, fail_ok=False) host_helper.wait_for_hosts_ready(hosts=host)
def is_storage_node(self): return len(system_helper.get_storage_nodes()) > 0
def storage_node_not_exist(): return len(system_helper.get_storage_nodes()) == 0
def _test_increase_ceph_mon(): """ Increase the size of ceph-mon. Only applicable to a storage system. Fails until CGTS-8216 Test steps: 1. Determine the current size of ceph-mon 2. Attempt to modify ceph-mon to invalid values 3. Check if there is free space to increase ceph-mon 4. Attempt to increase ceph-mon 5. Wait for config out-of-date alarms to raise 6. Lock/unlock all affected nodes (controllers and storage) 7. Wait for alarms to clear 8. Check that ceph-mon has the correct updated value Enhancement: 1. Possibly check there is enough disk space for ceph-mon to increase. Not sure if this is required since there always seems to be some space on the rootfs. """ table_ = table_parser.table(cli.system("ceph-mon-list")[1]) ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib", **{"hostname": "controller-0"})[0] LOG.info("ceph_mon_gib is currently: {}".format(ceph_mon_gib)) LOG.tc_step("Attempt to modify ceph-mon to invalid values") invalid_cmg = ['19', '41', 'fds'] for value in invalid_cmg: host = "controller-0" cli.system("ceph-mon-modify {} ceph_mon_gib={}".format(host, value), fail_ok=True) if int(ceph_mon_gib) >= 30: skip("Insufficient disk space to execute test") ceph_mon_gib_avail = 40 - int(ceph_mon_gib) new_ceph_mon_gib = math.trunc(ceph_mon_gib_avail / 10) + int(ceph_mon_gib) LOG.tc_step("Increase ceph_mon_gib to {}".format(new_ceph_mon_gib)) hosts = system_helper.get_controllers() for host in hosts: cli.system("ceph-mon-modify {} ceph_mon_gib={}".format( host, new_ceph_mon_gib)) # We only need to do this for one controller now and it applies to both break LOG.info("Wait for expected alarms to appear") storage_hosts = system_helper.get_storage_nodes() total_hosts = hosts + storage_hosts for host in total_hosts: system_helper.wait_for_alarm(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host)) LOG.tc_step("Lock/unlock all affected nodes") for host in storage_hosts: HostsToRecover.add(host) host_helper.lock_host(host) host_helper.unlock_host(host) system_helper.wait_for_alarm_gone( alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(host)) time.sleep(10) standby = system_helper.get_standby_controller_name() active = system_helper.get_active_controller_name() HostsToRecover.add(standby) host_helper.lock_host(standby) host_helper.unlock_host(standby) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(standby)) time.sleep(10) host_helper.swact_host(active) HostsToRecover.add(active) host_helper.lock_host(active) host_helper.unlock_host(active) system_helper.wait_for_alarm_gone(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, entity_id="host={}".format(active)) table_ = table_parser.table(cli.system("ceph-mon-list")[1]) ceph_mon_gib = table_parser.get_values(table_, "ceph_mon_gib", **{"hostname": "controller-0"})[0] assert ceph_mon_gib != new_ceph_mon_gib, "ceph-mon did not change"
def test_ceph_reboot_storage_node(stx_openstack_required): """ us69932_tc2_ceph_mon_process_kill from us69932_ceph_monitoring.odt Verify that ceph mon processes recover when they are killed nodes. Args: - Nothing Setup: - Requires system with storage nodes Test Steps: 0. Run CEPH pre-check fixture to check: - system has storage nodes - health of the ceph cluster is okay - that we have OSDs provisioned 1. Delete existing VMs 2. Boot new VMs and run dd on them 3. Reboot storage node and ensure both: - mon state goes down (if storage-0) - OSD state goes down 4. Ensure mon and OSD state recover afterwards 5. Cleanup VMs Potential rework: 1. Add the alarms checks for raise and clear 2. Maybe we don't want to reboot all storage nodes What defects this addresses: 1. CGTS-2975 Update: This test was updated for the Storage and Robustness feature. """ con_ssh = ControllerClient.get_active_controller() LOG.tc_step("Delete existing VMs") vm_helper.delete_vms() LOG.tc_step("Boot various VMs") vms = vm_helper.boot_vms_various_types(cleanup="function") vm_threads = [] LOG.tc_step("SSH to VMs and write to disk") end_event = Events("End dd in vms") try: for vm in vms: vm_thread = vm_helper.write_in_vm(vm, end_event=end_event, expect_timeout=40) vm_threads.append(vm_thread) storage_nodes = system_helper.get_storage_nodes(con_ssh) for host in storage_nodes: LOG.tc_step('Reboot {}'.format(host)) HostsToRecover.add(host, scope='function') host_helper.reboot_hosts(host, wait_for_offline=True, wait_for_reboot_finish=False) LOG.tc_step('Check health of CEPH cluster') ceph_healthy = True msg = None end_time = time.time() + 10 while time.time() < end_time: ceph_healthy = storage_helper.is_ceph_healthy(con_ssh) if not ceph_healthy: break assert not ceph_healthy, "ceph is not healthy" LOG.info(msg) LOG.tc_step('Check that OSDs are down') osd_list = storage_helper.get_osds(host, con_ssh) all_osds_up = True up_list = osd_list.copy() end_time = time.time() + 60 while time.time() < end_time and all_osds_up: for osd_id in osd_list: osd_up = storage_helper.is_osd_up(osd_id, con_ssh) if not osd_up: msg = 'OSD ID {} is down as expected'.format(osd_id) LOG.info(msg) up_list.remove(osd_id) if len(up_list) > 0: osd_list = up_list.copy() else: msg = ' All OSDs are down as expected' LOG.info(msg) all_osds_up = False assert not all_osds_up, " One or more OSD(s) {} is(are) up but should be down".format(up_list) system_helper.wait_for_host_values(host, availability='available') LOG.tc_step('Check that OSDs are up') osd_list = storage_helper.get_osds(host, con_ssh) down_list = osd_list.copy() all_osds_up = False end_time = time.time() + 60 while time.time() < end_time and not all_osds_up: for osd_id in osd_list: osd_up = storage_helper.is_osd_up(osd_id, con_ssh) if osd_up: msg = 'OSD ID {} is up as expected'.format(osd_id) LOG.info(msg) down_list.remove(osd_id) if len(down_list) > 0: osd_list = down_list.copy() else: msg = ' All OSDs are up as expected' LOG.info(msg) all_osds_up = True assert all_osds_up, " One or more OSD(s) {} is(are) down but should be up".format(down_list) LOG.tc_step('Check health of CEPH cluster') end_time = time.time() + 40 while time.time() < end_time: ceph_healthy = storage_helper.is_ceph_healthy(con_ssh) if ceph_healthy is True: break assert ceph_healthy, "ceph is not healthy" for vm_thread in vm_threads: assert vm_thread.res is True, "Writing in vm stopped unexpectedly" finally: end_event.set() for vm_thread in vm_threads: vm_thread.wait_for_thread_end(timeout=20) LOG.tc_step("Delete existing VMs") vm_helper.delete_vms()
def pre_system_backup(): """ Actions before system backup, including: - check the USB device is ready if it is the destination - create folder for the backup files on destination server - collect logs on the current system Args: Returns: """ lab = InstallVars.get_install_var('LAB') LOG.info("Preparing lab for system backup....") backup_dest = BackupVars.get_backup_var("BACKUP_DEST") NATBoxClient.set_natbox_client() _backup_info = { 'backup_dest': backup_dest, 'usb_parts_info': None, 'backup_dest_full_path': None, 'dest_server': None } if backup_dest == 'usb': _backup_info['dest'] = 'usb' active_controller_name = system_helper.get_active_controller_name() if active_controller_name != 'controller-0': msg = "controller-0 is not the active controller" LOG.info(msg + ", try to swact the host") host_helper.swact_host(active_controller_name) active_controller_name = system_helper.get_active_controller_name() assert active_controller_name == 'controller-0', msg LOG.fixture_step( "Checking if a USB flash drive is plugged in controller-0 node... " ) usb_device = install_helper.get_usb_device_name() assert usb_device, "No USB found in controller-0" parts_info = install_helper.get_usb_device_partition_info( usb_device=usb_device) part1 = "{}1".format(usb_device) part2 = "{}2".format(usb_device) if len(parts_info) < 3: skip( "USB {} is not partitioned; Create two partitions using fdisk; partition 1 = {}1, " "size = 2G, bootable; partition 2 = {}2, size equal to the avaialble space." .format(usb_device, usb_device, usb_device)) devices = parts_info.keys() LOG.info("Size of {} = {}".format( part1, install_helper.get_usb_partition_size(part1))) if not (part1 in devices and install_helper.get_usb_partition_size(part1) >= 2): skip("Insufficient size in {}; at least 2G is required. {}".format( part1, parts_info)) if not (part2 in devices and install_helper.get_usb_partition_size(part2) >= 10): skip("Insufficient size in {}; at least 2G is required. {}".format( part1, parts_info)) if not install_helper.mount_usb(part2): skip("Fail to mount USB for backups") LOG.tc_step("Erasing existing files from USB ... ") assert install_helper.delete_backup_files_from_usb( part2), "Fail to erase existing file from USB" _backup_info['usb_parts_info'] = parts_info _backup_info['backup_dest_full_path'] = BackupRestore.USB_BACKUP_PATH elif backup_dest == 'local': _backup_info['dest'] = 'local' # save backup files in Test Server which local backup_dest_path = BackupVars.get_backup_var('BACKUP_DEST_PATH') backup_dest_full_path = '{}/{}'.format(backup_dest_path, lab['short_name']) # ssh to test server test_server_attr = dict() test_server_attr['name'] = TestFileServer.get_hostname().split('.')[0] test_server_attr['server_ip'] = TestFileServer.get_server() test_server_attr['prompt'] = r'\[{}@{} {}\]\$ '\ .format(TestFileServer.get_user(), test_server_attr['name'], TestFileServer.get_user()) test_server_conn = install_helper.establish_ssh_connection( test_server_attr['name'], user=TestFileServer.get_user(), password=TestFileServer.get_password(), initial_prompt=test_server_attr['prompt']) test_server_conn.set_prompt(test_server_attr['prompt']) test_server_conn.deploy_ssh_key(install_helper.get_ssh_public_key()) test_server_attr['ssh_conn'] = test_server_conn test_server_obj = Server(**test_server_attr) _backup_info['dest_server'] = test_server_obj # test if backup path for the lab exist in Test server if test_server_conn.exec_cmd( "test -e {}".format(backup_dest_full_path))[0]: test_server_conn.exec_cmd( "mkdir -p {}".format(backup_dest_full_path)) # delete any existing files test_server_conn.exec_cmd("rm -rf {}/*".format(backup_dest_full_path)) _backup_info['usb_parts_info'] = None _backup_info['backup_dest_full_path'] = backup_dest_full_path collect_logs('before_br') _backup_info['is_storage_lab'] = (len(system_helper.get_storage_nodes()) > 0) return _backup_info
def backup_sysconfig_images(backup_info): """ Backup system images on storage lab Args: backup_info - settings for doing system backup Returns: None """ backup_dest = backup_info['backup_dest'] backup_dest_path = backup_info['backup_dest_full_path'] dest_server = backup_info['dest_server'] copy_to_usb = backup_info['copy_to_usb'] install_helper.backup_system(backup_dest=backup_dest, backup_dest_path=backup_dest_path, dest_server=dest_server, copy_to_usb=copy_to_usb) # storage lab start backup image files separately if it's a storage lab # if number of storage nodes is greater than 0 if len(system_helper.get_storage_nodes()) > 0: LOG.tc_step("Storage lab detected. copying images to backup.") image_ids = glance_helper.get_images() for img_id in image_ids: prop_key = 'store' image_properties = glance_helper.get_image_properties( img_id, prop_key, rtn_dict=True) LOG.debug('image store backends:{}'.format(image_properties)) if image_properties and image_properties.get(prop_key, None) == 'rbd': LOG.info('rbd based image, exporting it: {}, store:{}'.format( img_id, image_properties)) install_helper.export_image( img_id, backup_dest=backup_info['backup_dest'], backup_dest_path=backup_info['backup_dest_full_path'], dest_server=backup_info['dest_server'], copy_to_usb=backup_info['copy_to_usb']) else: LOG.warn( 'No property found!!! for image {}, properties:{}'.format( img_id, image_properties)) prop_key = 'direct_url' direct_url = glance_helper.get_image_properties( img_id, prop_key)[0] LOG.info( 'found direct_url, still consider it as rbd based image, exporting it: {}, stores:{}' .format(img_id, image_properties)) if direct_url and direct_url.startswith('rbd://'): install_helper.export_image( img_id, backup_dest=backup_dest, backup_dest_path=backup_dest_path, dest_server=dest_server, copy_to_usb=copy_to_usb) else: LOG.warn( 'non-rbd based image, skip it: {}, store:{}'.format( img_id, image_properties))