def test_dc_dead_office_recovery_central( reserve_unreserve_all_hosts_module_central): """ Test dead office recovery main cloud Args: Setups: - Reserve all nodes for central cloud in vlm Test Steps: - Launch various types of VMs in primary clouds. - Power off all nodes in vlm using multi-processing to simulate a power outage - Power on all nodes - Wait for nodes to become online/available - Check all the subclouds are syncs as start of the test. - check all the VMs are up in subclouds which are launched. """ LOG.tc_step("Boot 5 vms with various boot_source, disks, etc") vms = vm_helper.boot_vms_various_types() central_auth = Tenant.get('admin_platform', dc_region='SystemController') hosts = system_helper.get_hosts(auth_info=central_auth) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') hosts_to_check = system_helper.get_hosts( availability=['available', 'online'], auth_info=central_auth) LOG.info("Online or Available hosts before power-off: {}".format( hosts_to_check)) LOG.tc_step( "Powering off hosts in multi-processes to simulate power outage: {}". format(hosts)) try: vlm_helper.power_off_hosts_simultaneously(hosts, region='central_region') except: raise finally: LOG.tc_step("Wait for 60 seconds and power on hosts: {}".format(hosts)) time.sleep(60) LOG.info("Hosts to check after power-on: {}".format(hosts_to_check)) vlm_helper.power_on_hosts(hosts, reserve=False, reconnect_timeout=HostTimeout.REBOOT + HostTimeout.REBOOT, hosts_to_check=hosts_to_check, region='central_region') LOG.tc_step("Check subclouds managed") current_managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') assert managed_subclouds == current_managed_subclouds, 'current managed subclouds are diffrent from \ origin {} current {}'.format( current_managed_subclouds, managed_subclouds) LOG.tc_step("Check vms are recovered after dead office recovery") vm_helper.wait_for_vms_values(vms, fail_ok=False, timeout=600) LOG.tc_step("Check vms are reachable after central clouds DOR test") for vm in vms: vm_helper.wait_for_vm_pingable_from_natbox( vm_id=vm, timeout=VMTimeout.DHCP_RETRY)
def subclouds_to_test(request): LOG.info("Gather DNS config and subcloud management info") sc_auth = Tenant.get('admin_platform', dc_region='SystemController') dns_servers = system_helper.get_dns_servers(auth_info=sc_auth) subcloud = ProjVar.get_var('PRIMARY_SUBCLOUD') def revert(): LOG.fixture_step("Manage {} if unmanaged".format(subcloud)) dc_helper.manage_subcloud(subcloud) LOG.fixture_step("Revert DNS config if changed") system_helper.set_dns_servers(nameservers=dns_servers, auth_info=sc_auth) request.addfinalizer(revert) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') if subcloud in managed_subclouds: managed_subclouds.remove(subcloud) ssh_map = ControllerClient.get_active_controllers_map() managed_subclouds = [ subcloud for subcloud in managed_subclouds if subcloud in ssh_map ] return subcloud, managed_subclouds
def keypair_precheck(request): LOG.fixture_step("Make sure all online subclouds are managed") unmanaged_subclouds = dc_helper.get_subclouds(mgmt='unmanaged', avail='online') for subcloud in unmanaged_subclouds: dc_helper.manage_subcloud(subcloud) primary_subcloud = ProjVar.get_var('PRIMARY_SUBCLOUD') managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') managed_subclouds.remove(primary_subcloud) assert managed_subclouds, "This test needs at least two online subclouds for testing." central_auth = Tenant.get('admin', dc_region='SystemController') central_keypair = nova_helper.get_keypairs(auth_info=central_auth) ssh_map = ControllerClient.get_active_controllers_map() managed_subclouds = [subcloud for subcloud in managed_subclouds if subcloud in ssh_map] LOG.fixture_step("Ensure keypair are synced on {}".format(primary_subcloud)) subcloud_auth = Tenant.get('admin', dc_region=primary_subcloud) subcloud_keypair = nova_helper.get_keypairs(auth_info=subcloud_auth) if sorted(subcloud_keypair) != sorted(central_keypair): dc_helper.wait_for_subcloud_keypair(primary_subcloud, expected_keypair=central_keypair) def revert(): LOG.fixture_step("Manage {} if unmanaged".format(primary_subcloud)) dc_helper.manage_subcloud(primary_subcloud) LOG.fixture_step("Delete new keypair on central region") nova_helper.delete_keypairs(keypairs=NEW_KEYPAIR, auth_info=central_auth) LOG.fixture_step("Wait for sync audit on {} and keypair to sync over". format(primary_subcloud)) dc_helper.wait_for_sync_audit(subclouds=primary_subcloud, filters_regex='keypair') dc_helper.wait_for_subcloud_keypair(primary_subcloud, expected_keypair=central_keypair, timeout=60, check_interval=10) request.addfinalizer(revert) return primary_subcloud, managed_subclouds, central_keypair
def wait_for_image_sync_on_subcloud(image_id, timeout=1000, delete=False): if ProjVar.get_var('IS_DC'): if dc_helper.get_subclouds( field='management', name=ProjVar.get_var('PRIMARY_SUBCLOUD'))[0] == 'managed': auth_info = Tenant.get_primary() if delete: _wait_for_images_deleted(images=image_id, auth_info=auth_info, fail_ok=False, timeout=timeout) else: wait_for_image_appear(image_id, auth_info=auth_info, timeout=timeout)
def swact_precheck(request): LOG.info("Gather subcloud management info") subcloud = ProjVar.get_var('PRIMARY_SUBCLOUD') def revert(): LOG.fixture_step("Manage {} if unmanaged".format(subcloud)) dc_helper.manage_subcloud(subcloud) request.addfinalizer(revert) managed_subclouds = dc_helper.get_subclouds( mgmt=SubcloudStatus.MGMT_MANAGED, avail=SubcloudStatus.AVAIL_ONLINE, sync=SubcloudStatus.SYNCED) if subcloud in managed_subclouds: managed_subclouds.remove(subcloud) ssh_map = ControllerClient.get_active_controllers_map() managed_subclouds = [ subcloud for subcloud in managed_subclouds if subcloud in ssh_map ] return subcloud, managed_subclouds
def ntp_precheck(request, check_alarms): LOG.info("Gather NTP config and subcloud management info") central_auth = Tenant.get('admin_platform', dc_region='RegionOne') central_ntp = system_helper.get_ntp_servers(auth_info=central_auth) primary_subcloud = ProjVar.get_var('PRIMARY_SUBCLOUD') subcloud_auth = Tenant.get('admin_platform', dc_region=primary_subcloud) subcloud_ntp = system_helper.get_ntp_servers(auth_info=subcloud_auth) if not central_ntp == subcloud_ntp: dc_helper.wait_for_subcloud_ntp_config(subcloud=primary_subcloud) managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') ssh_map = ControllerClient.get_active_controllers_map() managed_subclouds = [subcloud for subcloud in managed_subclouds if subcloud in ssh_map] if primary_subcloud in managed_subclouds: managed_subclouds.remove(primary_subcloud) managed_subcloud = None if managed_subclouds: managed_subcloud = managed_subclouds.pop() LOG.fixture_step("Leave only one subcloud besides primary subcloud to be managed: {}".format(managed_subcloud)) subclouds_to_revert = [] if managed_subclouds: LOG.info("Unmange: {}".format(managed_subclouds)) for subcloud in managed_subclouds: if not system_helper.get_alarms(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, auth_info=Tenant.get('admin_platform', subcloud)): subclouds_to_revert.append(subcloud) dc_helper.unmanage_subcloud(subcloud) def revert(): reverted = False try: LOG.fixture_step("Manage primary subcloud {} if unmanaged".format(primary_subcloud)) dc_helper.manage_subcloud(primary_subcloud) LOG.fixture_step("Revert NTP config if changed") res = system_helper.modify_ntp(ntp_servers=central_ntp, auth_info=central_auth, check_first=True, clear_alarm=False)[0] if res != -1: LOG.fixture_step("Lock unlock config out-of-date hosts in central region") system_helper.wait_and_clear_config_out_of_date_alarms(auth_info=central_auth, wait_with_best_effort=True) LOG.fixture_step("Lock unlock config out-of-date hosts in {}".format(primary_subcloud)) dc_helper.wait_for_subcloud_ntp_config(subcloud=primary_subcloud, expected_ntp=central_ntp, clear_alarm=True) if managed_subcloud: LOG.fixture_step("Lock unlock config out-of-date hosts in {}".format(managed_subcloud)) dc_helper.wait_for_subcloud_ntp_config(subcloud=managed_subcloud, expected_ntp=central_ntp, clear_alarm=True) if subclouds_to_revert: LOG.fixture_step("Manage unmanaged subclouds and check they are unaffected") for subcloud in subclouds_to_revert: dc_helper.manage_subcloud(subcloud) assert not system_helper.get_alarms(alarm_id=EventLogID.CONFIG_OUT_OF_DATE, auth_info=Tenant.get('admin_platform', dc_region=subcloud)) reverted = True finally: if not reverted: for subcloud in subclouds_to_revert: dc_helper.manage_subcloud(subcloud) request.addfinalizer(revert) return primary_subcloud, managed_subcloud, central_ntp
def test_launch_app_via_sysinv(copy_test_apps): """ Test upload, apply, remove, delete custom app via system cmd Args: copy_test_apps (str): module fixture cleanup_app: fixture Setups: - Copy test files from test server to tis system (module) - Remove and delete test app if exists Test Steps: - system application-upload test app tar file and wait for it to be uploaded - system application-apply test app and wait for it to be applied - wget <oam_ip>:<app_targetPort> from remote host - Verify app contains expected content - system application-remove test app and wait for it to be uninstalled - system application-delete test app from system """ app_dir = copy_test_apps app_name = HELM_APP_NAME central_ssh = ControllerClient.get_active_controller(name='RegionOne') central_auth = Tenant.get('admin_platform', dc_region='SystemController') platform_app = container_helper.get_apps(auth_info=central_auth, application='platform-integ-apps') LOG.info('Test platform-integ-apps is applied') assert len(platform_app) != 0 and platform_app[0] == 'applied' subclouds = dc_helper.get_subclouds() LOG.tc_step("Upload and apply {} on system controller".format(app_name)) container_helper.upload_app(app_name=app_name, app_version=HELM_APP_VERSION, tar_file=os.path.join(app_dir, HELM_TAR), auth_info=central_auth) container_helper.apply_app(app_name=app_name, auth_info=central_auth) LOG.tc_step( "Check docker image stored in System controller registry.local") code, output = cli.system(cmd="registry-image-list | fgrep hellokitty", ssh_client=central_ssh, fail_ok=True) assert code == 0 # LOG.info("code %s, output %s", code, output) for subcloud in subclouds: subcloud_auth = Tenant.get('admin_platform', dc_region=subcloud) LOG.tc_step("Upload/apply custom app on subcloud: {}".format(subcloud)) platform_app = container_helper.get_apps( auth_info=subcloud_auth, application='platform-integ-apps') LOG.info('Test platform-integ-apps is applied, on subcloud {}'.format( subcloud)) assert len(platform_app) != 0 and platform_app[0] == 'applied' LOG.tc_step("Upload and apply {} on subcloud: {}".format( app_name, subcloud)) container_helper.upload_app(app_name=app_name, app_version=HELM_APP_VERSION, tar_file=os.path.join(app_dir, HELM_TAR), auth_info=subcloud_auth) container_helper.apply_app(app_name=app_name, auth_info=subcloud_auth) LOG.tc_step("Check docker image stored on {} registry.central".format( subcloud)) code, output = cli.system(cmd="registry-image-list | fgrep hellokitty", ssh_client=central_ssh, auth_info=subcloud_auth, fail_ok=True) assert code == 0
def delete_images(images, timeout=ImageTimeout.DELETE, check_first=True, fail_ok=False, con_ssh=None, auth_info=Tenant.get('admin'), sys_con_for_dc=True, wait_for_subcloud_sync=True, del_subcloud_cache=True): """ Delete given images Args: images (list|str): ids of images to delete timeout (int): max time wait for cli to return, and max time wait for images to remove from openstack image list check_first (bool): whether to check if images exist before attempt to delete fail_ok (bool): con_ssh (SSHClient): auth_info (dict): sys_con_for_dc (bool): For DC system, whether to delete image on SystemController. wait_for_subcloud_sync (bool) del_subcloud_cache (bool): Whether to delete glance cache on subclouds after glance image deleted. glance image cache will expire on subcloud after 24 hours otherwise. Returns (tuple): (-1, "None of the given image(s) exist on system. Do nothing.") (0, "image(s) deleted successfully") (1, <stderr>) # if delete image cli returns stderr (2, "Delete image cli ran successfully but some image(s) <ids> did not disappear within <timeout> seconds") """ if not images: return -1, "No image provided to delete" LOG.info("Deleting image(s): {}".format(images)) if isinstance(images, str): images = [images] else: images = list(images) if check_first: existing_images = get_images(images=images, auth_info=auth_info, con_ssh=con_ssh) imgs_to_del = list(set(existing_images) & set(images)) if not imgs_to_del: msg = "None of the given image(s) exist on system. Do nothing." LOG.info(msg) return -1, msg else: imgs_to_del = list(images) args_ = ' '.join(imgs_to_del) if sys_con_for_dc and ProjVar.get_var('IS_DC'): con_ssh = ControllerClient.get_active_controller('RegionOne') auth_info = Tenant.get(tenant_dictname=auth_info['tenant'], dc_region='SystemController') exit_code, cmd_output = cli.openstack('image delete', args_, ssh_client=con_ssh, fail_ok=fail_ok, auth_info=auth_info, timeout=timeout) if exit_code > 1: return 1, cmd_output LOG.info("Waiting for images to be removed from openstack image " "list: {}".format(imgs_to_del)) all_deleted, images_deleted = _wait_for_images_deleted(imgs_to_del, fail_ok=fail_ok, con_ssh=con_ssh, auth_info=auth_info, timeout=timeout) if not all_deleted: images_undeleted = set(imgs_to_del) - set(images_deleted) msg = "Delete image cli ran successfully but some image(s) {} did " \ "not disappear within {} seconds".format(images_undeleted, timeout) return 2, msg if ProjVar.get_var('IS_DC') and wait_for_subcloud_sync: wait_for_image_sync_on_subcloud(images_deleted, timeout=1000, delete=True) if del_subcloud_cache: LOG.info("Attempt to delete glance image cache on subclouds.") # glance image cache on subcloud expires only after 24 hours of # glance image-delete. So it will fill up the # /opt/cgcs file system quickly in automated tests. Workaround # added to manually delete the glance cache. subclouds = dc_helper.get_subclouds(field='name', avail='online', mgmt='managed') for subcloud in subclouds: subcoud_ssh = ControllerClient.get_active_controller( name=subcloud, fail_ok=True) if subcoud_ssh: for img in images_deleted: img_path = '/opt/cgcs/glance/image-cache/{}'.format( img) if subcoud_ssh.file_exists(img_path): subcoud_ssh.exec_sudo_cmd( 'rm -f {}'.format(img_path)) LOG.info("image(s) are successfully deleted: {}".format(imgs_to_del)) return 0, "image(s) deleted successfully"
def test_dc_stress_alarm(subcloud_to_test): """ Test Stress Scenario on Distributed Cloud Args: subcloud_to_test (str): module fixture Setup: - Make sure there is consistency between alarm summary on Central Cloud and on subclouds Test Steps: Step1: - Trigger large amount of alarms, quickly on one subcloud - ensure system alarm-summary on subcloud matches dcmanager alarm summary on system controller Step2: - Trigger large amount of alarms quickly for a long time on all subclouds - Each alarm summary updates once every 30 seconds until the event is over - Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system controller Step3: - Clear all alarms - Ensure system alarm-summary on subcloud matches dcmanager alarm summary on system controller """ ssh_client = ControllerClient.get_active_controller(name=subcloud_to_test) # Step 1 LOG.tc_step("Trigger large amount of alarms, quickly on one subcloud") try: for i in range(1, ALARMS_NO + 1): ssh_client.exec_cmd( "fmClientCli -c \"### ###300.005###clear###system.vm###host=" "testhost-{}### ###critical### ###processing-error###cpu-cycles-limit-exceeded" "### ###True###True###'\"".format(i), fail_ok=False) finally: for i in range(1, ALARMS_NO + 1): ssh_client.exec_cmd('fmClientCli -D host=testhost-{}'.format(i)) check_alarm_summary_match_subcloud(subcloud_to_test) # Step 2 ssh_client_list = {} for subcloud in dc_helper.get_subclouds(mgmt='managed'): ssh_client_list[subcloud] = ControllerClient.get_active_controller( name=subcloud_to_test) try: LOG.tc_step( "Trigger large amount of alarms quickly for a long time on all subclouds" ) for subcloud in ssh_client_list: subcloud_ssh = ssh_client_list[subcloud] for i in range(1, ALARMS_NO + 1): subcloud_ssh.exec_cmd( "fmClientCli -c \"### ###300.005###clear###" "system.vm###host=testhost-{}### ###critical### ###processing-error###" "cpu-cycles-limit-exceeded### ###True###True###'\"".format( i), fail_ok=False) for subcloud in ssh_client_list: check_alarm_summary_match_subcloud(subcloud) finally: # Step 3 LOG.tc_step("Clear all alarms on all subclouds") for subcloud in ssh_client_list: subcloud_ssh = ssh_client_list[subcloud] for i in range(1, ALARMS_NO + 1): subcloud_ssh.exec_cmd( 'fmClientCli -D host=testhost-{}'.format(i)) for subcloud in ssh_client_list: check_alarm_summary_match_subcloud(subcloud)
def snmp_precheck(request): LOG.info("Gather SNMP config and subcloud management info") central_auth = Tenant.get('admin_platform', dc_region='RegionOne') central_comms = system_helper.get_snmp_comms(auth_info=central_auth) central_trapdests = system_helper.get_snmp_trapdests( auth_info=central_auth) primary_subcloud = ProjVar.get_var('PRIMARY_SUBCLOUD') managed_subclouds = dc_helper.get_subclouds(mgmt='managed', avail='online') if primary_subcloud in managed_subclouds: managed_subclouds.remove(primary_subcloud) else: dc_helper.manage_subcloud(primary_subcloud) ssh_map = ControllerClient.get_active_controllers_map() managed_subclouds = [ subcloud for subcloud in managed_subclouds if subcloud in ssh_map ] LOG.fixture_step("Ensure SNMP community strings are synced on {}".format( primary_subcloud)) subcloud_auth = Tenant.get('admin_platform', dc_region=primary_subcloud) subcloud_comms = system_helper.get_snmp_comms(auth_info=subcloud_auth) if sorted(subcloud_comms) != sorted(central_comms): dc_helper.wait_for_subcloud_snmp_comms(primary_subcloud, expected_comms=central_comms) LOG.fixture_step( "Ensure SNMP trapdests are synced on {}".format(primary_subcloud)) subcloud_trapdests = system_helper.get_snmp_trapdests( auth_info=subcloud_auth) if sorted(subcloud_trapdests) != sorted(central_trapdests): dc_helper.wait_for_subcloud_snmp_trapdests( primary_subcloud, expected_trapdests=central_trapdests) def revert(): LOG.fixture_step("Manage {} if unmanaged".format(primary_subcloud)) dc_helper.manage_subcloud(primary_subcloud) LOG.fixture_step( "Delete new SNMP community string and trapdest on central region") system_helper.delete_snmp_comm(comms=SNMP_COMM, auth_info=central_auth) system_helper.delete_snmp_trapdest(ip_addrs=SNMP_TRAPDEST[1], auth_info=central_auth) LOG.fixture_step( "Wait for sync audit on {} and SNMP community strings and trapdests to sync over" .format(primary_subcloud)) dc_helper.wait_for_sync_audit(subclouds=primary_subcloud) dc_helper.wait_for_subcloud_snmp_comms(primary_subcloud, expected_comms=central_comms, timeout=60, check_interval=10) dc_helper.wait_for_subcloud_snmp_trapdests( primary_subcloud, expected_trapdests=central_trapdests, timeout=60, check_interval=10) request.addfinalizer(revert) return primary_subcloud, managed_subclouds, central_comms, central_trapdests