def create_linux_user(user, password, host, verify_cmd='id', fail_ok=False, verify_after_creation=True): LOG.info('Creating user:{} with password:{} on host:{}\n'.format( user, password, host)) command = r"useradd '{}'; echo '{}' | sudo passwd '{}' --stdin".format( user, password, user) with host_helper.ssh_to_host(host) as connection: code, output = connection.exec_sudo_cmd(command, fail_ok=fail_ok) if verify_after_creation: command = "hostname; sudo su - '{}' -c '{}'".format( user, verify_cmd if verify_cmd else 'id') with host_helper.ssh_to_host(host) as connection: code, output = connection.exec_sudo_cmd(command, fail_ok=False) LOG.info('OK, verified user:{} with password:{} on host:{} was created\n'. format(user, password, host)) return code, output
def test_node_install_kpi(collect_kpi): """ This test measures the install time for each node in the system. """ if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") lab_name = ProjVar.get_var("LAB_NAME") hosts = system_helper.get_hosts() print("System has hosts: {}".format(hosts)) log_path = NodeInstall.LOG_PATH start_cmd = 'head -n 1 {}'.format(log_path) end_cmd = 'tail -n 1 {}'.format(log_path) date_cmd = '{} -n 1 /var/log/bash.log' with host_helper.ssh_to_host('controller-0') as con0_ssh: bash_start = con0_ssh.exec_sudo_cmd(date_cmd.format('head'), fail_ok=False)[1] bash_end = con0_ssh.exec_sudo_cmd(date_cmd.format('tail'), fail_ok=False)[1] bash_start = re.findall(TIMESTAMP_PATTERN, bash_start.strip())[0] bash_end = re.findall(TIMESTAMP_PATTERN, bash_end.strip())[0] date_ = bash_start.split('T')[0] def _get_time_delta(start_, end_): start_ = start_.replace(',', '.') end_ = end_.replace(',', '.') start_t = '{}T{}'.format(date_, start_) end_t = '{}T{}'.format(date_, end_) time_delta = common.get_timedelta_for_isotimes(start_t, end_t).total_seconds() if time_delta < 0: end_t = '{}T{}'.format(bash_end.split('T')[0], end_) time_delta = common.get_timedelta_for_isotimes( start_t, end_t).total_seconds() return time_delta for host in hosts: with host_helper.ssh_to_host(hostname=host) as host_ssh: start_output = host_ssh.exec_sudo_cmd(start_cmd, fail_ok=False)[1].strip() end_output = host_ssh.exec_sudo_cmd(end_cmd, fail_ok=False)[1].strip() kpi_name = NodeInstall.NAME.format(host) start_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, start_output)[0] end_time = re.findall(NodeInstall.TIMESTAMP_PATTERN, end_output)[0] install_duration = _get_time_delta(start_time, end_time) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=kpi_name, log_path=log_path, lab_name=lab_name, kpi_val=install_duration, fail_ok=False)
def _test_check_vm_disk_on_compute(storage, hosts_per_backing): """ Tests that existence of volumes are properly reported for lvm-backed vms. Skip: - Skip if no lvm-configured compute nodes available Test steps: - Create a flavor for a lvm-backed vms and boot vm out of that flavor - SSH onto the node hosting the VM and do the following: - Run ps aux and confirm that there is a qemu process - Run sudo lvs and confirm the existence of a thin pool - Run sudo lvs and confirm the existence of a volume for the vm - Ensure that the "free" space shown for the hypervisor (obtained by running "nova hypervisor-show <compute node>" and then checking the "free_disk_gb" field) reflects the space available within the thin pool - Delete the instance and ensure that space is returned to the hypervisor Test Teardown: - Delete created VM if not already done """ hosts_with_backing = hosts_per_backing.get(storage, []) if not hosts_with_backing: skip(SkipStorageBacking.NO_HOST_WITH_BACKING.format(storage)) LOG.tc_step("Create flavor and boot vm") flavor = nova_helper.create_flavor(storage_backing=storage)[1] ResourceCleanup.add('flavor', flavor, scope='function') vm = vm_helper.boot_vm(source='image', flavor=flavor, cleanup='function')[1] vm_helper.wait_for_vm_pingable_from_natbox(vm) vm_host = vm_helper.get_vm_host(vm) with host_helper.ssh_to_host(vm_host) as compute_ssh: LOG.tc_step("Look for qemu process") compute_ssh.exec_sudo_cmd(cmd="lvs --units g") assert check_for_qemu_process(compute_ssh), "qemu process not found when calling ps" LOG.tc_step("Look for pool information") thin_pool_size = get_initial_pool_space(compute_ssh, vm) vm_vol_name = vm + '_disk' raw_vm_volume_output = \ compute_ssh.exec_sudo_cmd(cmd="lvs --units g --noheadings -o lv_size -S lv_name={}".format(vm_vol_name))[1] assert raw_vm_volume_output, "created vm volume not found" vm_volume_size = float(raw_vm_volume_output.strip('<g')) LOG.tc_step("Calculate compute free disk space and ensure that it reflects thin pool") expected_space_left = int(thin_pool_size - vm_volume_size) free_disk_space = get_compute_free_disk_gb(vm_host) assert expected_space_left - 1 <= free_disk_space <= expected_space_left + 1, \ 'Hypervisor-show does not reflect space within thin pool' LOG.tc_step("Calculate free space following vm deletion (ensure volume space is returned)") vm_helper.delete_vms(vm) free_disk_space = get_compute_free_disk_gb(vm_host) assert int(thin_pool_size) == free_disk_space, \ 'Space is not properly returned to the hypervisor or hypervisor info does not properly reflect it'
def is_process_running(pid, host, con_ssh=None, retries=3, interval=3): """ Check if the process with the PID is existing Args: pid (int): process id host (str): host the process resides con_ssh: ssh connection/client to the host retries (int): times to re-try if no process found before return failure interval (int): time to wait before next re-try Returns: boolean - true if the process existing, false otherwise msg (str) - the details of the process or error messages """ cmd = 'ps -p {}'.format(pid) for _ in range(retries): with host_helper.ssh_to_host(host, con_ssh=con_ssh) as host_ssh: code, output = host_ssh.exec_cmd(cmd, fail_ok=True) if 0 != code: LOG.warn('Process:{} DOES NOT exist, error:{}'.format( pid, output)) else: return True, output time.sleep(interval) return False, ''
def delete_test_users(): global _host_users restore_sysadmin_password(target_password=TARGET_PASSWORD) LOG.info('Deleting users created for testing\n') conn_to_ac = ControllerClient.get_active_controller() count = 0 for (host, user), _ in _host_users.items(): if user == 'sysadmin' or user == HostLinuxUser.get_user(): LOG.info('-do not delete user:{} on host:{}\n'.format( user, host)) continue LOG.info('-deleting user:{} on host:{}\n'.format(user, host)) count += 1 if host == 'active-controller': conn_to_ac.exec_sudo_cmd('userdel -r {}'.format(user)) else: # sleep a bit so controller-1 have same password as controller-0 time.sleep(30) with host_helper.ssh_to_host(host, password='******') as conn: LOG.info( 'TODO: delete user:{} on host:{} by CLI: userdel -r {}\n' .format(user, host, user)) conn.exec_sudo_cmd("userdel -r '{}'".format(user)) LOG.info('{} test user deleted'.format(count))
def locate_usb(host_type="controller", min_size=13): """ Try to locate a USB device on a host of the type specified. Arguments: - host_type (string) - e.g. controller, compute, storage - min_size (int) - minimum size of USB required (GiB) Returns: - hostname, e.g. controller-0 - usb_device, e.g. /dev/sdb """ LOG.tc_step("Check all hosts of type {} for USB devices".format(host_type)) hosts = system_helper.get_hosts(personality=host_type) for host in hosts: with host_helper.ssh_to_host(host) as host_ssh: cmd = "ls --color=none -ltrd /dev/disk/by-id/usb*" rc, out = host_ssh.exec_cmd(cmd) if rc == 0: usb_device = "/dev/" + (out.splitlines()[0])[-3:] LOG.info("Found USB device {} on host {}".format(usb_device, host)) cmd = "blockdev --getsize64 {}".format(usb_device) usb_bytes = host_ssh.exec_sudo_cmd(cmd)[1] gib = int(usb_bytes) / (1024 * 1024 * 1024) if gib > min_size: LOG.info("Size of USB device is sufficient for test") return host, usb_device else: skip("Size of USB device is insufficient for test") return (None, None)
def test_launch_pod_via_kubectl(copy_test_apps, delete_test_pod, controller): """ Test custom pod apply and delete Args: copy_test_apps (str): module fixture delete_test_pod: fixture controller: test param Setups: - Copy test files from test server to tis system (module) - Delete test pod if already exists on system Test Steps: - ssh to given controller - kubectl apply custom pod yaml and verify custom pod is added to both controllers (if applicable) - kubectl delete custom pod and verify it is removed from both controllers (if applicable) """ host = controller_precheck(controller) with host_helper.ssh_to_host(hostname=host) as con_ssh: app_path = os.path.join(copy_test_apps, POD_YAML) LOG.tc_step('kubectl apply {}, and check {} pod is created and ' 'running'.format(POD_YAML, POD_NAME)) kube_helper.apply_pod(file_path=app_path, pod_name=POD_NAME, check_both_controllers=True, con_ssh=con_ssh) LOG.tc_step("Delete {} pod and check it's removed from both " "controllers if applicable".format(POD_NAME)) kube_helper.delete_resources(resource_names=POD_NAME, con_ssh=con_ssh)
def upload_helm_charts(tar_file, repo=None, delete_first=False, con_ssh=None, timeout=120, fail_ok=False): """ Upload helm charts via helm-upload cmd Args: tar_file: repo delete_first: con_ssh: timeout: fail_ok: Returns (tuple): (0, <path_to_charts>) (1, <std_err>) (2, <hostname for host that does not have helm charts in expected dir>) """ if not con_ssh: con_ssh = ControllerClient.get_active_controller() helm_dir = os.path.normpath(StxPath.HELM_CHARTS_DIR) if not repo: repo = 'starlingx' file_path = os.path.join(helm_dir, repo, os.path.basename(tar_file)) current_host = con_ssh.get_hostname() controllers = [current_host] if not system_helper.is_aio_simplex(con_ssh=con_ssh): con_name = 'controller-1' if controllers[ 0] == 'controller-0' else \ 'controller-0' controllers.append(con_name) if delete_first: for host in controllers: with host_helper.ssh_to_host(hostname=host, con_ssh=con_ssh) as host_ssh: if host_ssh.file_exists(file_path): host_ssh.exec_sudo_cmd('rm -f {}'.format(file_path)) code, output = exec_helm_upload_cmd(tarball=tar_file, repo=repo, timeout=timeout, con_ssh=con_ssh, fail_ok=fail_ok) if code != 0: return 1, output file_exist = con_ssh.file_exists(file_path) if not file_exist: raise exceptions.ContainerError( "{} not found on {} after helm-upload".format( file_path, current_host)) LOG.info("Helm charts {} uploaded successfully".format(file_path)) return 0, file_path
def reset(): app_name = 'stx-openstack' post_status = container_helper.get_apps(application=app_name, field='status')[0] if not post_status.endswith('ed'): LOG.fixture_step("Wait for application apply finish") container_helper.wait_for_apps_status(apps=app_name, status=AppStatus.APPLIED, timeout=1800, check_interval=15, fail_ok=False) user_overrides = container_helper.get_helm_override_values(chart='nova', namespace='openstack', fields='user_overrides')[0] if not user_overrides or user_overrides == 'None': LOG.info("No change in nova user_overrides. Do nothing.") return LOG.fixture_step("Update nova helm-override to reset values") container_helper.update_helm_override(chart='nova', namespace='openstack', reset_vals=True) user_overrides = container_helper.get_helm_override_values(chart='nova', namespace='openstack', fields='user_overrides')[0] assert not user_overrides, "nova helm user_overrides still exist after reset-values" LOG.fixture_step("Re-apply stx-openstack application and ensure it is applied") container_helper.apply_app(app_name='stx-openstack', check_first=False, applied_timeout=1800) check_cmd = 'grep foo {}'.format(conf_path) LOG.fixture_step("Ensure user_override is removed from {} in nova-compute " "containers".format(conf_path)) for host in valid_hosts: with host_helper.ssh_to_host(host) as host_ssh: LOG.info("Wait for nova-compute pods running on {}".format(host)) kube_helper.wait_for_openstack_pods_status(application='nova', component='compute', con_ssh=host_ssh, status=PodStatus.RUNNING) LOG.info("Check new release generated for nova compute pods on {}".format(host)) nova_compute_pods = kube_helper.get_openstack_pods(field='NAME', application='nova', component='compute', con_ssh=host_ssh)[0] nova_compute_pods = sorted(nova_compute_pods) if NEW_NOVA_COMPUTE_PODS: assert NEW_NOVA_COMPUTE_PODS != nova_compute_pods, \ "No new release generated after reset values" LOG.info("Check custom conf is removed from {} in nova compute " "container on {}".format(conf_path, host)) for nova_compute_pod in nova_compute_pods: code, output = kube_helper.exec_cmd_in_container(cmd=check_cmd, pod=nova_compute_pod, fail_ok=True, con_ssh=host_ssh, namespace='openstack', container_name='nova-compute') assert code == 1, "{} on {} still contains user override info after " \ "reset nova helm-override values and reapply stx-openstack " \ "app: {}".format(conf_path, host, output)
def check_vm_cpu_model(vm_id, vcpu_model, expt_arch=None): if vcpu_model == 'Passthrough': pattern_ps = 'host' pattern_virsh = 'host-passthrough' virsh_tag = 'cpu' type_ = 'dict' elif vcpu_model: virsh_tag = 'cpu/model' type_ = 'text' if vcpu_model == 'Haswell': pattern_ps = pattern_virsh = r'(haswell|haswell\-notsx)' else: pattern_ps = pattern_virsh = vcpu_model.lower() else: # vcpu model is not set pattern_ps = None pattern_virsh = None virsh_tag = 'cpu' type_ = 'dict' LOG.info( "Check vcpu model successfully applied to vm via ps aux and virsh dumpxml on vm host" ) host = vm_helper.get_vm_host(vm_id) inst_name = vm_helper.get_vm_instance_name(vm_id) with host_helper.ssh_to_host(host) as host_ssh: output_ps = host_ssh.exec_cmd( "ps aux | grep --color='never' -i {}".format(vm_id), fail_ok=False)[1] output_virsh = host_helper.get_values_virsh_xmldump( inst_name, host_ssh, tag_paths=virsh_tag, target_type=type_) output_virsh = output_virsh[0] if vcpu_model: assert re.search(r'\s-cpu\s{}(\s|,)'.format(pattern_ps), output_ps.lower()), \ 'cpu_model {} not found for vm {}'.format(pattern_ps, vm_id) else: assert '-cpu' not in output_ps, "cpu model is specified in ps aux" if vcpu_model == 'Passthrough': assert output_virsh['mode'] == 'host-passthrough', \ 'cpu mode is not passthrough in virsh for vm {}'.format(vm_id) LOG.info("Check cpu passthrough model from within the vm") vm_vcpu_model = vm_helper.get_vcpu_model(vm_id) host_cpu_model = host_helper.get_host_cpu_model(host=host) assert host_cpu_model == vm_vcpu_model, "VM cpu model is different than host cpu model with cpu passthrough" if expt_arch: assert expt_arch == vm_vcpu_model, "VM cpu model changed. Original: {}. Current: {}".\ format(expt_arch, vcpu_model) elif vcpu_model: assert re.search(pattern_virsh, output_virsh.lower()), \ 'cpu model {} is not found in virsh for vm {}'.format(pattern_virsh, vm_id) else: assert output_virsh == {}, "Virsh cpu output: {}".format(output_virsh) vm_vcpu_model = vm_helper.get_vcpu_model(vm_id) assert 'QEMU Virtual CPU' in vm_vcpu_model, "vCPU model is not QEMU Virtual CPU when unspecified"
def teardown(): global generated_vm_dict for host in generated_vm_dict: with host_helper.ssh_to_host(host) as host_ssh: for vm in generated_vm_dict[host]: host_ssh.exec_sudo_cmd('virsh destroy {}'.format(vm)) host_ssh.exec_sudo_cmd('virsh undefine {}'.format(vm)) generated_vm_dict = {}
def get_host_and_ns(netid, host_list): for host in host_list: with host_helper.ssh_to_host(host) as node_ssh: cmd = 'ip netns | grep --color=never {}'.format(netid) ns = node_ssh.exec_cmd(cmd=cmd)[1] if ns and netid in ns.split()[0]: return (host, ns.split()[0]) return (None, None)
def login_as_linux_user(user, password, host, cmd='whoami', expecting_fail=False): if is_on_action_controller(host): LOG.info('Login to the active controller:{}\n'.format(host)) if user != HostLinuxUser.get_user(): skip( 'Login to the active controller(will not skip if controller-1 is active), ' 'host:{}, user:{}'.format(host, user)) return False, '' if user == 'sysadmin': LOG.info('Login to the host:{} as "sysadmin"!\n'.format(host)) LOG.info('Attempt to login to host:{}, user:{}, password:{}\n'.format( host, user, password)) # todo: if host is the active-controller, ssh_to_host will ignore username # and using 'sysadmin', which leads to error cmd = '(date; uuid; hostname; {}) 2>/dev/null'.format(cmd) try: with host_helper.ssh_to_host(host, username=user, password=password) as conn: code, output = conn.exec_cmd(cmd, fail_ok=True) LOG.info('code={}, output={}\n'.format(code, output)) if 0 != code: msg = 'Failed to execute cmd:{} on host:{} as user:{}, password:{}'.format( cmd, host, user, password) LOG.info(msg) assert expecting_fail, msg return False, output else: assert not expecting_fail, \ 'Expecting logged in but failed: host:{} as user:{} with password:{}'.format(host, user, password) return True, output except Exception as e: # LOG.info('Caught exception:\n{}\n'.format(e)) msg = 'Expecting to login but failed with exception:{}'.format(e) assert expecting_fail, msg if not 'Permission denied,' in str(e): LOG.warning( 'Login as {}/{} failed without Permission denied error.'. format(user, password)) else: LOG.info( 'Failed to login as expected on host:{}, user:{}, password:{}, for "Permission denied"' .format(host, user, password)) return False, str(e)
def get_pmon_process_id(pid_file, host, con_ssh=None): cmd = 'cat {} 2>/dev/null | head -n1 && echo 2>/dev/null'.format(pid_file) with host_helper.ssh_to_host(host, con_ssh=con_ssh) as con: code, output = con.exec_cmd(cmd) if output.strip(): return int(output.strip()) return -1
def search_file_on_host(host, where, file_pattern, maxdepth=3): with host_helper.ssh_to_host(host) as ssh_client: cmd = 'find ' + where + ' -maxdepth ' + str( maxdepth) + ' -name "' + file_pattern + '"' LOG.info('searching file using cmd:{}'.format(cmd)) rc, file = ssh_client.exec_cmd(cmd) if rc != 0: LOG.info('Failed to find file with pattern:{}, cmd:{}'.format( file_pattern, cmd)) return rc, file
def delete_images_from_host_registries(con_ssh=None, auth_info=Tenant.get('admin_platform')): hosts = system_helper.get_hosts(con_ssh=con_ssh, auth_info=auth_info) for host in hosts: with host_helper.ssh_to_host(hostname=host, con_ssh=con_ssh) as host_ssh: LOG.info("Delete {} images for host: {}".format( STX_MONITOR_APP_NAME, host)) container_helper.remove_docker_images_with_pattern( pattern="elastic", con_ssh=host_ssh, timeout=120)
def test_kube_edgex_services(deploy_edgex, controller): """ Test edgex pods are deployed and running Args: deploy_edgex (str): module fixture controller: test param Test Steps: - ssh to given controller - Wait for EdgeX pods deployment - Check all EdgeX pods are running - Check EdgeX services displayed: 'edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata' - Check EdgeX deployments displayed: 'edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata' """ pods = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') services = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') deployments = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') host = check_host(controller=controller) with host_helper.ssh_to_host(hostname=host) as con_ssh: LOG.tc_step("Check EdgeX pods on {}: {}".format(controller, pods)) edgex_services = kube_helper.get_resources(resource_type='service', namespace='default', con_ssh=con_ssh) edgex_deployments = kube_helper.get_resources( resource_type='deployment.apps', namespace='default', con_ssh=con_ssh) LOG.tc_step("Wait for EdgeX pods Running") kube_helper.wait_for_pods_status(partial_names=pods, namespace='default', status=PodStatus.RUNNING, con_ssh=con_ssh, fail_ok=False) LOG.tc_step("Check EdgeX services on {}: {}".format( controller, services)) for service in services: assert service in edgex_services, "{} not in kube-system " \ "service table".format(service) LOG.tc_step("Check EdgeX deployments on {}: {}".format( controller, deployments)) for deployment in deployments: assert deployment in edgex_deployments, \ "{} not in kube-system deployment.apps table".format(deployment)
def check_host_vswitch_port_engine_map(host, con_ssh=None): with host_helper.ssh_to_host(host, con_ssh=con_ssh) as host_ssh: expt_vswitch_map = host_helper.get_expected_vswitch_port_engine_map( host_ssh) actual_vswitch_map = host_helper.get_vswitch_port_engine_map(host_ssh) data_ports = host_helper.get_host_ports_for_net_type(host, net_type='data', ports_only=True) all_ports_used = host_helper.get_host_ports_for_net_type(host, net_type=None, ports_only=True) ports_dict = host_helper.get_host_ports(host, ['device type', 'name'], if_name=data_ports, strict=True, rtn_dict=True) extra_mt_ports = 0 for i in range(len(ports_dict['device type'])): device_type = ports_dict['device type'][i] if re.search(MELLANOX_DEVICE, device_type): # Only +1 if the other port of MX-4 is not used. CGTS-8303 port_name = ports_dict['name'][i] dev = port_name[-1] other_dev = '0' if dev == '1' else '1' other_port = port_name[:-1] + other_dev if other_port not in all_ports_used: extra_mt_ports += 1 if extra_mt_ports > 0: LOG.info( "{}Mellanox devices are used on {} data interfaces. Perform loose check on port-engine map." .format(SEP, host)) # check actual mapping has x more items than expected mapping. x is the number of MT pci device assert len(expt_vswitch_map) + extra_mt_ports == len( actual_vswitch_map) # check expected mapping is a subset of actual mapping for port, engines in expt_vswitch_map.items(): assert port in actual_vswitch_map, "port {} is not included in vswitch.ini on {}. Actual vSwitch map: {}".\ format(port, host, actual_vswitch_map) assert engines == actual_vswitch_map[port], 'engine list for port {} on {} is not as expected. ' \ 'Expected engines: {}; Actual engines: {}'.format(host, port, engines, actual_vswitch_map[port]) else: LOG.info( "{}No extra Mellanox device used on {} data interfaces. Perform strict check on port-engine map." .format(SEP, host)) assert expt_vswitch_map == actual_vswitch_map, "vSwitch mapping unexpected. Expect: {}; Actual: {}".format( expt_vswitch_map, actual_vswitch_map)
def test_orphan_audit(orphan_audit_setup, clear_virsh_vms): """ Tests the orphan audit by booting an instance directly on compute node to bypass nova, wait for 5 minutes and ensure that it gets cleaned up (TC2990 on rally) Test setup: - SCP two files to a compute node: - The DEFAULT_GUEST image currently on the controller node - An XML file that is on the test server (orphan_guest.xml) that will be used to define an start a VM with virsh - Change domain type in XML file to qemu if the test is being ran in a vbox Test steps: - Change the vm name in the XML file to an auto-generated name - SSH onto the node hosting the VM and run virsh define orphan_guest.xml and then virsh start Orphan_VM to start the VM - Assert that vm creation was successful by checking output of virsh start. Output of virsh list is logged as well - Check virsh list output to make sure that openstack has automatically cleaned up the orphan instance by 5.5 minutes. This check is periodically done every 10 seconds to a maximum of 5.5 minutes. The test immediately passes if any of the checks reports the abscence of the orphan_vm and fails if the vm is still present in the list after 5.5 minutes. Test Teardown: - Delete created VMs """ vm_host = orphan_audit_setup # Create standalone vm vm_name = common.get_unique_name('orphan', resource_type='vm') LOG.tc_step("Change orphan_vm name to an auto-generated name") with host_helper.ssh_to_host(vm_host) as host_ssh: host_ssh.exec_sudo_cmd( "sed -r -i 's#<name>.*</name>#<name>{}</name>#g' orphan_guest.xml". format(vm_name)) LOG.tc_step("Create a simple orphan vm") create_simple_orphan(host_ssh, vm_host, vm_name) list_cmd = 'virsh list --all' host_ssh.exec_sudo_cmd(list_cmd) # wait and check for deletion LOG.tc_step("Check for deletion of vm") assert wait_for_deletion( host_ssh, vm_name), "{} is still in virsh list after 330 seconds".format( vm_name) global generated_vm_dict generated_vm_dict[vm_host].remove(vm_name)
def test_kernel_module_signatures(): """ Test kernel modules are properly signed on all stx hosts. Steps on each host: - 'cat /proc/sys/kernel/tainted', ensure value is 4096. If not, do following steps: - 'grep --color=never -i "module verification failed" /var/log/kern.log' to find out failed modules - 'modinfo <failed_module> | grep --color=never -E "sig|filename" to display signing info for each module """ hosts = system_helper.get_hosts() failed_hosts = {} for host in hosts: with host_helper.ssh_to_host(host) as host_ssh: LOG.tc_step( "Check for unassigned kernel modules on {}".format(host)) output = host_ssh.exec_cmd('cat /proc/sys/kernel/tainted', fail_ok=False)[1] output_binary = '{0:b}'.format(int(output)) unassigned_module_bit = '0' # 14th bit is to flag unassigned module if len(output_binary) >= 14: unassigned_module_bit = output_binary[-14] if unassigned_module_bit != '0': LOG.error( "Kernel module verification(s) failed on {}. Collecting " "more info".format(host)) LOG.tc_step( "Check kern.log for modules with failed verification") failed_modules = [] err_out = host_ssh.exec_cmd( 'grep --color=never -i "module verification failed" ' '/var/log/kern.log')[1] for line in err_out.splitlines(): module = re.findall(r'\] (.*): module verification failed', line)[0].strip() if module not in failed_modules: failed_modules.append(module) failed_hosts[host] = failed_modules LOG.tc_step("Display signing info for {} failed kernel " "modules: {}".format(host, failed_modules)) for module in failed_modules: host_ssh.exec_cmd('modinfo {} | grep --color=never -E ' '"sig|filename"'.format(module)) assert not failed_hosts, "Kernel module signature verification " \ "failed on: {}".format(failed_hosts)
def check_kern_log(): cmd = """cat /var/log/kern.log | grep -i --color=never "(i40e): transmit queue" | awk '$0 > "{}"'""". \ format(start_time) i40e_errs = [] host_helper.wait_for_hosts_ready(hosts=hosts) for host in hosts: with host_helper.ssh_to_host(hostname=host) as host_ssh: output = host_ssh.exec_cmd(cmd)[1] if output: i40e_errs.append("{}: {}".format(host, output)) assert not i40e_errs, "i40e errors: {}".format(i40e_errs)
def check_vm_files_on_hypervisor(vm_id, vm_host, instance_name): with host_helper.ssh_to_host(vm_host) as host_ssh: cmd = " ls /var/lib/nova/instances/{}".format(vm_id) cmd_output = host_ssh.exec_cmd(cmd)[1] for expt_file in ('console.log', 'disk.config'): assert expt_file in cmd_output, \ "{} is not found for config drive vm {} on " \ "{}".format(expt_file, vm_id, vm_host) output = host_ssh.exec_cmd('ls /run/libvirt/qemu')[1] libvirt = "{}.xml".format(instance_name) assert libvirt in output, "{} is not found in /run/libvirt/qemu on " \ "{}".format(libvirt, vm_host)
def test_kpi_cyclictest_hypervisor(collect_kpi, prepare_test_session, get_hypervisor): if not collect_kpi: skip("KPI only test. Skip due to kpi collection is not enabled") global testable_hypervisors chosen_hypervisor = get_hypervisor cpu_info = testable_hypervisors[chosen_hypervisor] cpu_info['for_host_test'] = True LOG.info( 'Hypervisor chosen to run cyclictest: {}'.format(chosen_hypervisor)) active_controller_name = system_helper.get_active_controller_name() program = os.path.join(os.path.normpath(CYCLICTEST_DIR), os.path.basename(CYCLICTEST_EXE)) LOG.debug('program={}'.format(program)) with host_helper.ssh_to_host(chosen_hypervisor) as target_ssh: prep_test_on_host(target_ssh, chosen_hypervisor, program, active_controller_name) run_log, hist_file = run_cyclictest(target_ssh, program, chosen_hypervisor, cpu_info=cpu_info) LOG.info("Process and upload test results") local_run_log, local_hist_file = fetch_results_from_target( target_ssh=target_ssh, target_host=chosen_hypervisor, active_con_name=active_controller_name, run_log=run_log, hist_file=hist_file) testable_hypervisors[chosen_hypervisor]['for_host_test'] = False avg_val, six_nines_val = calculate_results(run_log=local_run_log, hist_file=local_hist_file, cores_to_ignore=None, num_cores=len( cpu_info['vm_cores'])) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_HYPERVISOR_AVG, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT) kpi_log_parser.record_kpi(local_kpi_file=collect_kpi, kpi_name=CyclicTest.NAME_HYPERVISOR_6_NINES, kpi_val=six_nines_val, uptime=15, unit=CyclicTest.UNIT)
def _test_status_firewall_reboot(): """ Test iptables status after reboot of controller Test Steps: - Stop iptables service - Confirm iptables service has stopped - Reboot the controller being tested - Confirm iptables service is online - Repeat for second controller """ LOG.tc_step("Getting the controller(s)") controllers = system_helper.get_controllers() for controller in controllers: with host_helper.ssh_to_host(controller) as con_ssh: LOG.tc_step("Stopping iptables service") cmd = 'service iptables stop' con_ssh.exec_sudo_cmd(cmd) LOG.tc_step("checking iptables status") cmd = 'service iptables status' code, output = con_ssh.exec_sudo_cmd(cmd) assert 'Active: inactive' or 'Active: failed' in output, "iptables service did not stop running on host {}"\ .format(controller) LOG.tc_step("Rebooting {}".format(controller)) HostsToRecover.add(controller) host_helper.reboot_hosts(controller) with host_helper.ssh_to_host(controller) as con_ssh: LOG.tc_step( "Checking iptables status on host {} after reboot".format( controller)) cmd = 'service iptables status | grep --color=never Active' code, output = con_ssh.exec_sudo_cmd(cmd) assert 'active' in output, "iptables service did not start after reboot on host {}".format( controller)
def clear_vxlan_endpoint_stats(compute): """ Clear the vxlan-endpoint-stats Args: compute Returns: code """ LOG.info("Clearing vshell vxlan-endpoint-stats-list") with host_helper.ssh_to_host(compute) as host_ssh: code = host_ssh.exec_cmd('vshell vxlan-endpoint-stats-clear', fail_ok=False)[0] return code
def test_ssh_to_hosts(): """ Test ssh to every host on system from active controller """ hosts_to_ssh = system_helper.get_hosts( availability=[HostAvailState.AVAILABLE, HostAvailState.ONLINE]) failed_list = [] for hostname in hosts_to_ssh: LOG.tc_step("Attempt SSH to {}".format(hostname)) try: with host_helper.ssh_to_host(hostname): pass except Exception as e: failed_list.append("\n{}: {}".format(hostname, e.__str__())) assert not failed_list, "SSH to host(s) failed: {}".format(failed_list)
def get_pmon_process_info(name, host, conf_file=None, con_ssh=None): """ Get process info from its PMON config file Args: name (str): name of the PMON process host (str): host on which the PROM process running con_ssh: connection to the active controller conf_file (str): configuration file for the PMON process Returns (dict): settings of the process """ LOG.info('Get PMON process information for {}'.format(name)) if not conf_file: file_name = '{}.conf'.format(name) else: file_name = conf_file cmd = 'cat {}'.format(os.path.join(PMON_PROC_CONF_DIR, file_name)) with host_helper.ssh_to_host(host, con_ssh=con_ssh) as con0_ssh: code, output = con0_ssh.exec_sudo_cmd(cmd) if 0 != code or not output.strip(): LOG.error( 'Failed to read config file:{}/{} for PMON process:{} on host:{}, ' 'code:{}, message:{}'.format(PMON_PROC_CONF_DIR, file_name, name, host, code, output)) return {} conf_parser = configparser.ConfigParser() conf_parser.read_file(StringIO(output)) settings = {} if 'process' in conf_parser.sections(): settings = { k.strip(): v.split(';')[0].strip() for k, v in conf_parser.items('process') } settings['interval'] = int(settings.get('interval', 5)) settings['debounce'] = int(settings.get('debounce', 20)) LOG.debug('process settings:{}'.format(settings)) return settings
def get_vxlan_endpoint_stats(compute, field='packets-unicast'): """ Get the stats from vshell for vxlan-endpoint-stats-list Args: compute field (str): Filter to use to parse packets Returns: list """ LOG.info("Getting vshell vxlan-endpoint-stats-list") with host_helper.ssh_to_host(compute) as host_ssh: table_ = table_parser.table(host_ssh.exec_cmd('vshell vxlan-endpoint-stats-list', fail_ok=False)[1]) packets = table_parser.get_values(table_, field, regex=True) return packets
def check_host_file_for_vm(vm_id, expecting=True, host=None, fail_ok=True): LOG.info('Verify the file for vTPM exists on the hosting node for VM:' + vm_id) if host is None: host = vm_helper.get_vm_host(vm_id) active_controller_name = system_helper.get_active_controller_name() instance_name = vm_helper.get_vm_instance_name(vm_id) vtpm_file = vtpm_base_dir.format( vm_id=vm_id, instance_name=instance_name) + '/' + vtpm_file_name if host != active_controller_name: hosting_node = host else: hosting_node = active_controller_name with host_helper.ssh_to_host(hosting_node) as ssh_client: if ssh_client.file_exists(vtpm_file): LOG.info('OK, found the file for vTPM:{} on host:{}'.format( vtpm_file, host)) assert expecting is True or fail_ok is True, \ 'FAIL, the files supporting vTPM are NOT found on the {} as expected'.format(host) if expecting is True: LOG.info('-this is expected') else: LOG.info('-this is NOT expected') return True, expecting else: LOG.info('Cannot find the file for vTPM:{} on host:{}'.format( vtpm_file, host)) assert expecting is False or fail_ok is True, \ 'FAIL, the files should be cleared as expected' if expecting is False: LOG.info('-this is expected') else: LOG.info('-this is NOT expected') return False, expecting
def test_kube_edgex_services(deploy_edgex, controller): """ Test edgex pods are deployed and running Args: deploy_edgex (str): module fixture controller: test param Test Steps: - ssh to given controller - Wait for EdgeX pods deployment - Check all EdgeX pods are running - Check EdgeX services displayed: 'edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata' - Check EdgeX deployments displayed: 'edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata' """ host = check_host(controller=controller) with host_helper.ssh_to_host(hostname=host) as con_ssh: pods = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') LOG.tc_step("Check EdgeX pods on {} : {}".format(controller, pods)) kube_system_info = kube_helper.get_pods_info(namespace='default', con_ssh=con_ssh, type_names=('pod', 'service', 'deployment.apps'), keep_type_prefix=False) for pod_info in kube_system_info['pod']: res, actual_pod_info = kube_helper.wait_for_pods(pod_info['name'], namespace='default', con_ssh=con_ssh) assert res, "Pod {} status is {} instead of {}". \ format(actual_pod_info['name'], pod_info['status'], PodStatus.RUNNING) services = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') LOG.tc_step("Check EdgeX services on {}: {}".format(controller, services)) existing_services = kube_system_info['service'] existing_services = [service['name'] for service in existing_services] for service in services: assert service in existing_services, "{} not in kube-system service table".format(service) deployments = ('edgex-core-command', 'edgex-core-consul', 'edgex-core-data', 'edgex-core-metadata') LOG.tc_step("Check kube-system deployments on {}: {}".format(controller, deployments)) existing_deployments = kube_system_info['deployment.apps'] existing_deployments = [deployment['name'] for deployment in existing_deployments] for deployment in deployments: assert deployment in existing_deployments, "{} not in kube-system deployment.apps table".format(deployment)