def check_affinity(vm, expect_vcpupin): """ Check the affinity of vcpus in various libvirt API output :param vm: VM object :param expect_vcpupin: Expected affinity details :return: True if affinity matches from different virsh API outputs, False if not """ host_cpu_count = utils.total_count() if hasattr(utils, 'total_count') else utils.total_cpus_count() affinity_xml = affinity_from_xml(vm) affinity_vcpupin = affinity_from_vcpupin(vm) affinity_vcpuinfo = affinity_from_vcpuinfo(vm) result = True for vcpu in list(expect_vcpupin.keys()): expect_affinity = cpus_string_to_affinity_list(str(expect_vcpupin[vcpu]), host_cpu_count) # Check for vcpuinfo affinity if affinity_vcpuinfo[int(vcpu)] != expect_affinity: logging.error("CPU affinity in virsh vcpuinfo output" " is unexpected") result = False # Check for vcpupin affinity if affinity_vcpupin[int(vcpu)] != expect_affinity: logging.error("Virsh vcpupin output is unexpected") result = False # Check for affinity in Domain xml if affinity_xml: if affinity_xml[vcpu] != expect_affinity: logging.error("Affinity in domain XML is unexpected") result = False if result: logging.debug("Vcpupin info check pass") return result
def run(test, params, env): """ check time jumps in guest (only for Linux guest): 1) boot guest with '-rtc base=utc,clock=host,driftfix=slew' 2) check current clocksource in guest 3) pin all vcpus to specfic host CPUs 4) verify time jump :param test: QEMU test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ vm = env.get_vm(params["main_vm"]) session = vm.wait_for_login() error_context.context("Check the clock source currently used on guest", test.log.info) cmd = "cat /sys/devices/system/clocksource/" cmd += "clocksource0/current_clocksource" test.log.info("%s is current clocksource.", session.cmd_output(cmd)) error_context.context("Pin every vcpu to physical cpu", test.log.info) host_cpu_num = cpu.total_count() host_cpu_list = (_ for _ in range(int(host_cpu_num))) if len(vm.vcpu_threads) > int(host_cpu_num): host_cpu_list = [] for _ in range(len(vm.vcpu_threads)): host_cpu_list.append(_ % int(host_cpu_num)) cpu_pin_list = list(zip(vm.vcpu_threads, host_cpu_list)) for vcpu, pcpu in cpu_pin_list: process.system("taskset -p -c %s %s" % (pcpu, vcpu)) check_cmd = params["check_cmd"] output = str(session.cmd_output(check_cmd)).splitlines() session.close() time_pattern = "%y-%m-%d %H:%M:%S" time_list = [] for str_time in output: time_struct = time.strptime(str_time, time_pattern) etime = time.mktime(time_struct) time_list.append(etime) for idx, _ in enumerate(time_list): if idx < len(time_list) - 1: if _ == time_list[idx + 1] or (_ + 1) == time_list[idx + 1]: continue else: test.fail("Test fail, time jumps backward or forward on guest") else: break
def affinity_from_proc(vm): """ Return dict of affinity from proc :param vm: VM object :return: dict of affinity of VM """ pid = vm.get_pid() proc_affinity = {} vcpu_pids = [] host_cpu_count = utils.total_count() if hasattr(utils, 'total_count') else utils.total_cpus_count() vcpu_pids = vm.get_vcpus_pid() for vcpu in range(len(vcpu_pids)): output = cpu_allowed_list_by_task(pid, vcpu_pids[vcpu]) output_affinity = cpus_string_to_affinity_list(output, int(host_cpu_count)) proc_affinity[vcpu] = output_affinity return proc_affinity
def affinity_from_vcpupin(vm, vcpu=None, options=None): """ Returns dict of vcpu's affinity from virsh vcpupin output :param vm: VM object :param vcpu: virtual cpu to qeury :param options: --live, --current or --config :return: dict of affinity of VM """ vcpupin_output = {} vcpupin_affinity = {} host_cpu_count = utils.total_count() if hasattr(utils, 'total_count') else utils.total_cpus_count() result = virsh.vcpupin(vm.name, vcpu=vcpu, options=options, debug=True) for vcpu in result.stdout_text.strip().split('\n')[2:]: # On newer version of libvirt, there is no ':' in # vcpupin output anymore vcpupin_output[int(vcpu.split()[0].rstrip(':'))] = vcpu.split()[1] for vcpu in vcpupin_output: vcpupin_affinity[vcpu] = cpus_string_to_affinity_list(vcpupin_output[vcpu], host_cpu_count) return vcpupin_affinity
def test(self): # only supports combined server+client model at the moment # should support separate I suppose, but nobody uses it nprocs = self.params.get('nprocs', default=cpu.total_count()) args = self.params.get('args', default=None) args = '%s %s' % (args, nprocs) pid = os.fork() if pid: # parent client = os.path.join(self.sourcedir, 'client.txt') args = '-c %s %s' % (client, args) cmd = os.path.join(self.sourcedir, "tbench") + " " + args # Standard output is verbose and merely makes our debug logs huge # so we don't retain it. It gets parsed for the results. self.results = process.system_output(cmd, shell=True).decode() os.kill(pid, signal.SIGTERM) # clean up the server else: # child server = os.path.join(self.sourcedir, 'tbench_srv') os.execlp(server, server) pattern = re.compile(r"Throughput (.*?) MB/sec (.*?) procs") (throughput, procs) = pattern.findall(self.results)[0] self.log.info({'throughput': throughput, 'procs': procs})
def affinity_from_xml(vm): """ Returns dict of the vcpu's affinity from guest xml :param vm: VM object :return: dict of affinity of VM """ host_cpu_count = utils.total_count() if hasattr(utils, 'total_count') else utils.total_cpus_count() xml_affinity_list = [] xml_affinity = {} try: vmxml = libvirt_xml.VMXML.new_from_dumpxml(vm.name) xml_affinity_list = vmxml['cputune'].vcpupins except LibvirtXMLNotFoundError: logging.debug("No <cputune> element find in domain xml") return xml_affinity # Store xml_affinity_list to a dict for vcpu in xml_affinity_list: xml_affinity[vcpu['vcpu']] = cpus_string_to_affinity_list(vcpu['cpuset'], host_cpu_count) return xml_affinity
def _process_lscpu(self): output = process.system_output("lscpu", shell=True, ignore_status=True) for line in output.decode().splitlines(): self.total_cpus = cpu.total_count() self.online_cpus = cpu.online_list() if 'Model name:' in line: self.model = line.split(':')[1].split('(')[0].strip().lower() self.log.info("CPU model %s" % self.model) if 'Physical chips:' in line: self.pchips = int(line.split(':')[1].strip()) self.log.info("Physical Chips %s" % self.pchips) if 'Physical sockets:' in line: self.psockets = int(line.split(':')[1].strip()) self.log.info("Physical Sockets %s" % self.psockets) if 'Physical cores/chip:' in line: self.pcorechips = int(line.split(':')[1].strip()) self.log.info("Physical cores/chip %s" % self.pcorechips) nodesysfs = '/sys/devices/system/node/' for nodefile in os.listdir(nodesysfs): if 'node' in nodefile: filename = os.path.join(nodesysfs, nodefile, 'cpulist') self.node_cpu_dict[nodefile] = genio.read_file(filename) self.log.info("Nodes and CPU list: %s" % self.node_cpu_dict)
def run(test, params, env): """ Time drift test (mainly for Windows guests): 1) Log into a guest. 2) Take a time reading from the guest and host. 3) Run load on the guest and host. 4) Take a second time reading. 5) Stop the load and rest for a while. 6) Take a third time reading. 7) If the drift immediately after load is higher than a user- specified value (in %), fail. If the drift after the rest period is higher than a user-specified value, fail. :param test: QEMU test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ # Helper functions def set_cpu_affinity(pid, mask): """ Set the CPU affinity of all threads of the process with PID pid. Do this recursively for all child processes as well. :param pid: The process ID. :param mask: The CPU affinity mask. :return: A dict containing the previous mask for each thread. """ tids = process.run("ps -L --pid=%s -o lwp=" % pid, verbose=False, ignore_status=True).stdout_text.split() prev_masks = {} for tid in tids: prev_mask = process.run("taskset -p %s" % tid, verbose=False).stdout_text.split()[-1] prev_masks[tid] = prev_mask process.system("taskset -p %s %s" % (mask, tid), verbose=False) children = process.run("ps --ppid=%s -o pid=" % pid, verbose=False, ignore_status=True).stdout_text.split() for child in children: prev_masks.update(set_cpu_affinity(child, mask)) return prev_masks def restore_cpu_affinity(prev_masks): """ Restore the CPU affinity of several threads. :param prev_masks: A dict containing TIDs as keys and masks as values. """ for tid, mask in prev_masks.items(): process.system("taskset -p %s %s" % (mask, tid), verbose=False, ignore_status=True) vm = env.get_vm(params["main_vm"]) vm.verify_alive() boot_option_added = params.get("boot_option_added") boot_option_removed = params.get("boot_option_removed") if boot_option_added or boot_option_removed: utils_test.update_boot_option(vm, args_removed=boot_option_removed, args_added=boot_option_added) if params["os_type"] == "windows": utils_time.sync_timezone_win(vm) timeout = int(params.get("login_timeout", 360)) session = vm.wait_for_serial_login(timeout=timeout) # Collect test parameters: # Command to run to get the current time time_command = params["time_command"] # Filter which should match a string to be passed to time.strptime() time_filter_re = params["time_filter_re"] # Time format for time.strptime() time_format = params["time_format"] guest_load_command = params["guest_load_command"] guest_load_stop_command = params["guest_load_stop_command"] host_load_command = params["host_load_command"] guest_load_instances = params["guest_load_instances"] host_load_instances = params["host_load_instances"] if not guest_load_instances and not host_load_instances: host_load_instances = cpu.total_count() guest_load_instances = vm.get_cpu_count() else: host_load_instances = int(host_load_instances) guest_load_instances = int(guest_load_instances) # CPU affinity mask for taskset cpu_mask = int(params.get("cpu_mask", "0xFF"), 16) load_duration = float(params.get("load_duration", "30")) rest_duration = float(params.get("rest_duration", "10")) drift_threshold = float(params.get("drift_threshold", "200")) drift_threshold_after_rest = float( params.get("drift_threshold_after_rest", "200")) test_duration = float(params.get("test_duration", "60")) interval_gettime = float(params.get("interval_gettime", "20")) guest_load_sessions = [] host_load_sessions = [] try: # Set the VM's CPU affinity prev_affinity = set_cpu_affinity(vm.get_shell_pid(), cpu_mask) try: # Open shell sessions with the guest logging.info("Starting load on guest...") for i in range(guest_load_instances): load_session = vm.wait_for_login(timeout=timeout) # Set output func to None to stop it from being called so we # can change the callback function and the parameters it takes # with no problems load_session.set_output_func(None) load_session.set_output_params(()) load_session.set_output_prefix("(guest load %d) " % i) load_session.set_output_func(logging.debug) guest_load_sessions.append(load_session) # Get time before load # (ht stands for host time, gt stands for guest time) (ht0, gt0) = utils_test.get_time(session, time_command, time_filter_re, time_format) # Run some load on the guest if params["os_type"] == "linux": for i, load_session in enumerate(guest_load_sessions): load_session.sendline(guest_load_command % i) else: for load_session in guest_load_sessions: load_session.sendline(guest_load_command) # Run some load on the host logging.info("Starting load on host...") for i in range(host_load_instances): load_cmd = aexpect.run_bg(host_load_command, output_func=logging.debug, output_prefix="(host load %d) " % i, timeout=0.5) host_load_sessions.append(load_cmd) # Set the CPU affinity of the load process pid = load_cmd.get_pid() set_cpu_affinity(pid, cpu_mask << i) # Sleep for a while (during load) logging.info("Sleeping for %s seconds...", load_duration) time.sleep(load_duration) start_time = time.time() while (time.time() - start_time) < test_duration: # Get time delta after load (ht1, gt1) = utils_test.get_time(session, time_command, time_filter_re, time_format) # Report results host_delta = ht1 - ht0 guest_delta = gt1 - gt0 drift = 100.0 * (host_delta - guest_delta) / host_delta logging.info("Host duration: %.2f", host_delta) logging.info("Guest duration: %.2f", guest_delta) logging.info("Drift: %.2f%%", drift) time.sleep(interval_gettime) finally: logging.info("Cleaning up...") # Restore the VM's CPU affinity restore_cpu_affinity(prev_affinity) # Stop the guest load if guest_load_stop_command: session.cmd_output(guest_load_stop_command) # Close all load shell sessions for load_session in guest_load_sessions: load_session.close() for load_session in host_load_sessions: load_session.close() # Sleep again (rest) logging.info("Sleeping for %s seconds...", rest_duration) time.sleep(rest_duration) # Get time after rest (ht2, gt2) = utils_test.get_time(session, time_command, time_filter_re, time_format) finally: session.close() # remove flags add for this test. if boot_option_added or boot_option_removed: utils_test.update_boot_option(vm, args_removed=boot_option_added, args_added=boot_option_removed) # Report results host_delta_total = ht2 - ht0 guest_delta_total = gt2 - gt0 drift_total = 100.0 * (host_delta_total - guest_delta_total) / host_delta logging.info("Total host duration including rest: %.2f", host_delta_total) logging.info("Total guest duration including rest: %.2f", guest_delta_total) logging.info("Total drift after rest: %.2f%%", drift_total) # Fail the test if necessary if abs(drift) > drift_threshold: test.fail("Time drift too large: %.2f%%" % drift) if abs(drift_total) > drift_threshold_after_rest: test.fail("Time drift too large after rest period: %.2f%%" % drift_total)
def run(test, params, env): """ KVM multi test: 1) Log into guests 2) Check all the nics available or not 3) Ping among guest nic and host 3.1) Ping with different packet size 3.2) Flood ping test 3.3) Final ping test 4) Transfer files among guest nics and host 4.1) Create file by dd command in guest 4.2) Transfer file between nics 4.3) Compare original file and transferred file 5) ping among different nics 5.1) Ping with different packet size 5.2) Flood ping test 5.3) Final ping test 6) Transfer files among different nics 6.1) Create file by dd command in guest 6.2) Transfer file between nics 6.3) Compare original file and transferred file 7) Repeat step 3 - 6 on every nic. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def ping(session, nic, dst_ip, strick_check, flood_minutes): d_packet_size = [ 1, 4, 48, 512, 1440, 1500, 1505, 4054, 4055, 4096, 4192, 8878, 9000, 32767, 65507 ] packet_size = params.get("packet_size", "").split() or d_packet_size for size in packet_size: error_context.context("Ping with packet size %s" % size, logging.info) status, output = utils_test.ping(dst_ip, 10, interface=nic, packetsize=size, timeout=30, session=session) if strict_check: ratio = utils_test.get_loss_ratio(output) if ratio != 0: test.fail("Loss ratio is %s for packet size" " %s" % (ratio, size)) else: if status != 0: test.fail("Ping returns non-zero value %s" % output) error_context.context("Flood ping test", logging.info) utils_test.ping(dst_ip, None, interface=nic, flood=True, output_func=None, timeout=flood_minutes * 60, session=session) error_context.context("Final ping test", logging.info) counts = params.get("ping_counts", 100) status, output = utils_test.ping(dst_ip, counts, interface=nic, timeout=float(counts) * 1.5, session=session) if strick_check == "yes": ratio = utils_test.get_loss_ratio(output) if ratio != 0: test.fail("Packet loss ratio is %s after flood" % ratio) else: if status != 0: test.fail("Ping returns non-zero value %s" % output) def file_transfer(session, src, dst): username = params.get("username", "") password = params.get("password", "") src_path = "/tmp/1" dst_path = "/tmp/2" port = int(params["file_transfer_port"]) cmd = "dd if=/dev/urandom of=%s bs=100M count=1" % src_path cmd = params.get("file_create_cmd", cmd) error_context.context("Create file by dd command, cmd: %s" % cmd, logging.info) session.cmd(cmd) transfer_timeout = int(params.get("transfer_timeout")) log_filename = "scp-from-%s-to-%s.log" % (src, dst) error_context.context("Transfer file from %s to %s" % (src, dst), logging.info) remote.scp_between_remotes(src, dst, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) src_path = dst_path dst_path = "/tmp/3" log_filename = "scp-from-%s-to-%s.log" % (dst, src) error_context.context("Transfer file from %s to %s" % (dst, src), logging.info) remote.scp_between_remotes(dst, src, port, password, password, username, username, src_path, dst_path, log_filename=log_filename, timeout=transfer_timeout) error_context.context("Compare original file and transferred file", logging.info) cmd1 = "md5sum /tmp/1" cmd2 = "md5sum /tmp/3" md5sum1 = session.cmd(cmd1).split()[0] md5sum2 = session.cmd(cmd2).split()[0] if md5sum1 != md5sum2: test.error("File changed after transfer") nic_interface_list = [] check_irqbalance_cmd = params.get("check_irqbalance_cmd", "systemctl status irqbalance") stop_irqbalance_cmd = params.get("stop_irqbalance_cmd", "systemctl stop irqbalance") start_irqbalance_cmd = params.get("start_irqbalance_cmd", "systemctl start irqbalance") status_irqbalance = params.get("status_irqbalance", "Active: active|running") vms = params["vms"].split() host_mem = utils_memory.memtotal() // (1024 * 1024) host_cpu_count = cpu.total_count() vhost_count = 0 if params.get("vhost"): vhost_count = 1 if host_cpu_count < (1 + vhost_count) * len(vms): test.error("The host don't have enough cpus to start guest" "pcus: %d, minimum of vcpus and vhost: %d" % (host_cpu_count, (1 + vhost_count) * len(vms))) params['mem'] = host_mem // len(vms) * 1024 params['smp'] = params['vcpu_maxcpus'] = \ host_cpu_count // len(vms) - vhost_count if params['smp'] % 2 != 0: params['vcpu_sockets'] = 1 params["start_vm"] = "yes" for vm_name in vms: env_process.preprocess_vm(test, params, env, vm_name) timeout = float(params.get("login_timeout", 360)) strict_check = params.get("strick_check", "no") host_ip = utils_net.get_ip_address_by_interface(params.get("netdst")) host_ip = params.get("srchost", host_ip) flood_minutes = float(params["flood_minutes"]) error_context.context("Check irqbalance service status", logging.info) o = process.system_output(check_irqbalance_cmd, ignore_status=True, shell=True).decode() check_stop_irqbalance = False if re.findall(status_irqbalance, o): logging.debug("stop irqbalance") process.run(stop_irqbalance_cmd, shell=True) check_stop_irqbalance = True o = process.system_output(check_irqbalance_cmd, ignore_status=True, shell=True).decode() if re.findall(status_irqbalance, o): test.error("Can not stop irqbalance") thread_list = [] nic_interface = [] for vm_name in vms: guest_ifname = "" guest_ip = "" vm = env.get_vm(vm_name) session = vm.wait_for_login(timeout=timeout) thread_list.extend(vm.vcpu_threads) thread_list.extend(vm.vhost_threads) error_context.context("Check all the nics available or not", logging.info) for index, nic in enumerate(vm.virtnet): guest_ifname = utils_net.get_linux_ifname(session, nic.mac) guest_ip = vm.get_address(index) if not (guest_ifname and guest_ip): err_log = "vms %s get ip or ifname failed." % vm_name err_log = "ifname: %s, ip: %s." % (guest_ifname, guest_ip) test.fail(err_log) nic_interface = [guest_ifname, guest_ip, session] nic_interface_list.append(nic_interface) error_context.context("Pin vcpus and vhosts to host cpus", logging.info) host_numa_nodes = utils_misc.NumaInfo() vthread_num = 0 for numa_node_id in host_numa_nodes.nodes: numa_node = host_numa_nodes.nodes[numa_node_id] for _ in range(len(numa_node.cpus)): if vthread_num >= len(thread_list): break vcpu_tid = thread_list[vthread_num] logging.debug("pin vcpu/vhost thread(%s) to cpu(%s)", vcpu_tid, numa_node.pin_cpu(vcpu_tid)) vthread_num += 1 nic_interface_list_len = len(nic_interface_list) # ping and file transfer test for src_ip_index in range(nic_interface_list_len): error_context.context("Ping test from guest to host", logging.info) src_ip_info = nic_interface_list[src_ip_index] ping(src_ip_info[2], src_ip_info[0], host_ip, strict_check, flood_minutes) error_context.context("File transfer test between guest and host", logging.info) file_transfer(src_ip_info[2], src_ip_info[1], host_ip) for dst_ip in nic_interface_list[src_ip_index:]: if src_ip_info[1] == dst_ip[1]: continue txt = "Ping test between %s and %s" % (src_ip_info[1], dst_ip[1]) error_context.context(txt, logging.info) ping(src_ip_info[2], src_ip_info[0], dst_ip[1], strict_check, flood_minutes) txt = "File transfer test between %s " % src_ip_info[1] txt += "and %s" % dst_ip[1] error_context.context(txt, logging.info) file_transfer(src_ip_info[2], src_ip_info[1], dst_ip[1]) if check_stop_irqbalance: process.run(start_irqbalance_cmd, shell=True)