def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() virsh_cmd = "service libvirt-guests stop" check_flags_parallel( virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) ret = utils.run("service libvirt-guests status", ignore_status=True) logging.info("status output: %s", ret.stdout) if all([ "Suspending %s" % vm_name not in ret.stdout, "stopped, with saved guests" not in ret.stdout ]): raise error.TestFail("Can't see messages of suspending vm") # status command should return 3. if ret.exit_status != 3: raise error.TestFail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "service libvirt-guests start" check_flags_parallel( virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags) # Wait for VM in running state wait_for_state("running")
def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() virsh_cmd = "service libvirt-guests stop" check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) ret = utils.run("service libvirt-guests status", ignore_status=True) logging.info("status output: %s", ret.stdout) if any(["Suspending %s" % vm_name not in ret.stdout, "stopped, with saved guests" not in ret.stdout]): raise error.TestFail("Can't see messages of suspending vm") # status command should return 3. if ret.exit_status != 3: raise error.TestFail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "service libvirt-guests start" check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags) # Wait for VM in running state wait_for_state("running")
def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() virsh_cmd = "service libvirt-guests stop" check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) ret = utils.run("service libvirt-guests status", ignore_status=True) logging.info("status output: %s", ret.stdout) if not re.findall(r"Suspending %s" % vm_name, ret.stdout, re.M): raise error.TestFail("Can't see messages of suspending vm") # status command should return 3. if ret.exit_status != 3: raise error.TestFail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state utils_misc.wait_for(lambda: vm.state() == "shut off", 10) virsh_cmd = "service libvirt-guests start" check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags)
def run(test, params, env): """ KVM boot time test: 1) Set init run level to 1 2) Send a shutdown command to the guest, or issue a system_powerdown monitor command (depending on the value of shutdown_method) 3) Boot up the guest and measure the boot time 4) set init run level back to the old one :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) session = vm.wait_for_login(timeout=timeout) error.context("Set guest run level to 1", logging.info) single_user_cmd = params['single_user_cmd'] session.cmd(single_user_cmd) try: error.context("Shut down guest", logging.info) session.cmd('sync') vm.destroy() error.context("Boot up guest and measure the boot time", logging.info) utils_memory.drop_caches() vm.create() vm.verify_alive() session = vm.wait_for_serial_login(timeout=timeout) boot_time = utils_misc.monotonic_time() - vm.start_monotonic_time test.write_test_keyval({'result': "%ss" % boot_time}) expect_time = int(params.get("expect_bootup_time", "17")) logging.info("Boot up time: %ss" % boot_time) finally: try: error.context("Restore guest run level", logging.info) restore_level_cmd = params['restore_level_cmd'] session.cmd(restore_level_cmd) session.cmd('sync') vm.destroy(gracefully=False) env_process.preprocess_vm(test, params, env, vm.name) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Can not restore guest run level, " "need restore the image") params["restore_image_after_testing"] = "yes" if boot_time > expect_time: raise error.TestFail( "Guest boot up is taking too long: %ss" % boot_time) session.close()
def run(test, params, env): """ KVM boot time test: 1) Set init run level to 1 2) Send a shutdown command to the guest, or issue a system_powerdown monitor command (depending on the value of shutdown_method) 3) Boot up the guest and measure the boot time 4) set init run level back to the old one :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) session = vm.wait_for_login(timeout=timeout) error.context("Set guest run level to 1", logging.info) single_user_cmd = params['single_user_cmd'] session.cmd(single_user_cmd) try: error.context("Shut down guest", logging.info) session.cmd('sync') vm.destroy() error.context("Boot up guest and measure the boot time", logging.info) utils_memory.drop_caches() vm.create() vm.verify_alive() session = vm.wait_for_serial_login(timeout=timeout) boot_time = utils_misc.monotonic_time() - vm.start_monotonic_time test.write_test_keyval({'result': "%ss" % boot_time}) expect_time = int(params.get("expect_bootup_time", "17")) logging.info("Boot up time: %ss" % boot_time) finally: try: error.context("Restore guest run level", logging.info) restore_level_cmd = params['restore_level_cmd'] session.cmd(restore_level_cmd) session.cmd('sync') vm.destroy(gracefully=False) env_process.preprocess_vm(test, params, env, vm.name) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Can not restore guest run level, " "need restore the image") params["restore_image_after_testing"] = "yes" if boot_time > expect_time: raise error.TestFail("Guest boot up is taking too long: %ss" % boot_time) session.close()
def create_backup(self, sync, backup_image_name=""): """ create live backup with qmp command. """ transaction = self.params.get("transaction", "yes") drive_name = self.get_device() bitmap_name = self.bitmap_name backup_format = self.backup_format speed = self.speed mode = "existing" if sync == "full": mode = "absolute-paths" granularity = int(self.params.get("granularity", 65536)) backup_image_name = "images/%s.%s" % (self.image_chain[0], backup_format) backup_image_name = utils_misc.get_path(self.data_dir, backup_image_name) self.trash_files.append(backup_image_name) if transaction == "yes": args_list = [] bitmap_args = { "node": drive_name, "name": bitmap_name, "granularity": granularity } self.transaction_add(args_list, "block-dirty-bitmap-add", bitmap_args) backup_args = { "device": drive_name, "target": backup_image_name, "format": backup_format, "sync": sync, "mode": mode, "speed": speed } self.transaction_add(args_list, "drive-backup", backup_args) error_context.context( "Create bitmap and drive-backup with transaction " "for %s" % drive_name, logging.info) self.vm.monitor.transaction(args_list) if not self.get_status(): raise exceptions.TestFail("full backup job not found") return None error_context.context("Create bitmap for %s" % drive_name, logging.info) self.vm.monitor.operate_dirty_bitmap("add", drive_name, bitmap_name, granularity) if not backup_image_name: raise exceptions.TestError("No backup target provided.") error_context.context("Create %s backup for %s" % (sync, drive_name), logging.info) self.vm.monitor.drive_backup(drive_name, backup_image_name, backup_format, sync, speed, mode, bitmap_name) if not self.get_status(): raise exceptions.TestFail("%s backup job not found" % sync) utils_memory.drop_caches()
def run(test, params, env): """ KVM restore from file-test: 1) Pause VM 2) Save VM to file 3) Restore VM from file, and measure the time it takes 4) Remove VM restoration file 5) Check VM :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) expect_time = int(params.get("expect_restore_time", 25)) session = vm.wait_for_login(timeout=timeout) save_file = params.get( "save_file", os.path.join("/tmp", utils_misc.generate_random_string(8))) try: error.context("Pause VM", logging.info) vm.pause() error.context("Save VM to file", logging.info) vm.save_to_file(save_file) error.context("Restore VM from file", logging.info) time.sleep(10) utils_memory.drop_caches() vm.restore_from_file(save_file) session = vm.wait_for_login(timeout=timeout) restore_time = utils_misc.monotonic_time() - vm.start_monotonic_time test.write_test_keyval({'result': "%ss" % restore_time}) logging.info("Restore time: %ss" % restore_time) finally: try: error.context("Remove VM restoration file", logging.info) os.remove(save_file) error.context("Check VM", logging.info) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Unable to restore VM, restoring from image") params["restore_image_after_testing"] = "yes" if restore_time > expect_time: raise error.TestFail("Guest restoration took too long: %ss" % restore_time) session.close()
def run(test, params, env): """ KVM restore from file-test: 1) Pause VM 2) Save VM to file 3) Restore VM from file, and measure the time it takes 4) Remove VM restoration file 5) Check VM :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) expect_time = int(params.get("expect_restore_time", 25)) session = vm.wait_for_login(timeout=timeout) save_file = params.get("save_file", os.path.join("/tmp", utils_misc.generate_random_string(8))) try: error.context("Pause VM", logging.info) vm.pause() error.context("Save VM to file", logging.info) vm.save_to_file(save_file) error.context("Restore VM from file", logging.info) time.sleep(10) utils_memory.drop_caches() vm.restore_from_file(save_file) session = vm.wait_for_login(timeout=timeout) restore_time = utils_misc.monotonic_time() - vm.start_monotonic_time test.write_test_keyval({'result': "%ss" % restore_time}) logging.info("Restore time: %ss" % restore_time) finally: try: error.context("Remove VM restoration file", logging.info) os.remove(save_file) error.context("Check VM", logging.info) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Unable to restore VM, restoring from image") params["restore_image_after_testing"] = "yes" if restore_time > expect_time: raise error.TestFail( "Guest restoration took too long: %ss" % restore_time) session.close()
def run(test, params, env): """ Qemu numa basic test: 1) Get host numa topological structure 2) Start a guest and bind it on the cpus of one node 3) Check the memory status of qemu process. It should mainly use the memory in the same node. 4) Destroy the guest 5) Repeat step 2 ~ 4 on every node in host :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ error_context.context("Get host numa topological structure", logging.info) timeout = float(params.get("login_timeout", 240)) host_numa_node = utils_misc.NumaInfo() node_list = host_numa_node.online_nodes for node_id in node_list: error_context.base_context("Bind qemu process to numa node %s" % node_id, logging.info) vm = "vm_bind_to_%s" % node_id params['qemu_command_prefix'] = "numactl --cpunodebind=%s" % node_id utils_memory.drop_caches() node_MemFree = int(host_numa_node.read_from_node_meminfo(node_id, "MemFree")) if node_MemFree < int(params["mem"]) * 1024: test.cancel("No enough free memory in node %d." % node_id) env_process.preprocess_vm(test, params, env, vm) vm = env.get_vm(vm) vm.verify_alive() session = vm.wait_for_login(timeout=timeout) session.close() error_context.context("Check the memory use status of qemu process", logging.info) memory_status, _ = utils_test.qemu.get_numa_status(host_numa_node, vm.get_pid()) node_used_most = 0 memory_sz_used_most = 0 for index in range(len(node_list)): if memory_sz_used_most < memory_status[index]: memory_sz_used_most = memory_status[index] node_used_most = node_list[index] logging.debug("Qemu used %s pages in node" " %s" % (memory_status[index], node_list[index])) if node_used_most != node_id: test.fail("Qemu still use memory from other node. " "Expect: %s, used: %s" % (node_id, node_used_most)) error_context.context("Destroy guest.", logging.info) vm.destroy()
def run(test, params, env): """ Qemu numa basic test: 1) Get host numa topological structure 2) Start a guest and bind it on the cpus of one node 3) Check the memory status of qemu process. It should mainly use the memory in the same node. 4) Destroy the guest 5) Repeat step 2 ~ 4 on every node in host :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ error.context("Get host numa topological structure", logging.info) timeout = float(params.get("login_timeout", 240)) host_numa_node = utils_misc.NumaInfo() node_list = host_numa_node.online_nodes for node_id in node_list: error.base_context("Bind qemu process to numa node %s" % node_id, logging.info) vm = "vm_bind_to_%s" % node_id params['qemu_command_prefix'] = "numactl --cpunodebind=%s" % node_id utils_memory.drop_caches() env_process.preprocess_vm(test, params, env, vm) vm = env.get_vm(vm) vm.verify_alive() session = vm.wait_for_login(timeout=timeout) session.close() error.context("Check the memory use status of qemu process", logging.info) memory_status, _ = utils_test.qemu.get_numa_status(host_numa_node, vm.get_pid()) node_used_most = 0 memory_sz_used_most = 0 for index in range(len(node_list)): if memory_sz_used_most < memory_status[index]: memory_sz_used_most = memory_status[index] node_used_most = node_list[index] logging.debug("Qemu used %s pages in node" " %s" % (memory_status[index], node_list[index])) if node_used_most != node_id: raise error.TestFail("Qemu still use memory from other node." " Expect: %s, used: %s" % (node_id, node_used_most)) error.context("Destroy guest.", logging.info) vm.destroy()
def create_backup(self, sync, backup_image_name=""): """ create live backup with qmp command. """ transaction = self.params.get("transaction", "yes") drive_name = self.get_device() bitmap_name = self.bitmap_name backup_format = self.backup_format speed = self.speed mode = "existing" if sync == "full": mode = "absolute-paths" granularity = int(self.params.get("granularity", 65536)) backup_image_name = "images/%s.%s" % (self.image_chain[0], backup_format) backup_image_name = utils_misc.get_path(self.data_dir, backup_image_name) self.trash_files.append(backup_image_name) if transaction == "yes": args_list = [] bitmap_args = {"node": drive_name, "name": bitmap_name, "granularity": granularity} self.transaction_add(args_list, "block-dirty-bitmap-add", bitmap_args) backup_args = {"device": drive_name, "target": backup_image_name, "format": backup_format, "sync": sync, "mode": mode, "speed": speed} self.transaction_add(args_list, "drive-backup", backup_args) logging.info("Create bitmap and drive-backup with transaction " "for %s" % drive_name) self.vm.monitor.transaction(args_list) if not self.get_status(): self.test.fail("full backup job not found") return None logging.info("Create bitmap for %s" % drive_name) self.vm.monitor.operate_dirty_bitmap("add", drive_name, bitmap_name, granularity) if not backup_image_name: self.test.error("No backup target provided.") logging.info("Create %s backup for %s" % (sync, drive_name)) self.vm.monitor.drive_backup(drive_name, backup_image_name, backup_format, sync, speed, mode, bitmap_name) if not self.get_status(): self.test.fail("%s backup job not found" % sync) utils_memory.drop_caches()
def check_hugepage_file(vm, vmxml, umask): drop_caches() # Set umask process.run("umask %s" % umask, ignore_status=False, shell=True) setup_hugepages(2048, 2000) modify_domain_xml(vmxml) # Start guest vm.start() vm.wait_for_login() # Check the default dac of hugepage file hugepage_file_name = "/dev/hugepages/libvirt" # Get the mode of hugepge file f = os.open(hugepage_file_name, 0) stat_re = os.fstat(f) hugepage_file_mode = oct(stat_re.st_mode & 0o777) logging.debug(hugepage_file_mode) os.close(f) return hugepage_file_mode
def check_hugepage_file(vm, vmxml, umask): drop_caches() # Set umask process.run("umask %s" % umask, ignore_status=False, shell=True) setup_hugepages(2048, 2000) modify_domain_xml(vmxml) # Start guest vm.start() vm.wait_for_login() # Check the default dac of hugepage file hugepage_file_name = "/dev/hugepages/libvirt" # Get the mode of hugepage file f = os.open(hugepage_file_name, 0) stat_re = os.fstat(f) hugepage_file_mode = oct(stat_re.st_mode & 0o777) logging.debug(hugepage_file_mode) os.close(f) return hugepage_file_mode
def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() # form proper parallel command based on if systemd is used or not is_systemd = utils.run("cat /proc/1/comm").stdout.count("systemd") if is_systemd: virsh_cmd_stop = "systemctl stop libvirt-guests" virsh_cmd_start = "systemctl start libvirt-guests" else: virsh_cmd_stop = "service libvirt-guests stop" virsh_cmd_start = "service libvirt-guests start" ret = check_flags_parallel( virsh_cmd_stop, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout) if all([ "Suspending %s" % vm_name not in ret.stdout, "stopped, with saved guests" not in ret.stdout ]): raise error.TestFail("Can't see messages of suspending vm") # status command should return 3. if not is_systemd: ret = libvirt_guests.raw_status() if ret.exit_status != 3: raise error.TestFail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") check_flags_parallel( virsh_cmd_start, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags) # Wait for VM in running state wait_for_state("running")
def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() # form proper parallel command based on if systemd is used or not is_systemd = utils.run("cat /proc/1/comm").stdout.count("systemd") if is_systemd: virsh_cmd_stop = "systemctl stop libvirt-guests" virsh_cmd_start = "systemctl start libvirt-guests" else: virsh_cmd_stop = "service libvirt-guests stop" virsh_cmd_start = "service libvirt-guests start" ret = check_flags_parallel(virsh_cmd_stop, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout) if all(["Suspending %s" % vm_name not in ret.stdout, "stopped, with saved guests" not in ret.stdout]): raise error.TestFail("Can't see messages of suspending vm") # status command should return 3. if not is_systemd: ret = libvirt_guests.raw_status() if ret.exit_status != 3: raise error.TestFail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") check_flags_parallel(virsh_cmd_start, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags) # Wait for VM in running state wait_for_state("running")
# Ignore exception with "ignore_status=True" if progress: option += " --verbose" option += extra_param # For bypass_cache test. Run a shell command to check fd flags while # excuting managedsave command bash_cmd = ("let i=1; while((i++<400)); do if [ -e %s ]; then (cat /proc" "/$(lsof -w %s|awk '/libvirt_i/{print $2}')/fdinfo/*%s* |" "grep 'flags:.*') && break; else sleep 0.05; fi; done;") # Flags to check bypass cache take effect flags = os.O_DIRECT if test_bypass_cache: # Drop caches. drop_caches() virsh_cmd = "virsh managedsave %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1"), flags) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "virsh start %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) # Wait for VM in running state wait_for_state("running") elif test_libvirt_guests: logging.debug("libvirt-guests status: %s", libvirt_guests.status()) if multi_guests:
def run(test, params, env): """ Check KSM can be started automaticly when ksmtuned threshold is reached 1. Get the memory of your host and the KSM_THRES_COEF 2. Boot a guest with memory less than KSM_THRES_COEF threshold 3. Get the memory used in host of process qemu-kvm 4. Get the free memory in host 5. If both the free memory size is not smaller than the threshold and guest used memory + threshold is not bigger than total memory in host. Check the ksm status in host. Ksm should not start in the host 6. Repeat step 2~5 under it broke the rule in step 5 :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ def check_ksm(mem, stress=False): """ :param mem: Boot guest with given memory, in KB :param stress: Load stress or not """ params['mem'] = mem // 1024 params['start_vm'] = 'yes' vm_name = params['main_vm'] env_process.preprocess_vm(test, params, env, vm_name) vm = env.get_vm(vm_name) vm.wait_for_login() if stress: params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' % (int(params['mem']) // 2)) stress_test = VMStress(vm, "stress", params) stress_test.load_stress_tool() time.sleep(30) qemu_pid = vm.get_pid() qemu_used_page = utils_misc.normalize_data_size(process.getoutput( params['cmd_get_qemu_used_mem'] % qemu_pid, shell=True) + 'K', 'B') pagesize = utils_memory.getpagesize() qemu_used_mem = int(float(qemu_used_page)) * pagesize free_mem_host = utils_memory.freememtotal() ksm_status = process.getoutput(params['cmd_check_ksm_status']) vm.destroy() logging.info('The ksm threshold is %s, the memory allocated by qemu is' ' %s, and the total free memory on host is %s.' % (ksm_thres, qemu_used_mem, free_mem_host)) if free_mem_host >= ksm_thres: if ksm_status != '0': test.fail('Ksm should not start.') if stress: test.error('The host resource is not consumed as expected.') elif ksm_status == '0': test.fail('Ksm should start but it does not.') total_mem_host = utils_memory.memtotal() utils_memory.drop_caches() free_mem_host = utils_memory.freememtotal() ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True) ksm_thres = int(total_mem_host * (int(re.findall('\\d+', ksm_thres)[0]) / 100)) guest_mem = (free_mem_host - ksm_thres) // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) status_ksm_service = process.system( params['cmd_status_ksmtuned'], ignore_status=True) if status_ksm_service != 0: process.run(params['cmd_start_ksmtuned']) check_ksm(guest_mem) ksm_config_file = params['ksm_config_file'] backup_file = ksm_config_file + '.backup' copyfile(ksm_config_file, backup_file) threshold = params.get_numeric('ksm_threshold') with open(ksm_config_file, "a+") as f: f.write('%s=%s' % (params['ksm_thres_conf'], threshold)) process.run(params['cmd_restart_ksmtuned']) ksm_thres = total_mem_host * (threshold / 100) guest_mem = total_mem_host - ksm_thres // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) try: check_ksm(guest_mem, stress=True) finally: copyfile(backup_file, ksm_config_file) os.remove(backup_file) if status_ksm_service != 0: process.run(params['cmd_stop_ksmtuned']) else: process.run(params['cmd_restart_ksmtuned'])
def run(test, params, env): """ Test command: virsh managedsave. This command can save and destroy a running domain, so it can be restarted from the same state at a later time. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) managed_save_file = "/var/lib/libvirt/qemu/save/%s.save" % vm_name # define function def vm_recover_check(option, libvirtd, check_shutdown=False): """ Check if the vm can be recovered correctly. :param guest_name : Checked vm's name. :param option : managedsave command option. """ # This time vm not be shut down if vm.is_alive(): test.fail("Guest should be inactive") # Check vm managed save state. ret = virsh.dom_list("--managed-save --inactive", debug=True) vm_state1 = re.findall(r".*%s.*" % vm_name, ret.stdout.strip())[0].split()[2] ret = virsh.dom_list("--managed-save --all", debug=True) vm_state2 = re.findall(r".*%s.*" % vm_name, ret.stdout.strip())[0].split()[2] if vm_state1 != "saved" or vm_state2 != "saved": test.fail("Guest state should be saved") virsh.start(vm_name, debug=True) # This time vm should be in the list if vm.is_dead(): test.fail("Guest should be active") # Restart libvirtd and check vm status again. libvirtd.restart() if vm.is_dead(): test.fail("Guest should be active after" " restarting libvirtd") # Check managed save file: if os.path.exists(managed_save_file): test.fail("Managed save image exist " "after starting the domain") if option: if option.count("running"): if vm.is_dead() or vm.is_paused(): test.fail("Guest state should be" " running after started" " because of '--running' option") elif option.count("paused"): if not vm.is_paused(): test.fail("Guest state should be" " paused after started" " because of '--paused' option") else: if params.get("paused_after_start_vm") == "yes": if not vm.is_paused(): test.fail("Guest state should be" " paused after started" " because of initia guest state") if check_shutdown: # Resume the domain. if vm.is_paused(): vm.resume() vm.wait_for_login() # Shutdown and start the domain, # it should be in runing state and can be login. vm.shutdown() vm.wait_for_shutdown() vm.start() vm.wait_for_login() def vm_undefine_check(vm_name): """ Check if vm can be undefined with manage-save option """ #backup xml file xml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if not os.path.exists(managed_save_file): test.fail("Can't find managed save image") #undefine domain with no options. if not virsh.undefine(vm_name, options=None, ignore_status=True).exit_status: test.fail("Guest shouldn't be undefined" "while domain managed save image exists") #undefine domain with managed-save option. if virsh.undefine(vm_name, options="--managed-save", ignore_status=True).exit_status: test.fail("Guest can't be undefine with " "managed-save option") if os.path.exists(managed_save_file): test.fail("Managed save image exists" " after undefining vm") #restore and start the vm. xml_backup.define() vm.start() def check_flags_parallel(virsh_cmd, bash_cmd, flags): """ Run the commands parallel and check the output. """ cmd = ("%s & %s" % (virsh_cmd, bash_cmd)) ret = process.run(cmd, ignore_status=True, shell=True, ignore_bg_processes=True) output = ret.stdout_text.strip() logging.debug("check flags output: %s" % output) lines = re.findall(r"flags:.(\d+)", output, re.M) logging.debug("Find all fdinfo flags: %s" % lines) lines = [int(i, 8) & flags for i in lines] if flags not in lines: test.fail("Checking flags %s failed" % flags) return ret def check_multi_guests(guests, start_delay, libvirt_guests): """ Check start_delay option for multiple guests. """ # Destroy vm first if vm.is_alive(): vm.destroy(gracefully=False) # Clone given number of guests timeout = params.get("clone_timeout", 360) for i in range(int(guests)): dst_vm = "%s_%s" % (vm_name, i) utils_libguestfs.virt_clone_cmd(vm_name, dst_vm, True, timeout=timeout) virsh.start(dst_vm, debug=True) # Wait 10 seconds for vm to start time.sleep(10) is_systemd = process.run("cat /proc/1/comm", shell=True).stdout_text.count("systemd") if is_systemd: libvirt_guests.restart() pattern = r'(.+ \d\d:\d\d:\d\d).+: Resuming guest.+done' else: ret = process.run("service libvirt-guests restart | \ awk '{ print strftime(\"%b %y %H:%M:%S\"), \ $0; fflush(); }'", shell=True) pattern = r'(.+ \d\d:\d\d:\d\d)+ Resuming guest.+done' # libvirt-guests status command read messages from systemd # journal, in cases of messages are not ready in time, # add a time wait here. def wait_func(): return libvirt_guests.raw_status().stdout.count("Resuming guest") utils_misc.wait_for(wait_func, 5) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout_text) resume_time = re.findall(pattern, ret.stdout_text, re.M) if not resume_time: test.fail("Can't see messages of resuming guest") # Convert time string to int resume_seconds = [time.mktime(time.strptime( tm, "%b %y %H:%M:%S")) for tm in resume_time] logging.info("Resume time in seconds: %s", resume_seconds) # Check if start_delay take effect for i in range(len(resume_seconds)-1): if resume_seconds[i+1] - resume_seconds[i] < int(start_delay): test.fail("Checking start_delay failed") def wait_for_state(vm_state): """ Wait for vm state is ready. """ utils_misc.wait_for(lambda: vm.state() == vm_state, 10) def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() # form proper parallel command based on if systemd is used or not is_systemd = process.run("cat /proc/1/comm", shell=True).stdout_text.count("systemd") if is_systemd: virsh_cmd_stop = "systemctl stop libvirt-guests" virsh_cmd_start = "systemctl start libvirt-guests" else: virsh_cmd_stop = "service libvirt-guests stop" virsh_cmd_start = "service libvirt-guests start" ret = check_flags_parallel(virsh_cmd_stop, bash_cmd % (managed_save_file, managed_save_file, "1"), flags) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout_text) if all(["Suspending %s" % vm_name not in ret.stdout_text, "stopped, with saved guests" not in ret.stdout_text]): test.fail("Can't see messages of suspending vm") # status command should return 3. if not is_systemd: ret = libvirt_guests.raw_status() if ret.exit_status != 3: test.fail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") check_flags_parallel(virsh_cmd_start, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) # Wait for VM in running state wait_for_state("running") def vm_msave_remove_check(vm_name): """ Check managed save remove command. """ if not os.path.exists(managed_save_file): test.fail("Can't find managed save image") virsh.managedsave_remove(vm_name, debug=True) if os.path.exists(managed_save_file): test.fail("Managed save image still exists") virsh.start(vm_name, debug=True) # The domain state should be running if vm.state() != "running": test.fail("Guest state should be" " running after started") def vm_managedsave_loop(vm_name, loop_range, libvirtd): """ Run a loop of managedsave command and check its result. """ if vm.is_dead(): virsh.start(vm_name, debug=True) for i in range(int(loop_range)): logging.debug("Test loop: %s" % i) virsh.managedsave(vm_name, debug=True) virsh.start(vm_name, debug=True) # Check libvirtd status. if not libvirtd.is_running(): test.fail("libvirtd is stopped after cmd") # Check vm status. if vm.state() != "running": test.fail("Guest isn't in running state") def build_vm_xml(vm_name, **dargs): """ Build the new domain xml and define it. """ try: # stop vm before doing any change to xml if vm.is_alive(): vm.destroy(gracefully=False) vmxml = vm_xml.VMXML.new_from_dumpxml(vm_name) if dargs.get("cpu_mode"): if "cpu" in vmxml: del vmxml.cpu cpuxml = vm_xml.VMCPUXML() cpuxml.mode = params.get("cpu_mode", "host-model") cpuxml.match = params.get("cpu_match", "exact") cpuxml.fallback = params.get("cpu_fallback", "forbid") cpu_topology = {} cpu_topology_sockets = params.get("cpu_topology_sockets") if cpu_topology_sockets: cpu_topology["sockets"] = cpu_topology_sockets cpu_topology_cores = params.get("cpu_topology_cores") if cpu_topology_cores: cpu_topology["cores"] = cpu_topology_cores cpu_topology_threads = params.get("cpu_topology_threads") if cpu_topology_threads: cpu_topology["threads"] = cpu_topology_threads if cpu_topology: cpuxml.topology = cpu_topology vmxml.cpu = cpuxml vmxml.vcpu = int(params.get("vcpu_nums")) if dargs.get("sec_driver"): seclabel_dict = {"type": "dynamic", "model": "selinux", "relabel": "yes"} vmxml.set_seclabel([seclabel_dict]) vmxml.sync() vm.start() except Exception as e: logging.error(str(e)) test.cancel("Build domain xml failed") status_error = ("yes" == params.get("status_error", "no")) vm_ref = params.get("managedsave_vm_ref", "name") libvirtd_state = params.get("libvirtd", "on") extra_param = params.get("managedsave_extra_param", "") progress = ("yes" == params.get("managedsave_progress", "no")) cpu_mode = "yes" == params.get("managedsave_cpumode", "no") test_undefine = "yes" == params.get("managedsave_undefine", "no") test_bypass_cache = "yes" == params.get("test_bypass_cache", "no") autostart_bypass_cache = params.get("autostart_bypass_cache", "") multi_guests = params.get("multi_guests", "") test_libvirt_guests = params.get("test_libvirt_guests", "") check_flags = "yes" == params.get("check_flags", "no") security_driver = params.get("security_driver", "") remove_after_cmd = "yes" == params.get("remove_after_cmd", "no") option = params.get("managedsave_option", "") check_shutdown = "yes" == params.get("shutdown_after_cmd", "no") pre_vm_state = params.get("pre_vm_state", "") move_saved_file = "yes" == params.get("move_saved_file", "no") test_loop_cmd = "yes" == params.get("test_loop_cmd", "no") if option: if not virsh.has_command_help_match('managedsave', option): # Older libvirt does not have this option test.cancel("Older libvirt does not" " handle arguments consistently") # Backup xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) # Get the libvirtd service libvirtd = utils_libvirtd.Libvirtd() # Get config files. qemu_config = utils_config.LibvirtQemuConfig() libvirt_guests_config = utils_config.LibvirtGuestsConfig() # Get libvirt-guests service libvirt_guests = Factory.create_service("libvirt-guests") try: # Destroy vm first for setting configuration file if vm.state() == "running": vm.destroy(gracefully=False) # Prepare test environment. if libvirtd_state == "off": libvirtd.stop() if autostart_bypass_cache: ret = virsh.autostart(vm_name, "", ignore_status=True, debug=True) libvirt.check_exit_status(ret) qemu_config.auto_start_bypass_cache = autostart_bypass_cache libvirtd.restart() if security_driver: qemu_config.security_driver = [security_driver] if test_libvirt_guests: if multi_guests: start_delay = params.get("start_delay", "20") libvirt_guests_config.START_DELAY = start_delay if check_flags: libvirt_guests_config.BYPASS_CACHE = "1" # The config file format should be "x=y" instead of "x = y" process.run("sed -i -e 's/ = /=/g' " "/etc/sysconfig/libvirt-guests", shell=True) libvirt_guests.restart() # Change domain xml. if cpu_mode: build_vm_xml(vm_name, cpu_mode=True) if security_driver: build_vm_xml(vm_name, sec_driver=True) # Turn VM into certain state. if pre_vm_state == "transient": logging.info("Creating %s..." % vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) # Wait for VM to be in shut off state utils_misc.wait_for(lambda: vm.state() == "shut off", 10) vm.undefine() if virsh.create(vmxml_for_test.xml, ignore_status=True, debug=True).exit_status: vmxml_backup.define() test.cancel("Cann't create the domain") # Wait for vm in stable state if params.get("start_vm") == "yes": if vm.state() == "shut off": vm.start() vm.wait_for_login() # run test case domid = vm.get_id() domuuid = vm.get_uuid() if vm_ref == "id": vm_ref = domid elif vm_ref == "uuid": vm_ref = domuuid elif vm_ref == "hex_id": vm_ref = hex(int(domid)) elif vm_ref.count("invalid"): vm_ref = params.get(vm_ref) elif vm_ref == "name": vm_ref = vm_name # Ignore exception with "ignore_status=True" if progress: option += " --verbose" option += extra_param # For bypass_cache test. Run a shell command to check fd flags while # excuting managedsave command software_mgr = software_manager.SoftwareManager() if not software_mgr.check_installed('lsof'): logging.info('Installing lsof package:') software_mgr.install('lsof') bash_cmd = ("let i=1; while((i++<400)); do if [ -e %s ]; then (cat /proc" "/$(lsof -w %s|awk '/libvirt_i/{print $2}')/fdinfo/*%s* |" "grep 'flags:.*') && break; else sleep 0.05; fi; done;") # Flags to check bypass cache take effect flags = os.O_DIRECT if test_bypass_cache: # Drop caches. drop_caches() virsh_cmd = "virsh managedsave %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1"), flags) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "virsh start %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) # Wait for VM in running state wait_for_state("running") elif test_libvirt_guests: logging.debug("libvirt-guests status: %s", libvirt_guests.status()) if multi_guests: check_multi_guests(multi_guests, start_delay, libvirt_guests) if check_flags: check_guest_flags(bash_cmd, flags) else: # Ensure VM is running utils_misc.wait_for(lambda: vm.state() == "running", 10) ret = virsh.managedsave(vm_ref, options=option, ignore_status=True, debug=True) status = ret.exit_status # The progress information outputed in error message error_msg = ret.stderr.strip() if move_saved_file: cmd = "echo > %s" % managed_save_file process.run(cmd, shell=True) # recover libvirtd service start if libvirtd_state == "off": libvirtd.start() if status_error: if not status: test.fail("Run successfully with wrong command!") else: if status: test.fail("Run failed with right command") if progress: if not error_msg.count("Managedsave:"): test.fail("Got invalid progress output") if remove_after_cmd: vm_msave_remove_check(vm_name) elif test_undefine: vm_undefine_check(vm_name) elif autostart_bypass_cache: libvirtd.stop() virsh_cmd = ("(service libvirtd start)") check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) elif test_loop_cmd: loop_range = params.get("loop_range", "20") vm_managedsave_loop(vm_name, loop_range, libvirtd) else: vm_recover_check(option, libvirtd, check_shutdown) finally: # Restore test environment. # Restart libvirtd.service qemu_config.restore() libvirt_guests_config.restore() libvirtd.restart() if autostart_bypass_cache: virsh.autostart(vm_name, "--disable", ignore_status=True, debug=True) vm.destroy(gracefully=False) virsh.managedsave_remove(vm_name, debug=True) vmxml_backup.sync() if multi_guests: for i in range(int(multi_guests)): virsh.remove_domain("%s_%s" % (vm_name, i), "--remove-all-storage", debug=True)
def run(test, params, env): """ Test rbd disk device. 1.Prepare test environment,destroy or suspend a VM. 2.Prepare disk image. 3.Edit disks xml and start the domain. 4.Perform test operation. 5.Recover test environment. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) virsh_dargs = {'debug': True, 'ignore_status': True} # Global variable to store max/current memory, # it may change after attach/detach new_max_mem = None new_cur_mem = None def get_vm_memtotal(session): """ Get guest total memory """ proc_meminfo = session.cmd_output("cat /proc/meminfo") # verify format and units are expected return int( re.search(r'MemTotal:\s+(\d+)\s+[kK]B', proc_meminfo).group(1)) def consume_vm_mem(size=1000, timeout=360): """ To consume guest memory, default size is 1000M """ session = vm.wait_for_login() # Mount tmpfs on /mnt and write to a file on it, # it is the memory operation sh_cmd = ("swapoff -a; mount -t tmpfs -o size={0}M tmpfs " "/mnt; dd if=/dev/urandom of=/mnt/test bs=1M" " count={0}".format(size)) session.cmd(sh_cmd, timeout=timeout) session.close() def check_qemu_cmd(): """ Check qemu command line options. """ cmd = ("ps -ef | grep %s | grep -v grep " % vm_name) if max_mem_rt: cmd += (" | grep 'slots=%s,maxmem=%sk'" % (max_mem_slots, max_mem_rt)) if tg_size: size = int(tg_size) * 1024 cmd_str = 'memdimm.\|memory-backend-ram,id=ram-node.' cmd += (" | grep 'memory-backend-ram,id=%s' | grep 'size=%s" % (cmd_str, size)) if pg_size: cmd += ",host-nodes=%s" % node_mask if numa_memnode: for node in numa_memnode: if ('nodeset' in node and node['nodeset'] in node_mask): cmd += ",policy=%s" % node['mode'] cmd += ".*pc-dimm,node=%s" % tg_node if mem_addr: cmd += (".*slot=%s,addr=%s" % (mem_addr['slot'], int(mem_addr['base'], 16))) cmd += "'" # Run the command process.run(cmd, shell=True) def check_guest_meminfo(old_mem, check_option): """ Check meminfo on guest. """ assert old_mem is not None session = vm.wait_for_login() # Hot-plugged memory should be online by udev rules udev_file = "/lib/udev/rules.d/80-hotplug-cpu-mem.rules" udev_rules = ('SUBSYSTEM=="memory", ACTION=="add", TEST=="state",' ' ATTR{state}=="offline", ATTR{state}="online"') cmd = ("grep memory %s || echo '%s' >> %s" % (udev_file, udev_rules, udev_file)) session.cmd(cmd) # Wait a while for new memory to be detected. utils_misc.wait_for(lambda: get_vm_memtotal(session) != int(old_mem), 20, first=15.0) new_mem = get_vm_memtotal(session) session.close() logging.debug("Memtotal on guest: %s", new_mem) no_of_times = 1 if at_times: no_of_times = at_times if check_option == "attach": if new_mem != int(old_mem) + (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "attach memory device") if check_option == "detach": if new_mem != int(old_mem) - (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "detach memory device") def check_dom_xml(at_mem=False, dt_mem=False): """ Check domain xml options. """ # Global variable to store max/current memory global new_max_mem global new_cur_mem if attach_option.count("config"): dom_xml = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) else: dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) try: xml_max_mem_rt = int(dom_xml.max_mem_rt) xml_max_mem = int(dom_xml.max_mem) xml_cur_mem = int(dom_xml.current_mem) assert int(max_mem_rt) == xml_max_mem_rt # Check attached/detached memory if at_mem: if at_times: assert int(max_mem) + (int(tg_size) * at_times) == xml_max_mem else: assert int(max_mem) + int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory if at_times: assert int(cur_mem) + (int(tg_size) * at_times) == xml_cur_mem else: assert int(cur_mem) + int(tg_size) == xml_cur_mem new_max_mem = xml_max_mem new_cur_mem = xml_cur_mem mem_dev = dom_xml.get_devices("memory") memory_devices = 1 if at_times: memory_devices = at_times if len(mem_dev) != memory_devices: test.fail("Found wrong number of memory device") assert int(tg_size) == int(mem_dev[0].target.size) assert int(tg_node) == int(mem_dev[0].target.node) elif dt_mem: if at_times: assert int(new_max_mem) - (int(tg_size) * at_times) == xml_max_mem assert int(new_cur_mem) - (int(tg_size) * at_times) == xml_cur_mem else: assert int(new_max_mem) - int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory assert int(new_cur_mem) - int(tg_size) == xml_cur_mem except AssertionError: utils_misc.log_last_traceback() test.fail("Found unmatched memory setting from domain xml") def check_save_restore(): """ Test save and restore operation """ save_file = os.path.join(data_dir.get_tmp_dir(), "%s.save" % vm_name) ret = virsh.save(vm_name, save_file, **virsh_dargs) libvirt.check_exit_status(ret) ret = virsh.restore(save_file, **virsh_dargs) libvirt.check_exit_status(ret) if os.path.exists(save_file): os.remove(save_file) # Login to check vm status vm.wait_for_login().close() def create_mem_xml(): """ Create memory device xml. """ mem_xml = memory.Memory() mem_model = params.get("mem_model", "dimm") mem_xml.mem_model = mem_model if tg_size: tg_xml = memory.Memory.Target() tg_xml.size = int(tg_size) tg_xml.size_unit = tg_sizeunit # There is support for non-numa node if numa_cells: tg_xml.node = int(tg_node) mem_xml.target = tg_xml if pg_size: src_xml = memory.Memory.Source() src_xml.pagesize = int(pg_size) src_xml.pagesize_unit = pg_unit src_xml.nodemask = node_mask mem_xml.source = src_xml if mem_addr: mem_xml.address = mem_xml.new_mem_address(**{"attrs": mem_addr}) logging.debug("Memory device xml: %s", mem_xml) return mem_xml.copy() def add_device(dev_xml, at_error=False): """ Add memory device by attachment or modify domain xml. """ if attach_device: ret = virsh.attach_device(vm_name, dev_xml.xml, flagstr=attach_option) libvirt.check_exit_status(ret, at_error) else: vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) if numa_cells: del vmxml.max_mem del vmxml.current_mem vmxml.add_device(dev_xml) vmxml.sync() def modify_domain_xml(): """ Modify domain xml and define it. """ vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) mem_unit = params.get("mem_unit", "KiB") vcpu = params.get("vcpu", "4") if max_mem_rt: vmxml.max_mem_rt = int(max_mem_rt) vmxml.max_mem_rt_slots = max_mem_slots vmxml.max_mem_rt_unit = mem_unit if vcpu: vmxml.vcpu = int(vcpu) vcpu_placement = params.get("vcpu_placement", "static") vmxml.placement = vcpu_placement if numa_memnode: vmxml.numa_memory = {} vmxml.numa_memnode = numa_memnode else: try: del vmxml.numa_memory del vmxml.numa_memnode except Exception: # Not exists pass if numa_cells: cells = [ast.literal_eval(x) for x in numa_cells] # Rounding the numa memory values if align_mem_values: for cell in range(cells.__len__()): memory_value = str( utils_numeric.align_value(cells[cell]["memory"], align_to_value)) cells[cell]["memory"] = memory_value cpu_xml = vm_xml.VMCPUXML() cpu_xml.xml = "<cpu><numa/></cpu>" cpu_mode = params.get("cpu_mode") model_fallback = params.get("model_fallback") if cpu_mode: cpu_xml.mode = cpu_mode if model_fallback: cpu_xml.fallback = model_fallback cpu_xml.numa_cell = cells vmxml.cpu = cpu_xml # Delete memory and currentMemory tag, # libvirt will fill it automatically del vmxml.max_mem del vmxml.current_mem # hugepages setting if huge_pages: membacking = vm_xml.VMMemBackingXML() hugepages = vm_xml.VMHugepagesXML() pagexml_list = [] for i in range(len(huge_pages)): pagexml = hugepages.PageXML() pagexml.update(huge_pages[i]) pagexml_list.append(pagexml) hugepages.pages = pagexml_list membacking.hugepages = hugepages vmxml.mb = membacking logging.debug("vm xml: %s", vmxml) vmxml.sync() pre_vm_state = params.get("pre_vm_state", "running") attach_device = "yes" == params.get("attach_device", "no") detach_device = "yes" == params.get("detach_device", "no") attach_error = "yes" == params.get("attach_error", "no") start_error = "yes" == params.get("start_error", "no") detach_error = "yes" == params.get("detach_error", "no") maxmem_error = "yes" == params.get("maxmem_error", "no") attach_option = params.get("attach_option", "") test_qemu_cmd = "yes" == params.get("test_qemu_cmd", "no") test_managedsave = "yes" == params.get("test_managedsave", "no") test_save_restore = "yes" == params.get("test_save_restore", "no") test_mem_binding = "yes" == params.get("test_mem_binding", "no") restart_libvirtd = "yes" == params.get("restart_libvirtd", "no") add_mem_device = "yes" == params.get("add_mem_device", "no") test_dom_xml = "yes" == params.get("test_dom_xml", "no") max_mem = params.get("max_mem") max_mem_rt = params.get("max_mem_rt") max_mem_slots = params.get("max_mem_slots", "16") cur_mem = params.get("current_mem") numa_cells = params.get("numa_cells", "").split() set_max_mem = params.get("set_max_mem") align_mem_values = "yes" == params.get("align_mem_values", "no") align_to_value = int(params.get("align_to_value", "65536")) known_unplug_errors = [] known_unplug_errors.append(params.get("known_unplug_errors")) # params for attached device tg_size = params.get("tg_size") tg_sizeunit = params.get("tg_sizeunit", 'KiB') tg_node = params.get("tg_node", 0) pg_size = params.get("page_size") pg_unit = params.get("page_unit", "KiB") node_mask = params.get("node_mask", "0") mem_addr = ast.literal_eval(params.get("memory_addr", "{}")) huge_pages = [ ast.literal_eval(x) for x in params.get("huge_pages", "").split() ] numa_memnode = [ ast.literal_eval(x) for x in params.get("numa_memnode", "").split() ] at_times = int(params.get("attach_times", 1)) # Back up xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if not libvirt_version.version_compare(1, 2, 14): test.cancel("Memory hotplug not supported in current libvirt version.") if align_mem_values: # Rounding the following values to 'align' max_mem = utils_numeric.align_value(max_mem, align_to_value) max_mem_rt = utils_numeric.align_value(max_mem_rt, align_to_value) cur_mem = utils_numeric.align_value(cur_mem, align_to_value) tg_size = utils_numeric.align_value(tg_size, align_to_value) try: # Drop caches first for host has enough memory drop_caches() # Destroy domain first if vm.is_alive(): vm.destroy(gracefully=False) modify_domain_xml() # Start the domain any way if attach memory device old_mem_total = None if attach_device: vm.start() session = vm.wait_for_login() old_mem_total = get_vm_memtotal(session) logging.debug("Memtotal on guest: %s", old_mem_total) session.close() dev_xml = None # To attach the memory device. if add_mem_device: at_times = int(params.get("attach_times", 1)) dev_xml = create_mem_xml() for x in xrange(at_times): # If any error excepted, command error status should be # checked in the last time if x == at_times - 1: add_device(dev_xml, attach_error) else: add_device(dev_xml) # Check domain xml after attach device. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Set domain state if pre_vm_state == "transient": logging.info("Creating %s...", vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) vm.undefine() if virsh.create(vmxml_for_test.xml, **virsh_dargs).exit_status: vmxml_backup.define() test.fail("Cann't create the domain") elif vm.is_dead(): try: vm.start() vm.wait_for_login().close() except virt_vm.VMStartError as detail: if start_error: pass else: test.fail(detail) # Set memory operation if set_max_mem: max_mem_option = params.get("max_mem_option", "") ret = virsh.setmaxmem(vm_name, set_max_mem, flagstr=max_mem_option) libvirt.check_exit_status(ret, maxmem_error) # Check domain xml after start the domain. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check qemu command line if test_qemu_cmd: check_qemu_cmd() # Check guest meminfo after attachment if (attach_device and not attach_option.count("config") and not any([attach_error, start_error])): check_guest_meminfo(old_mem_total, check_option="attach") # Consuming memory on guest, # to verify memory changes by numastat if test_mem_binding: pid = vm.get_pid() old_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", old_numastat) consume_vm_mem() new_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", new_numastat) # Only check total memory which is the last element if float(new_numastat[-1]) - float(old_numastat[-1]) < 0: test.fail("Numa memory can't be consumed on guest") # Run managedsave command to check domain xml. if test_managedsave: ret = virsh.managedsave(vm_name, **virsh_dargs) libvirt.check_exit_status(ret) vm.start() vm.wait_for_login().close() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Run save and restore command to check domain xml if test_save_restore: check_save_restore() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check domain xml after restarting libvirtd if restart_libvirtd: libvirtd = utils_libvirtd.Libvirtd() libvirtd.restart() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Detach the memory device unplug_failed_with_known_error = False if detach_device: if not dev_xml: dev_xml = create_mem_xml() for x in xrange(at_times): ret = virsh.detach_device(vm_name, dev_xml.xml, flagstr=attach_option) try: libvirt.check_exit_status(ret, detach_error) except Exception as detail: dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=5) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if os.path.exists(dmesg_file): with open(dmesg_file, 'r') as f: flag = re.findall( r'memory memory\d+?: Offline failed', f.read()) if not flag: # The attached memory is used by vm, and it could not be unplugged # The result is expected os.remove(dmesg_file) test.fail(detail) unplug_failed_with_known_error = True os.remove(dmesg_file) # Check whether a known error occured or not dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=4) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if known_unplug_errors and os.path.exists(dmesg_file): for known_error in known_unplug_errors: if (known_error[0] == known_error[-1]) and \ known_error.startswith(("'")): known_error = known_error[1:-1] with open(dmesg_file, 'r') as f: if known_error in f.read(): unplug_failed_with_known_error = True logging.debug( "Known error occured, while hot unplug" ": %s", known_error) if test_dom_xml and not unplug_failed_with_known_error: check_dom_xml(dt_mem=detach_device) # Remove dmesg temp file if os.path.exists(dmesg_file): os.remove(dmesg_file) finally: # Delete snapshots. snapshot_lists = virsh.snapshot_list(vm_name) if len(snapshot_lists) > 0: libvirt.clean_up_snapshots(vm_name, snapshot_lists) for snap in snapshot_lists: virsh.snapshot_delete(vm_name, snap, "--metadata") # Recover VM. if vm.is_alive(): vm.destroy(gracefully=False) logging.info("Restoring vm...") vmxml_backup.sync()
def run(test, params, env): """ KVM reboot time test: 1) Set init run level to 1 2) Restart guest 3) Wait for the console 4) Send a 'reboot' command to the guest 5) Boot up the guest and measure the boot time 6) Restore guest run level :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) session = vm.wait_for_login(timeout=timeout) error_context.context("Set guest run level to 1", logging.info) single_user_cmd = params['single_user_cmd'] session.cmd(single_user_cmd) try: error_context.context("Restart guest", logging.info) session.cmd('sync') vm.destroy() error_context.context("Boot up guest", logging.info) vm.create() vm.verify_alive() session = vm.wait_for_serial_login(timeout=timeout) error_context.context("Send a 'reboot' command to the guest", logging.info) utils_memory.drop_caches() session.cmd('reboot & exit', timeout=1, ignore_all_errors=True) before_reboot_stamp = utils_misc.monotonic_time() error_context.context("Boot up the guest and measure the boot time", logging.info) session = vm.wait_for_serial_login(timeout=timeout) reboot_time = utils_misc.monotonic_time() - before_reboot_stamp test.write_test_keyval({'result': "%ss" % reboot_time}) expect_time = int(params.get("expect_reboot_time", "30")) logging.info("Reboot time: %ss" % reboot_time) finally: try: error_context.context("Restore guest run level", logging.info) restore_level_cmd = params['restore_level_cmd'] session.cmd(restore_level_cmd) session.cmd('sync') vm.destroy(gracefully=False) env_process.preprocess_vm(test, params, env, vm.name) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Can not restore guest run level, " "need restore the image") params["restore_image_after_testing"] = "yes" if reboot_time > expect_time: test.fail("Guest reboot is taking too long: %ss" % reboot_time) session.close()
def run(test, params, env): """ Tests KSM (Kernel Shared Memory) capability by allocating and filling KVM guests memory using various values. KVM sets the memory as MADV_MERGEABLE so all VM's memory can be merged. The workers in guest writes to tmpfs filesystem thus allocations are not limited by process max memory, only by VM's memory. Two test modes are supported - serial and parallel. Serial mode - uses multiple VMs, allocates memory per guest and always verifies the correct number of shared memory. 0) Prints out the setup and initialize guest(s) 1) Fills guest with the same number (S1) 2) Random fill on the first guest 3) Random fill of the remaining VMs one by one until the memory is completely filled (KVM stops machines which asks for additional memory until there is available memory) (S2, shouldn't finish) 4) Destroy all VMs but the last one 5) Checks the last VMs memory for corruption Parallel mode - uses one VM with multiple allocator workers. Executes scenarios in parallel to put more stress on the KVM. 0) Prints out the setup and initialize guest(s) 1) Fills memory with the same number (S1) 2) Fills memory with random numbers (S2) 3) Verifies all pages 4) Fills memory with the same number (S2) 5) Changes the last 96B (S3) Scenarios: S1) Fill all vms with the same value (all pages should be merged into 1) S2) Random fill (all pages should be splitted) S3) Fill last 96B (change only last 96B of each page; some pages will be merged; there was a bug with data corruption) Every worker has unique random key so we are able to verify the filled values. :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. :param cfg: ksm_swap - use swap? :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only) :param cfg: ksm_parallel_ratio - number of workers (parallel mode only) :param cfg: ksm_host_reserve - override memory reserve on host in MB :param cfg: ksm_guest_reserve - override memory reserve on guests in MB :param cfg: ksm_mode - test mode {serial, parallel} :param cfg: ksm_perf_ratio - performance ratio, increase it when your machine is too slow """ def _start_allocator(vm, session, timeout): """ Execute ksm_overcommit_guest.py on guest, wait until it's initialized. :param vm: VM object. :param session: Remote session to a VM object. :param timeout: Timeout that will be used to verify if ksm_overcommit_guest.py started properly. """ logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name) session.sendline("python /tmp/ksm_overcommit_guest.py") try: session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" % (vm.name, str(details))) test.fail(e_msg) def _execute_allocator(command, vm, session, timeout): """ Execute a given command on ksm_overcommit_guest.py main loop, indicating the vm the command was executed on. :param command: Command that will be executed. :param vm: VM object. :param session: Remote session to VM object. :param timeout: Timeout used to verify expected output. :return: Tuple (match index, data) """ logging.debug("Executing '%s' on ksm_overcommit_guest.py loop, " "vm: %s, timeout: %s", command, vm.name, timeout) session.sendline(command) try: (match, data) = session.read_until_last_line_matches( ["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Failed to execute command '%s' on " "ksm_overcommit_guest.py, vm '%s': %s" % (command, vm.name, str(details))) test.fail(e_msg) return (match, data) def get_ksmstat(): """ Return sharing memory by ksm in MB :return: memory in MB """ fpages = open('/sys/kernel/mm/ksm/pages_sharing') ksm_pages = int(fpages.read()) fpages.close() return ((ksm_pages * 4096) / 1e6) def initialize_guests(): """ Initialize guests (fill their memories with specified patterns). """ logging.info("Phase 1: filling guest memory pages") for session in lsessions: vm = lvms[lsessions.index(session)] logging.debug("Turning off swap on vm %s", vm.name) session.cmd("swapoff -a", timeout=300) # Start the allocator _start_allocator(vm, session, 60 * perf_ratio) # Execute allocator on guests for i in range(0, vmsc): vm = lvms[i] cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % skeys[0] _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio) # Let ksm_overcommit_guest.py do its job # (until shared mem reaches expected value) shm = 0 j = 0 logging.debug("Target shared meminfo for guest %s: %s", vm.name, ksm_size) while ((new_ksm and (shm < (ksm_size * (i + 1)))) or (not new_ksm and (shm < (ksm_size)))): if j > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until " "the DL on guest: %s" % vm.name) pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo for guest %s after " "iteration %s: %s", vm.name, j, shm) j += 1 # Keep some reserve pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) logging.debug(utils_test.get_memory_info(lvms)) logging.info("Phase 1: PASS") def separate_first_guest(): """ Separate memory of the first guest by generating special random series """ logging.info("Phase 2: Split the pages on the first guest") cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, lvms[0], lsessions[0], fill_base_timeout * 2 * perf_ratio)[1] r_msg = data.splitlines()[-1] logging.debug("Return message of static_random_fill: %s", r_msg) out = int(r_msg.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size, out, (ksm_size * 1000 / out)) logging.debug(utils_test.get_memory_info(lvms)) logging.debug("Phase 2: PASS") def split_guest(): """ Sequential split of pages on guests up to memory limit """ logging.info("Phase 3a: Sequential split of pages on guests up to " "memory limit") last_vm = 0 session = None vm = None for i in range(1, vmsc): # Check VMs for j in range(0, vmsc): if not lvms[j].is_alive: e_msg = ("VM %d died while executing static_random_fill on" " VM %d in allocator loop" % (j, i)) test.fail(e_msg) vm = lvms[i] session = lsessions[i] cmd = "mem.static_random_fill()" logging.debug("Executing %s on ksm_overcommit_guest.py loop, " "vm: %s", cmd, vm.name) session.sendline(cmd) out = "" try: logging.debug("Watching host mem while filling vm %s memory", vm.name) while (not out.startswith("PASS") and not out.startswith("FAIL")): if not vm.is_alive(): e_msg = ("VM %d died while executing " "static_random_fill on allocator loop" % i) test.fail(e_msg) free_mem = int(utils_memory.read_from_meminfo("MemFree")) if (ksm_swap): free_mem = (free_mem + int(utils_memory.read_from_meminfo("SwapFree"))) logging.debug("Free memory on host: %d", free_mem) # We need to keep some memory for python to run. if (free_mem < 64000) or (ksm_swap and free_mem < (450000 * perf_ratio)): vm.pause() for j in range(0, i): lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() logging.debug("Only %s free memory, killing %d guests", free_mem, (i - 1)) last_vm = i out = session.read_nonblocking(0.1, 1) time.sleep(2) except OSError: logging.debug("Only %s host free memory, killing %d guests", free_mem, (i - 1)) logging.debug("Stopping %s", vm.name) vm.pause() for j in range(0, i): logging.debug("Destroying %s", lvms[j].name) lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() last_vm = i if last_vm != 0: break logging.debug("Memory filled for guest %s", vm.name) logging.info("Phase 3a: PASS") logging.info("Phase 3b: Verify memory of the max stressed VM") for i in range(last_vm + 1, vmsc): lsessions[i].close() if i == (vmsc - 1): logging.debug(utils_test.get_memory_info([lvms[i]])) logging.debug("Destroying guest %s", lvms[i].name) lvms[i].destroy(gracefully=False) # Verify last machine with randomly generated memory cmd = "mem.static_random_verify()" _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([lvms[last_vm]])) lsessions[last_vm].cmd_output("die()", 20) lvms[last_vm].destroy(gracefully=False) logging.info("Phase 3b: PASS") def split_parallel(): """ Parallel page spliting """ logging.info("Phase 1: parallel page spliting") # We have to wait until allocator is finished (it waits 5 seconds to # clean the socket session = lsessions[0] vm = lvms[0] for i in range(1, max_alloc): lsessions.append(vm.wait_for_login(timeout=360)) session.cmd("swapoff -a", timeout=300) for i in range(0, max_alloc): # Start the allocator _start_allocator(vm, lsessions[i], 60 * perf_ratio) logging.info("Phase 1: PASS") logging.info("Phase 2a: Simultaneous merging") logging.debug("Memory used by allocator on guests = %dMB", (ksm_size / max_alloc)) for i in range(0, max_alloc): cmd = "mem = MemFill(%d, %s, %s)" % ((ksm_size / max_alloc), skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % (skeys[0]) _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio) # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3) shm = 0 i = 0 logging.debug("Target shared memory size: %s", ksm_size) while (shm < ksm_size): if i > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until DL") pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceed...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo after attempt %s: %s", i, shm) i += 1 logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2a: PASS") logging.info("Phase 2b: Simultaneous spliting") # Actual splitting for i in range(0, max_alloc): cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] logging.debug(data) out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", (ksm_size / max_alloc), out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2b: PASS") logging.info("Phase 2c: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.static_random_verify()" data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2c: PASS") logging.info("Phase 2d: Simultaneous merging") # Actual splitting for i in range(0, max_alloc): cmd = "mem.value_fill(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio)[1] logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2d: PASS") logging.info("Phase 2e: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.value_check(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2e: PASS") logging.info("Phase 2f: Simultaneous spliting last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_fill(96)" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size / max_alloc, out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2f: PASS") logging.info("Phase 2g: Simultaneous verification last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_verify(96)" _, data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2g: PASS") logging.debug("Cleaning up...") for i in range(0, max_alloc): lsessions[i].cmd_output("die()", 20) session.close() vm.destroy(gracefully=False) # Main test code logging.info("Starting phase 0: Initialization") if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0: logging.info("Killing ksmtuned...") process.run("killall ksmtuned") new_ksm = False if (os.path.exists("/sys/kernel/mm/ksm/run")): process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True) process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True) process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True) e_up = "/sys/kernel/mm/transparent_hugepage/enabled" e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled" if os.path.exists(e_up): process.run("echo 'never' > %s" % e_up, shell=True) if os.path.exists(e_rh): process.run("echo 'never' > %s" % e_rh, shell=True) new_ksm = True else: try: process.run("modprobe ksm") process.run("ksmctl start 5000 100") except process.CmdError as details: test.fail("Failed to load KSM: %s" % details) # host_reserve: mem reserve kept for the host system to run host_reserve = int(params.get("ksm_host_reserve", -1)) if (host_reserve == -1): try: available = utils_memory.read_from_meminfo("MemAvailable") except process.CmdError: # ancient kernels utils_memory.drop_caches() available = utils_memory.read_from_meminfo("MemFree") # default host_reserve = UsedMem + one_minimal_guest(128MB) # later we add 64MB per additional guest host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128) # using default reserve _host_reserve = True else: _host_reserve = False # guest_reserve: mem reserve kept to avoid guest OS to kill processes guest_reserve = int(params.get("ksm_guest_reserve", -1)) if (guest_reserve == -1): # default guest_reserve = minimal_system_mem(256MB) # later we add tmpfs overhead guest_reserve = 256 # using default reserve _guest_reserve = True else: _guest_reserve = False max_vms = int(params.get("max_vms", 2)) overcommit = float(params.get("ksm_overcommit_ratio", 2.0)) max_alloc = int(params.get("ksm_parallel_ratio", 1)) # vmsc: count of all used VMs vmsc = int(overcommit) + 1 vmsc = max(vmsc, max_vms) if (params['ksm_mode'] == "serial"): max_alloc = vmsc if _host_reserve: # First round of additional guest reserves host_reserve += vmsc * 64 _host_reserve = vmsc host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve) ksm_swap = False if params.get("ksm_swap") == "yes": ksm_swap = True # Performance ratio perf_ratio = params.get("ksm_perf_ratio") if perf_ratio: perf_ratio = float(perf_ratio) else: perf_ratio = 1 if (params['ksm_mode'] == "parallel"): vmsc = 1 overcommit = 1 mem = host_mem # 32bit system adjustment if "64" not in params.get("vm_arch_name"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if (host_mem > 3100): mem = 3100 if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: mem = 3100 - 64 else: # mem: Memory of the guest systems. Maximum must be less than # host's physical ram mem = int(overcommit * host_mem / vmsc) # 32bit system adjustment if not params['image_name'].endswith("64"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if mem - guest_reserve - 1 > 3100: vmsc = int(math.ceil((host_mem * overcommit) / (3100 + guest_reserve))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: vmsc = int(math.ceil((host_mem * overcommit) / (3100 - 64.0))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) # 0.055 represents OS + TMPFS additional reserve per guest ram MB if _guest_reserve: guest_reserve += math.ceil(mem * 0.055) swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024 logging.debug("Overcommit = %f", overcommit) logging.debug("True overcommit = %f ", (float(vmsc * mem) / float(host_mem))) logging.debug("Host memory = %dM", host_mem) logging.debug("Guest memory = %dM", mem) logging.debug("Using swap = %s", ksm_swap) logging.debug("Swap = %dM", swap) logging.debug("max_vms = %d", max_vms) logging.debug("Count of all used VMs = %d", vmsc) logging.debug("Performance_ratio = %f", perf_ratio) # Generate unique keys for random series skeys = [] dkeys = [] for i in range(0, max(vmsc, max_alloc)): key = random.randrange(0, 255) while key in skeys: key = random.randrange(0, 255) skeys.append(key) key = random.randrange(0, 999) while key in dkeys: key = random.randrange(0, 999) dkeys.append(key) logging.debug("skeys: %s", skeys) logging.debug("dkeys: %s", dkeys) lvms = [] lsessions = [] # As we don't know the number and memory amount of VMs in advance, # we need to specify and create them here vm_name = params["main_vm"] params['mem'] = mem params['vms'] = vm_name # Associate pidfile name params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name, 'pid') if not params.get('extra_params'): params['extra_params'] = ' ' params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # ksm_size: amount of memory used by allocator ksm_size = mem - guest_reserve logging.debug("Memory used by allocator on guests = %dM", ksm_size) fill_base_timeout = ksm_size / 10 # Creating the first guest env_process.preprocess_vm(test, params, env, vm_name) lvms.append(env.get_vm(vm_name)) if not lvms[0]: test.error("VM object not found in environment") if not lvms[0].is_alive(): test.error("VM seems to be dead; Test requires a living VM") logging.debug("Booting first guest %s", lvms[0].name) lsessions.append(lvms[0].wait_for_login(timeout=360)) # Associate vm PID try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Creating other guest systems for i in range(1, vmsc): vm_name = "vm" + str(i + 1) params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name, 'pid') params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # Last VM is later used to run more allocators simultaneously lvms.append(lvms[0].clone(vm_name, params)) env.register_vm(vm_name, lvms[i]) params['vms'] += " " + vm_name logging.debug("Booting guest %s", lvms[i].name) lvms[i].create() if not lvms[i].is_alive(): test.error("VM %s seems to be dead; Test requires a" "living VM" % lvms[i].name) lsessions.append(lvms[i].wait_for_login(timeout=360)) try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Let guests rest a little bit :-) pause = vmsc * 2 * perf_ratio logging.debug("Waiting %ds before proceed", pause) time.sleep(vmsc * 2 * perf_ratio) logging.debug(utils_test.get_memory_info(lvms)) # Copy ksm_overcommit_guest.py into guests vksmd_src = os.path.join(data_dir.get_shared_dir(), "scripts", "ksm_overcommit_guest.py") dst_dir = "/tmp" for vm in lvms: vm.copy_files_to(vksmd_src, dst_dir) logging.info("Phase 0: PASS") if params['ksm_mode'] == "parallel": logging.info("Starting KSM test parallel mode") split_parallel() logging.info("KSM test parallel mode: PASS") elif params['ksm_mode'] == "serial": logging.info("Starting KSM test serial mode") initialize_guests() separate_first_guest() split_guest() logging.info("KSM test serial mode: PASS")
def run(test, params, env): """ Test rbd disk device. 1.Prepare test environment,destroy or suspend a VM. 2.Prepare disk image. 3.Edit disks xml and start the domain. 4.Perform test operation. 5.Recover test environment. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) virsh_dargs = {'debug': True, 'ignore_status': True} # Global variable to store max/current memory, # it may change after attach/detach new_max_mem = None new_cur_mem = None def consume_vm_mem(size=1000, timeout=360): """ To consume guest memory, default size is 1000M """ session = vm.wait_for_login() # Mount tmpfs on /mnt and write to a file on it, # it is the memory operation sh_cmd = ("swapoff -a; mount -t tmpfs -o size={0}M tmpfs " "/mnt; dd if=/dev/urandom of=/mnt/test bs=1M" " count={0}".format(size)) session.cmd(sh_cmd, timeout=timeout) session.close() def mount_hugepages(page_size): """ To mount hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc """ if page_size == 4: perm = "" else: perm = "pagesize=%dK" % page_size tlbfs_status = utils_misc.is_mounted("hugetlbfs", "/dev/hugepages", "hugetlbfs") if tlbfs_status: utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs") utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs", perm) def setup_hugepages(page_size=2048, shp_num=2000): """ To setup hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc :param shp_num: number of hugepage, string type """ mount_hugepages(page_size) utils_memory.set_num_huge_pages(shp_num) config.hugetlbfs_mount = ["/dev/hugepages"] utils_libvirtd.libvirtd_restart() def restore_hugepages(page_size=4): """ To recover hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc """ mount_hugepages(page_size) config.restore() utils_libvirtd.libvirtd_restart() def check_qemu_cmd(max_mem_rt, tg_size): """ Check qemu command line options. :param max_mem_rt: size of max memory :param tg_size: Target hotplug memory size :return: None """ cmd = ("ps -ef | grep %s | grep -v grep " % vm_name) if discard: if libvirt_version.version_compare(7, 3, 0): cmd = cmd + " | grep " + '\\"discard-data\\":true' else: cmd += " | grep 'discard-data=yes'" elif max_mem_rt: cmd += (" | grep 'slots=%s,maxmem=%sk'" % (max_mem_slots, max_mem_rt)) if tg_size: size = int(tg_size) * 1024 if huge_pages or discard or cold_plug_discard: cmd_str = 'memdimm.\|memory-backend-file,id=ram-node.' cmd += ( " | grep 'memory-backend-file,id=%s' | grep 'size=%s" % (cmd_str, size)) else: cmd_str = 'mem.\|memory-backend-ram,id=ram-node.' cmd += ( " | grep 'memory-backend-ram,id=%s' | grep 'size=%s" % (cmd_str, size)) if pg_size: cmd += ",host-nodes=%s" % node_mask if numa_memnode: for node in numa_memnode: if ('nodeset' in node and node['nodeset'] in node_mask): cmd += ",policy=%s" % node['mode'] cmd += ".*pc-dimm,node=%s" % tg_node if mem_addr: cmd += (".*slot=%s" % (mem_addr['slot'])) cmd += "'" if cold_plug_discard: cmd += " | grep 'discard-data=yes'" # Run the command result = process.run(cmd, shell=True, verbose=True, ignore_status=True) if result.exit_status: test.fail('Qemu command check fail.') def check_guest_meminfo(old_mem, check_option): """ Check meminfo on guest. """ assert old_mem is not None session = vm.wait_for_login() # Hot-plugged memory should be online by udev rules udev_file = "/lib/udev/rules.d/80-hotplug-cpu-mem.rules" udev_rules = ('SUBSYSTEM=="memory", ACTION=="add", TEST=="state",' ' ATTR{state}=="offline", ATTR{state}="online"') cmd = ("grep memory %s || echo '%s' >> %s" % (udev_file, udev_rules, udev_file)) session.cmd(cmd) # Wait a while for new memory to be detected. utils_misc.wait_for( lambda: vm.get_totalmem_sys(online) != int(old_mem), 30, first=20.0) new_mem = vm.get_totalmem_sys(online) session.close() logging.debug("Memtotal on guest: %s", new_mem) no_of_times = 1 if at_times: no_of_times = at_times if check_option == "attach": if new_mem != int(old_mem) + (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "attach memory device") if check_option == "detach": if new_mem != int(old_mem) - (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "detach memory device") def check_dom_xml(at_mem=False, dt_mem=False): """ Check domain xml options. """ # Global variable to store max/current memory global new_max_mem global new_cur_mem if attach_option.count("config"): dom_xml = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) else: dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) try: xml_max_mem_rt = int(dom_xml.max_mem_rt) xml_max_mem = int(dom_xml.max_mem) xml_cur_mem = int(dom_xml.current_mem) assert int(max_mem_rt) == xml_max_mem_rt # Check attached/detached memory logging.info("at_mem=%s,dt_mem=%s", at_mem, dt_mem) logging.info("detach_device is %s", detach_device) if at_mem: if at_times: assert int(max_mem) + (int(tg_size) * at_times) == xml_max_mem else: assert int(max_mem) + int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory if at_times: assert int(cur_mem) + (int(tg_size) * at_times) == xml_cur_mem else: assert int(cur_mem) + int(tg_size) == xml_cur_mem new_max_mem = xml_max_mem new_cur_mem = xml_cur_mem mem_dev = dom_xml.get_devices("memory") memory_devices = 1 if at_times: memory_devices = at_times if len(mem_dev) != memory_devices: test.fail("Found wrong number of memory device") assert int(tg_size) == int(mem_dev[0].target.size) assert int(tg_node) == int(mem_dev[0].target.node) elif dt_mem: if at_times: assert int(new_max_mem) - (int(tg_size) * at_times) == xml_max_mem assert int(new_cur_mem) - (int(tg_size) * at_times) == xml_cur_mem else: assert int(new_max_mem) - int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory assert int(new_cur_mem) - int(tg_size) == xml_cur_mem except AssertionError: utils_misc.log_last_traceback() test.fail("Found unmatched memory setting from domain xml") def check_mem_align(): """ Check if set memory align to 256 """ dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) dom_mem = {} dom_mem['maxMemory'] = int(dom_xml.max_mem_rt) dom_mem['memory'] = int(dom_xml.memory) dom_mem['currentMemory'] = int(dom_xml.current_mem) cpuxml = dom_xml.cpu numa_cell = cpuxml.numa_cell dom_mem['numacellMemory'] = int(numa_cell[0]['memory']) sum_numa_mem = sum([int(cell['memory']) for cell in numa_cell]) attached_mem = dom_xml.get_devices(device_type='memory')[0] dom_mem['attached_mem'] = attached_mem.target.size all_align = True for key in dom_mem: logging.info('%-20s:%15d', key, dom_mem[key]) if dom_mem[key] % 262144: logging.error('%s not align to 256', key) if key == 'currentMemory': continue all_align = False if not all_align: test.fail('Memory not align to 256') if dom_mem['memory'] == sum_numa_mem + dom_mem['attached_mem']: logging.info( 'Check Pass: Memory is equal to (all numa memory + memory device)' ) else: test.fail( 'Memory is not equal to (all numa memory + memory device)') return dom_mem def check_save_restore(): """ Test save and restore operation """ save_file = os.path.join(data_dir.get_tmp_dir(), "%s.save" % vm_name) ret = virsh.save(vm_name, save_file, **virsh_dargs) libvirt.check_exit_status(ret) def _wait_for_restore(): try: virsh.restore(save_file, debug=True, ignore_status=False) return True except Exception as e: logging.error(e) utils_misc.wait_for(_wait_for_restore, 30, step=5) if os.path.exists(save_file): os.remove(save_file) # Login to check vm status vm.wait_for_login().close() def add_device(dev_xml, attach, at_error=False): """ Add memory device by attachment or modify domain xml. """ if attach: ret = virsh.attach_device(vm_name, dev_xml.xml, flagstr=attach_option, debug=True) libvirt.check_exit_status(ret, at_error) else: vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) if numa_cells: del vmxml.max_mem del vmxml.current_mem vmxml.add_device(dev_xml) vmxml.sync() def modify_domain_xml(): """ Modify domain xml and define it. """ vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) mem_unit = params.get("mem_unit", "KiB") vcpu = params.get("vcpu", "4") if max_mem_rt: vmxml.max_mem_rt = int(max_mem_rt) vmxml.max_mem_rt_slots = max_mem_slots vmxml.max_mem_rt_unit = mem_unit if max_mem: vmxml.max_mem = int(max_mem) if cur_mem: vmxml.current_mem = int(cur_mem) if memory_val: vmxml.memory = int(memory_val) if vcpu: vmxml.vcpu = int(vcpu) vcpu_placement = params.get("vcpu_placement", "static") vmxml.placement = vcpu_placement if numa_memnode: vmxml.numa_memory = {} vmxml.numa_memnode = numa_memnode else: try: del vmxml.numa_memory del vmxml.numa_memnode except Exception: # Not exists pass if numa_cells: cells = [ast.literal_eval(x) for x in numa_cells] # Rounding the numa memory values if align_mem_values: for cell in range(cells.__len__()): memory_value = str( utils_numeric.align_value(cells[cell]["memory"], align_to_value)) cells[cell]["memory"] = memory_value cpu_xml = vm_xml.VMCPUXML() cpu_xml.xml = "<cpu mode='host-model'><numa/></cpu>" cpu_mode = params.get("cpu_mode") model_fallback = params.get("model_fallback") if cpu_mode: cpu_xml.mode = cpu_mode if model_fallback: cpu_xml.fallback = model_fallback cpu_xml.numa_cell = cpu_xml.dicts_to_cells(cells) vmxml.cpu = cpu_xml # Delete memory and currentMemory tag, # libvirt will fill it automatically del vmxml.max_mem del vmxml.current_mem # hugepages setting if huge_pages or discard or cold_plug_discard: membacking = vm_xml.VMMemBackingXML() membacking.discard = True membacking.source = '' membacking.source_type = 'file' if huge_pages: hugepages = vm_xml.VMHugepagesXML() pagexml_list = [] for i in range(len(huge_pages)): pagexml = hugepages.PageXML() pagexml.update(huge_pages[i]) pagexml_list.append(pagexml) hugepages.pages = pagexml_list membacking.hugepages = hugepages vmxml.mb = membacking logging.debug("vm xml: %s", vmxml) vmxml.sync() pre_vm_state = params.get("pre_vm_state", "running") attach_device = "yes" == params.get("attach_device", "no") detach_device = "yes" == params.get("detach_device", "no") detach_alias = "yes" == params.get("detach_alias", "no") detach_alias_options = params.get("detach_alias_options") attach_error = "yes" == params.get("attach_error", "no") start_error = "yes" == params.get("start_error", "no") define_error = "yes" == params.get("define_error", "no") detach_error = "yes" == params.get("detach_error", "no") maxmem_error = "yes" == params.get("maxmem_error", "no") attach_option = params.get("attach_option", "") test_qemu_cmd = "yes" == params.get("test_qemu_cmd", "no") wait_before_save_secs = int(params.get("wait_before_save_secs", 0)) test_managedsave = "yes" == params.get("test_managedsave", "no") test_save_restore = "yes" == params.get("test_save_restore", "no") test_mem_binding = "yes" == params.get("test_mem_binding", "no") restart_libvirtd = "yes" == params.get("restart_libvirtd", "no") add_mem_device = "yes" == params.get("add_mem_device", "no") test_dom_xml = "yes" == params.get("test_dom_xml", "no") max_mem = params.get("max_mem") max_mem_rt = params.get("max_mem_rt") max_mem_slots = params.get("max_mem_slots", "16") memory_val = params.get('memory_val', '') mem_align = 'yes' == params.get('mem_align', 'no') hot_plug = 'yes' == params.get('hot_plug', 'no') cur_mem = params.get("current_mem") numa_cells = params.get("numa_cells", "").split() set_max_mem = params.get("set_max_mem") align_mem_values = "yes" == params.get("align_mem_values", "no") align_to_value = int(params.get("align_to_value", "65536")) hot_reboot = "yes" == params.get("hot_reboot", "no") rand_reboot = "yes" == params.get("rand_reboot", "no") guest_known_unplug_errors = [] guest_known_unplug_errors.append(params.get("guest_known_unplug_errors")) host_known_unplug_errors = [] host_known_unplug_errors.append(params.get("host_known_unplug_errors")) discard = "yes" == params.get("discard", "no") cold_plug_discard = "yes" == params.get("cold_plug_discard", "no") if cold_plug_discard or discard: mem_discard = 'yes' else: mem_discard = 'no' # params for attached device mem_model = params.get("mem_model", "dimm") tg_size = params.get("tg_size") tg_sizeunit = params.get("tg_sizeunit", 'KiB') tg_node = params.get("tg_node", 0) pg_size = params.get("page_size") pg_unit = params.get("page_unit", "KiB") huge_page_num = int(params.get('huge_page_num', 2000)) node_mask = params.get("node_mask", "0") mem_addr = ast.literal_eval(params.get("memory_addr", "{}")) huge_pages = [ ast.literal_eval(x) for x in params.get("huge_pages", "").split() ] numa_memnode = [ ast.literal_eval(x) for x in params.get("numa_memnode", "").split() ] at_times = int(params.get("attach_times", 1)) online = params.get("mem_online", "no") config = utils_config.LibvirtQemuConfig() setup_hugepages_flag = params.get("setup_hugepages") if (setup_hugepages_flag == "yes"): cpu_arch = cpu_util.get_family() if hasattr(cpu_util, 'get_family')\ else cpu_util.get_cpu_arch() if cpu_arch == 'power8': pg_size = '16384' huge_page_num = 200 elif cpu_arch == 'power9': pg_size = '2048' huge_page_num = 2000 [x.update({'size': pg_size}) for x in huge_pages] setup_hugepages(int(pg_size), shp_num=huge_page_num) # Back up xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if not libvirt_version.version_compare(1, 2, 14): test.cancel("Memory hotplug not supported in current libvirt version.") if 'align_256m' in params.get('name', ''): arch = platform.machine() if arch.lower() != 'ppc64le': test.cancel('This case is for ppc64le only.') if align_mem_values: # Rounding the following values to 'align' max_mem = utils_numeric.align_value(max_mem, align_to_value) max_mem_rt = utils_numeric.align_value(max_mem_rt, align_to_value) cur_mem = utils_numeric.align_value(cur_mem, align_to_value) tg_size = utils_numeric.align_value(tg_size, align_to_value) try: # Drop caches first for host has enough memory drop_caches() # Destroy domain first if vm.is_alive(): vm.destroy(gracefully=False) modify_domain_xml() numa_info = utils_misc.NumaInfo() logging.debug(numa_info.get_all_node_meminfo()) # Start the domain any way if attach memory device old_mem_total = None if attach_device: vm.start() session = vm.wait_for_login() old_mem_total = vm.get_totalmem_sys(online) logging.debug("Memtotal on guest: %s", old_mem_total) session.close() elif discard: vm.start() session = vm.wait_for_login() check_qemu_cmd(max_mem_rt, tg_size) dev_xml = None # To attach the memory device. if (add_mem_device and not hot_plug) or cold_plug_discard: at_times = int(params.get("attach_times", 1)) randvar = 0 if rand_reboot: rand_value = random.randint(15, 25) logging.debug("reboots at %s", rand_value) for x in xrange(at_times): # If any error excepted, command error status should be # checked in the last time device_alias = "ua-" + str(uuid.uuid4()) dev_xml = utils_hotplug.create_mem_xml( tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model, mem_discard, device_alias) randvar = randvar + 1 logging.debug("attaching device count = %s", x) if x == at_times - 1: add_device(dev_xml, attach_device, attach_error) else: add_device(dev_xml, attach_device) if hot_reboot: vm.reboot() vm.wait_for_login() if rand_reboot and randvar == rand_value: vm.reboot() vm.wait_for_login() randvar = 0 rand_value = random.randint(15, 25) logging.debug("reboots at %s", rand_value) # Check domain xml after attach device. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Set domain state if pre_vm_state == "transient": logging.info("Creating %s...", vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) vm.undefine() if virsh.create(vmxml_for_test.xml, **virsh_dargs).exit_status: vmxml_backup.define() test.fail("Can't create the domain") elif vm.is_dead(): try: vm.start() vm.wait_for_login().close() except virt_vm.VMStartError as detail: if start_error: pass else: except_msg = "memory hotplug isn't supported by this QEMU binary" if except_msg in detail.reason: test.cancel(detail) test.fail(detail) # Set memory operation if set_max_mem: max_mem_option = params.get("max_mem_option", "") ret = virsh.setmaxmem(vm_name, set_max_mem, flagstr=max_mem_option) libvirt.check_exit_status(ret, maxmem_error) # Hotplug memory device if add_mem_device and hot_plug: process.run('ps -ef|grep qemu', shell=True, verbose=True) session = vm.wait_for_login() original_mem = vm.get_totalmem_sys() dev_xml = utils_hotplug.create_mem_xml(tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model) add_device(dev_xml, True) mem_after = vm.get_totalmem_sys() params['delta'] = mem_after - original_mem # Check domain xml after start the domain. if test_dom_xml: check_dom_xml(at_mem=attach_device) if mem_align: dom_mem = check_mem_align() check_qemu_cmd(dom_mem['maxMemory'], dom_mem['attached_mem']) if hot_plug and params['delta'] != dom_mem['attached_mem']: test.fail( 'Memory after attach not equal to original mem + attached mem' ) # Check qemu command line if test_qemu_cmd: check_qemu_cmd(max_mem_rt, tg_size) # Check guest meminfo after attachment if (attach_device and not attach_option.count("config") and not any([attach_error, start_error])): check_guest_meminfo(old_mem_total, check_option="attach") # Consuming memory on guest, # to verify memory changes by numastat if test_mem_binding: pid = vm.get_pid() old_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", old_numastat) # Increase the memory consumed to 1500 consume_vm_mem(1500) new_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", new_numastat) # Only check total memory which is the last element if float(new_numastat[-1]) - float(old_numastat[-1]) < 0: test.fail("Numa memory can't be consumed on guest") # Run managedsave command to check domain xml. if test_managedsave: # Wait 10s for vm to be ready before managedsave time.sleep(wait_before_save_secs) ret = virsh.managedsave(vm_name, **virsh_dargs) libvirt.check_exit_status(ret) def _wait_for_vm_start(): try: vm.start() return True except Exception as e: logging.error(e) utils_misc.wait_for(_wait_for_vm_start, timeout=30, step=5) vm.wait_for_login().close() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Run save and restore command to check domain xml if test_save_restore: # Wait 10s for vm to be ready before save time.sleep(wait_before_save_secs) check_save_restore() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check domain xml after restarting libvirtd if restart_libvirtd: libvirtd = utils_libvirtd.Libvirtd() libvirtd.restart() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Detach the memory device unplug_failed_with_known_error = False if detach_device: dev_xml = utils_hotplug.create_mem_xml(tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model, mem_discard) for x in xrange(at_times): if not detach_alias: ret = virsh.detach_device(vm_name, dev_xml.xml, flagstr=attach_option, debug=True) else: ret = virsh.detach_device_alias(vm_name, device_alias, detach_alias_options, debug=True) if ret.stderr and host_known_unplug_errors: for known_error in host_known_unplug_errors: if (known_error[0] == known_error[-1]) and \ known_error.startswith(("'")): known_error = known_error[1:-1] if known_error in ret.stderr: unplug_failed_with_known_error = True logging.debug( "Known error occurred in Host, while" " hot unplug: %s", known_error) if unplug_failed_with_known_error: break try: libvirt.check_exit_status(ret, detach_error) except Exception as detail: dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=5) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if os.path.exists(dmesg_file): with open(dmesg_file, 'r') as f: flag = re.findall( r'memory memory\d+?: Offline failed', f.read()) if not flag: # The attached memory is used by vm, and it could # not be unplugged.The result is expected os.remove(dmesg_file) test.fail(detail) unplug_failed_with_known_error = True os.remove(dmesg_file) # Check whether a known error occurred or not dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=4) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if guest_known_unplug_errors and os.path.exists(dmesg_file): for known_error in guest_known_unplug_errors: if (known_error[0] == known_error[-1]) and \ known_error.startswith(("'")): known_error = known_error[1:-1] with open(dmesg_file, 'r') as f: if known_error in f.read(): unplug_failed_with_known_error = True logging.debug( "Known error occurred, while hot" " unplug: %s", known_error) if test_dom_xml and not unplug_failed_with_known_error: check_dom_xml(dt_mem=detach_device) # Remove dmesg temp file if os.path.exists(dmesg_file): os.remove(dmesg_file) except xcepts.LibvirtXMLError: if define_error: pass finally: # Delete snapshots. snapshot_lists = virsh.snapshot_list(vm_name) if len(snapshot_lists) > 0: libvirt.clean_up_snapshots(vm_name, snapshot_lists) for snap in snapshot_lists: virsh.snapshot_delete(vm_name, snap, "--metadata") # Recover VM. if vm.is_alive(): vm.destroy(gracefully=False) logging.info("Restoring vm...") if (setup_hugepages_flag == "yes"): restore_hugepages() vmxml_backup.sync()
def run(test, params, env): """ Qemu numa stress test: 1) Boot up a guest and find the node it used 2) Try to allocate memory in that node 3) Run memory heavy stress inside guest 4) Check the memory use status of qemu process 5) Repeat step 2 ~ 4 several times :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ host_numa_node = utils_misc.NumaInfo() if len(host_numa_node.online_nodes) < 2: raise error.TestNAError("Host only has one NUMA node, " "skipping test...") timeout = float(params.get("login_timeout", 240)) test_count = int(params.get("test_count", 4)) vm = env.get_vm(params["main_vm"]) vm.verify_alive() session = vm.wait_for_login(timeout=timeout) qemu_pid = vm.get_pid() if test_count < len(host_numa_node.online_nodes): test_count = len(host_numa_node.online_nodes) tmpfs_size = 0 for node in host_numa_node.nodes: node_mem = int(host_numa_node.read_from_node_meminfo(node, "MemTotal")) if tmpfs_size < node_mem: tmpfs_size = node_mem tmpfs_path = params.get("tmpfs_path", "tmpfs_numa_test") tmpfs_path = utils_misc.get_path(data_dir.get_tmp_dir(), tmpfs_path) tmpfs_write_speed = int(params.get("tmpfs_write_speed", 10240)) dd_timeout = tmpfs_size / tmpfs_write_speed * 1.5 mount_fs_size = "size=%dK" % tmpfs_size memory_file = utils_misc.get_path(tmpfs_path, "test") dd_cmd = "dd if=/dev/urandom of=%s bs=1k count=%s" % (memory_file, tmpfs_size) if not os.path.isdir(tmpfs_path): os.mkdir(tmpfs_path) numa_node_malloc = -1 most_used_node, memory_used = max_mem_map_node(host_numa_node, qemu_pid) for test_round in range(test_count): if utils_memory.freememtotal() < tmpfs_size: raise error.TestError("Don't have enough memory to execute this " "test after %s round" % test_round) error.context("Executing stress test round: %s" % test_round, logging.info) numa_node_malloc = most_used_node numa_dd_cmd = "numactl -m %s %s" % (numa_node_malloc, dd_cmd) error.context("Try to allocate memory in node %s" % numa_node_malloc, logging.info) try: utils_misc.mount("none", tmpfs_path, "tmpfs", perm=mount_fs_size) funcatexit.register(env, params.get("type"), utils_misc.umount, "none", tmpfs_path, "tmpfs") utils.system(numa_dd_cmd, timeout=dd_timeout) except Exception, error_msg: if "No space" in str(error_msg): pass else: raise error.TestFail("Can not allocate memory in node %s." " Error message:%s" % (numa_node_malloc, str(error_msg))) error.context("Run memory heavy stress in guest", logging.info) autotest_control.run(test, params, env) error.context("Get the qemu process memory use status", logging.info) node_after, memory_after = max_mem_map_node(host_numa_node, qemu_pid) if node_after == most_used_node and memory_after >= memory_used: raise error.TestFail("Memory still stick in " "node %s" % numa_node_malloc) else: most_used_node = node_after memory_used = memory_after utils_misc.umount("none", tmpfs_path, "tmpfs") funcatexit.unregister(env, params.get("type"), utils_misc.umount, "none", tmpfs_path, "tmpfs") session.cmd("sync; echo 3 > /proc/sys/vm/drop_caches") utils_memory.drop_caches()
def run(test, params, env): """ KVM reboot time test: 1) Set init run level to 1 2) Restart guest 3) Wait for the console 4) Send a 'reboot' command to the guest 5) Boot up the guest and measure the boot time 6) Restore guest run level :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment """ vm = env.get_vm(params["main_vm"]) vm.verify_alive() timeout = int(params.get("login_timeout", 360)) session = vm.wait_for_login(timeout=timeout) error_context.context("Set guest run level to 1", logging.info) single_user_cmd = params['single_user_cmd'] session.cmd(single_user_cmd) try: error_context.context("Restart guest", logging.info) session.cmd('sync') vm.destroy() error_context.context("Boot up guest", logging.info) vm.create() vm.verify_alive() session = vm.wait_for_serial_login(timeout=timeout) error_context.context("Send a 'reboot' command to the guest", logging.info) utils_memory.drop_caches() session.cmd('reboot & exit', timeout=1, ignore_all_errors=True) before_reboot_stamp = utils_misc.monotonic_time() error_context.context("Boot up the guest and measure the boot time", logging.info) session = vm.wait_for_serial_login(timeout=timeout) reboot_time = utils_misc.monotonic_time() - before_reboot_stamp test.write_test_keyval({'result': "%ss" % reboot_time}) expect_time = int(params.get("expect_reboot_time", "30")) logging.info("Reboot time: %ss", reboot_time) finally: try: error_context.context("Restore guest run level", logging.info) restore_level_cmd = params['restore_level_cmd'] session.cmd(restore_level_cmd) session.cmd('sync') vm.destroy(gracefully=False) env_process.preprocess_vm(test, params, env, vm.name) vm.verify_alive() vm.wait_for_login(timeout=timeout) except Exception: logging.warning("Can not restore guest run level, " "need restore the image") params["restore_image_after_testing"] = "yes" if reboot_time > expect_time: test.fail("Guest reboot is taking too long: %ss" % reboot_time) session.close()
def run(test, params, env): """ Qemu numa stress test: 1) Boot up a guest and find the node it used 2) Try to allocate memory in that node 3) Run memory heavy stress inside guest 4) Check the memory use status of qemu process 5) Repeat step 2 ~ 4 several times :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ host_numa_node = utils_misc.NumaInfo() if len(host_numa_node.online_nodes) < 2: test.cancel("Host only has one NUMA node, skipping test...") timeout = float(params.get("login_timeout", 240)) test_count = int(params.get("test_count", 4)) vm = env.get_vm(params["main_vm"]) vm.verify_alive() session = vm.wait_for_login(timeout=timeout) qemu_pid = vm.get_pid() if test_count < len(host_numa_node.online_nodes): test_count = len(host_numa_node.online_nodes) tmpfs_size = params.get_numeric("tmpfs_size") for node in host_numa_node.nodes: node_mem = int(host_numa_node.read_from_node_meminfo(node, "MemTotal")) if tmpfs_size == 0: tmpfs_size = node_mem tmpfs_path = params.get("tmpfs_path", "tmpfs_numa_test") tmpfs_path = utils_misc.get_path(data_dir.get_tmp_dir(), tmpfs_path) tmpfs_write_speed = get_tmpfs_write_speed() dd_timeout = tmpfs_size / tmpfs_write_speed * 1.5 mount_fs_size = "size=%dK" % tmpfs_size memory_file = utils_misc.get_path(tmpfs_path, "test") dd_cmd = "dd if=/dev/urandom of=%s bs=1k count=%s" % (memory_file, tmpfs_size) utils_memory.drop_caches() if utils_memory.freememtotal() < tmpfs_size: test.cancel("Host does not have enough free memory to run the test, " "skipping test...") if not os.path.isdir(tmpfs_path): os.mkdir(tmpfs_path) test_mem = float(params.get("mem")) * float(params.get("mem_ratio", 0.8)) stress_args = "--cpu 4 --io 4 --vm 2 --vm-bytes %sM" % int(test_mem / 2) most_used_node, memory_used = max_mem_map_node(host_numa_node, qemu_pid) for test_round in range(test_count): if os.path.exists(memory_file): os.remove(memory_file) utils_memory.drop_caches() if utils_memory.freememtotal() < tmpfs_size: test.error("Don't have enough memory to execute this " "test after %s round" % test_round) error_context.context("Executing stress test round: %s" % test_round, logging.info) numa_node_malloc = most_used_node numa_dd_cmd = "numactl -m %s %s" % (numa_node_malloc, dd_cmd) error_context.context( "Try to allocate memory in node %s" % numa_node_malloc, logging.info) try: utils_misc.mount("none", tmpfs_path, "tmpfs", perm=mount_fs_size) funcatexit.register(env, params.get("type"), utils_misc.umount, "none", tmpfs_path, "tmpfs") process.system(numa_dd_cmd, timeout=dd_timeout, shell=True) except Exception as error_msg: if "No space" in str(error_msg): pass else: test.fail("Can not allocate memory in node %s." " Error message:%s" % (numa_node_malloc, str(error_msg))) error_context.context("Run memory heavy stress in guest", logging.info) stress_test = utils_test.VMStress(vm, "stress", params, stress_args=stress_args) stress_test.load_stress_tool() error_context.context("Get the qemu process memory use status", logging.info) node_after, memory_after = max_mem_map_node(host_numa_node, qemu_pid) if node_after == most_used_node and memory_after >= memory_used: test.fail("Memory still stick in node %s" % numa_node_malloc) else: most_used_node = node_after memory_used = memory_after stress_test.unload_stress() stress_test.clean() utils_misc.umount("none", tmpfs_path, "tmpfs") funcatexit.unregister(env, params.get("type"), utils_misc.umount, "none", tmpfs_path, "tmpfs") session.cmd("sync; echo 3 > /proc/sys/vm/drop_caches") utils_memory.drop_caches() session.close()
# Ignore exception with "ignore_status=True" if progress: option += " --verbose" option += extra_param # For bypass_cache test. Run a shell command to check fd flags while # excuting managedsave command bash_cmd = ("let i=1; while((i++<400)); do if [ -e %s ]; then (cat /proc" "/$(lsof -w %s|awk '/libvirt_i/{print $2}')/fdinfo/*%s* |" "grep 'flags:.*%s') && break; else sleep 0.05; fi; done;") # Flags to check bypass cache take effect flags = "014" if test_bypass_cache: # Drop caches. drop_caches() virsh_cmd = "virsh managedsave %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1", flags), flags) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "virsh start %s %s" % (option, vm_name) check_flags_parallel(virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0", flags), flags) # Wait for VM in running state wait_for_state("running") elif test_libvirt_guests: logging.debug("libvirt-guests status: %s", libvirt_guests.status()) if multi_guests:
def run(test, params, env): """ Qemu numa consistency test: 1) Get host numa topological structure 2) Start a guest with the same node as the host, each node has one cpu 3) Get the vcpu thread used cpu id in host and the cpu belongs which node 4) Allocate memory inside guest and bind the allocate process to one of its vcpu. 5) The memory used in host should increase in the same node if the vcpu thread is not switch to other node. 6) Repeat step 3~5 for each vcpu thread of the guest. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def get_vcpu_used_node(numa_node_info, vcpu_thread): cpu_used_host = utils_misc.get_thread_cpu(vcpu_thread)[0] node_used_host = ([ _ for _ in node_list if cpu_used_host in numa_node_info.nodes[_].cpus ][0]) return node_used_host error.context("Get host numa topological structure", logging.info) timeout = float(params.get("login_timeout", 240)) host_numa_node = utils_misc.NumaInfo() node_list = host_numa_node.online_nodes if len(node_list) < 2: raise error.TestNAError("This host only has one NUMA node, " "skipping test...") node_list.sort() params['smp'] = len(node_list) params['vcpu_cores'] = 1 params['vcpu_threads'] = 1 params['vcpu_sockets'] = params['smp'] params['guest_numa_nodes'] = "" for node_id in range(len(node_list)): params['guest_numa_nodes'] += " node%d" % node_id params['start_vm'] = 'yes' utils_memory.drop_caches() vm = params['main_vm'] env_process.preprocess_vm(test, params, env, vm) vm = env.get_vm(vm) vm.verify_alive() vcpu_threads = vm.vcpu_threads session = vm.wait_for_login(timeout=timeout) dd_size = 256 if dd_size * len(vcpu_threads) > int(params['mem']): dd_size = int(int(params['mem']) / 2 / len(vcpu_threads)) mount_size = dd_size * len(vcpu_threads) mount_cmd = "mount -o size=%dM -t tmpfs none /tmp" % mount_size qemu_pid = vm.get_pid() drop = 0 for cpuid in range(len(vcpu_threads)): error.context("Get vcpu %s used numa node." % cpuid, logging.info) memory_status, _ = utils_test.qemu.get_numa_status( host_numa_node, qemu_pid) node_used_host = get_vcpu_used_node(host_numa_node, vcpu_threads[cpuid]) node_used_host_index = node_list.index(node_used_host) memory_used_before = memory_status[node_used_host_index] error.context("Allocate memory in guest", logging.info) session.cmd(mount_cmd) binded_dd_cmd = "taskset %s" % str(2**int(cpuid)) binded_dd_cmd += " dd if=/dev/urandom of=/tmp/%s" % cpuid binded_dd_cmd += " bs=1M count=%s" % dd_size session.cmd(binded_dd_cmd) error.context("Check qemu process memory use status", logging.info) node_after = get_vcpu_used_node(host_numa_node, vcpu_threads[cpuid]) if node_after != node_used_host: logging.warn("Node used by vcpu thread changed. So drop the" " results in this round.") drop += 1 continue memory_status, _ = utils_test.qemu.get_numa_status( host_numa_node, qemu_pid) memory_used_after = memory_status[node_used_host_index] page_size = resource.getpagesize() / 1024 memory_allocated = (memory_used_after - memory_used_before) * page_size / 1024 if 1 - float(memory_allocated) / float(dd_size) > 0.05: numa_hardware_cmd = params.get("numa_hardware_cmd") if numa_hardware_cmd: numa_info = utils.system_output(numa_hardware_cmd, ignore_status=True) msg = "Expect malloc %sM memory in node %s," % (dd_size, node_used_host) msg += "but only malloc %sM \n" % memory_allocated msg += "Please check more details of the numa node: %s" % numa_info raise error.TestFail(msg) session.close() if drop == len(vcpu_threads): raise error.TestError("All test rounds are dropped." " Please test it again.")
def run(test, params, env): """ Check KSM can be started automaticly when ksmtuned threshold is reached 1. Get the memory of your host and the KSM_THRES_COEF 2. Boot a guest with memory less than KSM_THRES_COEF threshold 3. Get the memory used in host of process qemu-kvm 4. Get the free memory in host 5. If both the free memory size is not smaller than the threshold and guest used memory + threshold is not bigger than total memory in host. Check the ksm status in host. Ksm should not start in the host 6. Repeat step 2~5 under it broke the rule in step 5 :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. """ def check_ksm(mem, threshold_reached=False): """ :param mem: Boot guest with given memory, in KB :ksmtuned_enabled: ksmtuned threshold is reached or not """ def heavyload_install(): if session.cmd_status(test_install_cmd) != 0: logging.warning("Could not find installed heavyload in guest, " "will install it via winutils.iso ") winutil_drive = utils_misc.get_winutils_vol(session) if not winutil_drive: test.cancel("WIN_UTILS CDROM not found.") install_cmd = params["install_cmd"] % winutil_drive session.cmd(install_cmd) def check_qemu_used_mem(qemu_pid, mem): qemu_used_page = process.getoutput(get_qemu_used_mem % qemu_pid, shell=True) qemu_used_mem = float(qemu_used_page) * pagesize if qemu_used_mem < mem * mem_thres: return False return True params['mem'] = mem // 1024 params['start_vm'] = 'yes' vm_name = params['main_vm'] env_process.preprocess_vm(test, params, env, vm_name) vm = env.get_vm(vm_name) session = vm.wait_for_login() qemu_pid = vm.get_pid() if params["os_type"] == "linux": params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' % (int(params['mem']) // 2)) stress_test = VMStress(vm, "stress", params) stress_test.load_stress_tool() else: install_path = params["install_path"] test_install_cmd = 'dir "%s" | findstr /I heavyload' % install_path heavyload_install() heavyload_bin = r'"%s\heavyload.exe" ' % install_path heavyload_options = ["/MEMORY 100", "/START"] start_cmd = heavyload_bin + " ".join(heavyload_options) stress_tool = BackgroundTest( session.cmd, (start_cmd, stress_timeout, stress_timeout)) stress_tool.start() if not utils_misc.wait_for(stress_tool.is_alive, stress_timeout): test.error("Failed to start heavyload process") if not utils_misc.wait_for(lambda: check_qemu_used_mem(qemu_pid, mem), stress_timeout, 10, 10): test.error("QEMU used memory doesn't reach %s of guest mem %sM in " "%ss" % (mem_thres, mem // 1024, stress_timeout)) time.sleep(30) free_mem_host = utils_memory.freememtotal() ksm_status = process.getoutput(params['cmd_check_ksm_status']) vm.destroy() logging.info( "The ksm threshold is %sM, QEMU used memory is %sM, " "and the total free memory on host is %sM", ksm_thres // 1024, mem // 1024, free_mem_host // 1024) if threshold_reached: if free_mem_host > ksm_thres: test.error("Host memory is not consumed as much as expected") if ksm_status == '0': test.fail("KSM should be running") else: if free_mem_host < ksm_thres: test.error("Host memory is consumed too much more than " "expected") if ksm_status != '0': test.fail("KSM should not be running") total_mem_host = utils_memory.memtotal() utils_memory.drop_caches() free_mem_host = utils_memory.freememtotal() ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True) ksm_thres = int(total_mem_host * (int(re.findall('\\d+', ksm_thres)[0]) / 100)) guest_mem = (free_mem_host - ksm_thres) // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) status_ksm_service = process.system(params['cmd_status_ksmtuned'], ignore_status=True) if status_ksm_service != 0: process.run(params['cmd_start_ksmtuned']) stress_timeout = params.get("stress_timeout", 1800) mem_thres = float(params.get("mem_thres", 0.95)) get_qemu_used_mem = params['cmd_get_qemu_used_mem'] pagesize = utils_memory.getpagesize() check_ksm(guest_mem) ksm_config_file = params['ksm_config_file'] backup_file = ksm_config_file + '.backup' copyfile(ksm_config_file, backup_file) threshold = params.get_numeric('ksm_threshold') with open(ksm_config_file, "a+") as f: f.write('%s=%s' % (params['ksm_thres_conf'], threshold)) process.run(params['cmd_restart_ksmtuned']) ksm_thres = total_mem_host * (threshold / 100) guest_mem = total_mem_host - ksm_thres // 2 if arch.ARCH in ('ppc64', 'ppc64le'): guest_mem = guest_mem - guest_mem % (256 * 1024) try: check_ksm(guest_mem, threshold_reached=True) finally: copyfile(backup_file, ksm_config_file) os.remove(backup_file) if status_ksm_service != 0: process.run(params['cmd_stop_ksmtuned']) else: process.run(params['cmd_restart_ksmtuned'])
def run(test, params, env): """ Test rbd disk device. 1.Prepare test environment,destroy or suspend a VM. 2.Prepare disk image. 3.Edit disks xml and start the domain. 4.Perform test operation. 5.Recover test environment. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) virsh_dargs = {'debug': True, 'ignore_status': True} # Global variable to store max/current memory, # it may change after attach/detach new_max_mem = None new_cur_mem = None def get_vm_memtotal(session): """ Get guest total memory """ proc_meminfo = session.cmd_output("cat /proc/meminfo") # verify format and units are expected return int(re.search(r'MemTotal:\s+(\d+)\s+[kK]B', proc_meminfo).group(1)) def consume_vm_mem(size=1000, timeout=360): """ To consume guest memory, default size is 1000M """ session = vm.wait_for_login() # Mount tmpfs on /mnt and write to a file on it, # it is the memory operation sh_cmd = ("swapoff -a; mount -t tmpfs -o size={0}M tmpfs " "/mnt; dd if=/dev/urandom of=/mnt/test bs=1M" " count={0}".format(size)) session.cmd(sh_cmd, timeout=timeout) session.close() def check_qemu_cmd(): """ Check qemu command line options. """ cmd = ("ps -ef | grep %s | grep -v grep " % vm_name) if max_mem_rt: cmd += (" | grep 'slots=%s,maxmem=%sk'" % (max_mem_slots, max_mem_rt)) if tg_size: size = int(tg_size) * 1024 cmd += (" | grep 'memory-backend-ram,id=memdimm0,size=%s" % size) if pg_size: cmd += ",host-nodes=%s" % node_mask if numa_memnode: for node in numa_memnode: if ('nodeset' in node and node['nodeset'] in node_mask): cmd += ",policy=%s" % node['mode'] cmd += ".*pc-dimm,node=%s" % tg_node if mem_addr: cmd += (".*slot=%s,addr=%s" % (mem_addr['slot'], int(mem_addr['base'], 16))) cmd += "'" # Run the command utils.run(cmd) def check_guest_meminfo(old_mem): """ Check meminfo on guest. """ assert old_mem is not None session = vm.wait_for_login() # Hot-plugged memory should be online by udev rules udev_file = "/lib/udev/rules.d/80-hotplug-cpu-mem.rules" udev_rules = ('SUBSYSTEM=="memory", ACTION=="add", TEST=="state",' ' ATTR{state}=="offline", ATTR{state}="online"') cmd = ("grep memory %s || echo '%s' >> %s" % (udev_file, udev_rules, udev_file)) session.cmd(cmd) # Wait a while for new memory to be detected. utils_misc.wait_for( lambda: get_vm_memtotal(session) != int(old_mem), 5) new_mem = get_vm_memtotal(session) session.close() logging.debug("Memtotal on guest: %s", new_mem) if new_mem != int(old_mem) + int(tg_size): raise error.TestFail("Total memory on guest couldn't" " changed after attach memory " "device") def check_dom_xml(at_mem=False, dt_mem=False): """ Check domain xml options. """ # Global variable to store max/current memory global new_max_mem global new_cur_mem if attach_option.count("config"): dom_xml = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) else: dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) try: xml_max_mem_rt = int(dom_xml.max_mem_rt) xml_max_mem = int(dom_xml.max_mem) xml_cur_mem = int(dom_xml.current_mem) assert int(max_mem_rt) == xml_max_mem_rt # Check attached/detached memory if at_mem: assert int(max_mem) + int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory #assert int(cur_mem) + int(tg_size) == xml_cur_mem new_max_mem = xml_max_mem new_cur_mem = xml_cur_mem mem_dev = dom_xml.get_devices("memory") if len(mem_dev) != 1: raise error.TestFail("Found wrong number of" " memory device") assert int(tg_size) == int(mem_dev[0].target.size) assert int(tg_node) == int(mem_dev[0].target.node) elif dt_mem: assert int(new_max_mem) - int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory #assert int(new_cur_mem) - int(tg_size) == xml_cur_mem except AssertionError: utils.log_last_traceback() raise error.TestFail("Found unmatched memory setting" " from domain xml") def check_save_restore(): """ Test save and restore operation """ save_file = os.path.join(test.tmpdir, "%s.save" % vm_name) ret = virsh.save(vm_name, save_file, **virsh_dargs) libvirt.check_exit_status(ret) ret = virsh.restore(save_file, **virsh_dargs) libvirt.check_exit_status(ret) if os.path.exists(save_file): os.remove(save_file) # Login to check vm status vm.wait_for_login().close() def create_mem_xml(): """ Create memory device xml. """ mem_xml = memory.Memory() mem_model = params.get("mem_model", "dimm") mem_xml.mem_model = mem_model if tg_size: tg_xml = memory.Memory.Target() tg_xml.size = int(tg_size) tg_xml.size_unit = tg_sizeunit tg_xml.node = int(tg_node) mem_xml.target = tg_xml if pg_size: src_xml = memory.Memory.Source() src_xml.pagesize = int(pg_size) src_xml.pagesize_unit = pg_unit src_xml.nodemask = node_mask mem_xml.source = src_xml if mem_addr: mem_xml.address = mem_xml.new_mem_address( **{"attrs": mem_addr}) logging.debug("Memory device xml: %s", mem_xml) return mem_xml.copy() def add_device(dev_xml, at_error=False): """ Add memory device by attachment or modify domain xml. """ if attach_device: ret = virsh.attach_device(vm_name, dev_xml.xml, flagstr=attach_option) libvirt.check_exit_status(ret, at_error) else: vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) if numa_cells: del vmxml.max_mem del vmxml.current_mem vmxml.add_device(dev_xml) vmxml.sync() def modify_domain_xml(): """ Modify domain xml and define it. """ vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) mem_unit = params.get("mem_unit", "KiB") vcpu = params.get("vcpu", "4") if max_mem_rt: vmxml.max_mem_rt = int(max_mem_rt) vmxml.max_mem_rt_slots = max_mem_slots vmxml.max_mem_rt_unit = mem_unit if vcpu: vmxml.vcpu = int(vcpu) vcpu_placement = params.get("vcpu_placement", "static") vmxml.placement = vcpu_placement if numa_memnode: vmxml.numa_memory = {} vmxml.numa_memnode = numa_memnode else: try: del vmxml.numa_memory del vmxml.numa_memnode except: # Not exists pass if numa_cells: cells = [ast.literal_eval(x) for x in numa_cells] cpu_xml = vm_xml.VMCPUXML() cpu_xml.xml = "<cpu><numa/></cpu>" cpu_mode = params.get("cpu_mode") model_fallback = params.get("model_fallback") if cpu_mode: cpu_xml.mode = cpu_mode if model_fallback: cpu_xml.fallback = model_fallback cpu_xml.numa_cell = cells vmxml.cpu = cpu_xml # Delete memory and currentMemory tag, # libvirt will fill it automatically del vmxml.max_mem del vmxml.current_mem # hugepages setting if huge_pages: membacking = vm_xml.VMMemBackingXML() hugepages = vm_xml.VMHugepagesXML() pagexml_list = [] for i in range(len(huge_pages)): pagexml = hugepages.PageXML() pagexml.update(huge_pages[i]) pagexml_list.append(pagexml) hugepages.pages = pagexml_list membacking.hugepages = hugepages vmxml.mb = membacking logging.debug("vm xml: %s", vmxml) vmxml.sync() pre_vm_state = params.get("pre_vm_state", "running") attach_device = "yes" == params.get("attach_device", "no") detach_device = "yes" == params.get("detach_device", "no") attach_error = "yes" == params.get("attach_error", "no") start_error = "yes" == params.get("start_error", "no") detach_error = "yes" == params.get("detach_error", "no") maxmem_error = "yes" == params.get("maxmem_error", "no") attach_option = params.get("attach_option", "") test_qemu_cmd = "yes" == params.get("test_qemu_cmd", "no") test_managedsave = "yes" == params.get("test_managedsave", "no") test_save_restore = "yes" == params.get("test_save_restore", "no") test_mem_binding = "yes" == params.get("test_mem_binding", "no") restart_libvirtd = "yes" == params.get("restart_libvirtd", "no") add_mem_device = "yes" == params.get("add_mem_device", "no") test_dom_xml = "yes" == params.get("test_dom_xml", "no") max_mem = params.get("max_mem") max_mem_rt = params.get("max_mem_rt") max_mem_slots = params.get("max_mem_slots", "16") #cur_mem = params.get("current_mem") numa_cells = params.get("numa_cells", "").split() set_max_mem = params.get("set_max_mem") # params for attached device tg_size = params.get("tg_size") tg_sizeunit = params.get("tg_sizeunit", 'KiB') tg_node = params.get("tg_node", 0) pg_size = params.get("page_size") pg_unit = params.get("page_unit", "KiB") node_mask = params.get("node_mask", "0") mem_addr = ast.literal_eval(params.get("memory_addr", "{}")) huge_pages = [ast.literal_eval(x) for x in params.get("huge_pages", "").split()] numa_memnode = [ast.literal_eval(x) for x in params.get("numa_memnode", "").split()] # Back up xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) try: # Drop caches first for host has enough memory drop_caches() # Destroy domain first if vm.is_alive(): vm.destroy(gracefully=False) modify_domain_xml() # Start the domain any way if attach memory device old_mem_total = None if attach_device: vm.start() session = vm.wait_for_login() old_mem_total = get_vm_memtotal(session) logging.debug("Memtotal on guest: %s", old_mem_total) session.close() dev_xml = None # To attach the memory device. if add_mem_device: at_times = int(params.get("attach_times", 1)) dev_xml = create_mem_xml() for x in xrange(at_times): # If any error excepted, command error status should be # checked in the last time if x == at_times - 1: add_device(dev_xml, attach_error) else: add_device(dev_xml) # Check domain xml after attach device. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Set domain state if pre_vm_state == "transient": logging.info("Creating %s...", vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) vm.undefine() if virsh.create(vmxml_for_test.xml, **virsh_dargs).exit_status: vmxml_backup.define() raise error.TestFail("Cann't create the domain") elif vm.is_dead(): try: vm.start() vm.wait_for_login().close() except virt_vm.VMStartError: if start_error: pass else: raise error.TestFail("VM Failed to start" " for some reason!") # Set memory operation if set_max_mem: max_mem_option = params.get("max_mem_option", "") ret = virsh.setmaxmem(vm_name, set_max_mem, flagstr=max_mem_option) libvirt.check_exit_status(ret, maxmem_error) # Check domain xml after start the domain. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check qemu command line if test_qemu_cmd: check_qemu_cmd() # Check guest meminfo after attachment if (attach_device and not attach_option.count("config") and not any([attach_error, start_error])): check_guest_meminfo(old_mem_total) # Consuming memory on guest, # to verify memory changes by numastat if test_mem_binding: pid = vm.get_pid() old_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", old_numastat) consume_vm_mem() new_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", new_numastat) # Only check total memory which is the last element if float(new_numastat[-1]) - float(old_numastat[-1]) < 0: raise error.TestFail("Numa memory can't be consumed" " on guest") # Run managedsave command to check domain xml. if test_managedsave: ret = virsh.managedsave(vm_name, **virsh_dargs) libvirt.check_exit_status(ret) vm.start() vm.wait_for_login().close() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Run save and restore command to check domain xml if test_save_restore: check_save_restore() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check domain xml after restarting libvirtd if restart_libvirtd: libvirtd = utils_libvirtd.Libvirtd() libvirtd.restart() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Detach the memory device if detach_device: if not dev_xml: dev_xml = create_mem_xml() ret = virsh.detach_device(vm_name, dev_xml.xml, flagstr=attach_option) libvirt.check_exit_status(ret, detach_error) if test_dom_xml: check_dom_xml(dt_mem=detach_device) finally: # Delete snapshots. snapshot_lists = virsh.snapshot_list(vm_name) if len(snapshot_lists) > 0: libvirt.clean_up_snapshots(vm_name, snapshot_lists) for snap in snapshot_lists: virsh.snapshot_delete(vm_name, snap, "--metadata") # Recover VM. if vm.is_alive(): vm.destroy(gracefully=False) logging.info("Restoring vm...") vmxml_backup.sync()
def run(test, params, env): """ Test rbd disk device. 1.Prepare test environment,destroy or suspend a VM. 2.Prepare disk image. 3.Edit disks xml and start the domain. 4.Perform test operation. 5.Recover test environment. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) virsh_dargs = {'debug': True, 'ignore_status': True} # Global variable to store max/current memory, # it may change after attach/detach new_max_mem = None new_cur_mem = None def consume_vm_mem(size=1000, timeout=360): """ To consume guest memory, default size is 1000M """ session = vm.wait_for_login() # Mount tmpfs on /mnt and write to a file on it, # it is the memory operation sh_cmd = ("swapoff -a; mount -t tmpfs -o size={0}M tmpfs " "/mnt; dd if=/dev/urandom of=/mnt/test bs=1M" " count={0}".format(size)) session.cmd(sh_cmd, timeout=timeout) session.close() def mount_hugepages(page_size): """ To mount hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc """ if page_size == 4: perm = "" else: perm = "pagesize=%dK" % page_size tlbfs_status = utils_misc.is_mounted("hugetlbfs", "/dev/hugepages", "hugetlbfs") if tlbfs_status: utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs") utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs", perm) def setup_hugepages(page_size=2048, shp_num=2000): """ To setup hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc :param shp_num: number of hugepage, string type """ mount_hugepages(page_size) utils_memory.set_num_huge_pages(shp_num) config.hugetlbfs_mount = ["/dev/hugepages"] utils_libvirtd.libvirtd_restart() def restore_hugepages(page_size=4): """ To recover hugepages :param page_size: unit is kB, it can be 4,2048,1048576,etc """ mount_hugepages(page_size) config.restore() utils_libvirtd.libvirtd_restart() def check_qemu_cmd(max_mem_rt, tg_size): """ Check qemu command line options. :param max_mem_rt: size of max memory :param tg_size: Target hotplug memory size :return: None """ cmd = ("ps -ef | grep %s | grep -v grep " % vm_name) if max_mem_rt: cmd += (" | grep 'slots=%s,maxmem=%sk'" % (max_mem_slots, max_mem_rt)) if tg_size: size = int(tg_size) * 1024 cmd_str = 'memdimm.\|memory-backend-ram,id=ram-node.' cmd += (" | grep 'memory-backend-ram,id=%s' | grep 'size=%s" % (cmd_str, size)) if pg_size: cmd += ",host-nodes=%s" % node_mask if numa_memnode: for node in numa_memnode: if ('nodeset' in node and node['nodeset'] in node_mask): cmd += ",policy=%s" % node['mode'] cmd += ".*pc-dimm,node=%s" % tg_node if mem_addr: cmd += (".*slot=%s,addr=%s" % (mem_addr['slot'], int(mem_addr['base'], 16))) cmd += "'" # Run the command result = process.run(cmd, shell=True, verbose=True, ignore_status=True) if result.exit_status: test.fail('Qemu command check fail.') def check_guest_meminfo(old_mem, check_option): """ Check meminfo on guest. """ assert old_mem is not None session = vm.wait_for_login() # Hot-plugged memory should be online by udev rules udev_file = "/lib/udev/rules.d/80-hotplug-cpu-mem.rules" udev_rules = ('SUBSYSTEM=="memory", ACTION=="add", TEST=="state",' ' ATTR{state}=="offline", ATTR{state}="online"') cmd = ("grep memory %s || echo '%s' >> %s" % (udev_file, udev_rules, udev_file)) session.cmd(cmd) # Wait a while for new memory to be detected. utils_misc.wait_for( lambda: vm.get_totalmem_sys(online) != int(old_mem), 30, first=20.0) new_mem = vm.get_totalmem_sys(online) session.close() logging.debug("Memtotal on guest: %s", new_mem) no_of_times = 1 if at_times: no_of_times = at_times if check_option == "attach": if new_mem != int(old_mem) + (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "attach memory device") if check_option == "detach": if new_mem != int(old_mem) - (int(tg_size) * no_of_times): test.fail("Total memory on guest couldn't changed after " "detach memory device") def check_dom_xml(at_mem=False, dt_mem=False): """ Check domain xml options. """ # Global variable to store max/current memory global new_max_mem global new_cur_mem if attach_option.count("config"): dom_xml = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) else: dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) try: xml_max_mem_rt = int(dom_xml.max_mem_rt) xml_max_mem = int(dom_xml.max_mem) xml_cur_mem = int(dom_xml.current_mem) assert int(max_mem_rt) == xml_max_mem_rt # Check attached/detached memory if at_mem: if at_times: assert int(max_mem) + (int(tg_size) * at_times) == xml_max_mem else: assert int(max_mem) + int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory if at_times: assert int(cur_mem) + (int(tg_size) * at_times) == xml_cur_mem else: assert int(cur_mem) + int(tg_size) == xml_cur_mem new_max_mem = xml_max_mem new_cur_mem = xml_cur_mem mem_dev = dom_xml.get_devices("memory") memory_devices = 1 if at_times: memory_devices = at_times if len(mem_dev) != memory_devices: test.fail("Found wrong number of memory device") assert int(tg_size) == int(mem_dev[0].target.size) assert int(tg_node) == int(mem_dev[0].target.node) elif dt_mem: if at_times: assert int(new_max_mem) - (int(tg_size) * at_times) == xml_max_mem assert int(new_cur_mem) - (int(tg_size) * at_times) == xml_cur_mem else: assert int(new_max_mem) - int(tg_size) == xml_max_mem # Bug 1220702, skip the check for current memory assert int(new_cur_mem) - int(tg_size) == xml_cur_mem except AssertionError: utils_misc.log_last_traceback() test.fail("Found unmatched memory setting from domain xml") def check_mem_align(): """ Check if set memory align to 256 """ dom_xml = vm_xml.VMXML.new_from_dumpxml(vm_name) dom_mem = {} dom_mem['maxMemory'] = int(dom_xml.max_mem_rt) dom_mem['memory'] = int(dom_xml.memory) dom_mem['currentMemory'] = int(dom_xml.current_mem) cpuxml = dom_xml.cpu numa_cell = cpuxml.numa_cell dom_mem['numacellMemory'] = int(numa_cell[0]['memory']) sum_numa_mem = sum([int(cell['memory']) for cell in numa_cell]) attached_mem = dom_xml.get_devices(device_type='memory')[0] dom_mem['attached_mem'] = attached_mem.target.size all_align = True for key in dom_mem: logging.info('%-20s:%15d', key, dom_mem[key]) if dom_mem[key] % 256: logging.error('%s not align to 256', key) all_align = False if not all_align: test.fail('Memory not align to 256') if dom_mem['memory'] == sum_numa_mem + dom_mem['attached_mem']: logging.info('Check Pass: Memory is equal to (all numa memory + memory device)') else: test.fail('Memory is not equal to (all numa memory + memory device)') return dom_mem def check_save_restore(): """ Test save and restore operation """ save_file = os.path.join(data_dir.get_tmp_dir(), "%s.save" % vm_name) ret = virsh.save(vm_name, save_file, **virsh_dargs) libvirt.check_exit_status(ret) ret = virsh.restore(save_file, **virsh_dargs) libvirt.check_exit_status(ret) if os.path.exists(save_file): os.remove(save_file) # Login to check vm status vm.wait_for_login().close() def add_device(dev_xml, attach, at_error=False): """ Add memory device by attachment or modify domain xml. """ if attach: ret = virsh.attach_device(vm_name, dev_xml.xml, flagstr=attach_option, debug=True) libvirt.check_exit_status(ret, at_error) else: vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) if numa_cells: del vmxml.max_mem del vmxml.current_mem vmxml.add_device(dev_xml) vmxml.sync() def modify_domain_xml(): """ Modify domain xml and define it. """ vmxml = vm_xml.VMXML.new_from_inactive_dumpxml(vm.name) mem_unit = params.get("mem_unit", "KiB") vcpu = params.get("vcpu", "4") if max_mem_rt: vmxml.max_mem_rt = int(max_mem_rt) vmxml.max_mem_rt_slots = max_mem_slots vmxml.max_mem_rt_unit = mem_unit if memory_val: vmxml.memory = int(memory_val) if vcpu: vmxml.vcpu = int(vcpu) vcpu_placement = params.get("vcpu_placement", "static") vmxml.placement = vcpu_placement if numa_memnode: vmxml.numa_memory = {} vmxml.numa_memnode = numa_memnode else: try: del vmxml.numa_memory del vmxml.numa_memnode except Exception: # Not exists pass if numa_cells: cells = [ast.literal_eval(x) for x in numa_cells] # Rounding the numa memory values if align_mem_values: for cell in range(cells.__len__()): memory_value = str(utils_numeric.align_value( cells[cell]["memory"], align_to_value)) cells[cell]["memory"] = memory_value cpu_xml = vm_xml.VMCPUXML() cpu_xml.xml = "<cpu><numa/></cpu>" cpu_mode = params.get("cpu_mode") model_fallback = params.get("model_fallback") if cpu_mode: cpu_xml.mode = cpu_mode if model_fallback: cpu_xml.fallback = model_fallback cpu_xml.numa_cell = cells vmxml.cpu = cpu_xml # Delete memory and currentMemory tag, # libvirt will fill it automatically del vmxml.max_mem del vmxml.current_mem # hugepages setting if huge_pages: membacking = vm_xml.VMMemBackingXML() hugepages = vm_xml.VMHugepagesXML() pagexml_list = [] for i in range(len(huge_pages)): pagexml = hugepages.PageXML() pagexml.update(huge_pages[i]) pagexml_list.append(pagexml) hugepages.pages = pagexml_list membacking.hugepages = hugepages vmxml.mb = membacking logging.debug("vm xml: %s", vmxml) vmxml.sync() pre_vm_state = params.get("pre_vm_state", "running") attach_device = "yes" == params.get("attach_device", "no") detach_device = "yes" == params.get("detach_device", "no") attach_error = "yes" == params.get("attach_error", "no") start_error = "yes" == params.get("start_error", "no") detach_error = "yes" == params.get("detach_error", "no") maxmem_error = "yes" == params.get("maxmem_error", "no") attach_option = params.get("attach_option", "") test_qemu_cmd = "yes" == params.get("test_qemu_cmd", "no") test_managedsave = "yes" == params.get("test_managedsave", "no") test_save_restore = "yes" == params.get("test_save_restore", "no") test_mem_binding = "yes" == params.get("test_mem_binding", "no") restart_libvirtd = "yes" == params.get("restart_libvirtd", "no") add_mem_device = "yes" == params.get("add_mem_device", "no") test_dom_xml = "yes" == params.get("test_dom_xml", "no") max_mem = params.get("max_mem") max_mem_rt = params.get("max_mem_rt") max_mem_slots = params.get("max_mem_slots", "16") memory_val = params.get('memory_val', '') mem_align = 'yes' == params.get('mem_align', 'no') hot_plug = 'yes' == params.get('hot_plug', 'no') cur_mem = params.get("current_mem") numa_cells = params.get("numa_cells", "").split() set_max_mem = params.get("set_max_mem") align_mem_values = "yes" == params.get("align_mem_values", "no") align_to_value = int(params.get("align_to_value", "65536")) hot_reboot = "yes" == params.get("hot_reboot", "no") rand_reboot = "yes" == params.get("rand_reboot", "no") guest_known_unplug_errors = [] guest_known_unplug_errors.append(params.get("guest_known_unplug_errors")) host_known_unplug_errors = [] host_known_unplug_errors.append(params.get("host_known_unplug_errors")) # params for attached device mem_model = params.get("mem_model", "dimm") tg_size = params.get("tg_size") tg_sizeunit = params.get("tg_sizeunit", 'KiB') tg_node = params.get("tg_node", 0) pg_size = params.get("page_size") pg_unit = params.get("page_unit", "KiB") node_mask = params.get("node_mask", "0") mem_addr = ast.literal_eval(params.get("memory_addr", "{}")) huge_pages = [ast.literal_eval(x) for x in params.get("huge_pages", "").split()] numa_memnode = [ast.literal_eval(x) for x in params.get("numa_memnode", "").split()] at_times = int(params.get("attach_times", 1)) online = params.get("mem_online", "no") config = utils_config.LibvirtQemuConfig() setup_hugepages_flag = params.get("setup_hugepages") if (setup_hugepages_flag == "yes"): setup_hugepages(int(pg_size)) # Back up xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if not libvirt_version.version_compare(1, 2, 14): test.cancel("Memory hotplug not supported in current libvirt version.") if 'align_256m' in params.get('name', ''): arch = platform.machine() if arch.lower() != 'ppc64le': test.cancel('This case is for ppc64le only.') if align_mem_values: # Rounding the following values to 'align' max_mem = utils_numeric.align_value(max_mem, align_to_value) max_mem_rt = utils_numeric.align_value(max_mem_rt, align_to_value) cur_mem = utils_numeric.align_value(cur_mem, align_to_value) tg_size = utils_numeric.align_value(tg_size, align_to_value) try: # Drop caches first for host has enough memory drop_caches() # Destroy domain first if vm.is_alive(): vm.destroy(gracefully=False) modify_domain_xml() # Start the domain any way if attach memory device old_mem_total = None if attach_device: vm.start() session = vm.wait_for_login() old_mem_total = vm.get_totalmem_sys(online) logging.debug("Memtotal on guest: %s", old_mem_total) session.close() dev_xml = None # To attach the memory device. if add_mem_device and not hot_plug: at_times = int(params.get("attach_times", 1)) dev_xml = utils_hotplug.create_mem_xml(tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model) randvar = 0 rand_value = random.randint(15, 25) logging.debug("reboots at %s", rand_value) for x in xrange(at_times): # If any error excepted, command error status should be # checked in the last time randvar = randvar + 1 logging.debug("attaching device count = %s", x) if x == at_times - 1: add_device(dev_xml, attach_device, attach_error) else: add_device(dev_xml, attach_device) if hot_reboot: vm.reboot() vm.wait_for_login() if rand_reboot and randvar == rand_value: randvar = 0 rand_value = random.randint(15, 25) logging.debug("reboots at %s", rand_value) vm.reboot() vm.wait_for_login() # Check domain xml after attach device. if test_dom_xml: check_dom_xml(at_mem=attach_device) # Set domain state if pre_vm_state == "transient": logging.info("Creating %s...", vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) vm.undefine() if virsh.create(vmxml_for_test.xml, **virsh_dargs).exit_status: vmxml_backup.define() test.fail("Cann't create the domain") elif vm.is_dead(): try: vm.start() vm.wait_for_login().close() except virt_vm.VMStartError as detail: if start_error: pass else: except_msg = "memory hotplug isn't supported by this QEMU binary" if except_msg in detail.reason: test.cancel(detail) test.fail(detail) # Set memory operation if set_max_mem: max_mem_option = params.get("max_mem_option", "") ret = virsh.setmaxmem(vm_name, set_max_mem, flagstr=max_mem_option) libvirt.check_exit_status(ret, maxmem_error) # Hotplug memory device if add_mem_device and hot_plug: process.run('ps -ef|grep qemu', shell=True, verbose=True) session = vm.wait_for_login() original_mem = vm.get_totalmem_sys() dev_xml = utils_hotplug.create_mem_xml(tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model) add_device(dev_xml, True) mem_after = vm.get_totalmem_sys() params['delta'] = mem_after - original_mem # Check domain xml after start the domain. if test_dom_xml: check_dom_xml(at_mem=attach_device) if mem_align: dom_mem = check_mem_align() check_qemu_cmd(dom_mem['maxMemory'], dom_mem['attached_mem']) if hot_plug and params['delta'] != dom_mem['attached_mem']: test.fail('Memory after attach not equal to original mem + attached mem') # Check qemu command line if test_qemu_cmd: check_qemu_cmd(max_mem_rt, tg_size) # Check guest meminfo after attachment if (attach_device and not attach_option.count("config") and not any([attach_error, start_error])): check_guest_meminfo(old_mem_total, check_option="attach") # Consuming memory on guest, # to verify memory changes by numastat if test_mem_binding: pid = vm.get_pid() old_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", old_numastat) consume_vm_mem() new_numastat = read_from_numastat(pid, "Total") logging.debug("Numastat: %s", new_numastat) # Only check total memory which is the last element if float(new_numastat[-1]) - float(old_numastat[-1]) < 0: test.fail("Numa memory can't be consumed on guest") # Run managedsave command to check domain xml. if test_managedsave: ret = virsh.managedsave(vm_name, **virsh_dargs) libvirt.check_exit_status(ret) vm.start() vm.wait_for_login().close() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Run save and restore command to check domain xml if test_save_restore: check_save_restore() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Check domain xml after restarting libvirtd if restart_libvirtd: libvirtd = utils_libvirtd.Libvirtd() libvirtd.restart() if test_dom_xml: check_dom_xml(at_mem=attach_device) # Detach the memory device unplug_failed_with_known_error = False if detach_device: if not dev_xml: dev_xml = utils_hotplug.create_mem_xml(tg_size, pg_size, mem_addr, tg_sizeunit, pg_unit, tg_node, node_mask, mem_model) for x in xrange(at_times): ret = virsh.detach_device(vm_name, dev_xml.xml, flagstr=attach_option) if ret.stderr and host_known_unplug_errors: for known_error in host_known_unplug_errors: if (known_error[0] == known_error[-1]) and \ known_error.startswith(("'")): known_error = known_error[1:-1] if known_error in ret.stderr: unplug_failed_with_known_error = True logging.debug("Known error occured in Host, while" " hot unplug: %s", known_error) if unplug_failed_with_known_error: break try: libvirt.check_exit_status(ret, detach_error) except Exception as detail: dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=5) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if os.path.exists(dmesg_file): with open(dmesg_file, 'r') as f: flag = re.findall( r'memory memory\d+?: Offline failed', f.read()) if not flag: # The attached memory is used by vm, and it could not be unplugged # The result is expected os.remove(dmesg_file) test.fail(detail) unplug_failed_with_known_error = True os.remove(dmesg_file) # Check whether a known error occured or not dmesg_file = tempfile.mktemp(dir=data_dir.get_tmp_dir()) try: session = vm.wait_for_login() utils_misc.verify_dmesg(dmesg_log_file=dmesg_file, ignore_result=True, session=session, level_check=4) except Exception: session.close() test.fail("After memory unplug Unable to connect to VM" " or unable to collect dmesg") session.close() if guest_known_unplug_errors and os.path.exists(dmesg_file): for known_error in guest_known_unplug_errors: if (known_error[0] == known_error[-1]) and \ known_error.startswith(("'")): known_error = known_error[1:-1] with open(dmesg_file, 'r') as f: if known_error in f.read(): unplug_failed_with_known_error = True logging.debug("Known error occured, while hot unplug" ": %s", known_error) if test_dom_xml and not unplug_failed_with_known_error: check_dom_xml(dt_mem=detach_device) # Remove dmesg temp file if os.path.exists(dmesg_file): os.remove(dmesg_file) finally: # Delete snapshots. snapshot_lists = virsh.snapshot_list(vm_name) if len(snapshot_lists) > 0: libvirt.clean_up_snapshots(vm_name, snapshot_lists) for snap in snapshot_lists: virsh.snapshot_delete(vm_name, snap, "--metadata") # Recover VM. if vm.is_alive(): vm.destroy(gracefully=False) logging.info("Restoring vm...") if (setup_hugepages_flag == "yes"): restore_hugepages() vmxml_backup.sync()
def run(test, params, env): """ Tests KSM (Kernel Shared Memory) capability by allocating and filling KVM guests memory using various values. KVM sets the memory as MADV_MERGEABLE so all VM's memory can be merged. The workers in guest writes to tmpfs filesystem thus allocations are not limited by process max memory, only by VM's memory. Two test modes are supported - serial and parallel. Serial mode - uses multiple VMs, allocates memory per guest and always verifies the correct number of shared memory. 0) Prints out the setup and initialize guest(s) 1) Fills guest with the same number (S1) 2) Random fill on the first guest 3) Random fill of the remaining VMs one by one until the memory is completely filled (KVM stops machines which asks for additional memory until there is available memory) (S2, shouldn't finish) 4) Destroy all VMs but the last one 5) Checks the last VMs memory for corruption Parallel mode - uses one VM with multiple allocator workers. Executes scenarios in parallel to put more stress on the KVM. 0) Prints out the setup and initialize guest(s) 1) Fills memory with the same number (S1) 2) Fills memory with random numbers (S2) 3) Verifies all pages 4) Fills memory with the same number (S2) 5) Changes the last 96B (S3) Scenarios: S1) Fill all vms with the same value (all pages should be merged into 1) S2) Random fill (all pages should be splitted) S3) Fill last 96B (change only last 96B of each page; some pages will be merged; there was a bug with data corruption) Every worker has unique random key so we are able to verify the filled values. :param test: kvm test object. :param params: Dictionary with test parameters. :param env: Dictionary with the test environment. :param cfg: ksm_swap - use swap? :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only) :param cfg: ksm_parallel_ratio - number of workers (parallel mode only) :param cfg: ksm_host_reserve - override memory reserve on host in MB :param cfg: ksm_guest_reserve - override memory reserve on guests in MB :param cfg: ksm_mode - test mode {serial, parallel} :param cfg: ksm_perf_ratio - performance ratio, increase it when your machine is too slow """ def _start_allocator(vm, session, timeout): """ Execute ksm_overcommit_guest.py on guest, wait until it's initialized. :param vm: VM object. :param session: Remote session to a VM object. :param timeout: Timeout that will be used to verify if ksm_overcommit_guest.py started properly. """ logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name) session.sendline("python /tmp/ksm_overcommit_guest.py") try: session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" % (vm.name, str(details))) test.fail(e_msg) def _execute_allocator(command, vm, session, timeout): """ Execute a given command on ksm_overcommit_guest.py main loop, indicating the vm the command was executed on. :param command: Command that will be executed. :param vm: VM object. :param session: Remote session to VM object. :param timeout: Timeout used to verify expected output. :return: Tuple (match index, data) """ logging.debug( "Executing '%s' on ksm_overcommit_guest.py loop, " "vm: %s, timeout: %s", command, vm.name, timeout) session.sendline(command) try: (match, data) = session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout) except aexpect.ExpectProcessTerminatedError as details: e_msg = ("Failed to execute command '%s' on " "ksm_overcommit_guest.py, vm '%s': %s" % (command, vm.name, str(details))) test.fail(e_msg) return (match, data) def get_ksmstat(): """ Return sharing memory by ksm in MB :return: memory in MB """ fpages = open('/sys/kernel/mm/ksm/pages_sharing') ksm_pages = int(fpages.read()) fpages.close() return ((ksm_pages * 4096) / 1e6) def initialize_guests(): """ Initialize guests (fill their memories with specified patterns). """ logging.info("Phase 1: filling guest memory pages") for session in lsessions: vm = lvms[lsessions.index(session)] logging.debug("Turning off swap on vm %s", vm.name) session.cmd("swapoff -a", timeout=300) # Start the allocator _start_allocator(vm, session, 60 * perf_ratio) # Execute allocator on guests for i in range(0, vmsc): vm = lvms[i] cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % skeys[0] _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio) # Let ksm_overcommit_guest.py do its job # (until shared mem reaches expected value) shm = 0 j = 0 logging.debug("Target shared meminfo for guest %s: %s", vm.name, ksm_size) while ((new_ksm and (shm < (ksm_size * (i + 1)))) or (not new_ksm and (shm < (ksm_size)))): if j > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until " "the DL on guest: %s" % vm.name) pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug( "Shared meminfo for guest %s after " "iteration %s: %s", vm.name, j, shm) j += 1 # Keep some reserve pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceeding...", pause) time.sleep(pause) logging.debug(utils_test.get_memory_info(lvms)) logging.info("Phase 1: PASS") def separate_first_guest(): """ Separate memory of the first guest by generating special random series """ logging.info("Phase 2: Split the pages on the first guest") cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, lvms[0], lsessions[0], fill_base_timeout * 2 * perf_ratio)[1] r_msg = data.splitlines()[-1] logging.debug("Return message of static_random_fill: %s", r_msg) out = int(r_msg.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size, out, (ksm_size * 1000 / out)) logging.debug(utils_test.get_memory_info(lvms)) logging.debug("Phase 2: PASS") def split_guest(): """ Sequential split of pages on guests up to memory limit """ logging.info("Phase 3a: Sequential split of pages on guests up to " "memory limit") last_vm = 0 session = None vm = None for i in range(1, vmsc): # Check VMs for j in range(0, vmsc): if not lvms[j].is_alive: e_msg = ("VM %d died while executing static_random_fill on" " VM %d in allocator loop" % (j, i)) test.fail(e_msg) vm = lvms[i] session = lsessions[i] cmd = "mem.static_random_fill()" logging.debug( "Executing %s on ksm_overcommit_guest.py loop, " "vm: %s", cmd, vm.name) session.sendline(cmd) out = "" try: logging.debug("Watching host mem while filling vm %s memory", vm.name) while (not out.startswith("PASS") and not out.startswith("FAIL")): if not vm.is_alive(): e_msg = ("VM %d died while executing " "static_random_fill on allocator loop" % i) test.fail(e_msg) free_mem = int(utils_memory.read_from_meminfo("MemFree")) if (ksm_swap): free_mem = ( free_mem + int(utils_memory.read_from_meminfo("SwapFree"))) logging.debug("Free memory on host: %d", free_mem) # We need to keep some memory for python to run. if (free_mem < 64000) or (ksm_swap and free_mem < (450000 * perf_ratio)): vm.pause() for j in range(0, i): lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() logging.debug("Only %s free memory, killing %d guests", free_mem, (i - 1)) last_vm = i out = session.read_nonblocking(0.1, 1) time.sleep(2) except OSError: logging.debug("Only %s host free memory, killing %d guests", free_mem, (i - 1)) logging.debug("Stopping %s", vm.name) vm.pause() for j in range(0, i): logging.debug("Destroying %s", lvms[j].name) lvms[j].destroy(gracefully=False) time.sleep(20) vm.resume() last_vm = i if last_vm != 0: break logging.debug("Memory filled for guest %s", vm.name) logging.info("Phase 3a: PASS") logging.info("Phase 3b: Verify memory of the max stressed VM") for i in range(last_vm + 1, vmsc): lsessions[i].close() if i == (vmsc - 1): logging.debug(utils_test.get_memory_info([lvms[i]])) logging.debug("Destroying guest %s", lvms[i].name) lvms[i].destroy(gracefully=False) # Verify last machine with randomly generated memory cmd = "mem.static_random_verify()" _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([lvms[last_vm]])) lsessions[last_vm].cmd_output("die()", 20) lvms[last_vm].destroy(gracefully=False) logging.info("Phase 3b: PASS") def split_parallel(): """ Parallel page spliting """ logging.info("Phase 1: parallel page spliting") # We have to wait until allocator is finished (it waits 5 seconds to # clean the socket session = lsessions[0] vm = lvms[0] for i in range(1, max_alloc): lsessions.append(vm.wait_for_login(timeout=360)) session.cmd("swapoff -a", timeout=300) for i in range(0, max_alloc): # Start the allocator _start_allocator(vm, lsessions[i], 60 * perf_ratio) logging.info("Phase 1: PASS") logging.info("Phase 2a: Simultaneous merging") logging.debug("Memory used by allocator on guests = %dMB", (ksm_size / max_alloc)) for i in range(0, max_alloc): cmd = "mem = MemFill(%d, %s, %s)" % ( (ksm_size / max_alloc), skeys[i], dkeys[i]) _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio) cmd = "mem.value_fill(%d)" % (skeys[0]) _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio) # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3) shm = 0 i = 0 logging.debug("Target shared memory size: %s", ksm_size) while (shm < ksm_size): if i > 64: logging.debug(utils_test.get_memory_info(lvms)) test.error("SHM didn't merge the memory until DL") pause = ksm_size / 200 * perf_ratio logging.debug("Waiting %ds before proceed...", pause) time.sleep(pause) if (new_ksm): shm = get_ksmstat() else: shm = vm.get_shared_meminfo() logging.debug("Shared meminfo after attempt %s: %s", i, shm) i += 1 logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2a: PASS") logging.info("Phase 2b: Simultaneous spliting") # Actual splitting for i in range(0, max_alloc): cmd = "mem.static_random_fill()" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] logging.debug(data) out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", (ksm_size / max_alloc), out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2b: PASS") logging.info("Phase 2c: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.static_random_verify()" data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2c: PASS") logging.info("Phase 2d: Simultaneous merging") # Actual splitting for i in range(0, max_alloc): cmd = "mem.value_fill(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * 2 * perf_ratio)[1] logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2d: PASS") logging.info("Phase 2e: Simultaneous verification") for i in range(0, max_alloc): cmd = "mem.value_check(%d)" % skeys[0] data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio))[1] logging.info("Phase 2e: PASS") logging.info("Phase 2f: Simultaneous spliting last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_fill(96)" data = _execute_allocator(cmd, vm, lsessions[i], fill_base_timeout * perf_ratio)[1] data = data.splitlines()[-1] out = int(data.split()[4]) logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size / max_alloc, out, (ksm_size * 1000 / out / max_alloc)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2f: PASS") logging.info("Phase 2g: Simultaneous verification last 96B") for i in range(0, max_alloc): cmd = "mem.static_random_verify(96)" _, data = _execute_allocator(cmd, vm, lsessions[i], (mem / 200 * 50 * perf_ratio)) logging.debug(utils_test.get_memory_info([vm])) logging.info("Phase 2g: PASS") logging.debug("Cleaning up...") for i in range(0, max_alloc): lsessions[i].cmd_output("die()", 20) session.close() vm.destroy(gracefully=False) # Main test code logging.info("Starting phase 0: Initialization") if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0: logging.info("Killing ksmtuned...") process.run("killall ksmtuned") new_ksm = False if (os.path.exists("/sys/kernel/mm/ksm/run")): process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True) process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True) process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True) e_up = "/sys/kernel/mm/transparent_hugepage/enabled" e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled" if os.path.exists(e_up): process.run("echo 'never' > %s" % e_up, shell=True) if os.path.exists(e_rh): process.run("echo 'never' > %s" % e_rh, shell=True) new_ksm = True else: try: process.run("modprobe ksm") process.run("ksmctl start 5000 100") except process.CmdError as details: test.fail("Failed to load KSM: %s" % details) # host_reserve: mem reserve kept for the host system to run host_reserve = int(params.get("ksm_host_reserve", -1)) if (host_reserve == -1): try: available = utils_memory.read_from_meminfo("MemAvailable") except process.CmdError: # ancient kernels utils_memory.drop_caches() available = utils_memory.read_from_meminfo("MemFree") # default host_reserve = UsedMem + one_minimal_guest(128MB) # later we add 64MB per additional guest host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128) # using default reserve _host_reserve = True else: _host_reserve = False # guest_reserve: mem reserve kept to avoid guest OS to kill processes guest_reserve = int(params.get("ksm_guest_reserve", -1)) if (guest_reserve == -1): # default guest_reserve = minimal_system_mem(256MB) # later we add tmpfs overhead guest_reserve = 256 # using default reserve _guest_reserve = True else: _guest_reserve = False max_vms = int(params.get("max_vms", 2)) overcommit = float(params.get("ksm_overcommit_ratio", 2.0)) max_alloc = int(params.get("ksm_parallel_ratio", 1)) # vmsc: count of all used VMs vmsc = int(overcommit) + 1 vmsc = max(vmsc, max_vms) if (params['ksm_mode'] == "serial"): max_alloc = vmsc if _host_reserve: # First round of additional guest reserves host_reserve += vmsc * 64 _host_reserve = vmsc host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve) ksm_swap = False if params.get("ksm_swap") == "yes": ksm_swap = True # Performance ratio perf_ratio = params.get("ksm_perf_ratio") if perf_ratio: perf_ratio = float(perf_ratio) else: perf_ratio = 1 if (params['ksm_mode'] == "parallel"): vmsc = 1 overcommit = 1 mem = host_mem # 32bit system adjustment if "64" not in params.get("vm_arch_name"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if (host_mem > 3100): mem = 3100 if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: mem = 3100 - 64 else: # mem: Memory of the guest systems. Maximum must be less than # host's physical ram mem = int(overcommit * host_mem / vmsc) # 32bit system adjustment if not params['image_name'].endswith("64"): logging.debug("Probably i386 guest architecture, " "max allocator mem = 2G") # Guest can have more than 2G but # kvm mem + 1MB (allocator itself) can't if mem - guest_reserve - 1 > 3100: vmsc = int( math.ceil( (host_mem * overcommit) / (3100 + guest_reserve))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) if os.popen("uname -i").readline().startswith("i386"): logging.debug("Host is i386 architecture, max guest mem is 2G") # Guest system with qemu overhead (64M) can't have more than 2G if mem > 3100 - 64: vmsc = int(math.ceil((host_mem * overcommit) / (3100 - 64.0))) if _host_reserve: host_reserve += (vmsc - _host_reserve) * 64 host_mem -= (vmsc - _host_reserve) * 64 _host_reserve = vmsc mem = int(math.floor(host_mem * overcommit / vmsc)) # 0.055 represents OS + TMPFS additional reserve per guest ram MB if _guest_reserve: guest_reserve += math.ceil(mem * 0.055) swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024 logging.debug("Overcommit = %f", overcommit) logging.debug("True overcommit = %f ", (float(vmsc * mem) / float(host_mem))) logging.debug("Host memory = %dM", host_mem) logging.debug("Guest memory = %dM", mem) logging.debug("Using swap = %s", ksm_swap) logging.debug("Swap = %dM", swap) logging.debug("max_vms = %d", max_vms) logging.debug("Count of all used VMs = %d", vmsc) logging.debug("Performance_ratio = %f", perf_ratio) # Generate unique keys for random series skeys = [] dkeys = [] for i in range(0, max(vmsc, max_alloc)): key = random.randrange(0, 255) while key in skeys: key = random.randrange(0, 255) skeys.append(key) key = random.randrange(0, 999) while key in dkeys: key = random.randrange(0, 999) dkeys.append(key) logging.debug("skeys: %s", skeys) logging.debug("dkeys: %s", dkeys) lvms = [] lsessions = [] # As we don't know the number and memory amount of VMs in advance, # we need to specify and create them here vm_name = params["main_vm"] params['mem'] = mem params['vms'] = vm_name # Associate pidfile name params['pid_' + vm_name] = utils_misc.generate_tmp_file_name( vm_name, 'pid') if not params.get('extra_params'): params['extra_params'] = ' ' params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # ksm_size: amount of memory used by allocator ksm_size = mem - guest_reserve logging.debug("Memory used by allocator on guests = %dM", ksm_size) fill_base_timeout = ksm_size / 10 # Creating the first guest env_process.preprocess_vm(test, params, env, vm_name) lvms.append(env.get_vm(vm_name)) if not lvms[0]: test.error("VM object not found in environment") if not lvms[0].is_alive(): test.error("VM seems to be dead; Test requires a living VM") logging.debug("Booting first guest %s", lvms[0].name) lsessions.append(lvms[0].wait_for_login(timeout=360)) # Associate vm PID try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Creating other guest systems for i in range(1, vmsc): vm_name = "vm" + str(i + 1) params['pid_' + vm_name] = utils_misc.generate_tmp_file_name( vm_name, 'pid') params['extra_params_' + vm_name] = params.get('extra_params') params['extra_params_' + vm_name] += (" -pidfile %s" % (params.get('pid_' + vm_name))) params['extra_params'] = params.get('extra_params_' + vm_name) # Last VM is later used to run more allocators simultaneously lvms.append(lvms[0].clone(vm_name, params)) env.register_vm(vm_name, lvms[i]) params['vms'] += " " + vm_name logging.debug("Booting guest %s", lvms[i].name) lvms[i].create() if not lvms[i].is_alive(): test.error("VM %s seems to be dead; Test requires a" "living VM" % lvms[i].name) lsessions.append(lvms[i].wait_for_login(timeout=360)) try: tmp = open(params.get('pid_' + vm_name), 'r') params['pid_' + vm_name] = int(tmp.readline()) except Exception: test.fail("Could not get PID of %s" % (vm_name)) # Let guests rest a little bit :-) pause = vmsc * 2 * perf_ratio logging.debug("Waiting %ds before proceed", pause) time.sleep(vmsc * 2 * perf_ratio) logging.debug(utils_test.get_memory_info(lvms)) # Copy ksm_overcommit_guest.py into guests vksmd_src = os.path.join(data_dir.get_shared_dir(), "scripts", "ksm_overcommit_guest.py") dst_dir = "/tmp" for vm in lvms: vm.copy_files_to(vksmd_src, dst_dir) logging.info("Phase 0: PASS") if params['ksm_mode'] == "parallel": logging.info("Starting KSM test parallel mode") split_parallel() logging.info("KSM test parallel mode: PASS") elif params['ksm_mode'] == "serial": logging.info("Starting KSM test serial mode") initialize_guests() separate_first_guest() split_guest() logging.info("KSM test serial mode: PASS")
def run(test, params, env): """ Test command: virsh managedsave. This command can save and destroy a running domain, so it can be restarted from the same state at a later time. """ vm_name = params.get("main_vm") vm = env.get_vm(vm_name) managed_save_file = "/var/lib/libvirt/qemu/save/%s.save" % vm_name shutdown_timeout = int(params.get('shutdown_timeout', 60)) # define function def vm_recover_check(option, libvirtd, check_shutdown=False): """ Check if the vm can be recovered correctly. :param guest_name : Checked vm's name. :param option : managedsave command option. """ # This time vm not be shut down if vm.is_alive(): test.fail("Guest should be inactive") # Check vm managed save state. ret = virsh.dom_list("--managed-save --inactive", debug=True) vm_state1 = re.findall(r".*%s.*" % vm_name, ret.stdout.strip())[0].split()[2] ret = virsh.dom_list("--managed-save --all", debug=True) vm_state2 = re.findall(r".*%s.*" % vm_name, ret.stdout.strip())[0].split()[2] if vm_state1 != "saved" or vm_state2 != "saved": test.fail("Guest state should be saved") virsh.start(vm_name, debug=True) # This time vm should be in the list if vm.is_dead(): test.fail("Guest should be active") # Restart libvirtd and check vm status again. libvirtd.restart() if vm.is_dead(): test.fail("Guest should be active after" " restarting libvirtd") # Check managed save file: if os.path.exists(managed_save_file): test.fail("Managed save image exist " "after starting the domain") if option: if option.count("running"): if vm.is_dead() or vm.is_paused(): test.fail("Guest state should be" " running after started" " because of '--running' option") elif option.count("paused"): if not vm.is_paused(): test.fail("Guest state should be" " paused after started" " because of '--paused' option") else: if params.get("paused_after_start_vm") == "yes": if not vm.is_paused(): test.fail("Guest state should be" " paused after started" " because of initia guest state") if check_shutdown: # Resume the domain. if vm.is_paused(): vm.resume() vm.wait_for_login() # Shutdown and start the domain, # it should be in runing state and can be login. vm.shutdown() if not vm.wait_for_shutdown(shutdown_timeout): test.fail('VM failed to shutdown') vm.start() vm.wait_for_login() def vm_undefine_check(vm_name): """ Check if vm can be undefined with manage-save option """ #backup xml file xml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if not os.path.exists(managed_save_file): test.fail("Can't find managed save image") #undefine domain with no options. if not virsh.undefine(vm_name, options=None, ignore_status=True).exit_status: test.fail("Guest shouldn't be undefined" "while domain managed save image exists") #undefine domain with managed-save option. if virsh.undefine(vm_name, options="--managed-save", ignore_status=True).exit_status: test.fail("Guest can't be undefine with " "managed-save option") if os.path.exists(managed_save_file): test.fail("Managed save image exists" " after undefining vm") #restore and start the vm. xml_backup.define() vm.start() def check_flags_parallel(virsh_cmd, bash_cmd, flags): """ Run the commands parallel and check the output. """ cmd = ("%s & %s" % (virsh_cmd, bash_cmd)) ret = process.run(cmd, ignore_status=True, shell=True, ignore_bg_processes=True) output = ret.stdout_text.strip() logging.debug("check flags output: %s" % output) lines = re.findall(r"flags:.(\d+)", output, re.M) logging.debug("Find all fdinfo flags: %s" % lines) lines = [int(i, 8) & flags for i in lines] if flags not in lines: test.fail("Checking flags %s failed" % flags) return ret def check_multi_guests(guests, start_delay, libvirt_guests): """ Check start_delay option for multiple guests. """ # Destroy vm first if vm.is_alive(): vm.destroy(gracefully=False) # Clone given number of guests timeout = params.get("clone_timeout", 360) for i in range(int(guests)): dst_vm = "%s_%s" % (vm_name, i) utils_libguestfs.virt_clone_cmd(vm_name, dst_vm, True, timeout=timeout) virsh.start(dst_vm, debug=True) # Wait 10 seconds for vm to start time.sleep(10) is_systemd = process.run("cat /proc/1/comm", shell=True).stdout_text.count("systemd") if is_systemd: libvirt_guests.restart() pattern = r'(.+ \d\d:\d\d:\d\d).+: Resuming guest.+done' else: ret = process.run("service libvirt-guests restart | \ awk '{ print strftime(\"%b %y %H:%M:%S\"), \ $0; fflush(); }'", shell=True) pattern = r'(.+ \d\d:\d\d:\d\d)+ Resuming guest.+done' # libvirt-guests status command read messages from systemd # journal, in cases of messages are not ready in time, # add a time wait here. def wait_func(): return libvirt_guests.raw_status().stdout.count("Resuming guest") utils_misc.wait_for(wait_func, 5) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout_text) resume_time = re.findall(pattern, ret.stdout_text, re.M) if not resume_time: test.fail("Can't see messages of resuming guest") # Convert time string to int resume_seconds = [ time.mktime(time.strptime(tm, "%b %y %H:%M:%S")) for tm in resume_time ] logging.info("Resume time in seconds: %s", resume_seconds) # Check if start_delay take effect for i in range(len(resume_seconds) - 1): if resume_seconds[i + 1] - resume_seconds[i] < int(start_delay): test.fail("Checking start_delay failed") def wait_for_state(vm_state): """ Wait for vm state is ready. """ utils_misc.wait_for(lambda: vm.state() == vm_state, 10) def check_guest_flags(bash_cmd, flags): """ Check bypass_cache option for single guest. """ # Drop caches. drop_caches() # form proper parallel command based on if systemd is used or not is_systemd = process.run("cat /proc/1/comm", shell=True).stdout_text.count("systemd") if is_systemd: virsh_cmd_stop = "systemctl stop libvirt-guests" virsh_cmd_start = "systemctl start libvirt-guests" else: virsh_cmd_stop = "service libvirt-guests stop" virsh_cmd_start = "service libvirt-guests start" ret = check_flags_parallel( virsh_cmd_stop, bash_cmd % (managed_save_file, managed_save_file, "1"), flags) if is_systemd: ret = libvirt_guests.raw_status() logging.info("status output: %s", ret.stdout_text) if all([ "Suspending %s" % vm_name not in ret.stdout_text, "stopped, with saved guests" not in ret.stdout_text ]): test.fail("Can't see messages of suspending vm") # status command should return 3. if not is_systemd: ret = libvirt_guests.raw_status() if ret.exit_status != 3: test.fail("The exit code %s for libvirt-guests" " status is not correct" % ret) # Wait for VM in shut off state wait_for_state("shut off") check_flags_parallel( virsh_cmd_start, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) # Wait for VM in running state wait_for_state("running") def vm_msave_remove_check(vm_name): """ Check managed save remove command. """ if not os.path.exists(managed_save_file): test.fail("Can't find managed save image") virsh.managedsave_remove(vm_name, debug=True) if os.path.exists(managed_save_file): test.fail("Managed save image still exists") virsh.start(vm_name, debug=True) # The domain state should be running if vm.state() != "running": test.fail("Guest state should be" " running after started") def vm_managedsave_loop(vm_name, loop_range, libvirtd): """ Run a loop of managedsave command and check its result. """ if vm.is_dead(): virsh.start(vm_name, debug=True) for i in range(int(loop_range)): logging.debug("Test loop: %s" % i) virsh.managedsave(vm_name, debug=True) virsh.start(vm_name, debug=True) # Check libvirtd status. if not libvirtd.is_running(): test.fail("libvirtd is stopped after cmd") # Check vm status. if vm.state() != "running": test.fail("Guest isn't in running state") def build_vm_xml(vm_name, **dargs): """ Build the new domain xml and define it. """ try: # stop vm before doing any change to xml if vm.is_alive(): vm.destroy(gracefully=False) vmxml = vm_xml.VMXML.new_from_dumpxml(vm_name) if dargs.get("cpu_mode"): if "cpu" in vmxml: del vmxml.cpu cpuxml = vm_xml.VMCPUXML() cpuxml.mode = params.get("cpu_mode", "host-model") cpuxml.match = params.get("cpu_match", "exact") cpuxml.fallback = params.get("cpu_fallback", "forbid") cpu_topology = {} cpu_topology_sockets = params.get("cpu_topology_sockets") if cpu_topology_sockets: cpu_topology["sockets"] = cpu_topology_sockets cpu_topology_cores = params.get("cpu_topology_cores") if cpu_topology_cores: cpu_topology["cores"] = cpu_topology_cores cpu_topology_threads = params.get("cpu_topology_threads") if cpu_topology_threads: cpu_topology["threads"] = cpu_topology_threads if cpu_topology: cpuxml.topology = cpu_topology vmxml.cpu = cpuxml vmxml.vcpu = int(params.get("vcpu_nums")) if dargs.get("sec_driver"): seclabel_dict = { "type": "dynamic", "model": "selinux", "relabel": "yes" } vmxml.set_seclabel([seclabel_dict]) vmxml.sync() vm.start() except Exception as e: logging.error(str(e)) test.cancel("Build domain xml failed") status_error = ("yes" == params.get("status_error", "no")) vm_ref = params.get("managedsave_vm_ref", "name") libvirtd_state = params.get("libvirtd", "on") extra_param = params.get("managedsave_extra_param", "") progress = ("yes" == params.get("managedsave_progress", "no")) cpu_mode = "yes" == params.get("managedsave_cpumode", "no") test_undefine = "yes" == params.get("managedsave_undefine", "no") test_bypass_cache = "yes" == params.get("test_bypass_cache", "no") autostart_bypass_cache = params.get("autostart_bypass_cache", "") multi_guests = params.get("multi_guests", "") test_libvirt_guests = params.get("test_libvirt_guests", "") check_flags = "yes" == params.get("check_flags", "no") security_driver = params.get("security_driver", "") remove_after_cmd = "yes" == params.get("remove_after_cmd", "no") option = params.get("managedsave_option", "") check_shutdown = "yes" == params.get("shutdown_after_cmd", "no") pre_vm_state = params.get("pre_vm_state", "") move_saved_file = "yes" == params.get("move_saved_file", "no") test_loop_cmd = "yes" == params.get("test_loop_cmd", "no") if option: if not virsh.has_command_help_match('managedsave', option): # Older libvirt does not have this option test.cancel("Older libvirt does not" " handle arguments consistently") # Backup xml file. vmxml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) # Get the libvirtd service libvirtd = utils_libvirtd.Libvirtd() # Get config files. qemu_config = utils_config.LibvirtQemuConfig() libvirt_guests_config = utils_config.LibvirtGuestsConfig() # Get libvirt-guests service libvirt_guests = Factory.create_service("libvirt-guests") try: # Destroy vm first for setting configuration file if vm.state() == "running": vm.destroy(gracefully=False) # Prepare test environment. if libvirtd_state == "off": libvirtd.stop() if autostart_bypass_cache: ret = virsh.autostart(vm_name, "", ignore_status=True, debug=True) libvirt.check_exit_status(ret) qemu_config.auto_start_bypass_cache = autostart_bypass_cache libvirtd.restart() if security_driver: qemu_config.security_driver = [security_driver] if test_libvirt_guests: if multi_guests: start_delay = params.get("start_delay", "20") libvirt_guests_config.START_DELAY = start_delay if check_flags: libvirt_guests_config.BYPASS_CACHE = "1" # The config file format should be "x=y" instead of "x = y" process.run( "sed -i -e 's/ = /=/g' " "/etc/sysconfig/libvirt-guests", shell=True) libvirt_guests.restart() # Change domain xml. if cpu_mode: build_vm_xml(vm_name, cpu_mode=True) if security_driver: build_vm_xml(vm_name, sec_driver=True) # Turn VM into certain state. if pre_vm_state == "transient": logging.info("Creating %s..." % vm_name) vmxml_for_test = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name) if vm.is_alive(): vm.destroy(gracefully=False) # Wait for VM to be in shut off state utils_misc.wait_for(lambda: vm.state() == "shut off", 10) vm.undefine() if virsh.create(vmxml_for_test.xml, ignore_status=True, debug=True).exit_status: vmxml_backup.define() test.cancel("Cann't create the domain") # Wait for vm in stable state if params.get("start_vm") == "yes": if vm.state() == "shut off": vm.start() vm.wait_for_login() # run test case domid = vm.get_id() domuuid = vm.get_uuid() if vm_ref == "id": vm_ref = domid elif vm_ref == "uuid": vm_ref = domuuid elif vm_ref == "hex_id": vm_ref = hex(int(domid)) elif vm_ref.count("invalid"): vm_ref = params.get(vm_ref) elif vm_ref == "name": vm_ref = vm_name # Ignore exception with "ignore_status=True" if progress: option += " --verbose" option += extra_param # For bypass_cache test. Run a shell command to check fd flags while # excuting managedsave command software_mgr = software_manager.SoftwareManager() if not software_mgr.check_installed('lsof'): logging.info('Installing lsof package:') software_mgr.install('lsof') bash_cmd = ( "let i=1; while((i++<400)); do if [ -e %s ]; then (cat /proc" "/$(lsof -w %s|awk '/libvirt_i/{print $2}')/fdinfo/%s |" "grep 'flags:.*') && break; else sleep 0.05; fi; done;") # Flags to check bypass cache take effect flags = os.O_DIRECT if test_bypass_cache: # Drop caches. drop_caches() virsh_cmd = "virsh managedsave %s %s" % (option, vm_name) check_flags_parallel( virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "1"), flags) # Wait for VM in shut off state wait_for_state("shut off") virsh_cmd = "virsh start %s %s" % (option, vm_name) check_flags_parallel( virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) # Wait for VM in running state wait_for_state("running") elif test_libvirt_guests: logging.debug("libvirt-guests status: %s", libvirt_guests.status()) if multi_guests: check_multi_guests(multi_guests, start_delay, libvirt_guests) if check_flags: check_guest_flags(bash_cmd, flags) else: # Ensure VM is running utils_misc.wait_for(lambda: vm.state() == "running", 10) ret = virsh.managedsave(vm_ref, options=option, ignore_status=True, debug=True) status = ret.exit_status # The progress information outputed in error message error_msg = ret.stderr.strip() if move_saved_file: cmd = "echo > %s" % managed_save_file process.run(cmd, shell=True) # recover libvirtd service start if libvirtd_state == "off": libvirtd.start() if status_error: if not status: if libvirtd_state == "off" and libvirt_version.version_compare( 5, 6, 0): logging.info( "From libvirt version 5.6.0 libvirtd is restarted " "and command should succeed") else: test.fail("Run successfully with wrong command!") else: if status: test.fail("Run failed with right command") if progress: if not error_msg.count("Managedsave:"): test.fail("Got invalid progress output") if remove_after_cmd: vm_msave_remove_check(vm_name) elif test_undefine: vm_undefine_check(vm_name) elif autostart_bypass_cache: # rhbz#1755303 if libvirt_version.version_compare(5, 6, 0): os.remove("/run/libvirt/qemu/autostarted") libvirtd.stop() virsh_cmd = ("(service libvirtd start)") check_flags_parallel( virsh_cmd, bash_cmd % (managed_save_file, managed_save_file, "0"), flags) elif test_loop_cmd: loop_range = params.get("loop_range", "20") vm_managedsave_loop(vm_name, loop_range, libvirtd) else: vm_recover_check(option, libvirtd, check_shutdown) finally: # Restore test environment. # Restart libvirtd.service qemu_config.restore() libvirt_guests_config.restore() libvirtd.restart() if autostart_bypass_cache: virsh.autostart(vm_name, "--disable", ignore_status=True, debug=True) vm.destroy(gracefully=False) virsh.managedsave_remove(vm_name, debug=True) vmxml_backup.sync() if multi_guests: for i in range(int(multi_guests)): virsh.remove_domain("%s_%s" % (vm_name, i), "--remove-all-storage", debug=True)
def run_numa_consistency(test, params, env): """ Qemu numa consistency test: 1) Get host numa topological structure 2) Start a guest with the same node as the host, each node has one cpu 3) Get the vcpu thread used cpu id in host and the cpu belongs which node 4) Allocate memory inside guest and bind the allocate process to one of its vcpu. 5) The memory used in host should increase in the same node if the vcpu thread is not switch to other node. 6) Repeat step 3~5 for each vcpu thread of the guest. :param test: QEMU test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def get_vcpu_used_node(numa_node_info, vcpu_thread): cpu_used_host = utils_misc.get_thread_cpu(vcpu_thread)[0] node_used_host = ([_ for _ in node_list if cpu_used_host in numa_node_info.nodes[_].cpus][0]) return node_used_host error.context("Get host numa topological structure", logging.info) timeout = float(params.get("login_timeout", 240)) host_numa_node = utils_misc.NumaInfo() node_list = host_numa_node.online_nodes if len(node_list) < 2: raise error.TestNAError("This host only has one NUMA node, " "skipping test...") node_list.sort() params['smp'] = len(node_list) params['vcpu_cores'] = 1 params['vcpu_threads'] = 1 params['vcpu_sockets'] = params['smp'] params['guest_numa_nodes'] = "" for node_id in range(len(node_list)): params['guest_numa_nodes'] += " node%d" % node_id params['start_vm'] = 'yes' utils_memory.drop_caches() vm = params['main_vm'] env_process.preprocess_vm(test, params, env, vm) vm = env.get_vm(vm) vm.verify_alive() vcpu_threads = vm.vcpu_threads session = vm.wait_for_login(timeout=timeout) dd_size = 256 if dd_size * len(vcpu_threads) > int(params['mem']): dd_size = int(int(params['mem']) / 2 / len(vcpu_threads)) mount_size = dd_size * len(vcpu_threads) mount_cmd = "mount -o size=%dM -t tmpfs none /tmp" % mount_size qemu_pid = vm.get_pid() drop = 0 for cpuid in range(len(vcpu_threads)): error.context("Get vcpu %s used numa node." % cpuid, logging.info) memory_status, _ = utils_test.get_qemu_numa_status(host_numa_node, qemu_pid) node_used_host = get_vcpu_used_node(host_numa_node, vcpu_threads[cpuid]) memory_used_before = memory_status[node_used_host] error.context("Allocate memory in guest", logging.info) session.cmd(mount_cmd) binded_dd_cmd = "taskset %s" % str(2 ** int(cpuid)) binded_dd_cmd += " dd if=/dev/urandom of=/tmp/%s" % cpuid binded_dd_cmd += " bs=1M count=%s" % dd_size session.cmd(binded_dd_cmd) error.context("Check qemu process memory use status", logging.info) node_after = get_vcpu_used_node(host_numa_node, vcpu_threads[cpuid]) if node_after != node_used_host: logging.warn("Node used by vcpu thread changed. So drop the" " results in this round.") drop += 1 continue memory_status, _ = utils_test.get_qemu_numa_status(host_numa_node, qemu_pid) memory_used_after = memory_status[node_used_host] memory_allocated = (memory_used_after - memory_used_before) * 4 / 1024 if 1 - float(memory_allocated) / float(dd_size) > 0.05: raise error.TestFail("Expect malloc %sM memory in node %s, but " "only malloc %sM" % (dd_size, node_used_host, memory_allocated)) session.close() if drop == len(vcpu_threads): raise error.TestError("All test rounds are dropped." " Please test it again.")