def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_model_os = utils.get_current_kernel_arch()
        if not re.match(cpu_model_nodeinfo, cpu_model_os):
            raise error.TestFail(
                "Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cpus_os = utils.count_cpus()
        if int(cpus_nodeinfo) != cpus_os:
            raise error.TestFail(
                "Virsh nodeinfo output didn't match number of "
                "CPU(s)")

        # Check CPU frequency
        cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | "
               "awk '{print $4}' | awk -F. '{print $1}'")
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_frequency_os = cmd_result.stdout.strip()
        print cpu_frequency_os
        if not re.match(cpu_frequency_nodeinfo, cpu_frequency_os):
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "frequency")

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l"
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3)
        cpu_sockets_os = int(
            cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo)
        if cpu_sockets_os != cpu_sockets_nodeinfo:
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s)")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Core(s) per socket', 4)
        cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cores_per_socket_os = cmd_result.stdout.strip()
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket")

        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = utils_memory.memtotal()
        if memory_size_nodeinfo != memory_size_os:
            raise error.TestFail("Virsh nodeinfo output didn't match "
                                 "Memory size")
Example #2
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_model_os = utils.get_current_kernel_arch()
        if not re.match(cpu_model_nodeinfo, cpu_model_os):
            raise error.TestFail(
                "Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cpus_os = utils.count_cpus()
        if int(cpus_nodeinfo) != cpus_os:
            raise error.TestFail("Virsh nodeinfo output didn't match number of "
                                 "CPU(s)")

        # Check CPU frequency
        cpu_frequency_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | "
               "awk '{print $4}' | awk -F. '{print $1}'")
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_frequency_os = cmd_result.stdout.strip()
        print cpu_frequency_os
        if not re.match(cpu_frequency_nodeinfo, cpu_frequency_os):
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "frequency")

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l"
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3)
        cpu_sockets_os = int(
            cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo)
        if cpu_sockets_os != cpu_sockets_nodeinfo:
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s)")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'Core(s) per socket', 4)
        cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cores_per_socket_os = cmd_result.stdout.strip()
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket")

        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = utils_memory.memtotal()
        if memory_size_nodeinfo != memory_size_os:
            raise error.TestFail("Virsh nodeinfo output didn't match "
                                 "Memory size")
Example #3
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_model_os = utils.get_current_kernel_arch()
        if not re.match(cpu_model_nodeinfo, cpu_model_os):
            raise error.TestFail("Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cpus_os = utils.count_cpus()
        if int(cpus_nodeinfo) != cpus_os:
            raise error.TestFail("Virsh nodeinfo output didn't match number of " "CPU(s)")

        # Check CPU frequency
        cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU frequency", 3)
        cmd = "cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | " "awk '{print $4}' | awk -F. '{print $1}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_frequency_os = cmd_result.stdout.strip()
        logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s", cpu_frequency_nodeinfo, cpu_frequency_os)
        #
        # Matching CPU Frequency is not an exact science in todays modern
        # processors and OS's. CPU's can have their execution speed varied
        # based on current workload in order to save energy and keep cool.
        # Thus since we're getting the values at disparate points in time,
        # we cannot necessarily do a pure comparison.
        # So, let's get the absolute value of the difference and ensure
        # that it's within 20 percent of each value to give us enough of
        # a "fudge" factor to declare "close enough". Don't return a failure
        # just print a debug message and move on.
        diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os))
        if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or float(diffval) / float(cpu_frequency_os) > 0.20:
            logging.debug("Virsh nodeinfo output didn't match CPU " "frequency within 20 percent")

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(_check_nodeinfo(nodeinfo_output, "CPU socket(s)", 3))
        cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l"
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, "NUMA cell(s)", 3)
        cpu_sockets_os = int(cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo)
        if cpu_sockets_os != cpu_sockets_nodeinfo:
            raise error.TestFail("Virsh nodeinfo output didn't match CPU " "socket(s)")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output, "Core(s) per socket", 4)
        cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cores_per_socket_os = cmd_result.stdout.strip()
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) " "per socket")

        # Check Memory size
        memory_size_nodeinfo = int(_check_nodeinfo(nodeinfo_output, "Memory size", 3))
        memory_size_os = utils_memory.memtotal()
        if memory_size_nodeinfo != memory_size_os:
            raise error.TestFail("Virsh nodeinfo output didn't match " "Memory size")
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("vms").split()
    if len(vm_names) < 2:
        test.cancel("Provide enough vms for migration")

    src_uri = "qemu:///system"
    dest_uri = libvirt_vm.complete_uri(params.get("migrate_dest_host",
                                                  "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        test.cancel("The dest_uri '%s' is invalid" % dest_uri)

    # Migrated vms' instance
    vms = env.get_all_vms()
    params["load_vms"] = list(vms)

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_tool = params.get("stress_tool", "")
    remote_stress = params.get("migration_stress_remote", "no") == "yes"
    host_stress = params.get("migration_stress_host", "no") == "yes"
    vms_stress = params.get("migration_stress_vms", "no") == "yes"
    vm_bytes = params.get("stress_vm_bytes", "128M")
    stress_args = params.get("%s_args" % stress_tool)
    migration_type = params.get("migration_type")
    start_migration_vms = params.get("start_migration_vms", "yes") == "yes"
    thread_timeout = int(params.get("thread_timeout", 120))
    ubuntu_dep = ['build-essential', 'git']
    hstress = rstress = None
    vstress = {}

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if "vm-bytes" in stress_args:
        params["%s_args" % stress_tool] = stress_args % vm_bytes

    # Ensure stress tool is available in host
    if host_stress:
        # remove package manager installed tool to avoid conflict
        if not utils_package.package_remove(stress_tool):
            logging.error("Existing %s is not removed")
        if "stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro():
            params['stress-ng_dependency_packages_list'] = ubuntu_dep
        try:
            hstress = utils_test.HostStress(stress_tool, params)
            hstress.load_stress_tool()
        except utils_test.StressError, info:
            test.error(info)
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("vms").split()
    if len(vm_names) < 2:
        test.cancel("Provide enough vms for migration")

    src_uri = "qemu:///system"
    dest_uri = libvirt_vm.complete_uri(params.get("migrate_dest_host",
                                                  "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        test.cancel("The dest_uri '%s' is invalid" % dest_uri)

    # Migrated vms' instance
    vms = env.get_all_vms()
    params["load_vms"] = list(vms)

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_tool = params.get("stress_tool", "")
    remote_stress = params.get("migration_stress_remote", "no") == "yes"
    host_stress = params.get("migration_stress_host", "no") == "yes"
    vms_stress = params.get("migration_stress_vms", "no") == "yes"
    vm_bytes = params.get("stress_vm_bytes", "128M")
    stress_args = params.get("%s_args" % stress_tool)
    migration_type = params.get("migration_type")
    start_migration_vms = params.get("start_migration_vms", "yes") == "yes"
    thread_timeout = int(params.get("thread_timeout", 120))
    ubuntu_dep = ['build-essential', 'git']
    hstress = rstress = None
    vstress = {}

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if "vm-bytes" in stress_args:
        params["%s_args" % stress_tool] = stress_args % vm_bytes

    # Ensure stress tool is available in host
    if host_stress:
        # remove package manager installed tool to avoid conflict
        if not utils_package.package_remove(stress_tool):
            logging.error("Existing %s is not removed")
        if "stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro():
            params['stress-ng_dependency_packages_list'] = ubuntu_dep
        try:
            hstress = utils_test.HostStress(stress_tool, params)
            hstress.load_stress_tool()
        except utils_test.StressError, info:
            test.error(info)
Example #6
0
def run(test, params, env):
    """
    Test the command virsh nodememstats

    (1) Call the virsh nodememstats command
    (2) Get the output
    (3) Check the against /proc/meminfo output
    (4) Call the virsh nodememstats command with an unexpected option
    (5) Call the virsh nodememstats command with libvirtd service stop
    """

    # Initialize the variables
    expected = {}
    actual = {}
    deltas = []
    name_stats = ['total', 'free', 'buffers', 'cached']
    itr = int(params.get("itr"))

    def virsh_check_nodememtats(actual_stats, expected_stats, delta):
        """
        Check the nodememstats output value with /proc/meminfo value
        """

        delta_stats = {}
        for name in name_stats:
            delta_stats[name] = abs(actual_stats[name] - expected_stats[name])
            if 'total' in name:
                if not delta_stats[name] == 0:
                    test.fail("Command 'virsh nodememstats' not"
                              " succeeded as the value for %s is "
                              "deviated by %d\nThe total memory "
                              "value is deviating-check" %
                              (name, delta_stats[name]))
            else:
                if delta_stats[name] > delta:
                    test.fail("Command 'virsh nodememstats' not "
                              "succeeded as the value for %s"
                              " is deviated by %d" % (name, delta_stats[name]))
        return delta_stats

    # Prepare libvirtd service
    check_libvirtd = "libvirtd" in params
    if check_libvirtd:
        libvirtd = params.get("libvirtd")
        if libvirtd == "off":
            utils_libvirtd.libvirtd_stop()

    # Get the option for the test case
    option = params.get("virsh_nodememstats_options")
    if option == "max":
        cell_dict = utils_test.libvirt.get_all_cells()
        option = len(list(cell_dict.keys()))

    # Run test case for 10 iterations
    # (default can be changed in subtests.cfg file)
    # and print the final statistics
    for i in range(itr):
        output = virsh.nodememstats(option)

        # Get the status of the virsh command executed
        status = output.exit_status

        # Get status_error option for the test case
        status_error = params.get("status_error")
        if status_error == "yes":
            if status == 0:
                if libvirtd == "off":
                    utils_libvirtd.libvirtd_start()
                    test.fail("Command 'virsh nodememstats' "
                              "succeeded with libvirtd service"
                              " stopped, incorrect")
                else:
                    test.fail("Command 'virsh nodememstats %s' "
                              "succeeded (incorrect command)" % option)

        elif status_error == "no":
            if status == 0:
                if option:
                    return
                # From the beginning of a line, group 1 is one or
                # more word-characters, followed by zero or more
                # whitespace characters and a ':', then one or
                # more whitespace characters, followed by group 2,
                # which is one or more digit characters,
                # then one or more whitespace characters followed by
                # a literal 'kB' or 'KiB' sequence, e.g as below
                # total  :              3809340 kB
                # total  :              3809340 KiB
                # Normalise the value to MBs
                regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)\s\w+")
                expected = {}

                for line in output.stdout.split('\n'):
                    match_obj = regex_obj.search(line)
                    # Due to the extra space in the list
                    if match_obj is not None:
                        name = match_obj.group(1)
                        value = match_obj.group(2)
                        expected[name] = int(value) // 1024

                # Get the actual value from /proc/meminfo and normalise to MBs
                actual['total'] = int(utils_memory.memtotal()) // 1024
                actual['free'] = int(utils_memory.freememtotal()) // 1024
                actual['buffers'] = int(
                    utils_memory.read_from_meminfo('Buffers')) // 1024
                actual['cached'] = int(
                    utils_memory.read_from_meminfo('Cached')) // 1024

                # Currently the delta value is kept at 200 MB this can be
                # tuned based on the accuracy
                # Check subtests.cfg for more details
                delta = int(params.get("delta"))
                output = virsh_check_nodememtats(actual, expected, delta)
                deltas.append(output)

            else:
                test.fail("Command virsh nodememstats %s not "
                          "succeeded:\n%s" % (option, status))

    # Recover libvirtd service start
    if libvirtd == "off":
        utils_libvirtd.libvirtd_start()

    # Print the deviated values for all iterations
    if status_error == "no":
        logging.debug("The following is the deviations from "
                      "the actual(/proc/meminfo) and expected"
                      " value(output of virsh nodememstats)")

        for i in range(itr):
            logging.debug("iteration %d:", i)
            for index, name in enumerate(name_stats):
                logging.debug("%19s : %d", name, deltas[i][name])
Example #7
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_arch = platform.machine()
        if not re.match(cpu_model_nodeinfo, cpu_arch):
            test.fail("Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs, nodeinfo CPUs represent online threads in the
        # system, check all online cpus in sysfs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l"
        cpus_online = process.run(cmd, ignore_status=True,
                                  shell=True).stdout.strip()
        cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l"
        cpus_total = process.run(cmd, ignore_status=True,
                                 shell=True).stdout.strip()
        if not os.path.exists('/sys/devices/system/cpu/cpu0/online'):
            cpus_online = str(int(cpus_online) + 1)
            cpus_total = str(int(cpus_total) + 1)

        logging.debug("host online cpus are %s", cpus_online)
        logging.debug("host total cpus are %s", cpus_total)

        if cpus_nodeinfo != cpus_online:
            if 'ppc' in cpu_arch:
                if cpus_nodeinfo != cpus_total:
                    test.fail("Virsh nodeinfo output of CPU(s) on"
                              " ppc did not match all threads in "
                              "the system")
            else:
                test.fail("Virsh nodeinfo output didn't match "
                          "number of CPU(s)")

        # Check CPU frequency, frequency is under clock for ppc
        cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock|BogoMIPS' | "
               "head -n1 | awk -F: '{print $2}' | awk -F. '{print $1}'")
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        cpu_frequency_os = cmd_result.stdout_text.strip()
        logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s",
                      cpu_frequency_nodeinfo, cpu_frequency_os)
        #
        # Matching CPU Frequency is not an exact science in todays modern
        # processors and OS's. CPU's can have their execution speed varied
        # based on current workload in order to save energy and keep cool.
        # Thus since we're getting the values at disparate points in time,
        # we cannot necessarily do a pure comparison.
        # So, let's get the absolute value of the difference and ensure
        # that it's within 20 percent of each value to give us enough of
        # a "fudge" factor to declare "close enough". Don't return a failure
        # just print a debug message and move on.
        diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os))
        if (float(diffval) / float(cpu_frequency_nodeinfo) > 0.20
                or float(diffval) / float(cpu_frequency_os) > 0.20):
            logging.debug("Virsh nodeinfo output didn't match CPU "
                          "frequency within 20 percent")

        # Get CPU topology from virsh capabilities xml
        cpu_topology = capability_xml.CapabilityXML()['cpu_topology']
        logging.debug("Cpu topology in virsh capabilities output: %s",
                      cpu_topology)

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not
        # total sockets in the system, so get total sockets in one node and
        # check with it
        node_info = utils_misc.NumaInfo()
        node_online_list = node_info.get_online_nodes()
        cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0]
        cmd += "/cpu*/topology/physical_package_id | uniq |wc -l"
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        total_sockets_in_node = int(cmd_result.stdout_text.strip())
        if total_sockets_in_node != cpu_sockets_nodeinfo:
            test.fail("Virsh nodeinfo output didn't match CPU "
                      "socket(s) of host OS")
        if cpu_sockets_nodeinfo != int(cpu_topology['sockets']):
            test.fail("Virsh nodeinfo output didn't match CPU "
                      "socket(s) of virsh capabilities output")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Core(s) per socket', 4)
        cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'"
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        cores_per_socket_os = cmd_result.stdout_text.strip()
        spec_numa = False
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            # for spec NUMA arch, the output of nodeinfo is in a spec format
            cpus_os = utils_misc.get_cpu_info().get("CPU(s)")
            numa_cells_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                  'NUMA cell(s)', 3)
            if (re.match(cores_per_socket_nodeinfo, cpus_os)
                    and re.match(numa_cells_nodeinfo, "1")):
                spec_numa = True
            else:
                test.fail("Virsh nodeinfo output didn't match "
                          "CPU(s) or Core(s) per socket of host OS")
        if cores_per_socket_nodeinfo != cpu_topology['cores']:
            test.fail("Virsh nodeinfo output didn't match Core(s) "
                      "per socket of virsh capabilities output")
        # Check Thread(s) per core
        threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Thread(s) per core', 4)
        if not spec_numa:
            if threads_per_core_nodeinfo != cpu_topology['threads']:
                test.fail("Virsh nodeinfo output didn't match"
                          "Thread(s) per core of virsh"
                          "capabilities output")
        else:
            if threads_per_core_nodeinfo != "1":
                test.fail("Virsh nodeinfo output didn't match"
                          "Thread(s) per core of virsh"
                          "capabilities output")
        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = 0
        if libvirt_version.version_compare(2, 0, 0):
            for i in node_online_list:
                node_memory = node_info.read_from_node_meminfo(i, 'MemTotal')
                memory_size_os += int(node_memory)
        else:
            memory_size_os = utils_memory.memtotal()
        logging.debug('The host total memory from nodes is %s', memory_size_os)

        if memory_size_nodeinfo != memory_size_os:
            test.fail("Virsh nodeinfo output didn't match " "Memory size")
Example #8
0
def run_stress_kernel_compile(tests, params, env):
    """
    Boot VMs and run kernel compile inside VM parallel.

    1) Boot up VMs:
       Every VM has 4G vmem, the total vmem of VMs' are
       $overcommit times as host's mem.
    2) Launch kernel compile inside every guest.

    :param test: QEMU test object.
    :param params: Dictionary with the test parameters.
    :param env: Dictionary with test environment.
    """
    def kernelcompile(session, vm_name):
        vm = env.get_vm(vm_name)
        ip = vm.get_address()
        path = params.get("download_url")
        logging.info("kernel path = %s" % path)
        get_kernel_cmd = "wget %s" % path
        try:
            status, output = session.cmd_status_output(get_kernel_cmd,
                                                       timeout=240)
            if status != 0:
                logging.error(output)
                raise error.TestFail("Fail to download the kernel"
                                     " in %s" % vm_name)
            else:
                logging.info("Completed download the kernel src"
                             " in %s" % vm_name)
            test_cmd = params.get("test_cmd")
            status, output = session.cmd_status_output(test_cmd, timeout=1200)
            if status != 0:
                logging.error(output)
        finally:
            status, _ = utils_test.ping(ip, count=10, timeout=30)
            if status != 0:
                raise error.TestFail("vm no response, pls check serial log")

    over_c = float(params.get("overcommit", 1.5))
    guest_number = int(params.get("guest_number", "1"))

    if guest_number < 1:
        logging.warn("At least boot up one guest for this test,"
                     " set up guest number to 1")
        guest_number = 1

    for tag in range(1, guest_number):
        params["vms"] += " stress_guest_%s" % tag

    mem_host = utils_memory.memtotal() / 1024
    vmem = int(mem_host * over_c / guest_number)

    if vmem < 256:
        raise error.TestNAError("The memory size set for guest is too small."
                                " Please try less than %s guests"
                                " in this host." % guest_number)
    params["mem"] = vmem
    params["start_vm"] = "yes"
    login_timeout = int(params.get("login_timeout", 360))

    env_process.preprocess(tests, params, env)

    sessions_info = []
    for vm_name in params["vms"].split():
        vm = env.get_vm(vm_name)
        vm.verify_alive()
        session = vm.wait_for_login(timeout=login_timeout)
        if not session:
            raise error.TestFail("Could not log into guest %s" % vm_name)

        sessions_info.append([session, vm_name])

    # run kernel compile in vms
    try:
        logging.info("run kernel compile in vms")
        bg_threads = []
        for session_info in sessions_info:
            session = session_info[0]
            vm_name = session_info[1]
            bg_thread = utils_test.BackgroundTest(kernelcompile,
                                                  (session, vm_name))
            bg_thread.start()
            bg_threads.append(bg_thread)

        completed = False
        while not completed:
            completed = True
            for bg_thread in bg_threads:
                if bg_thread.is_alive():
                    completed = False
    finally:
        try:
            for bg_thread in bg_threads:
                if bg_thread:
                    bg_thread.join()
        finally:
            for session_info in sessions_info:
                session_info[0].close()
Example #9
0
def run(test, params, env):
    """
    Check KSM can be started automaticly when ksmtuned threshold is reached

    1. Get the memory of your host and the KSM_THRES_COEF
    2. Boot a guest with memory less than KSM_THRES_COEF threshold
    3. Get the memory used in host of process qemu-kvm
    4. Get the free memory in host
    5. If both the free memory size is not smaller than the threshold and guest
        used memory + threshold is not bigger than total memory in host. Check
        the ksm status in host. Ksm should not start in the host
    6. Repeat step 2~5 under it broke the rule in step 5

    :param test: kvm test object.
    :param params: Dictionary with test parameters.
    :param env: Dictionary with the test environment.
    """
    def check_ksm(mem, threshold_reached=False):
        """
        :param mem: Boot guest with given memory, in KB
        :ksmtuned_enabled: ksmtuned threshold is reached or not
        """
        def heavyload_install():
            if session.cmd_status(test_install_cmd) != 0:
                logging.warning("Could not find installed heavyload in guest, "
                                "will install it via winutils.iso ")
                winutil_drive = utils_misc.get_winutils_vol(session)
                if not winutil_drive:
                    test.cancel("WIN_UTILS CDROM not found.")
                install_cmd = params["install_cmd"] % winutil_drive
                session.cmd(install_cmd)

        def check_qemu_used_mem(qemu_pid, mem):
            qemu_used_page = process.getoutput(get_qemu_used_mem % qemu_pid,
                                               shell=True)
            qemu_used_mem = float(qemu_used_page) * pagesize
            if qemu_used_mem < mem * mem_thres:
                return False
            return True

        params['mem'] = mem // 1024
        params['start_vm'] = 'yes'
        vm_name = params['main_vm']
        env_process.preprocess_vm(test, params, env, vm_name)
        vm = env.get_vm(vm_name)
        session = vm.wait_for_login()
        qemu_pid = vm.get_pid()
        if params["os_type"] == "linux":
            params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' %
                                     (int(params['mem']) // 2))
            stress_test = VMStress(vm, "stress", params)
            stress_test.load_stress_tool()
        else:
            install_path = params["install_path"]
            test_install_cmd = 'dir "%s" | findstr /I heavyload' % install_path
            heavyload_install()
            heavyload_bin = r'"%s\heavyload.exe" ' % install_path
            heavyload_options = ["/MEMORY 100", "/START"]
            start_cmd = heavyload_bin + " ".join(heavyload_options)
            stress_tool = BackgroundTest(
                session.cmd, (start_cmd, stress_timeout, stress_timeout))
            stress_tool.start()
            if not utils_misc.wait_for(stress_tool.is_alive, stress_timeout):
                test.error("Failed to start heavyload process")
        if not utils_misc.wait_for(lambda: check_qemu_used_mem(qemu_pid, mem),
                                   stress_timeout, 10, 10):
            test.error("QEMU used memory doesn't reach %s of guest mem %sM in "
                       "%ss" % (mem_thres, mem // 1024, stress_timeout))
        time.sleep(30)
        free_mem_host = utils_memory.freememtotal()
        ksm_status = process.getoutput(params['cmd_check_ksm_status'])
        vm.destroy()
        logging.info(
            "The ksm threshold is %sM, QEMU used memory is %sM, "
            "and the total free memory on host is %sM", ksm_thres // 1024,
            mem // 1024, free_mem_host // 1024)
        if threshold_reached:
            if free_mem_host > ksm_thres:
                test.error("Host memory is not consumed as much as expected")
            if ksm_status == '0':
                test.fail("KSM should be running")
        else:
            if free_mem_host < ksm_thres:
                test.error("Host memory is consumed too much more than "
                           "expected")
            if ksm_status != '0':
                test.fail("KSM should not be running")

    total_mem_host = utils_memory.memtotal()
    utils_memory.drop_caches()
    free_mem_host = utils_memory.freememtotal()
    ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True)
    ksm_thres = int(total_mem_host *
                    (int(re.findall('\\d+', ksm_thres)[0]) / 100))
    guest_mem = (free_mem_host - ksm_thres) // 2
    if arch.ARCH in ('ppc64', 'ppc64le'):
        guest_mem = guest_mem - guest_mem % (256 * 1024)
    status_ksm_service = process.system(params['cmd_status_ksmtuned'],
                                        ignore_status=True)
    if status_ksm_service != 0:
        process.run(params['cmd_start_ksmtuned'])
    stress_timeout = params.get("stress_timeout", 1800)
    mem_thres = float(params.get("mem_thres", 0.95))
    get_qemu_used_mem = params['cmd_get_qemu_used_mem']
    pagesize = utils_memory.getpagesize()
    check_ksm(guest_mem)

    ksm_config_file = params['ksm_config_file']
    backup_file = ksm_config_file + '.backup'
    copyfile(ksm_config_file, backup_file)
    threshold = params.get_numeric('ksm_threshold')
    with open(ksm_config_file, "a+") as f:
        f.write('%s=%s' % (params['ksm_thres_conf'], threshold))
    process.run(params['cmd_restart_ksmtuned'])
    ksm_thres = total_mem_host * (threshold / 100)
    guest_mem = total_mem_host - ksm_thres // 2
    if arch.ARCH in ('ppc64', 'ppc64le'):
        guest_mem = guest_mem - guest_mem % (256 * 1024)
    try:
        check_ksm(guest_mem, threshold_reached=True)
    finally:
        copyfile(backup_file, ksm_config_file)
        os.remove(backup_file)
        if status_ksm_service != 0:
            process.run(params['cmd_stop_ksmtuned'])
        else:
            process.run(params['cmd_restart_ksmtuned'])
Example #10
0
        if os.path.exists(e_rh):
            utils.run("echo 'never' > %s" % e_rh)
        new_ksm = True
    else:
        try:
            utils.run("modprobe ksm")
            utils.run("ksmctl start 5000 100")
        except error.CmdError, details:
            raise error.TestFail("Failed to load KSM: %s" % details)

    # host_reserve: mem reserve kept for the host system to run
    host_reserve = int(params.get("ksm_host_reserve", -1))
    if (host_reserve == -1):
        # default host_reserve = MemAvailable + one_minimal_guest(128MB)
        # later we add 64MB per additional guest
        host_reserve = ((utils_memory.memtotal()
                         - utils_memory.read_from_meminfo("MemFree"))
                        / 1024 + 128)
        # using default reserve
        _host_reserve = True
    else:
        _host_reserve = False

    # guest_reserve: mem reserve kept to avoid guest OS to kill processes
    guest_reserve = int(params.get("ksm_guest_reserve", -1))
    if (guest_reserve == -1):
        # default guest_reserve = minimal_system_mem(256MB)
        # later we add tmpfs overhead
        guest_reserve = 256
        # using default reserve
        _guest_reserve = True
Example #11
0
def run(test, params, env):
    """
    KVM multi test:
    1) Log into guests
    2) Check all the nics available or not
    3) Ping among guest nic and host
       3.1) Ping with different packet size
       3.2) Flood ping test
       3.3) Final ping test
    4) Transfer files among guest nics and host
       4.1) Create file by dd command in guest
       4.2) Transfer file between nics
       4.3) Compare original file and transferred file
    5) ping among different nics
       5.1) Ping with different packet size
       5.2) Flood ping test
       5.3) Final ping test
    6) Transfer files among different nics
       6.1) Create file by dd command in guest
       6.2) Transfer file between nics
       6.3) Compare original file and transferred file
    7) Repeat step 3 - 6 on every nic.

    :param test: QEMU test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """

    def ping(session, nic, dst_ip, strick_check, flood_minutes):
        d_packet_size = [1, 4, 48, 512, 1440, 1500, 1505, 4054, 4055, 4096,
                         4192, 8878, 9000, 32767, 65507]
        packet_size = params.get("packet_size", "").split() or d_packet_size
        for size in packet_size:
            error.context("Ping with packet size %s" % size, logging.info)
            status, output = utils_test.ping(dst_ip, 10, interface=nic,
                                             packetsize=size, timeout=30,
                                             session=session)
            if strict_check:
                ratio = utils_test.get_loss_ratio(output)
                if ratio != 0:
                    raise error.TestFail("Loss ratio is %s for packet size"
                                         " %s" % (ratio, size))
            else:
                if status != 0:
                    raise error.TestFail("Ping returns non-zero value %s" %
                                         output)

        error.context("Flood ping test", logging.info)
        utils_test.ping(dst_ip, None, interface=nic, flood=True,
                        output_func=None, timeout=flood_minutes * 60,
                        session=session)
        error.context("Final ping test", logging.info)
        counts = params.get("ping_counts", 100)
        status, output = utils_test.ping(dst_ip, counts, interface=nic,
                                         timeout=float(counts) * 1.5,
                                         session=session)
        if strick_check == "yes":
            ratio = utils_test.get_loss_ratio(output)
            if ratio != 0:
                raise error.TestFail("Packet loss ratio is %s after flood"
                                     % ratio)
        else:
            if status != 0:
                raise error.TestFail("Ping returns non-zero value %s" %
                                     output)

    def file_transfer(session, src, dst):
        username = params.get("username", "")
        password = params.get("password", "")
        src_path = "/tmp/1"
        dst_path = "/tmp/2"
        port = int(params["file_transfer_port"])

        cmd = "dd if=/dev/urandom of=%s bs=100M count=1" % src_path
        cmd = params.get("file_create_cmd", cmd)

        error.context("Create file by dd command, cmd: %s" % cmd, logging.info)
        session.cmd(cmd)

        transfer_timeout = int(params.get("transfer_timeout"))
        log_filename = "scp-from-%s-to-%s.log" % (src, dst)
        error.context("Transfer file from %s to %s" % (src, dst), logging.info)
        remote.scp_between_remotes(src, dst, port, password, password,
                                   username, username, src_path, dst_path,
                                   log_filename=log_filename,
                                   timeout=transfer_timeout)
        src_path = dst_path
        dst_path = "/tmp/3"
        log_filename = "scp-from-%s-to-%s.log" % (dst, src)
        error.context("Transfer file from %s to %s" % (dst, src), logging.info)
        remote.scp_between_remotes(dst, src, port, password, password,
                                   username, username, src_path, dst_path,
                                   log_filename=log_filename,
                                   timeout=transfer_timeout)
        error.context("Compare original file and transferred file",
                      logging.info)

        cmd1 = "md5sum /tmp/1"
        cmd2 = "md5sum /tmp/3"
        md5sum1 = session.cmd(cmd1).split()[0]
        md5sum2 = session.cmd(cmd2).split()[0]
        if md5sum1 != md5sum2:
            raise error.TestError("File changed after transfer")

    nic_interface_list = []
    check_irqbalance_cmd = params.get("check_irqbalance_cmd")
    stop_irqbalance_cmd = params.get("stop_irqbalance_cmd")
    start_irqbalance_cmd = params.get("start_irqbalance_cmd")
    status_irqbalance = params.get("status_irqbalance")
    vms = params["vms"].split()
    host_mem = utils_memory.memtotal() / (1024 * 1024)
    host_cpu_count = len(utils_misc.get_cpu_processors())
    vhost_count = 0
    if params.get("vhost"):
        vhost_count = 1
    if host_cpu_count < (1 + vhost_count) * len(vms):
        raise error.TestError("The host don't have enough cpus to start guest"
                              "pcus: %d, minimum of vcpus and vhost: %d" %
                              (host_cpu_count, (1 + vhost_count) * len(vms)))
    params['mem'] = host_mem / len(vms) * 1024
    params['smp'] = host_cpu_count / len(vms) - vhost_count
    if params['smp'] % 2 != 0:
        params['vcpu_sockets'] = 1
    params["start_vm"] = "yes"
    for vm_name in vms:
        env_process.preprocess_vm(test, params, env, vm_name)
    timeout = float(params.get("login_timeout", 360))
    strict_check = params.get("strick_check", "no")
    host_ip = utils_net.get_ip_address_by_interface(params.get("netdst"))
    host_ip = params.get("srchost", host_ip)
    flood_minutes = float(params["flood_minutes"])
    error.context("Check irqbalance service status", logging.info)
    o = process.system_output(check_irqbalance_cmd, ignore_status=True)
    check_stop_irqbalance = False
    if re.findall(status_irqbalance, o):
        logging.debug("stop irqbalance")
        process.run(stop_irqbalance_cmd)
        check_stop_irqbalance = True
        o = process.system_output(check_irqbalance_cmd, ignore_status=True)
        if re.findall(status_irqbalance, o):
            raise error.TestError("Can not stop irqbalance")
    thread_list = []
    nic_interface = []
    for vm_name in vms:
        guest_ifname = ""
        guest_ip = ""
        vm = env.get_vm(vm_name)
        session = vm.wait_for_login(timeout=timeout)
        thread_list.extend(vm.vcpu_threads)
        thread_list.extend(vm.vhost_threads)
        error.context("Check all the nics available or not", logging.info)
        for index, nic in enumerate(vm.virtnet):
            guest_ifname = utils_net.get_linux_ifname(session, nic.mac)
            guest_ip = vm.get_address(index)
            if not (guest_ifname and guest_ip):
                err_log = "vms %s get ip or ifname failed." % vm_name
                err_log = "ifname: %s, ip: %s." % (guest_ifname, guest_ip)
                raise error.TestFail(err_log)
            nic_interface = [guest_ifname, guest_ip, session]
            nic_interface_list.append(nic_interface)
    error.context("Pin vcpus and vhosts to host cpus", logging.info)
    host_numa_nodes = utils_misc.NumaInfo()
    vthread_num = 0
    for numa_node_id in host_numa_nodes.nodes:
        numa_node = host_numa_nodes.nodes[numa_node_id]
        for _ in range(len(numa_node.cpus)):
            if vthread_num >= len(thread_list):
                break
            vcpu_tid = thread_list[vthread_num]
            logging.debug("pin vcpu/vhost thread(%s) to cpu(%s)" %
                          (vcpu_tid, numa_node.pin_cpu(vcpu_tid)))
            vthread_num += 1

    nic_interface_list_len = len(nic_interface_list)
    # ping and file transfer test
    for src_ip_index in range(nic_interface_list_len):
        error.context("Ping test from guest to host", logging.info)
        src_ip_info = nic_interface_list[src_ip_index]
        ping(src_ip_info[2], src_ip_info[0], host_ip, strict_check,
             flood_minutes)
        error.context("File transfer test between guest and host",
                      logging.info)
        file_transfer(src_ip_info[2], src_ip_info[1], host_ip)
        for dst_ip in nic_interface_list[src_ip_index:]:
            if src_ip_info[1] == dst_ip[1]:
                continue
            txt = "Ping test between %s and %s" % (src_ip_info[1], dst_ip[1])
            error.context(txt, logging.info)
            ping(src_ip_info[2], src_ip_info[0], dst_ip[1], strict_check,
                 flood_minutes)
            txt = "File transfer test between %s " % src_ip_info[1]
            txt += "and %s" % dst_ip[1]
            error.context(txt, logging.info)
            file_transfer(src_ip_info[2], src_ip_info[1], dst_ip[1])
    if check_stop_irqbalance:
        process.run(start_irqbalance_cmd)
Example #12
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_model_os = utils.get_current_kernel_arch()
        if not re.match(cpu_model_nodeinfo, cpu_model_os):
            raise error.TestFail(
                "Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cpus_os = utils.count_cpus()
        if int(cpus_nodeinfo) != cpus_os:
            raise error.TestFail(
                "Virsh nodeinfo output didn't match number of "
                "CPU(s)")

        # Check CPU frequency
        cpu_frequency_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep 'cpu MHz' | head -n1 | "
               "awk '{print $4}' | awk -F. '{print $1}'")
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_frequency_os = cmd_result.stdout.strip()
        logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s",
                      cpu_frequency_nodeinfo, cpu_frequency_os)
        #
        # Matching CPU Frequency is not an exact science in todays modern
        # processors and OS's. CPU's can have their execution speed varied
        # based on current workload in order to save energy and keep cool.
        # Thus since we're getting the values at disparate points in time,
        # we cannot necessarily do a pure comparison.
        # So, let's get the absolute value of the difference and ensure
        # that it's within 20 percent of each value to give us enough of
        # a "fudge" factor to declare "close enough". Don't return a failure
        # just print a debug message and move on.
        diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os))
        if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or \
           float(diffval) / float(cpu_frequency_os) > 0.20:
            logging.debug("Virsh nodeinfo output didn't match CPU "
                          "frequency within 20 percent")

        # Get CPU topolopy from virsh capabilities xml
        cpu_topolopy = capability_xml.CapabilityXML()['cpu_topolopy']
        logging.debug("Cpu topolopy in virsh capabilities output: %s",
                      cpu_topolopy)

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        cmd = "grep 'physical id' /proc/cpuinfo | uniq | sort | uniq |wc -l"
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_NUMA_nodeinfo = _check_nodeinfo(nodeinfo_output, 'NUMA cell(s)', 3)
        cpu_sockets_os = int(
            cmd_result.stdout.strip()) / int(cpu_NUMA_nodeinfo)
        if cpu_sockets_os != cpu_sockets_nodeinfo:
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s) of host OS")
        if cpu_sockets_nodeinfo != int(cpu_topolopy['sockets']):
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s) of virsh capabilities output")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Core(s) per socket', 4)
        cmd = "grep 'cpu cores' /proc/cpuinfo | head -n1 | awk '{print $4}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cores_per_socket_os = cmd_result.stdout.strip()
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket of host OS")
        if cores_per_socket_nodeinfo != cpu_topolopy['cores']:
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket of virsh capabilities output")

        # Ckeck Thread(s) per core
        threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Thread(s) per core', 4)
        if threads_per_core_nodeinfo != cpu_topolopy['threads']:
            raise error.TestFail(
                "Virsh nodeinfo output didn't match Thread(s) "
                "per core of virsh capabilities output")

        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = utils_memory.memtotal()
        if memory_size_nodeinfo != memory_size_os:
            raise error.TestFail("Virsh nodeinfo output didn't match "
                                 "Memory size")
Example #13
0
def run(test, params, env):
    """
    Check KSM can be started automaticly when ksmtuned threshold is reached

    1. Get the memory of your host and the KSM_THRES_COEF
    2. Boot a guest with memory less than KSM_THRES_COEF threshold
    3. Get the memory used in host of process qemu-kvm
    4. Get the free memory in host
    5. If both the free memory size is not smaller than the threshold and guest
        used memory + threshold is not bigger than total memory in host. Check
        the ksm status in host. Ksm should not start in the host
    6. Repeat step 2~5 under it broke the rule in step 5

    :param test: kvm test object.
    :param params: Dictionary with test parameters.
    :param env: Dictionary with the test environment.
    """
    def check_ksm(mem, stress=False):
        """
        :param mem: Boot guest with given memory, in KB
        :param stress: Load stress or not
        """
        params['mem'] = mem // 1024
        params['start_vm'] = 'yes'
        vm_name = params['main_vm']
        env_process.preprocess_vm(test, params, env, vm_name)
        vm = env.get_vm(vm_name)
        vm.wait_for_login()
        if stress:
            params['stress_args'] = ('--cpu 4 --io 4 --vm 2 --vm-bytes %sM' %
                                     (int(params['mem']) // 2))
            stress_test = VMStress(vm, "stress", params)
            stress_test.load_stress_tool()
            time.sleep(30)
        qemu_pid = vm.get_pid()
        qemu_used_page = utils_misc.normalize_data_size(process.getoutput(
            params['cmd_get_qemu_used_mem'] % qemu_pid, shell=True) + 'K', 'B')
        pagesize = utils_memory.getpagesize()
        qemu_used_mem = int(float(qemu_used_page)) * pagesize
        free_mem_host = utils_memory.freememtotal()
        ksm_status = process.getoutput(params['cmd_check_ksm_status'])
        vm.destroy()
        logging.info('The ksm threshold is %s, the memory allocated by qemu is'
                     ' %s, and the total free memory on host is %s.' %
                     (ksm_thres, qemu_used_mem, free_mem_host))
        if free_mem_host >= ksm_thres:
            if ksm_status != '0':
                test.fail('Ksm should not start.')
            if stress:
                test.error('The host resource is not consumed as expected.')
        elif ksm_status == '0':
            test.fail('Ksm should start but it does not.')

    total_mem_host = utils_memory.memtotal()
    utils_memory.drop_caches()
    free_mem_host = utils_memory.freememtotal()
    ksm_thres = process.getoutput(params['cmd_get_thres'], shell=True)
    ksm_thres = int(total_mem_host *
                    (int(re.findall('\\d+', ksm_thres)[0]) / 100))
    guest_mem = (free_mem_host - ksm_thres) // 2
    if arch.ARCH in ('ppc64', 'ppc64le'):
        guest_mem = guest_mem - guest_mem % (256 * 1024)
    status_ksm_service = process.system(
        params['cmd_status_ksmtuned'], ignore_status=True)
    if status_ksm_service != 0:
        process.run(params['cmd_start_ksmtuned'])
    check_ksm(guest_mem)

    ksm_config_file = params['ksm_config_file']
    backup_file = ksm_config_file + '.backup'
    copyfile(ksm_config_file, backup_file)
    threshold = params.get_numeric('ksm_threshold')
    with open(ksm_config_file, "a+") as f:
        f.write('%s=%s' % (params['ksm_thres_conf'], threshold))
    process.run(params['cmd_restart_ksmtuned'])
    ksm_thres = total_mem_host * (threshold / 100)
    guest_mem = total_mem_host - ksm_thres // 2
    if arch.ARCH in ('ppc64', 'ppc64le'):
        guest_mem = guest_mem - guest_mem % (256 * 1024)
    try:
        check_ksm(guest_mem, stress=True)
    finally:
        copyfile(backup_file, ksm_config_file)
        os.remove(backup_file)
        if status_ksm_service != 0:
            process.run(params['cmd_stop_ksmtuned'])
        else:
            process.run(params['cmd_restart_ksmtuned'])
Example #14
0
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("vms").split()
    if len(vm_names) < 2:
        test.cancel("Provide enough vms for migration")

    src_uri = "qemu:///system"
    dest_uri = libvirt_vm.complete_uri(
        params.get("migrate_dest_host", "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        test.cancel("The dest_uri '%s' is invalid" % dest_uri)

    # Migrated vms' instance
    vms = env.get_all_vms()
    params["load_vms"] = list(vms)

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_tool = params.get("stress_tool", "")
    stress_type = params.get("migration_stress_type")
    require_stress_tool = "stress" in stress_tool
    vm_bytes = params.get("stress_vm_bytes", "128M")
    stress_args = params.get("stress_args")
    migration_type = params.get("migration_type")
    start_migration_vms = params.get("start_migration_vms", "yes") == "yes"
    thread_timeout = int(params.get("thread_timeout", 120))

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if "vm-bytes" in stress_args:
        params["stress_args"] = stress_args % vm_bytes

    # Ensure stress tool is available in host
    if require_stress_tool and stress_type == "stress_on_host":
        utils_test.load_stress("stress_on_host", params)

    for vm in vms:
        # Keep vm dead for edit
        if vm.is_alive():
            vm.destroy()
        set_cpu_memory(vm.name, cpu, memory)

    try:
        if start_migration_vms:
            for vm in vms:
                vm.start()
                vm.wait_for_login()

        # configure stress in VM
        if require_stress_tool and stress_type == "stress_in_vms":
            utils_test.load_stress("stress_in_vms", params, vms)

        do_stress_migration(vms, src_uri, dest_uri, migration_type, test,
                            params, thread_timeout)
    finally:
        logging.debug("Cleanup vms...")
        params["connect_uri"] = src_uri
        for vm in vms:
            utils_test.libvirt.MigrationTest().cleanup_dest_vm(
                vm, None, dest_uri)
            # Try to start vms in source once vms in destination are
            # cleaned up
            if not vm.is_alive():
                vm.start()
                vm.wait_for_login()
        utils_test.unload_stress(stress_type, params, vms)
Example #15
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_model_os = utils.get_current_kernel_arch()
        if not re.match(cpu_model_nodeinfo, cpu_model_os):
            raise error.TestFail(
                "Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs, nodeinfo CPUs represent online threads in the
        # system, check all online cpus in sysfs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l"
        cpus_online = utils.run(cmd, ignore_status=True)
        cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l"
        cpus_total = utils.run(cmd, ignore_status=True)
        if cpus_nodeinfo != cpus_online.stdout.strip():
            if 'power' in cpu_util.get_cpu_arch():
                if cpus_nodeinfo != cpus_total.stdout.strip():
                    raise error.TestFail("Virsh nodeinfo output of CPU(s) on"
                                         " ppc did not match all threads in "
                                         "the system")
            else:
                raise error.TestFail("Virsh nodeinfo output didn't match "
                                     "number of CPU(s)")

        # Check CPU frequency, frequency is under clock for ppc
        cpu_frequency_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock' | head -n1 | "
               "awk -F: '{print $2}' | awk -F. '{print $1}'")
        cmd_result = utils.run(cmd, ignore_status=True)
        cpu_frequency_os = cmd_result.stdout.strip()
        logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s",
                      cpu_frequency_nodeinfo, cpu_frequency_os)
        #
        # Matching CPU Frequency is not an exact science in todays modern
        # processors and OS's. CPU's can have their execution speed varied
        # based on current workload in order to save energy and keep cool.
        # Thus since we're getting the values at disparate points in time,
        # we cannot necessarily do a pure comparison.
        # So, let's get the absolute value of the difference and ensure
        # that it's within 20 percent of each value to give us enough of
        # a "fudge" factor to declare "close enough". Don't return a failure
        # just print a debug message and move on.
        diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os))
        if float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or \
           float(diffval) / float(cpu_frequency_os) > 0.20:
            logging.debug("Virsh nodeinfo output didn't match CPU "
                          "frequency within 20 percent")

        # Get CPU topology from virsh capabilities xml
        cpu_topology = capability_xml.CapabilityXML()['cpu_topology']
        logging.debug("Cpu topology in virsh capabilities output: %s",
                      cpu_topology)

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not
        # total sockets in the system, so get total sockets in one node and
        # check with it
        node_info = utils_misc.NumaInfo()
        node_online_list = node_info.get_online_nodes()
        cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0]
        cmd += "/cpu*/topology/physical_package_id | uniq |wc -l"
        cmd_result = utils.run(cmd, ignore_status=True)
        total_sockets_in_node = int(cmd_result.stdout.strip())
        if total_sockets_in_node != cpu_sockets_nodeinfo:
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s) of host OS")
        if cpu_sockets_nodeinfo != int(cpu_topology['sockets']):
            raise error.TestFail("Virsh nodeinfo output didn't match CPU "
                                 "socket(s) of virsh capabilities output")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'Core(s) per socket', 4)
        cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'"
        cmd_result = utils.run(cmd, ignore_status=True)
        cores_per_socket_os = cmd_result.stdout.strip()
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket of host OS")
        if cores_per_socket_nodeinfo != cpu_topology['cores']:
            raise error.TestFail("Virsh nodeinfo output didn't match Core(s) "
                                 "per socket of virsh capabilities output")

        # Ckeck Thread(s) per core
        threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Thread(s) per core', 4)
        if threads_per_core_nodeinfo != cpu_topology['threads']:
            raise error.TestFail("Virsh nodeinfo output didn't match Thread(s) "
                                 "per core of virsh capabilities output")

        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = utils_memory.memtotal()
        if memory_size_nodeinfo != memory_size_os:
            raise error.TestFail("Virsh nodeinfo output didn't match "
                                 "Memory size")
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("migration_vms").split()
    if len(vm_names) < 2:
        raise exceptions.TestSkipError("Provide enough vms for migration")

    src_uri = libvirt_vm.complete_uri(
        params.get("migrate_source_host", "EXAMPLE"))
    if src_uri.count('///') or src_uri.count('EXAMPLE'):
        raise exceptions.TestSkipError("The src_uri '%s' is invalid" % src_uri)

    dest_uri = libvirt_vm.complete_uri(
        params.get("migrate_dest_host", "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        raise exceptions.TestSkipError("The dest_uri '%s' is invalid" %
                                       dest_uri)

    # Params for NFS and SSH setup
    params["server_ip"] = params.get("migrate_dest_host")
    params["server_user"] = "******"
    params["server_pwd"] = params.get("migrate_dest_pwd")
    params["client_ip"] = params.get("migrate_source_host")
    params["client_user"] = "******"
    params["client_pwd"] = params.get("migrate_source_pwd")
    params["nfs_client_ip"] = params.get("migrate_dest_host")
    params["nfs_server_ip"] = params.get("migrate_source_host")

    # Configure NFS client on remote host
    nfs_client = nfs.NFSClient(params)
    nfs_client.setup()

    # Migrated vms' instance
    vms = []
    for vm_name in vm_names:
        vms.append(
            libvirt_vm.VM(vm_name, params, test.bindir,
                          env.get("address_cache")))

    load_vm_names = params.get("load_vms").split()
    # vms for load
    load_vms = []
    for vm_name in load_vm_names:
        load_vms.append(
            libvirt_vm.VM(vm_name, params, test.bindir,
                          env.get("address_cache")))
    params['load_vms'] = load_vms

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_type = params.get("migration_stress_type")
    vm_bytes = params.get("stress_vm_bytes")
    stress_args = params.get("stress_args")
    migration_type = params.get("migration_type")
    start_migration_vms = "yes" == params.get("start_migration_vms", "yes")
    thread_timeout = int(params.get("thread_timeout", 120))
    remote_host = params.get("migrate_dest_host")
    username = params.get("migrate_dest_user", "root")
    password = params.get("migrate_dest_pwd")
    prompt = params.get("shell_prompt", r"[\#\$]")

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if vm_bytes is not None:
        params["stress_args"] = stress_args % vm_bytes

    for vm in vms:
        # Keep vm dead for edit
        if vm.is_alive():
            vm.destroy()
        set_cpu_memory(vm.name, cpu, memory)

    try:
        vm_ipaddr = {}
        if start_migration_vms:
            for vm in vms:
                vm.start()
                vm.wait_for_login()
                vm_ipaddr[vm.name] = vm.get_address()
                # TODO: recover vm if start failed?
        # Config ssh autologin for remote host
        ssh_key.setup_ssh_key(remote_host, username, password, port=22)

        do_stress_migration(vms, src_uri, dest_uri, stress_type,
                            migration_type, params, thread_timeout)
        # Check network of vms on destination
        if start_migration_vms and migration_type != "cross":
            for vm in vms:
                utils_test.check_dest_vm_network(vm, vm_ipaddr[vm.name],
                                                 remote_host, username,
                                                 password, prompt)
    finally:
        logging.debug("Cleanup vms...")
        for vm_name in vm_names:
            vm = libvirt_vm.VM(vm_name, params, test.bindir,
                               env.get("address_cache"))
            utlv.MigrationTest().cleanup_dest_vm(vm, None, dest_uri)
            if vm.is_alive():
                vm.destroy(gracefully=False)

        if nfs_client:
            logging.info("Cleanup NFS client environment...")
            nfs_client.cleanup()
        env.clean_objects()
Example #17
0
        if os.path.exists(e_rh):
            utils.run("echo 'never' > %s" % e_rh)
        new_ksm = True
    else:
        try:
            utils.run("modprobe ksm")
            utils.run("ksmctl start 5000 100")
        except error.CmdError, details:
            raise error.TestFail("Failed to load KSM: %s" % details)

    # host_reserve: mem reserve kept for the host system to run
    host_reserve = int(params.get("ksm_host_reserve", -1))
    if (host_reserve == -1):
        # default host_reserve = MemAvailable + one_minimal_guest(128MB)
        # later we add 64MB per additional guest
        host_reserve = ((utils_memory.memtotal() -
                         utils_memory.read_from_meminfo("MemFree")) / 1024 +
                        128)
        # using default reserve
        _host_reserve = True
    else:
        _host_reserve = False

    # guest_reserve: mem reserve kept to avoid guest OS to kill processes
    guest_reserve = int(params.get("ksm_guest_reserve", -1))
    if (guest_reserve == -1):
        # default guest_reserve = minimal_system_mem(256MB)
        # later we add tmpfs overhead
        guest_reserve = 256
        # using default reserve
        _guest_reserve = True
Example #18
0
        if not (mb_enable and not tlbfs_enable):
            logging.debug("starting analyzing the hugepage usage...")
            pid = vms[-1].get_pid()
            started_free = utils_memory.get_num_huge_pages_free()
            # Get the thp usage from /proc/pid/smaps
            started_anon = utils_memory.get_num_anon_huge_pages(pid)
            static_used = non_started_free - started_free
            hugepage_used = static_used * page_size

            if test_type == "contrast":
                # get qemu-kvm memory consumption by top
                cmd = "top -b -n 1|awk '$1 == %s {print $10}'" % pid
                rate = utils.run(cmd, ignore_status=False,
                                 verbose=True).stdout.strip()
                qemu_kvm_used = (utils_memory.memtotal() * float(rate)) / 100
                logging.debug("rate: %s, used-by-qemu-kvm: %f, used-by-vm: %d",
                              rate, qemu_kvm_used, hugepage_used)
                if abs(qemu_kvm_used - hugepage_used) > hugepage_used * (err_range - 1):
                    raise error.TestFail("Error for hugepage usage")
            if test_type == "stress":
                if non_started_free <= started_free:
                    logging.debug("hugepage usage:%d -> %d", non_started_free,
                                  started_free)
                    raise error.TestFail("Error for hugepage usage with stress")
            if mb_enable is not True:
                if static_used > 0:
                    raise error.TestFail("VM use static hugepage without"
                                         " memoryBacking element")
                if thp_enable is not True and started_anon > 0:
                    raise error.TestFail("VM use transparent hugepage, while"
Example #19
0
def run(test, params, env):
    """
    Test steps:

    1) Get the params from params.
    2) check the environment
    3) Strat the VM and check whether the VM been started successfully
    4) Compare the Hugepage memory size to the Guest memory setted.
    5) Check the hugepage memory usage.
    6) Clean up
    """
    test_type = params.get("test_type", 'normal')
    tlbfs_enable = 'yes' == params.get("hugetlbfs_enable", 'no')
    shp_num = int(params.get("static_hugepage_num", 1024))
    thp_enable = 'yes' == params.get("trans_hugepage_enable", 'no')
    mb_enable = 'yes' == params.get("mb_enable", 'yes')
    delay = int(params.get("delay_time", 10))

    # Skip cases early
    vm_names = []
    if test_type == "contrast":
        vm_names = params.get("vms").split()[:2]
        if len(vm_names) < 2:
            test.cancel("This test requires two VMs")
        # confirm no VM running
        allvms = virsh.dom_list('--name').stdout.strip()
        if allvms != '':
            test.cancel("one or more VMs are alive")
        err_range = float(params.get("mem_error_range", 1.25))
    else:
        vm_names.append(params.get("main_vm"))
        if test_type == "stress":
            target_path = params.get("target_path", "/tmp/test.out")
        elif test_type == "unixbench":
            unixbench_control_file = params.get("unixbench_controle_file",
                                                "unixbench5.control")

    # backup orignal setting
    shp_orig_num = utils_memory.get_num_huge_pages()
    thp_orig_status = utils_memory.get_transparent_hugepage()
    page_size = utils_memory.get_huge_page_size()

    # mount/umount hugetlbfs
    tlbfs_status = utils_misc.is_mounted("hugetlbfs", "/dev/hugepages",
                                         "hugetlbfs")
    if tlbfs_enable is True:
        if tlbfs_status is not True:
            utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs")
    else:
        if tlbfs_status is True:
            utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs")

    # set static hugepage
    utils_memory.set_num_huge_pages(shp_num)

    # enable/disable transparent hugepage
    if thp_enable:
        utils_memory.set_transparent_hugepage('always')
    else:
        utils_memory.set_transparent_hugepage('never')

    # set/del memoryBacking tag
    for vm_name in vm_names:
        if mb_enable:
            vm_xml.VMXML.set_memoryBacking_tag(vm_name)
        else:
            vm_xml.VMXML.del_memoryBacking_tag(vm_name)

    utils_libvirtd.libvirtd_restart()
    non_started_free = utils_memory.get_num_huge_pages_free()

    vms = []
    sessions = []
    try:
        for vm_name in vm_names:
            # try to start vm and login
            try:
                vm = env.get_vm(vm_name)
                vm.start()
            except VMError as e:
                if mb_enable and not tlbfs_enable:
                    # if hugetlbfs not be mounted,
                    # VM start with memoryBacking tag will fail
                    logging.debug(e)
                else:
                    error_msg = "Test failed in positive case. error: %s\n" % e
                    test.fail(error_msg)
            if vm.is_alive() is not True:
                break
            vms.append(vm)

            # try to login and run some program
            try:
                session = vm.wait_for_login()
            except (LoginError, ShellError) as e:
                error_msg = "Test failed in positive case.\n error: %s\n" % e
                test.fail(error_msg)
            sessions.append(session)

            if test_type == "stress":
                # prepare file for increasing stress
                stress_path = prepare_c_file()
                remote.scp_to_remote(vm.get_address(), 22,
                                     'root', params.get('password'),
                                     stress_path, "/tmp/")
                # Try to install gcc on guest first
                utils_package.package_install(["gcc"], session, 360)
                # increasing workload
                session.cmd("gcc %s -o %s" % (stress_path, target_path))
                session.cmd("%s &" % target_path)

            if test_type == "unixbench":
                params["main_vm"] = vm_name
                params["test_control_file"] = unixbench_control_file

                control_path = os.path.join(test.virtdir, "control",
                                            unixbench_control_file)
                # unixbench test need 'patch' and 'perl' commands installed
                utils_package.package_install(["patch", "perl"], session, 360)
                command = utils_test.run_autotest(vm, session, control_path,
                                                  None, None, params,
                                                  copy_only=True)
                session.cmd("%s &" % command, ignore_all_errors=True)
                # wait for autotest running on vm
                time.sleep(delay)

                def _is_unixbench_running():
                    cmd = "ps -ef | grep perl | grep Run"
                    return not session.cmd_status(cmd)
                if not utils_misc.wait_for(_is_unixbench_running, timeout=240):
                    test.cancel("Failed to run unixbench in guest,"
                                " please make sure some necessary"
                                " packages are installed in guest,"
                                " such as gcc, tar, bzip2")
                logging.debug("Unixbench test is running in VM")

        if test_type == "contrast":
            # wait for vm finish starting completely
            time.sleep(delay)

        if not (mb_enable and not tlbfs_enable):
            logging.debug("starting analyzing the hugepage usage...")
            pid = vms[-1].get_pid()
            started_free = utils_memory.get_num_huge_pages_free()
            # Get the thp usage from /proc/pid/smaps
            started_anon = utils_memory.get_num_anon_huge_pages(pid)
            static_used = non_started_free - started_free
            hugepage_used = static_used * page_size

            if test_type == "contrast":
                # get qemu-kvm memory consumption by top
                cmd = "top -b -n 1|awk '$1 == %s {print $10}'" % pid
                rate = process.run(cmd, ignore_status=False,
                                   verbose=True, shell=True).stdout_text.strip()
                qemu_kvm_used = (utils_memory.memtotal() * float(rate)) / 100
                logging.debug("rate: %s, used-by-qemu-kvm: %f, used-by-vm: %d",
                              rate, qemu_kvm_used, hugepage_used)
                if abs(qemu_kvm_used - hugepage_used) > hugepage_used * (err_range - 1):
                    test.fail("Error for hugepage usage")
            if test_type == "stress":
                if non_started_free <= started_free:
                    logging.debug("hugepage usage:%d -> %d", non_started_free,
                                  started_free)
                    test.fail("Error for hugepage usage with stress")
            if mb_enable is not True:
                if static_used > 0:
                    test.fail("VM use static hugepage without"
                              " memoryBacking element")
                if thp_enable is not True and started_anon > 0:
                    test.fail("VM use transparent hugepage, while"
                              " it's disabled")
            else:
                if tlbfs_enable is not True:
                    if static_used > 0:
                        test.fail("VM use static hugepage without tlbfs"
                                  " mounted")
                    if thp_enable and started_anon <= 0:
                        test.fail("VM doesn't use transparent"
                                  " hugepage")
                else:
                    if shp_num > 0:
                        if static_used <= 0:
                            test.fail("VM doesn't use static"
                                      " hugepage")
                    else:
                        if static_used > 0:
                            test.fail("VM use static hugepage,"
                                      " while it's set to zero")
                    if thp_enable is not True:
                        if started_anon > 0:
                            test.fail("VM use transparent hugepage,"
                                      " while it's disabled")
                    else:
                        if shp_num == 0 and started_anon <= 0:
                            test.fail("VM doesn't use transparent"
                                      " hugepage, while static"
                                      " hugepage is disabled")
    finally:
        # end up session
        for session in sessions:
            session.close()

        for vm in vms:
            if vm.is_alive():
                vm.destroy()

        for vm_name in vm_names:
            if mb_enable:
                vm_xml.VMXML.del_memoryBacking_tag(vm_name)
            else:
                vm_xml.VMXML.set_memoryBacking_tag(vm_name)

        utils_libvirtd.libvirtd_restart()

        if tlbfs_enable is True:
            if tlbfs_status is not True:
                utils_misc.umount("hugetlbfs", "/dev/hugepages", "hugetlbfs")
        else:
            if tlbfs_status is True:
                utils_misc.mount("hugetlbfs", "/dev/hugepages", "hugetlbfs")
        utils_memory.set_num_huge_pages(shp_orig_num)
        utils_memory.set_transparent_hugepage(thp_orig_status)
Example #20
0
def run(test, params, env):
    """
    Network stress with multi nics test with netperf.

    1) Start multi vm(s) guest.
    2) Select multi vm(s) or host to setup netperf server/client.
    3) Execute netperf  stress on multi nics.
    4) Ping test after netperf testing, check whether nics still work.

    :param test: QEMU test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """
    login_timeout = float(params.get("login_timeout", 360))
    netperf_server = params.get("netperf_server").split()
    netperf_client = params.get("netperf_client")
    guest_username = params.get("username", "")
    guest_password = params.get("password", "")
    shell_client = params.get("shell_client")
    shell_port = params.get("shell_port")
    os_type = params.get("os_type")
    shell_prompt = params.get("shell_prompt", r"^root@.*[\#\$]\s*$|#")
    disable_firewall = params.get("disable_firewall", "")
    linesep = params.get("shell_linesep",
                         "\n").encode().decode('unicode_escape')
    status_test_command = params.get("status_test_command", "echo $?")
    ping_count = int(params.get("ping_count", 10))
    compile_option_client = params.get("compile_option_client", "")
    compile_option_server = params.get("compile_option_server", "")

    vms = params.get("vms")
    server_infos = []
    client_infos = []
    server_ips = []
    client_ips = []

    os_type = params.get("os_type")
    if os_type == "windows":
        host_mem = utils_memory.memtotal() // (1024 * 1024)
        vm_mem = host_mem / (len(vms.split()) + 1) * 1024
        if vm_mem < params.get_numeric("min_mem"):
            test.cancel("Host total memory is insufficient for this test case,"
                        "each VM's memory can not meet guest OS's requirement")
        params["mem"] = vm_mem
    params["start_vm"] = "yes"

    env_process.preprocess(test, params, env)
    for server in netperf_server:
        s_info = {}
        if server in vms:
            server_vm = env.get_vm(server)
            server_vm.verify_alive()
            server_ctl = server_vm.wait_for_serial_login(timeout=login_timeout)
            error_context.context("Stop fireware on netperf server guest.",
                                  logging.info)
            server_ctl.cmd(disable_firewall, ignore_all_errors=True)
            server_ip = server_vm.get_address()
            server_ips.append(server_ip)
            s_info["ip"] = server_ip
            s_info["os_type"] = params.get("os_type_%s" % server, os_type)
            s_info["username"] = params.get("username_%s" % server,
                                            guest_username)
            s_info["password"] = params.get("password_%s" % server,
                                            guest_password)
            s_info["shell_client"] = params.get("shell_client_%s" % server,
                                                shell_client)
            s_info["shell_port"] = params.get("shell_port_%s" % server,
                                              shell_port)
            s_info["shell_prompt"] = params.get("shell_prompt_%s" % server,
                                                shell_prompt)
            s_info["linesep"] = params.get("linesep_%s" % server, linesep)
            s_info["status_test_command"] = params.get(
                "status_test_command_%s" % server, status_test_command)
        else:
            err = "Only support setup netperf server in guest."
            test.error(err)
        server_infos.append(s_info)

    client = netperf_client.strip()
    c_info = {}
    if client in vms:
        client_vm = env.get_vm(client)
        client_vm.verify_alive()
        client_ctl = client_vm.wait_for_serial_login(timeout=login_timeout)
        if params.get("dhcp_cmd"):
            status, output = client_ctl.cmd_status_output(params["dhcp_cmd"],
                                                          timeout=600)
            if status:
                logging.warn("Failed to execute dhcp-command, output:\n %s",
                             output)
        error_context.context("Stop fireware on netperf client guest.",
                              logging.info)
        client_ctl.cmd(disable_firewall, ignore_all_errors=True)

        client_ip = client_vm.get_address()
        client_ips.append(client_ip)
        params_client_nic = params.object_params(client)
        nics_count = len(params_client_nic.get("nics", "").split())
        if nics_count > 1:
            for i in range(nics_count)[1:]:
                client_vm.wait_for_login(nic_index=i, timeout=login_timeout)
                client_ips.append(client_vm.get_address(index=i))

        c_info["ip"] = client_ip
        c_info["os_type"] = params.get("os_type_%s" % client, os_type)
        c_info["username"] = params.get("username_%s" % client, guest_username)
        c_info["password"] = params.get("password_%s" % client, guest_password)
        c_info["shell_client"] = params.get("shell_client_%s" % client,
                                            shell_client)
        c_info["shell_port"] = params.get("shell_port_%s" % client, shell_port)
        c_info["shell_prompt"] = params.get("shell_prompt_%s" % client,
                                            shell_prompt)
        c_info["linesep"] = params.get("linesep_%s" % client, linesep)
        c_info["status_test_command"] = params.get(
            "status_test_command_%s" % client, status_test_command)
    else:
        err = "Only support setup netperf client in guest."
        test.error(err)
    client_infos.append(c_info)

    if params.get("os_type") == "linux":
        error_context.context("Config static route in netperf server guest.",
                              logging.info)
        nics_list = utils_net.get_linux_ifname(client_ctl)
        for ip in server_ips:
            index = server_ips.index(ip) % len(nics_list)
            client_ctl.cmd("route add  -host %s %s" % (ip, nics_list[index]))

    netperf_link = params.get("netperf_link")
    netperf_link = os.path.join(data_dir.get_deps_dir("netperf"), netperf_link)
    md5sum = params.get("pkg_md5sum")
    netperf_server_link = params.get("netperf_server_link_win", netperf_link)
    netperf_server_link = os.path.join(data_dir.get_deps_dir("netperf"),
                                       netperf_server_link)
    server_md5sum = params.get("server_md5sum")
    netperf_client_link = params.get("netperf_client_link_win", netperf_link)
    netperf_client_link = os.path.join(data_dir.get_deps_dir("netperf"),
                                       netperf_client_link)
    client_md5sum = params.get("client_md5sum")

    server_path_linux = params.get("server_path", "/var/tmp")
    client_path_linux = params.get("client_path", "/var/tmp")
    server_path_win = params.get("server_path_win", "c:\\")
    client_path_win = params.get("client_path_win", "c:\\")

    netperf_clients = []
    netperf_servers = []
    error_context.context("Setup netperf guest.", logging.info)
    for c_info in client_infos:
        if c_info["os_type"] == "windows":
            netperf_link_c = netperf_client_link
            client_path = client_path_win
            md5sum = client_md5sum
        else:
            netperf_link_c = netperf_link
            client_path = client_path_linux
        n_client = utils_netperf.NetperfClient(
            c_info["ip"],
            client_path,
            md5sum,
            netperf_link_c,
            client=c_info["shell_client"],
            port=c_info["shell_port"],
            username=c_info["username"],
            password=c_info["password"],
            prompt=c_info["shell_prompt"],
            linesep=c_info["linesep"],
            status_test_command=c_info["status_test_command"],
            compile_option=compile_option_client)
        netperf_clients.append(n_client)
    error_context.context("Setup netperf server.", logging.info)
    for s_info in server_infos:
        if s_info["os_type"] == "windows":
            netperf_link_s = netperf_server_link
            server_path = server_path_win
            md5sum = server_md5sum
        else:
            netperf_link_s = netperf_link
            server_path = server_path_linux
        n_server = utils_netperf.NetperfServer(
            s_info["ip"],
            server_path,
            md5sum,
            netperf_link_s,
            client=s_info["shell_client"],
            port=s_info["shell_port"],
            username=s_info["username"],
            password=s_info["password"],
            prompt=s_info["shell_prompt"],
            linesep=s_info["linesep"],
            status_test_command=s_info["status_test_command"],
            compile_option=compile_option_server)
        netperf_servers.append(n_server)

    try:
        error_context.context("Start netperf server.", logging.info)
        for n_server in netperf_servers:
            n_server.start()
        test_duration = int(params.get("netperf_test_duration", 60))
        test_protocols = params.get("test_protocols", "TCP_STREAM")
        netperf_sessions = params.get("netperf_sessions", "1")
        p_sizes = params.get("package_sizes")
        netperf_cmd_prefix = params.get("netperf_cmd_prefix", "")
        error_context.context("Start netperf clients.", logging.info)
        for protocol in test_protocols.split():
            error_context.context("Testing %s protocol" % protocol,
                                  logging.info)
            sessions_test = netperf_sessions.split()
            sizes_test = p_sizes.split()
            for size in sizes_test:
                for sess in sessions_test:
                    test_option = params.get("test_option", "")
                    test_option += " -t %s -l %s " % (protocol, test_duration)
                    test_option += " -- -m %s" % size
                    launch_netperf_client(test, server_ips, netperf_clients,
                                          test_option, test_duration, sess,
                                          netperf_cmd_prefix, params)
        error_context.context("Ping test after netperf testing.", logging.info)
        for s_ip in server_ips:
            status, output = utils_test.ping(s_ip,
                                             ping_count,
                                             timeout=float(ping_count) * 1.5)
            if status != 0:
                test.fail("Ping returns non-zero value %s" % output)

            package_lost = utils_test.get_loss_ratio(output)
            if package_lost != 0:
                test.fail("%s packeage lost when ping server ip %s " %
                          (package_lost, server))
    finally:
        for n_server in netperf_servers:
            n_server.stop()
            n_server.cleanup(True)
        for n_client in netperf_clients:
            n_client.stop()
            n_client.cleanup(True)
        if server_ctl:
            server_ctl.close()
        if client_ctl:
            client_ctl.close()
Example #21
0
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("vms").split()
    if len(vm_names) < 2:
        test.cancel("Provide enough vms for migration")

    src_uri = "qemu:///system"
    dest_uri = libvirt_vm.complete_uri(
        params.get("migrate_dest_host", "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        test.cancel("The dest_uri '%s' is invalid" % dest_uri)

    # Migrated vms' instance
    vms = env.get_all_vms()
    params["load_vms"] = list(vms)

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_tool = params.get("stress_tool", "")
    remote_stress = params.get("migration_stress_remote", "no") == "yes"
    host_stress = params.get("migration_stress_host", "no") == "yes"
    vms_stress = params.get("migration_stress_vms", "no") == "yes"
    vm_bytes = params.get("stress_vm_bytes", "128M")
    stress_args = params.get("%s_args" % stress_tool)
    migration_type = params.get("migration_type")
    start_migration_vms = params.get("start_migration_vms", "yes") == "yes"
    thread_timeout = int(params.get("thread_timeout", 120))
    ubuntu_dep = ['build-essential', 'git']
    hstress = rstress = None
    vstress = {}

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if "vm-bytes" in stress_args:
        params["%s_args" % stress_tool] = stress_args % vm_bytes

    # Ensure stress tool is available in host
    if host_stress:
        # remove package manager installed tool to avoid conflict
        if not utils_package.package_remove(stress_tool):
            logging.error("Existing %s is not removed")
        if "stress-ng" in stress_tool and 'Ubuntu' in utils_misc.get_distro():
            params['stress-ng_dependency_packages_list'] = ubuntu_dep
        try:
            hstress = utils_test.HostStress(stress_tool, params)
            hstress.load_stress_tool()
        except utils_test.StressError as info:
            test.error(info)

    if remote_stress:
        try:
            server_ip = params['remote_ip']
            server_pwd = params['remote_pwd']
            server_user = params.get('remote_user', 'root')
            remote_session = remote.wait_for_login('ssh', server_ip, '22',
                                                   server_user, server_pwd,
                                                   r"[\#\$]\s*$")
            # remove package manager installed tool to avoid conflict
            if not utils_package.package_remove(stress_tool,
                                                session=remote_session):
                logging.error("Existing %s is not removed")
            if ("stess-ng" in stress_tool and 'Ubuntu'
                    in utils_misc.get_distro(session=remote_session)):
                params['stress-ng_dependency_packages_list'] = ubuntu_dep

            rstress = utils_test.HostStress(stress_tool,
                                            params,
                                            remote_server=True)
            rstress.load_stress_tool()
            remote_session.close()
        except utils_test.StressError as info:
            remote_session.close()
            test.error(info)

    for vm in vms:
        # Keep vm dead for edit
        if vm.is_alive():
            vm.destroy()
        set_cpu_memory(vm.name, cpu, memory)

    try:
        if start_migration_vms:
            for vm in vms:
                vm.start()
                session = vm.wait_for_login()
                # remove package manager installed tool to avoid conflict
                if not utils_package.package_remove(stress_tool,
                                                    session=session):
                    logging.error("Existing %s is not removed")
                # configure stress in VM
                if vms_stress:
                    if ("stress-ng" in stress_tool and 'Ubuntu'
                            in utils_misc.get_distro(session=session)):
                        params[
                            'stress-ng_dependency_packages_list'] = ubuntu_dep
                    try:
                        vstress[vm.name] = utils_test.VMStress(
                            vm, stress_tool, params)
                        vstress[vm.name].load_stress_tool()
                    except utils_test.StressError as info:
                        session.close()
                        test.error(info)
                session.close()

        do_stress_migration(vms, src_uri, dest_uri, migration_type, test,
                            params, thread_timeout)
    finally:
        logging.debug("Cleanup vms...")
        for vm in vms:
            utils_test.libvirt.MigrationTest().cleanup_dest_vm(
                vm, None, dest_uri)
            # Try to start vms in source once vms in destination are
            # cleaned up
            if not vm.is_alive():
                vm.start()
                vm.wait_for_login()
            try:
                if vstress[vm.name]:
                    vstress[vm.name].unload_stress()
            except KeyError:
                continue

        if rstress:
            rstress.unload_stress()

        if hstress:
            hstress.unload_stress()
Example #22
0
def run(test, params, env):
    """
    KVM multi test:
    1) Log into guests
    2) Check all the nics available or not
    3) Ping among guest nic and host
       3.1) Ping with different packet size
       3.2) Flood ping test
       3.3) Final ping test
    4) Transfer files among guest nics and host
       4.1) Create file by dd command in guest
       4.2) Transfer file between nics
       4.3) Compare original file and transferred file
    5) ping among different nics
       5.1) Ping with different packet size
       5.2) Flood ping test
       5.3) Final ping test
    6) Transfer files among different nics
       6.1) Create file by dd command in guest
       6.2) Transfer file between nics
       6.3) Compare original file and transferred file
    7) Repeat step 3 - 6 on every nic.

    :param test: QEMU test object
    :param params: Dictionary with the test parameters
    :param env: Dictionary with test environment.
    """
    def ping(session, nic, dst_ip, strick_check, flood_minutes):
        d_packet_size = [
            1, 4, 48, 512, 1440, 1500, 1505, 4054, 4055, 4096, 4192, 8878,
            9000, 32767, 65507
        ]
        packet_size = params.get("packet_size", "").split() or d_packet_size
        for size in packet_size:
            error_context.context("Ping with packet size %s" % size,
                                  logging.info)
            status, output = utils_test.ping(dst_ip,
                                             10,
                                             interface=nic,
                                             packetsize=size,
                                             timeout=30,
                                             session=session)
            if strict_check:
                ratio = utils_test.get_loss_ratio(output)
                if ratio != 0:
                    test.fail("Loss ratio is %s for packet size"
                              " %s" % (ratio, size))
            else:
                if status != 0:
                    test.fail("Ping returns non-zero value %s" % output)

        error_context.context("Flood ping test", logging.info)
        utils_test.ping(dst_ip,
                        None,
                        interface=nic,
                        flood=True,
                        output_func=None,
                        timeout=flood_minutes * 60,
                        session=session)
        error_context.context("Final ping test", logging.info)
        counts = params.get("ping_counts", 100)
        status, output = utils_test.ping(dst_ip,
                                         counts,
                                         interface=nic,
                                         timeout=float(counts) * 1.5,
                                         session=session)
        if strick_check == "yes":
            ratio = utils_test.get_loss_ratio(output)
            if ratio != 0:
                test.fail("Packet loss ratio is %s after flood" % ratio)
        else:
            if status != 0:
                test.fail("Ping returns non-zero value %s" % output)

    def file_transfer(session, src, dst):
        username = params.get("username", "")
        password = params.get("password", "")
        src_path = "/tmp/1"
        dst_path = "/tmp/2"
        port = int(params["file_transfer_port"])

        cmd = "dd if=/dev/urandom of=%s bs=100M count=1" % src_path
        cmd = params.get("file_create_cmd", cmd)

        error_context.context("Create file by dd command, cmd: %s" % cmd,
                              logging.info)
        session.cmd(cmd)

        transfer_timeout = int(params.get("transfer_timeout"))
        log_filename = "scp-from-%s-to-%s.log" % (src, dst)
        error_context.context("Transfer file from %s to %s" % (src, dst),
                              logging.info)
        remote.scp_between_remotes(src,
                                   dst,
                                   port,
                                   password,
                                   password,
                                   username,
                                   username,
                                   src_path,
                                   dst_path,
                                   log_filename=log_filename,
                                   timeout=transfer_timeout)
        src_path = dst_path
        dst_path = "/tmp/3"
        log_filename = "scp-from-%s-to-%s.log" % (dst, src)
        error_context.context("Transfer file from %s to %s" % (dst, src),
                              logging.info)
        remote.scp_between_remotes(dst,
                                   src,
                                   port,
                                   password,
                                   password,
                                   username,
                                   username,
                                   src_path,
                                   dst_path,
                                   log_filename=log_filename,
                                   timeout=transfer_timeout)
        error_context.context("Compare original file and transferred file",
                              logging.info)

        cmd1 = "md5sum /tmp/1"
        cmd2 = "md5sum /tmp/3"
        md5sum1 = session.cmd(cmd1).split()[0]
        md5sum2 = session.cmd(cmd2).split()[0]
        if md5sum1 != md5sum2:
            test.error("File changed after transfer")

    nic_interface_list = []
    check_irqbalance_cmd = params.get("check_irqbalance_cmd")
    stop_irqbalance_cmd = params.get("stop_irqbalance_cmd")
    start_irqbalance_cmd = params.get("start_irqbalance_cmd")
    status_irqbalance = params.get("status_irqbalance")
    vms = params["vms"].split()
    host_mem = utils_memory.memtotal() / (1024 * 1024)
    host_cpu_count = len(utils_misc.get_cpu_processors())
    vhost_count = 0
    if params.get("vhost"):
        vhost_count = 1
    if host_cpu_count < (1 + vhost_count) * len(vms):
        test.error("The host don't have enough cpus to start guest"
                   "pcus: %d, minimum of vcpus and vhost: %d" %
                   (host_cpu_count, (1 + vhost_count) * len(vms)))
    params['mem'] = host_mem / len(vms) * 1024
    params['smp'] = host_cpu_count / len(vms) - vhost_count
    if params['smp'] % 2 != 0:
        params['vcpu_sockets'] = 1
    params["start_vm"] = "yes"
    for vm_name in vms:
        env_process.preprocess_vm(test, params, env, vm_name)
    timeout = float(params.get("login_timeout", 360))
    strict_check = params.get("strick_check", "no")
    host_ip = utils_net.get_ip_address_by_interface(params.get("netdst"))
    host_ip = params.get("srchost", host_ip)
    flood_minutes = float(params["flood_minutes"])
    error_context.context("Check irqbalance service status", logging.info)
    o = process.system_output(check_irqbalance_cmd,
                              ignore_status=True,
                              shell=True)
    check_stop_irqbalance = False
    if re.findall(status_irqbalance, o):
        logging.debug("stop irqbalance")
        process.run(stop_irqbalance_cmd, shell=True)
        check_stop_irqbalance = True
        o = process.system_output(check_irqbalance_cmd,
                                  ignore_status=True,
                                  shell=True)
        if re.findall(status_irqbalance, o):
            test.error("Can not stop irqbalance")
    thread_list = []
    nic_interface = []
    for vm_name in vms:
        guest_ifname = ""
        guest_ip = ""
        vm = env.get_vm(vm_name)
        session = vm.wait_for_login(timeout=timeout)
        thread_list.extend(vm.vcpu_threads)
        thread_list.extend(vm.vhost_threads)
        error_context.context("Check all the nics available or not",
                              logging.info)
        for index, nic in enumerate(vm.virtnet):
            guest_ifname = utils_net.get_linux_ifname(session, nic.mac)
            guest_ip = vm.get_address(index)
            if not (guest_ifname and guest_ip):
                err_log = "vms %s get ip or ifname failed." % vm_name
                err_log = "ifname: %s, ip: %s." % (guest_ifname, guest_ip)
                test.fail(err_log)
            nic_interface = [guest_ifname, guest_ip, session]
            nic_interface_list.append(nic_interface)
    error_context.context("Pin vcpus and vhosts to host cpus", logging.info)
    host_numa_nodes = utils_misc.NumaInfo()
    vthread_num = 0
    for numa_node_id in host_numa_nodes.nodes:
        numa_node = host_numa_nodes.nodes[numa_node_id]
        for _ in range(len(numa_node.cpus)):
            if vthread_num >= len(thread_list):
                break
            vcpu_tid = thread_list[vthread_num]
            logging.debug("pin vcpu/vhost thread(%s) to cpu(%s)" %
                          (vcpu_tid, numa_node.pin_cpu(vcpu_tid)))
            vthread_num += 1

    nic_interface_list_len = len(nic_interface_list)
    # ping and file transfer test
    for src_ip_index in range(nic_interface_list_len):
        error_context.context("Ping test from guest to host", logging.info)
        src_ip_info = nic_interface_list[src_ip_index]
        ping(src_ip_info[2], src_ip_info[0], host_ip, strict_check,
             flood_minutes)
        error_context.context("File transfer test between guest and host",
                              logging.info)
        file_transfer(src_ip_info[2], src_ip_info[1], host_ip)
        for dst_ip in nic_interface_list[src_ip_index:]:
            if src_ip_info[1] == dst_ip[1]:
                continue
            txt = "Ping test between %s and %s" % (src_ip_info[1], dst_ip[1])
            error_context.context(txt, logging.info)
            ping(src_ip_info[2], src_ip_info[0], dst_ip[1], strict_check,
                 flood_minutes)
            txt = "File transfer test between %s " % src_ip_info[1]
            txt += "and %s" % dst_ip[1]
            error_context.context(txt, logging.info)
            file_transfer(src_ip_info[2], src_ip_info[1], dst_ip[1])
    if check_stop_irqbalance:
        process.run(start_irqbalance_cmd, shell=True)
Example #23
0
    def output_check(nodeinfo_output):
        # Check CPU model
        cpu_model_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU model", 3)
        cpu_arch = platform.machine()
        if not re.match(cpu_model_nodeinfo, cpu_arch):
            test.fail(
                "Virsh nodeinfo output didn't match CPU model")

        # Check number of CPUs, nodeinfo CPUs represent online threads in the
        # system, check all online cpus in sysfs
        cpus_nodeinfo = _check_nodeinfo(nodeinfo_output, "CPU(s)", 2)
        cmd = "cat /sys/devices/system/cpu/cpu*/online | grep 1 | wc -l"
        cpus_online = process.run(cmd, ignore_status=True,
                                  shell=True).stdout.strip()
        cmd = "cat /sys/devices/system/cpu/cpu*/online | wc -l"
        cpus_total = process.run(cmd, ignore_status=True,
                                 shell=True).stdout.strip()
        if not os.path.exists('/sys/devices/system/cpu/cpu0/online'):
            cpus_online = str(int(cpus_online) + 1)
            cpus_total = str(int(cpus_total) + 1)

        logging.debug("host online cpus are %s", cpus_online)
        logging.debug("host total cpus are %s", cpus_total)

        if cpus_nodeinfo != cpus_online:
            if 'ppc' in cpu_arch:
                if cpus_nodeinfo != cpus_total:
                    test.fail("Virsh nodeinfo output of CPU(s) on"
                              " ppc did not match all threads in "
                              "the system")
            else:
                test.fail("Virsh nodeinfo output didn't match "
                          "number of CPU(s)")

        # Check CPU frequency, frequency is under clock for ppc
        cpu_frequency_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'CPU frequency', 3)
        cmd = ("cat /proc/cpuinfo | grep -E 'cpu MHz|clock|BogoMIPS' | "
               "head -n1 | awk -F: '{print $2}' | awk -F. '{print $1}'")
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        cpu_frequency_os = cmd_result.stdout_text.strip()
        logging.debug("cpu_frequency_nodeinfo=%s cpu_frequency_os=%s",
                      cpu_frequency_nodeinfo, cpu_frequency_os)
        #
        # Matching CPU Frequency is not an exact science in todays modern
        # processors and OS's. CPU's can have their execution speed varied
        # based on current workload in order to save energy and keep cool.
        # Thus since we're getting the values at disparate points in time,
        # we cannot necessarily do a pure comparison.
        # So, let's get the absolute value of the difference and ensure
        # that it's within 20 percent of each value to give us enough of
        # a "fudge" factor to declare "close enough". Don't return a failure
        # just print a debug message and move on.
        diffval = abs(int(cpu_frequency_nodeinfo) - int(cpu_frequency_os))
        if (float(diffval) / float(cpu_frequency_nodeinfo) > 0.20 or
                float(diffval) / float(cpu_frequency_os) > 0.20):
            logging.debug("Virsh nodeinfo output didn't match CPU "
                          "frequency within 20 percent")

        # Get CPU topology from virsh capabilities xml
        cpu_topology = capability_xml.CapabilityXML()['cpu_topology']
        logging.debug("Cpu topology in virsh capabilities output: %s",
                      cpu_topology)

        # Check CPU socket(s)
        cpu_sockets_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'CPU socket(s)', 3))
        # CPU socket(s) in virsh nodeinfo is Total sockets in each node, not
        # total sockets in the system, so get total sockets in one node and
        # check with it
        node_info = utils_misc.NumaInfo()
        node_online_list = node_info.get_online_nodes()
        cmd = "cat /sys/devices/system/node/node%s" % node_online_list[0]
        cmd += "/cpu*/topology/physical_package_id | uniq |wc -l"
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        total_sockets_in_node = int(cmd_result.stdout_text.strip())
        if total_sockets_in_node != cpu_sockets_nodeinfo:
            test.fail("Virsh nodeinfo output didn't match CPU "
                      "socket(s) of host OS")
        if cpu_sockets_nodeinfo != int(cpu_topology['sockets']):
            test.fail("Virsh nodeinfo output didn't match CPU "
                      "socket(s) of virsh capabilities output")

        # Check Core(s) per socket
        cores_per_socket_nodeinfo = _check_nodeinfo(
            nodeinfo_output, 'Core(s) per socket', 4)
        cmd = "lscpu | grep 'Core(s) per socket' | head -n1 | awk '{print $4}'"
        cmd_result = process.run(cmd, ignore_status=True, shell=True)
        cores_per_socket_os = cmd_result.stdout_text.strip()
        spec_numa = False
        if not re.match(cores_per_socket_nodeinfo, cores_per_socket_os):
            # for spec NUMA arch, the output of nodeinfo is in a spec format
            cpus_os = utils_misc.get_cpu_info().get("CPU(s)")
            numa_cells_nodeinfo = _check_nodeinfo(
                nodeinfo_output, 'NUMA cell(s)', 3)
            if (re.match(cores_per_socket_nodeinfo, cpus_os) and
                    re.match(numa_cells_nodeinfo, "1")):
                spec_numa = True
            else:
                test.fail("Virsh nodeinfo output didn't match "
                          "CPU(s) or Core(s) per socket of host OS")
        if cores_per_socket_nodeinfo != cpu_topology['cores']:
            test.fail("Virsh nodeinfo output didn't match Core(s) "
                      "per socket of virsh capabilities output")
        # Check Thread(s) per core
        threads_per_core_nodeinfo = _check_nodeinfo(nodeinfo_output,
                                                    'Thread(s) per core', 4)
        if not spec_numa:
            if threads_per_core_nodeinfo != cpu_topology['threads']:
                test.fail("Virsh nodeinfo output didn't match"
                          "Thread(s) per core of virsh"
                          "capabilities output")
        else:
            if threads_per_core_nodeinfo != "1":
                test.fail("Virsh nodeinfo output didn't match"
                          "Thread(s) per core of virsh"
                          "capabilities output")
        # Check Memory size
        memory_size_nodeinfo = int(
            _check_nodeinfo(nodeinfo_output, 'Memory size', 3))
        memory_size_os = 0
        if libvirt_version.version_compare(2, 0, 0):
            for i in node_online_list:
                node_memory = node_info.read_from_node_meminfo(i, 'MemTotal')
                memory_size_os += int(node_memory)
        else:
            memory_size_os = utils_memory.memtotal()
        logging.debug('The host total memory from nodes is %s', memory_size_os)

        if memory_size_nodeinfo != memory_size_os:
            test.fail("Virsh nodeinfo output didn't match "
                      "Memory size")
Example #24
0
def run(test, params, env):
    """
    Tests KSM (Kernel Shared Memory) capability by allocating and filling
    KVM guests memory using various values. KVM sets the memory as
    MADV_MERGEABLE so all VM's memory can be merged. The workers in
    guest writes to tmpfs filesystem thus allocations are not limited
    by process max memory, only by VM's memory. Two test modes are supported -
    serial and parallel.

    Serial mode - uses multiple VMs, allocates memory per guest and always
                  verifies the correct number of shared memory.
                  0) Prints out the setup and initialize guest(s)
                  1) Fills guest with the same number (S1)
                  2) Random fill on the first guest
                  3) Random fill of the remaining VMs one by one until the
                     memory is completely filled (KVM stops machines which
                     asks for additional memory until there is available
                     memory) (S2, shouldn't finish)
                  4) Destroy all VMs but the last one
                  5) Checks the last VMs memory for corruption
    Parallel mode - uses one VM with multiple allocator workers. Executes
                   scenarios in parallel to put more stress on the KVM.
                   0) Prints out the setup and initialize guest(s)
                   1) Fills memory with the same number (S1)
                   2) Fills memory with random numbers (S2)
                   3) Verifies all pages
                   4) Fills memory with the same number (S2)
                   5) Changes the last 96B (S3)

    Scenarios:
    S1) Fill all vms with the same value (all pages should be merged into 1)
    S2) Random fill (all pages should be splitted)
    S3) Fill last 96B (change only last 96B of each page; some pages will be
                      merged; there was a bug with data corruption)
    Every worker has unique random key so we are able to verify the filled
    values.

    :param test: kvm test object.
    :param params: Dictionary with test parameters.
    :param env: Dictionary with the test environment.

    :param cfg: ksm_swap - use swap?
    :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only)
    :param cfg: ksm_parallel_ratio - number of workers (parallel mode only)
    :param cfg: ksm_host_reserve - override memory reserve on host in MB
    :param cfg: ksm_guest_reserve - override memory reserve on guests in MB
    :param cfg: ksm_mode - test mode {serial, parallel}
    :param cfg: ksm_perf_ratio - performance ratio, increase it when your
                                 machine is too slow
    """
    def _start_allocator(vm, session, timeout):
        """
        Execute ksm_overcommit_guest.py on guest, wait until it's initialized.

        :param vm: VM object.
        :param session: Remote session to a VM object.
        :param timeout: Timeout that will be used to verify if
                ksm_overcommit_guest.py started properly.
        """
        logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name)
        session.sendline("python /tmp/ksm_overcommit_guest.py")
        try:
            session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout)
        except aexpect.ExpectProcessTerminatedError as details:
            e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" %
                     (vm.name, str(details)))
            test.fail(e_msg)

    def _execute_allocator(command, vm, session, timeout):
        """
        Execute a given command on ksm_overcommit_guest.py main loop,
        indicating the vm the command was executed on.

        :param command: Command that will be executed.
        :param vm: VM object.
        :param session: Remote session to VM object.
        :param timeout: Timeout used to verify expected output.

        :return: Tuple (match index, data)
        """
        logging.debug("Executing '%s' on ksm_overcommit_guest.py loop, "
                      "vm: %s, timeout: %s", command, vm.name, timeout)
        session.sendline(command)
        try:
            (match, data) = session.read_until_last_line_matches(
                ["PASS:"******"FAIL:"],
                timeout)
        except aexpect.ExpectProcessTerminatedError as details:
            e_msg = ("Failed to execute command '%s' on "
                     "ksm_overcommit_guest.py, vm '%s': %s" %
                     (command, vm.name, str(details)))
            test.fail(e_msg)
        return (match, data)

    def get_ksmstat():
        """
        Return sharing memory by ksm in MB

        :return: memory in MB
        """
        fpages = open('/sys/kernel/mm/ksm/pages_sharing')
        ksm_pages = int(fpages.read())
        fpages.close()
        return ((ksm_pages * 4096) / 1e6)

    def initialize_guests():
        """
        Initialize guests (fill their memories with specified patterns).
        """
        logging.info("Phase 1: filling guest memory pages")
        for session in lsessions:
            vm = lvms[lsessions.index(session)]

            logging.debug("Turning off swap on vm %s", vm.name)
            session.cmd("swapoff -a", timeout=300)

            # Start the allocator
            _start_allocator(vm, session, 60 * perf_ratio)

        # Execute allocator on guests
        for i in range(0, vmsc):
            vm = lvms[i]

            cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i])
            _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio)

            cmd = "mem.value_fill(%d)" % skeys[0]
            _execute_allocator(cmd, vm, lsessions[i],
                               fill_base_timeout * 2 * perf_ratio)

            # Let ksm_overcommit_guest.py do its job
            # (until shared mem reaches expected value)
            shm = 0
            j = 0
            logging.debug("Target shared meminfo for guest %s: %s", vm.name,
                          ksm_size)
            while ((new_ksm and (shm < (ksm_size * (i + 1)))) or
                    (not new_ksm and (shm < (ksm_size)))):
                if j > 64:
                    logging.debug(utils_test.get_memory_info(lvms))
                    test.error("SHM didn't merge the memory until "
                               "the DL on guest: %s" % vm.name)
                pause = ksm_size / 200 * perf_ratio
                logging.debug("Waiting %ds before proceeding...", pause)
                time.sleep(pause)
                if (new_ksm):
                    shm = get_ksmstat()
                else:
                    shm = vm.get_shared_meminfo()
                logging.debug("Shared meminfo for guest %s after "
                              "iteration %s: %s", vm.name, j, shm)
                j += 1

        # Keep some reserve
        pause = ksm_size / 200 * perf_ratio
        logging.debug("Waiting %ds before proceeding...", pause)
        time.sleep(pause)

        logging.debug(utils_test.get_memory_info(lvms))
        logging.info("Phase 1: PASS")

    def separate_first_guest():
        """
        Separate memory of the first guest by generating special random series
        """
        logging.info("Phase 2: Split the pages on the first guest")

        cmd = "mem.static_random_fill()"
        data = _execute_allocator(cmd, lvms[0], lsessions[0],
                                  fill_base_timeout * 2 * perf_ratio)[1]

        r_msg = data.splitlines()[-1]
        logging.debug("Return message of static_random_fill: %s", r_msg)
        out = int(r_msg.split()[4])
        logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size,
                      out, (ksm_size * 1000 / out))
        logging.debug(utils_test.get_memory_info(lvms))
        logging.debug("Phase 2: PASS")

    def split_guest():
        """
        Sequential split of pages on guests up to memory limit
        """
        logging.info("Phase 3a: Sequential split of pages on guests up to "
                     "memory limit")
        last_vm = 0
        session = None
        vm = None
        for i in range(1, vmsc):
            # Check VMs
            for j in range(0, vmsc):
                if not lvms[j].is_alive:
                    e_msg = ("VM %d died while executing static_random_fill on"
                             " VM %d in allocator loop" % (j, i))
                    test.fail(e_msg)
            vm = lvms[i]
            session = lsessions[i]
            cmd = "mem.static_random_fill()"
            logging.debug("Executing %s on ksm_overcommit_guest.py loop, "
                          "vm: %s", cmd, vm.name)
            session.sendline(cmd)

            out = ""
            try:
                logging.debug("Watching host mem while filling vm %s memory",
                              vm.name)
                while (not out.startswith("PASS") and
                       not out.startswith("FAIL")):
                    if not vm.is_alive():
                        e_msg = ("VM %d died while executing "
                                 "static_random_fill on allocator loop" % i)
                        test.fail(e_msg)
                    free_mem = int(utils_memory.read_from_meminfo("MemFree"))
                    if (ksm_swap):
                        free_mem = (free_mem +
                                    int(utils_memory.read_from_meminfo("SwapFree")))
                    logging.debug("Free memory on host: %d", free_mem)

                    # We need to keep some memory for python to run.
                    if (free_mem < 64000) or (ksm_swap and
                                              free_mem < (450000 * perf_ratio)):
                        vm.pause()
                        for j in range(0, i):
                            lvms[j].destroy(gracefully=False)
                        time.sleep(20)
                        vm.resume()
                        logging.debug("Only %s free memory, killing %d guests",
                                      free_mem, (i - 1))
                        last_vm = i
                    out = session.read_nonblocking(0.1, 1)
                    time.sleep(2)
            except OSError:
                logging.debug("Only %s host free memory, killing %d guests",
                              free_mem, (i - 1))
                logging.debug("Stopping %s", vm.name)
                vm.pause()
                for j in range(0, i):
                    logging.debug("Destroying %s", lvms[j].name)
                    lvms[j].destroy(gracefully=False)
                time.sleep(20)
                vm.resume()
                last_vm = i

            if last_vm != 0:
                break
            logging.debug("Memory filled for guest %s", vm.name)

        logging.info("Phase 3a: PASS")

        logging.info("Phase 3b: Verify memory of the max stressed VM")
        for i in range(last_vm + 1, vmsc):
            lsessions[i].close()
            if i == (vmsc - 1):
                logging.debug(utils_test.get_memory_info([lvms[i]]))
            logging.debug("Destroying guest %s", lvms[i].name)
            lvms[i].destroy(gracefully=False)

        # Verify last machine with randomly generated memory
        cmd = "mem.static_random_verify()"
        _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm],
                           (mem / 200 * 50 * perf_ratio))
        logging.debug(utils_test.get_memory_info([lvms[last_vm]]))

        lsessions[last_vm].cmd_output("die()", 20)
        lvms[last_vm].destroy(gracefully=False)
        logging.info("Phase 3b: PASS")

    def split_parallel():
        """
        Parallel page spliting
        """
        logging.info("Phase 1: parallel page spliting")
        # We have to wait until allocator is finished (it waits 5 seconds to
        # clean the socket

        session = lsessions[0]
        vm = lvms[0]
        for i in range(1, max_alloc):
            lsessions.append(vm.wait_for_login(timeout=360))

        session.cmd("swapoff -a", timeout=300)

        for i in range(0, max_alloc):
            # Start the allocator
            _start_allocator(vm, lsessions[i], 60 * perf_ratio)

        logging.info("Phase 1: PASS")

        logging.info("Phase 2a: Simultaneous merging")
        logging.debug("Memory used by allocator on guests = %dMB",
                      (ksm_size / max_alloc))

        for i in range(0, max_alloc):
            cmd = "mem = MemFill(%d, %s, %s)" % ((ksm_size / max_alloc),
                                                 skeys[i], dkeys[i])
            _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio)

            cmd = "mem.value_fill(%d)" % (skeys[0])
            _execute_allocator(cmd, vm, lsessions[i],
                               fill_base_timeout * perf_ratio)

        # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3)
        shm = 0
        i = 0
        logging.debug("Target shared memory size: %s", ksm_size)
        while (shm < ksm_size):
            if i > 64:
                logging.debug(utils_test.get_memory_info(lvms))
                test.error("SHM didn't merge the memory until DL")
            pause = ksm_size / 200 * perf_ratio
            logging.debug("Waiting %ds before proceed...", pause)
            time.sleep(pause)
            if (new_ksm):
                shm = get_ksmstat()
            else:
                shm = vm.get_shared_meminfo()
            logging.debug("Shared meminfo after attempt %s: %s", i, shm)
            i += 1

        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2a: PASS")

        logging.info("Phase 2b: Simultaneous spliting")
        # Actual splitting
        for i in range(0, max_alloc):
            cmd = "mem.static_random_fill()"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * perf_ratio)[1]

            data = data.splitlines()[-1]
            logging.debug(data)
            out = int(data.split()[4])
            logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s",
                          (ksm_size / max_alloc), out,
                          (ksm_size * 1000 / out / max_alloc))
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2b: PASS")

        logging.info("Phase 2c: Simultaneous verification")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_verify()"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      (mem / 200 * 50 * perf_ratio))[1]
        logging.info("Phase 2c: PASS")

        logging.info("Phase 2d: Simultaneous merging")
        # Actual splitting
        for i in range(0, max_alloc):
            cmd = "mem.value_fill(%d)" % skeys[0]
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * 2 * perf_ratio)[1]
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2d: PASS")

        logging.info("Phase 2e: Simultaneous verification")
        for i in range(0, max_alloc):
            cmd = "mem.value_check(%d)" % skeys[0]
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      (mem / 200 * 50 * perf_ratio))[1]
        logging.info("Phase 2e: PASS")

        logging.info("Phase 2f: Simultaneous spliting last 96B")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_fill(96)"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * perf_ratio)[1]

            data = data.splitlines()[-1]
            out = int(data.split()[4])
            logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s",
                          ksm_size / max_alloc, out,
                          (ksm_size * 1000 / out / max_alloc))

        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2f: PASS")

        logging.info("Phase 2g: Simultaneous verification last 96B")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_verify(96)"
            _, data = _execute_allocator(cmd, vm, lsessions[i],
                                         (mem / 200 * 50 * perf_ratio))
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2g: PASS")

        logging.debug("Cleaning up...")
        for i in range(0, max_alloc):
            lsessions[i].cmd_output("die()", 20)
        session.close()
        vm.destroy(gracefully=False)

    # Main test code
    logging.info("Starting phase 0: Initialization")
    if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0:
        logging.info("Killing ksmtuned...")
        process.run("killall ksmtuned")
    new_ksm = False
    if (os.path.exists("/sys/kernel/mm/ksm/run")):
        process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True)
        process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True)
        process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True)

        e_up = "/sys/kernel/mm/transparent_hugepage/enabled"
        e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled"
        if os.path.exists(e_up):
            process.run("echo 'never' > %s" % e_up, shell=True)
        if os.path.exists(e_rh):
            process.run("echo 'never' > %s" % e_rh, shell=True)
        new_ksm = True
    else:
        try:
            process.run("modprobe ksm")
            process.run("ksmctl start 5000 100")
        except process.CmdError as details:
            test.fail("Failed to load KSM: %s" % details)

    # host_reserve: mem reserve kept for the host system to run
    host_reserve = int(params.get("ksm_host_reserve", -1))
    if (host_reserve == -1):
        try:
            available = utils_memory.read_from_meminfo("MemAvailable")
        except process.CmdError:  # ancient kernels
            utils_memory.drop_caches()
            available = utils_memory.read_from_meminfo("MemFree")
        # default host_reserve = UsedMem + one_minimal_guest(128MB)
        # later we add 64MB per additional guest
        host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128)
        # using default reserve
        _host_reserve = True
    else:
        _host_reserve = False

    # guest_reserve: mem reserve kept to avoid guest OS to kill processes
    guest_reserve = int(params.get("ksm_guest_reserve", -1))
    if (guest_reserve == -1):
        # default guest_reserve = minimal_system_mem(256MB)
        # later we add tmpfs overhead
        guest_reserve = 256
        # using default reserve
        _guest_reserve = True
    else:
        _guest_reserve = False

    max_vms = int(params.get("max_vms", 2))
    overcommit = float(params.get("ksm_overcommit_ratio", 2.0))
    max_alloc = int(params.get("ksm_parallel_ratio", 1))

    # vmsc: count of all used VMs
    vmsc = int(overcommit) + 1
    vmsc = max(vmsc, max_vms)

    if (params['ksm_mode'] == "serial"):
        max_alloc = vmsc
        if _host_reserve:
            # First round of additional guest reserves
            host_reserve += vmsc * 64
            _host_reserve = vmsc

    host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve)

    ksm_swap = False
    if params.get("ksm_swap") == "yes":
        ksm_swap = True

    # Performance ratio
    perf_ratio = params.get("ksm_perf_ratio")
    if perf_ratio:
        perf_ratio = float(perf_ratio)
    else:
        perf_ratio = 1

    if (params['ksm_mode'] == "parallel"):
        vmsc = 1
        overcommit = 1
        mem = host_mem
        # 32bit system adjustment
        if "64" not in params.get("vm_arch_name"):
            logging.debug("Probably i386 guest architecture, "
                          "max allocator mem = 2G")
            # Guest can have more than 2G but
            # kvm mem + 1MB (allocator itself) can't
            if (host_mem > 3100):
                mem = 3100

        if os.popen("uname -i").readline().startswith("i386"):
            logging.debug("Host is i386 architecture, max guest mem is 2G")
            # Guest system with qemu overhead (64M) can't have more than 2G
            if mem > 3100 - 64:
                mem = 3100 - 64

    else:
        # mem: Memory of the guest systems. Maximum must be less than
        # host's physical ram
        mem = int(overcommit * host_mem / vmsc)

        # 32bit system adjustment
        if not params['image_name'].endswith("64"):
            logging.debug("Probably i386 guest architecture, "
                          "max allocator mem = 2G")
            # Guest can have more than 2G but
            # kvm mem + 1MB (allocator itself) can't
            if mem - guest_reserve - 1 > 3100:
                vmsc = int(math.ceil((host_mem * overcommit) /
                                     (3100 + guest_reserve)))
                if _host_reserve:
                    host_reserve += (vmsc - _host_reserve) * 64
                    host_mem -= (vmsc - _host_reserve) * 64
                    _host_reserve = vmsc
                mem = int(math.floor(host_mem * overcommit / vmsc))

        if os.popen("uname -i").readline().startswith("i386"):
            logging.debug("Host is i386 architecture, max guest mem is 2G")
            # Guest system with qemu overhead (64M) can't have more than 2G
            if mem > 3100 - 64:
                vmsc = int(math.ceil((host_mem * overcommit) /
                                     (3100 - 64.0)))
                if _host_reserve:
                    host_reserve += (vmsc - _host_reserve) * 64
                    host_mem -= (vmsc - _host_reserve) * 64
                    _host_reserve = vmsc
                mem = int(math.floor(host_mem * overcommit / vmsc))

    # 0.055 represents OS + TMPFS additional reserve per guest ram MB
    if _guest_reserve:
        guest_reserve += math.ceil(mem * 0.055)

    swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024

    logging.debug("Overcommit = %f", overcommit)
    logging.debug("True overcommit = %f ", (float(vmsc * mem) /
                                            float(host_mem)))
    logging.debug("Host memory = %dM", host_mem)
    logging.debug("Guest memory = %dM", mem)
    logging.debug("Using swap = %s", ksm_swap)
    logging.debug("Swap = %dM", swap)
    logging.debug("max_vms = %d", max_vms)
    logging.debug("Count of all used VMs = %d", vmsc)
    logging.debug("Performance_ratio = %f", perf_ratio)

    # Generate unique keys for random series
    skeys = []
    dkeys = []
    for i in range(0, max(vmsc, max_alloc)):
        key = random.randrange(0, 255)
        while key in skeys:
            key = random.randrange(0, 255)
        skeys.append(key)

        key = random.randrange(0, 999)
        while key in dkeys:
            key = random.randrange(0, 999)
        dkeys.append(key)

    logging.debug("skeys: %s", skeys)
    logging.debug("dkeys: %s", dkeys)

    lvms = []
    lsessions = []

    # As we don't know the number and memory amount of VMs in advance,
    # we need to specify and create them here
    vm_name = params["main_vm"]
    params['mem'] = mem
    params['vms'] = vm_name
    # Associate pidfile name
    params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name,
                                                                 'pid')
    if not params.get('extra_params'):
        params['extra_params'] = ' '
    params['extra_params_' + vm_name] = params.get('extra_params')
    params['extra_params_' + vm_name] += (" -pidfile %s" %
                                          (params.get('pid_' + vm_name)))
    params['extra_params'] = params.get('extra_params_' + vm_name)

    # ksm_size: amount of memory used by allocator
    ksm_size = mem - guest_reserve
    logging.debug("Memory used by allocator on guests = %dM", ksm_size)
    fill_base_timeout = ksm_size / 10

    # Creating the first guest
    env_process.preprocess_vm(test, params, env, vm_name)
    lvms.append(env.get_vm(vm_name))
    if not lvms[0]:
        test.error("VM object not found in environment")
    if not lvms[0].is_alive():
        test.error("VM seems to be dead; Test requires a living VM")

    logging.debug("Booting first guest %s", lvms[0].name)

    lsessions.append(lvms[0].wait_for_login(timeout=360))
    # Associate vm PID
    try:
        tmp = open(params.get('pid_' + vm_name), 'r')
        params['pid_' + vm_name] = int(tmp.readline())
    except Exception:
        test.fail("Could not get PID of %s" % (vm_name))

    # Creating other guest systems
    for i in range(1, vmsc):
        vm_name = "vm" + str(i + 1)
        params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(vm_name,
                                                                     'pid')
        params['extra_params_' + vm_name] = params.get('extra_params')
        params['extra_params_' + vm_name] += (" -pidfile %s" %
                                              (params.get('pid_' + vm_name)))
        params['extra_params'] = params.get('extra_params_' + vm_name)

        # Last VM is later used to run more allocators simultaneously
        lvms.append(lvms[0].clone(vm_name, params))
        env.register_vm(vm_name, lvms[i])
        params['vms'] += " " + vm_name

        logging.debug("Booting guest %s", lvms[i].name)
        lvms[i].create()
        if not lvms[i].is_alive():
            test.error("VM %s seems to be dead; Test requires a"
                       "living VM" % lvms[i].name)

        lsessions.append(lvms[i].wait_for_login(timeout=360))
        try:
            tmp = open(params.get('pid_' + vm_name), 'r')
            params['pid_' + vm_name] = int(tmp.readline())
        except Exception:
            test.fail("Could not get PID of %s" % (vm_name))

    # Let guests rest a little bit :-)
    pause = vmsc * 2 * perf_ratio
    logging.debug("Waiting %ds before proceed", pause)
    time.sleep(vmsc * 2 * perf_ratio)
    logging.debug(utils_test.get_memory_info(lvms))

    # Copy ksm_overcommit_guest.py into guests
    vksmd_src = os.path.join(data_dir.get_shared_dir(),
                             "scripts", "ksm_overcommit_guest.py")
    dst_dir = "/tmp"
    for vm in lvms:
        vm.copy_files_to(vksmd_src, dst_dir)
    logging.info("Phase 0: PASS")

    if params['ksm_mode'] == "parallel":
        logging.info("Starting KSM test parallel mode")
        split_parallel()
        logging.info("KSM test parallel mode: PASS")
    elif params['ksm_mode'] == "serial":
        logging.info("Starting KSM test serial mode")
        initialize_guests()
        separate_first_guest()
        split_guest()
        logging.info("KSM test serial mode: PASS")
Example #25
0
def run(test, params, env):
    """
    Tests KSM (Kernel Shared Memory) capability by allocating and filling
    KVM guests memory using various values. KVM sets the memory as
    MADV_MERGEABLE so all VM's memory can be merged. The workers in
    guest writes to tmpfs filesystem thus allocations are not limited
    by process max memory, only by VM's memory. Two test modes are supported -
    serial and parallel.

    Serial mode - uses multiple VMs, allocates memory per guest and always
                  verifies the correct number of shared memory.
                  0) Prints out the setup and initialize guest(s)
                  1) Fills guest with the same number (S1)
                  2) Random fill on the first guest
                  3) Random fill of the remaining VMs one by one until the
                     memory is completely filled (KVM stops machines which
                     asks for additional memory until there is available
                     memory) (S2, shouldn't finish)
                  4) Destroy all VMs but the last one
                  5) Checks the last VMs memory for corruption
    Parallel mode - uses one VM with multiple allocator workers. Executes
                   scenarios in parallel to put more stress on the KVM.
                   0) Prints out the setup and initialize guest(s)
                   1) Fills memory with the same number (S1)
                   2) Fills memory with random numbers (S2)
                   3) Verifies all pages
                   4) Fills memory with the same number (S2)
                   5) Changes the last 96B (S3)

    Scenarios:
    S1) Fill all vms with the same value (all pages should be merged into 1)
    S2) Random fill (all pages should be splitted)
    S3) Fill last 96B (change only last 96B of each page; some pages will be
                      merged; there was a bug with data corruption)
    Every worker has unique random key so we are able to verify the filled
    values.

    :param test: kvm test object.
    :param params: Dictionary with test parameters.
    :param env: Dictionary with the test environment.

    :param cfg: ksm_swap - use swap?
    :param cfg: ksm_overcommit_ratio - memory overcommit (serial mode only)
    :param cfg: ksm_parallel_ratio - number of workers (parallel mode only)
    :param cfg: ksm_host_reserve - override memory reserve on host in MB
    :param cfg: ksm_guest_reserve - override memory reserve on guests in MB
    :param cfg: ksm_mode - test mode {serial, parallel}
    :param cfg: ksm_perf_ratio - performance ratio, increase it when your
                                 machine is too slow
    """
    def _start_allocator(vm, session, timeout):
        """
        Execute ksm_overcommit_guest.py on guest, wait until it's initialized.

        :param vm: VM object.
        :param session: Remote session to a VM object.
        :param timeout: Timeout that will be used to verify if
                ksm_overcommit_guest.py started properly.
        """
        logging.debug("Starting ksm_overcommit_guest.py on guest %s", vm.name)
        session.sendline("python /tmp/ksm_overcommit_guest.py")
        try:
            session.read_until_last_line_matches(["PASS:"******"FAIL:"], timeout)
        except aexpect.ExpectProcessTerminatedError as details:
            e_msg = ("Command ksm_overcommit_guest.py on vm '%s' failed: %s" %
                     (vm.name, str(details)))
            test.fail(e_msg)

    def _execute_allocator(command, vm, session, timeout):
        """
        Execute a given command on ksm_overcommit_guest.py main loop,
        indicating the vm the command was executed on.

        :param command: Command that will be executed.
        :param vm: VM object.
        :param session: Remote session to VM object.
        :param timeout: Timeout used to verify expected output.

        :return: Tuple (match index, data)
        """
        logging.debug(
            "Executing '%s' on ksm_overcommit_guest.py loop, "
            "vm: %s, timeout: %s", command, vm.name, timeout)
        session.sendline(command)
        try:
            (match,
             data) = session.read_until_last_line_matches(["PASS:"******"FAIL:"],
                                                          timeout)
        except aexpect.ExpectProcessTerminatedError as details:
            e_msg = ("Failed to execute command '%s' on "
                     "ksm_overcommit_guest.py, vm '%s': %s" %
                     (command, vm.name, str(details)))
            test.fail(e_msg)
        return (match, data)

    def get_ksmstat():
        """
        Return sharing memory by ksm in MB

        :return: memory in MB
        """
        fpages = open('/sys/kernel/mm/ksm/pages_sharing')
        ksm_pages = int(fpages.read())
        fpages.close()
        return ((ksm_pages * 4096) / 1e6)

    def initialize_guests():
        """
        Initialize guests (fill their memories with specified patterns).
        """
        logging.info("Phase 1: filling guest memory pages")
        for session in lsessions:
            vm = lvms[lsessions.index(session)]

            logging.debug("Turning off swap on vm %s", vm.name)
            session.cmd("swapoff -a", timeout=300)

            # Start the allocator
            _start_allocator(vm, session, 60 * perf_ratio)

        # Execute allocator on guests
        for i in range(0, vmsc):
            vm = lvms[i]

            cmd = "mem = MemFill(%d, %s, %s)" % (ksm_size, skeys[i], dkeys[i])
            _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio)

            cmd = "mem.value_fill(%d)" % skeys[0]
            _execute_allocator(cmd, vm, lsessions[i],
                               fill_base_timeout * 2 * perf_ratio)

            # Let ksm_overcommit_guest.py do its job
            # (until shared mem reaches expected value)
            shm = 0
            j = 0
            logging.debug("Target shared meminfo for guest %s: %s", vm.name,
                          ksm_size)
            while ((new_ksm and (shm < (ksm_size * (i + 1))))
                   or (not new_ksm and (shm < (ksm_size)))):
                if j > 64:
                    logging.debug(utils_test.get_memory_info(lvms))
                    test.error("SHM didn't merge the memory until "
                               "the DL on guest: %s" % vm.name)
                pause = ksm_size / 200 * perf_ratio
                logging.debug("Waiting %ds before proceeding...", pause)
                time.sleep(pause)
                if (new_ksm):
                    shm = get_ksmstat()
                else:
                    shm = vm.get_shared_meminfo()
                logging.debug(
                    "Shared meminfo for guest %s after "
                    "iteration %s: %s", vm.name, j, shm)
                j += 1

        # Keep some reserve
        pause = ksm_size / 200 * perf_ratio
        logging.debug("Waiting %ds before proceeding...", pause)
        time.sleep(pause)

        logging.debug(utils_test.get_memory_info(lvms))
        logging.info("Phase 1: PASS")

    def separate_first_guest():
        """
        Separate memory of the first guest by generating special random series
        """
        logging.info("Phase 2: Split the pages on the first guest")

        cmd = "mem.static_random_fill()"
        data = _execute_allocator(cmd, lvms[0], lsessions[0],
                                  fill_base_timeout * 2 * perf_ratio)[1]

        r_msg = data.splitlines()[-1]
        logging.debug("Return message of static_random_fill: %s", r_msg)
        out = int(r_msg.split()[4])
        logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s", ksm_size,
                      out, (ksm_size * 1000 / out))
        logging.debug(utils_test.get_memory_info(lvms))
        logging.debug("Phase 2: PASS")

    def split_guest():
        """
        Sequential split of pages on guests up to memory limit
        """
        logging.info("Phase 3a: Sequential split of pages on guests up to "
                     "memory limit")
        last_vm = 0
        session = None
        vm = None
        for i in range(1, vmsc):
            # Check VMs
            for j in range(0, vmsc):
                if not lvms[j].is_alive:
                    e_msg = ("VM %d died while executing static_random_fill on"
                             " VM %d in allocator loop" % (j, i))
                    test.fail(e_msg)
            vm = lvms[i]
            session = lsessions[i]
            cmd = "mem.static_random_fill()"
            logging.debug(
                "Executing %s on ksm_overcommit_guest.py loop, "
                "vm: %s", cmd, vm.name)
            session.sendline(cmd)

            out = ""
            try:
                logging.debug("Watching host mem while filling vm %s memory",
                              vm.name)
                while (not out.startswith("PASS")
                       and not out.startswith("FAIL")):
                    if not vm.is_alive():
                        e_msg = ("VM %d died while executing "
                                 "static_random_fill on allocator loop" % i)
                        test.fail(e_msg)
                    free_mem = int(utils_memory.read_from_meminfo("MemFree"))
                    if (ksm_swap):
                        free_mem = (
                            free_mem +
                            int(utils_memory.read_from_meminfo("SwapFree")))
                    logging.debug("Free memory on host: %d", free_mem)

                    # We need to keep some memory for python to run.
                    if (free_mem < 64000) or (ksm_swap and free_mem <
                                              (450000 * perf_ratio)):
                        vm.pause()
                        for j in range(0, i):
                            lvms[j].destroy(gracefully=False)
                        time.sleep(20)
                        vm.resume()
                        logging.debug("Only %s free memory, killing %d guests",
                                      free_mem, (i - 1))
                        last_vm = i
                    out = session.read_nonblocking(0.1, 1)
                    time.sleep(2)
            except OSError:
                logging.debug("Only %s host free memory, killing %d guests",
                              free_mem, (i - 1))
                logging.debug("Stopping %s", vm.name)
                vm.pause()
                for j in range(0, i):
                    logging.debug("Destroying %s", lvms[j].name)
                    lvms[j].destroy(gracefully=False)
                time.sleep(20)
                vm.resume()
                last_vm = i

            if last_vm != 0:
                break
            logging.debug("Memory filled for guest %s", vm.name)

        logging.info("Phase 3a: PASS")

        logging.info("Phase 3b: Verify memory of the max stressed VM")
        for i in range(last_vm + 1, vmsc):
            lsessions[i].close()
            if i == (vmsc - 1):
                logging.debug(utils_test.get_memory_info([lvms[i]]))
            logging.debug("Destroying guest %s", lvms[i].name)
            lvms[i].destroy(gracefully=False)

        # Verify last machine with randomly generated memory
        cmd = "mem.static_random_verify()"
        _execute_allocator(cmd, lvms[last_vm], lsessions[last_vm],
                           (mem / 200 * 50 * perf_ratio))
        logging.debug(utils_test.get_memory_info([lvms[last_vm]]))

        lsessions[last_vm].cmd_output("die()", 20)
        lvms[last_vm].destroy(gracefully=False)
        logging.info("Phase 3b: PASS")

    def split_parallel():
        """
        Parallel page spliting
        """
        logging.info("Phase 1: parallel page spliting")
        # We have to wait until allocator is finished (it waits 5 seconds to
        # clean the socket

        session = lsessions[0]
        vm = lvms[0]
        for i in range(1, max_alloc):
            lsessions.append(vm.wait_for_login(timeout=360))

        session.cmd("swapoff -a", timeout=300)

        for i in range(0, max_alloc):
            # Start the allocator
            _start_allocator(vm, lsessions[i], 60 * perf_ratio)

        logging.info("Phase 1: PASS")

        logging.info("Phase 2a: Simultaneous merging")
        logging.debug("Memory used by allocator on guests = %dMB",
                      (ksm_size / max_alloc))

        for i in range(0, max_alloc):
            cmd = "mem = MemFill(%d, %s, %s)" % (
                (ksm_size / max_alloc), skeys[i], dkeys[i])
            _execute_allocator(cmd, vm, lsessions[i], 60 * perf_ratio)

            cmd = "mem.value_fill(%d)" % (skeys[0])
            _execute_allocator(cmd, vm, lsessions[i],
                               fill_base_timeout * perf_ratio)

        # Wait until ksm_overcommit_guest.py merges pages (3 * ksm_size / 3)
        shm = 0
        i = 0
        logging.debug("Target shared memory size: %s", ksm_size)
        while (shm < ksm_size):
            if i > 64:
                logging.debug(utils_test.get_memory_info(lvms))
                test.error("SHM didn't merge the memory until DL")
            pause = ksm_size / 200 * perf_ratio
            logging.debug("Waiting %ds before proceed...", pause)
            time.sleep(pause)
            if (new_ksm):
                shm = get_ksmstat()
            else:
                shm = vm.get_shared_meminfo()
            logging.debug("Shared meminfo after attempt %s: %s", i, shm)
            i += 1

        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2a: PASS")

        logging.info("Phase 2b: Simultaneous spliting")
        # Actual splitting
        for i in range(0, max_alloc):
            cmd = "mem.static_random_fill()"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * perf_ratio)[1]

            data = data.splitlines()[-1]
            logging.debug(data)
            out = int(data.split()[4])
            logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s",
                          (ksm_size / max_alloc), out,
                          (ksm_size * 1000 / out / max_alloc))
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2b: PASS")

        logging.info("Phase 2c: Simultaneous verification")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_verify()"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      (mem / 200 * 50 * perf_ratio))[1]
        logging.info("Phase 2c: PASS")

        logging.info("Phase 2d: Simultaneous merging")
        # Actual splitting
        for i in range(0, max_alloc):
            cmd = "mem.value_fill(%d)" % skeys[0]
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * 2 * perf_ratio)[1]
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2d: PASS")

        logging.info("Phase 2e: Simultaneous verification")
        for i in range(0, max_alloc):
            cmd = "mem.value_check(%d)" % skeys[0]
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      (mem / 200 * 50 * perf_ratio))[1]
        logging.info("Phase 2e: PASS")

        logging.info("Phase 2f: Simultaneous spliting last 96B")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_fill(96)"
            data = _execute_allocator(cmd, vm, lsessions[i],
                                      fill_base_timeout * perf_ratio)[1]

            data = data.splitlines()[-1]
            out = int(data.split()[4])
            logging.debug("Performance: %dMB * 1000 / %dms = %dMB/s",
                          ksm_size / max_alloc, out,
                          (ksm_size * 1000 / out / max_alloc))

        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2f: PASS")

        logging.info("Phase 2g: Simultaneous verification last 96B")
        for i in range(0, max_alloc):
            cmd = "mem.static_random_verify(96)"
            _, data = _execute_allocator(cmd, vm, lsessions[i],
                                         (mem / 200 * 50 * perf_ratio))
        logging.debug(utils_test.get_memory_info([vm]))
        logging.info("Phase 2g: PASS")

        logging.debug("Cleaning up...")
        for i in range(0, max_alloc):
            lsessions[i].cmd_output("die()", 20)
        session.close()
        vm.destroy(gracefully=False)

    # Main test code
    logging.info("Starting phase 0: Initialization")
    if process.run("ps -C ksmtuned", ignore_status=True).exit_status == 0:
        logging.info("Killing ksmtuned...")
        process.run("killall ksmtuned")
    new_ksm = False
    if (os.path.exists("/sys/kernel/mm/ksm/run")):
        process.run("echo 50 > /sys/kernel/mm/ksm/sleep_millisecs", shell=True)
        process.run("echo 5000 > /sys/kernel/mm/ksm/pages_to_scan", shell=True)
        process.run("echo 1 > /sys/kernel/mm/ksm/run", shell=True)

        e_up = "/sys/kernel/mm/transparent_hugepage/enabled"
        e_rh = "/sys/kernel/mm/redhat_transparent_hugepage/enabled"
        if os.path.exists(e_up):
            process.run("echo 'never' > %s" % e_up, shell=True)
        if os.path.exists(e_rh):
            process.run("echo 'never' > %s" % e_rh, shell=True)
        new_ksm = True
    else:
        try:
            process.run("modprobe ksm")
            process.run("ksmctl start 5000 100")
        except process.CmdError as details:
            test.fail("Failed to load KSM: %s" % details)

    # host_reserve: mem reserve kept for the host system to run
    host_reserve = int(params.get("ksm_host_reserve", -1))
    if (host_reserve == -1):
        try:
            available = utils_memory.read_from_meminfo("MemAvailable")
        except process.CmdError:  # ancient kernels
            utils_memory.drop_caches()
            available = utils_memory.read_from_meminfo("MemFree")
        # default host_reserve = UsedMem + one_minimal_guest(128MB)
        # later we add 64MB per additional guest
        host_reserve = ((utils_memory.memtotal() - available) / 1024 + 128)
        # using default reserve
        _host_reserve = True
    else:
        _host_reserve = False

    # guest_reserve: mem reserve kept to avoid guest OS to kill processes
    guest_reserve = int(params.get("ksm_guest_reserve", -1))
    if (guest_reserve == -1):
        # default guest_reserve = minimal_system_mem(256MB)
        # later we add tmpfs overhead
        guest_reserve = 256
        # using default reserve
        _guest_reserve = True
    else:
        _guest_reserve = False

    max_vms = int(params.get("max_vms", 2))
    overcommit = float(params.get("ksm_overcommit_ratio", 2.0))
    max_alloc = int(params.get("ksm_parallel_ratio", 1))

    # vmsc: count of all used VMs
    vmsc = int(overcommit) + 1
    vmsc = max(vmsc, max_vms)

    if (params['ksm_mode'] == "serial"):
        max_alloc = vmsc
        if _host_reserve:
            # First round of additional guest reserves
            host_reserve += vmsc * 64
            _host_reserve = vmsc

    host_mem = (int(utils_memory.memtotal()) / 1024 - host_reserve)

    ksm_swap = False
    if params.get("ksm_swap") == "yes":
        ksm_swap = True

    # Performance ratio
    perf_ratio = params.get("ksm_perf_ratio")
    if perf_ratio:
        perf_ratio = float(perf_ratio)
    else:
        perf_ratio = 1

    if (params['ksm_mode'] == "parallel"):
        vmsc = 1
        overcommit = 1
        mem = host_mem
        # 32bit system adjustment
        if "64" not in params.get("vm_arch_name"):
            logging.debug("Probably i386 guest architecture, "
                          "max allocator mem = 2G")
            # Guest can have more than 2G but
            # kvm mem + 1MB (allocator itself) can't
            if (host_mem > 3100):
                mem = 3100

        if os.popen("uname -i").readline().startswith("i386"):
            logging.debug("Host is i386 architecture, max guest mem is 2G")
            # Guest system with qemu overhead (64M) can't have more than 2G
            if mem > 3100 - 64:
                mem = 3100 - 64

    else:
        # mem: Memory of the guest systems. Maximum must be less than
        # host's physical ram
        mem = int(overcommit * host_mem / vmsc)

        # 32bit system adjustment
        if not params['image_name'].endswith("64"):
            logging.debug("Probably i386 guest architecture, "
                          "max allocator mem = 2G")
            # Guest can have more than 2G but
            # kvm mem + 1MB (allocator itself) can't
            if mem - guest_reserve - 1 > 3100:
                vmsc = int(
                    math.ceil(
                        (host_mem * overcommit) / (3100 + guest_reserve)))
                if _host_reserve:
                    host_reserve += (vmsc - _host_reserve) * 64
                    host_mem -= (vmsc - _host_reserve) * 64
                    _host_reserve = vmsc
                mem = int(math.floor(host_mem * overcommit / vmsc))

        if os.popen("uname -i").readline().startswith("i386"):
            logging.debug("Host is i386 architecture, max guest mem is 2G")
            # Guest system with qemu overhead (64M) can't have more than 2G
            if mem > 3100 - 64:
                vmsc = int(math.ceil((host_mem * overcommit) / (3100 - 64.0)))
                if _host_reserve:
                    host_reserve += (vmsc - _host_reserve) * 64
                    host_mem -= (vmsc - _host_reserve) * 64
                    _host_reserve = vmsc
                mem = int(math.floor(host_mem * overcommit / vmsc))

    # 0.055 represents OS + TMPFS additional reserve per guest ram MB
    if _guest_reserve:
        guest_reserve += math.ceil(mem * 0.055)

    swap = int(utils_memory.read_from_meminfo("SwapTotal")) / 1024

    logging.debug("Overcommit = %f", overcommit)
    logging.debug("True overcommit = %f ",
                  (float(vmsc * mem) / float(host_mem)))
    logging.debug("Host memory = %dM", host_mem)
    logging.debug("Guest memory = %dM", mem)
    logging.debug("Using swap = %s", ksm_swap)
    logging.debug("Swap = %dM", swap)
    logging.debug("max_vms = %d", max_vms)
    logging.debug("Count of all used VMs = %d", vmsc)
    logging.debug("Performance_ratio = %f", perf_ratio)

    # Generate unique keys for random series
    skeys = []
    dkeys = []
    for i in range(0, max(vmsc, max_alloc)):
        key = random.randrange(0, 255)
        while key in skeys:
            key = random.randrange(0, 255)
        skeys.append(key)

        key = random.randrange(0, 999)
        while key in dkeys:
            key = random.randrange(0, 999)
        dkeys.append(key)

    logging.debug("skeys: %s", skeys)
    logging.debug("dkeys: %s", dkeys)

    lvms = []
    lsessions = []

    # As we don't know the number and memory amount of VMs in advance,
    # we need to specify and create them here
    vm_name = params["main_vm"]
    params['mem'] = mem
    params['vms'] = vm_name
    # Associate pidfile name
    params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(
        vm_name, 'pid')
    if not params.get('extra_params'):
        params['extra_params'] = ' '
    params['extra_params_' + vm_name] = params.get('extra_params')
    params['extra_params_' + vm_name] += (" -pidfile %s" %
                                          (params.get('pid_' + vm_name)))
    params['extra_params'] = params.get('extra_params_' + vm_name)

    # ksm_size: amount of memory used by allocator
    ksm_size = mem - guest_reserve
    logging.debug("Memory used by allocator on guests = %dM", ksm_size)
    fill_base_timeout = ksm_size / 10

    # Creating the first guest
    env_process.preprocess_vm(test, params, env, vm_name)
    lvms.append(env.get_vm(vm_name))
    if not lvms[0]:
        test.error("VM object not found in environment")
    if not lvms[0].is_alive():
        test.error("VM seems to be dead; Test requires a living VM")

    logging.debug("Booting first guest %s", lvms[0].name)

    lsessions.append(lvms[0].wait_for_login(timeout=360))
    # Associate vm PID
    try:
        tmp = open(params.get('pid_' + vm_name), 'r')
        params['pid_' + vm_name] = int(tmp.readline())
    except Exception:
        test.fail("Could not get PID of %s" % (vm_name))

    # Creating other guest systems
    for i in range(1, vmsc):
        vm_name = "vm" + str(i + 1)
        params['pid_' + vm_name] = utils_misc.generate_tmp_file_name(
            vm_name, 'pid')
        params['extra_params_' + vm_name] = params.get('extra_params')
        params['extra_params_' + vm_name] += (" -pidfile %s" %
                                              (params.get('pid_' + vm_name)))
        params['extra_params'] = params.get('extra_params_' + vm_name)

        # Last VM is later used to run more allocators simultaneously
        lvms.append(lvms[0].clone(vm_name, params))
        env.register_vm(vm_name, lvms[i])
        params['vms'] += " " + vm_name

        logging.debug("Booting guest %s", lvms[i].name)
        lvms[i].create()
        if not lvms[i].is_alive():
            test.error("VM %s seems to be dead; Test requires a"
                       "living VM" % lvms[i].name)

        lsessions.append(lvms[i].wait_for_login(timeout=360))
        try:
            tmp = open(params.get('pid_' + vm_name), 'r')
            params['pid_' + vm_name] = int(tmp.readline())
        except Exception:
            test.fail("Could not get PID of %s" % (vm_name))

    # Let guests rest a little bit :-)
    pause = vmsc * 2 * perf_ratio
    logging.debug("Waiting %ds before proceed", pause)
    time.sleep(vmsc * 2 * perf_ratio)
    logging.debug(utils_test.get_memory_info(lvms))

    # Copy ksm_overcommit_guest.py into guests
    vksmd_src = os.path.join(data_dir.get_shared_dir(), "scripts",
                             "ksm_overcommit_guest.py")
    dst_dir = "/tmp"
    for vm in lvms:
        vm.copy_files_to(vksmd_src, dst_dir)
    logging.info("Phase 0: PASS")

    if params['ksm_mode'] == "parallel":
        logging.info("Starting KSM test parallel mode")
        split_parallel()
        logging.info("KSM test parallel mode: PASS")
    elif params['ksm_mode'] == "serial":
        logging.info("Starting KSM test serial mode")
        initialize_guests()
        separate_first_guest()
        split_guest()
        logging.info("KSM test serial mode: PASS")
Example #26
0
def run_virsh_nodememstats(test, params, env):
    """
    Test the command virsh nodememstats

    (1) Call the virsh nodememstats command
    (2) Get the output
    (3) Check the against /proc/meminfo output
    (4) Call the virsh nodememstats command with an unexpected option
    (5) Call the virsh nodememstats command with libvirtd service stop
    """

    # Initialize the variables
    expected = {}
    actual = {}
    deltas = []
    name_stats = ['total', 'free', 'buffers', 'cached']
    itr = int(params.get("itr"))

    def virsh_check_nodememtats(actual_stats, expected_stats, delta):
        """
        Check the nodememstats output value with /proc/meminfo value
        """

        delta_stats = {}
        for name in name_stats:
            delta_stats[name] = abs(actual_stats[name] - expected_stats[name])
            if 'total' in name:
                if not delta_stats[name] == 0:
                    raise error.TestFail("Command 'virsh nodememstats' not"
                                         " succeeded as the value for %s is "
                                         "deviated by %d\nThe total memory "
                                         "value is deviating-check"
                                         % (name, delta_stats[name]))
            else:
                if delta_stats[name] > delta:
                    raise error.TestFail("Command 'virsh nodememstats' not "
                                         "succeeded as the value for %s"
                                         " is deviated by %d"
                                         % (name, delta_stats[name]))
        return delta_stats

    # Prepare libvirtd service
    check_libvirtd = params.has_key("libvirtd")
    if check_libvirtd:
        libvirtd = params.get("libvirtd")
        if libvirtd == "off":
            utils_libvirtd.libvirtd_stop()

    # Get the option for the test case
    option = params.get("virsh_nodememstats_options")

    # Run test case for 10 iterations
    # (default can be changed in subtests.cfg file)
    # and print the final statistics
    for i in range(itr):
        output = virsh.nodememstats(option)

        # Get the status of the virsh command executed
        status = output.exit_status

        # Get status_error option for the test case
        status_error = params.get("status_error")
        if status_error == "yes":
            if status == 0:
                if libvirtd == "off":
                    utils_libvirtd.libvirtd_start()
                    raise error.TestFail("Command 'virsh nodememstats' "
                                         "succeeded with libvirtd service"
                                         " stopped, incorrect")
                else:
                    raise error.TestFail("Command 'virsh nodememstats %s' "
                                         "succeeded (incorrect command)"
                                         % option)

        elif status_error == "no":
            if status == 0:
                # From the beginning of a line, group 1 is one or
                # more word-characters, followed by zero or more
                # whitespace characters and a ':', then one or
                # more whitespace characters, followed by group 2,
                # which is one or more digit characters,
                # then one or more whitespace characters followed by
                # a literal 'kB' or 'KiB' sequence, e.g as below
                # total  :              3809340 kB
                # total  :              3809340 KiB
                # Normalise the value to MBs
                regex_obj = re.compile(r"^(\w+)\s*:\s+(\d+)\s\w+")
                expected = {}

                for line in output.stdout.split('\n'):
                    match_obj = regex_obj.search(line)
                    # Due to the extra space in the list
                    if match_obj is not None:
                        name = match_obj.group(1)
                        value = match_obj.group(2)
                        expected[name] = int(value) / 1024

                # Get the actual value from /proc/meminfo and normalise to MBs
                actual['total'] = int(utils_memory.memtotal()) / 1024
                actual['free'] = int(utils_memory.freememtotal()) / 1024
                actual['buffers'] = int(
                    utils_memory.read_from_meminfo('Buffers')) / 1024
                actual['cached'] = int(
                    utils_memory.read_from_meminfo('Cached')) / 1024

                # Currently the delta value is kept at 200 MB this can be
                # tuned based on the accuracy
                # Check subtests.cfg for more details
                delta = int(params.get("delta"))
                output = virsh_check_nodememtats(actual, expected, delta)
                deltas.append(output)

            else:
                raise error.TestFail("Command virsh nodememstats %s not "
                                     "succeeded:\n%s" % (option, status))

    # Recover libvirtd service start
    if libvirtd == "off":
        utils_libvirtd.libvirtd_start()

    # Print the deviated values for all iterations
    if status_error == "no":
        logging.debug("The following is the deviations from "
                      "the actual(/proc/meminfo) and expected"
                      " value(output of virsh nodememstats)")

        for i in range(itr):
            logging.debug("iteration %d:", i)
            for index, name in enumerate(name_stats):
                logging.debug("%19s : %d", name, deltas[i][name])
def run(test, params, env):
    """
    Test migration under stress.
    """
    vm_names = params.get("migration_vms").split()
    if len(vm_names) < 2:
        raise exceptions.TestSkipError("Provide enough vms for migration")

    src_uri = libvirt_vm.complete_uri(params.get("migrate_source_host",
                                                 "EXAMPLE"))
    if src_uri.count('///') or src_uri.count('EXAMPLE'):
        raise exceptions.TestSkipError("The src_uri '%s' is invalid" % src_uri)

    dest_uri = libvirt_vm.complete_uri(params.get("migrate_dest_host",
                                                  "EXAMPLE"))
    if dest_uri.count('///') or dest_uri.count('EXAMPLE'):
        raise exceptions.TestSkipError("The dest_uri '%s' is invalid" %
                                       dest_uri)

    # Params for NFS and SSH setup
    params["server_ip"] = params.get("migrate_dest_host")
    params["server_user"] = "******"
    params["server_pwd"] = params.get("migrate_dest_pwd")
    params["client_ip"] = params.get("migrate_source_host")
    params["client_user"] = "******"
    params["client_pwd"] = params.get("migrate_source_pwd")
    params["nfs_client_ip"] = params.get("migrate_dest_host")
    params["nfs_server_ip"] = params.get("migrate_source_host")

    # Configure NFS client on remote host
    nfs_client = nfs.NFSClient(params)
    nfs_client.setup()

    # Migrated vms' instance
    vms = []
    for vm_name in vm_names:
        vms.append(libvirt_vm.VM(vm_name, params, test.bindir,
                                 env.get("address_cache")))

    load_vm_names = params.get("load_vms").split()
    # vms for load
    load_vms = []
    for vm_name in load_vm_names:
        load_vms.append(libvirt_vm.VM(vm_name, params, test.bindir,
                                      env.get("address_cache")))
    params['load_vms'] = load_vms

    cpu = int(params.get("smp", 1))
    memory = int(params.get("mem")) * 1024
    stress_type = params.get("migration_stress_type")
    vm_bytes = params.get("stress_vm_bytes")
    stress_args = params.get("stress_args")
    migration_type = params.get("migration_type")
    start_migration_vms = "yes" == params.get("start_migration_vms", "yes")
    thread_timeout = int(params.get("thread_timeout", 120))
    remote_host = params.get("migrate_dest_host")
    username = params.get("migrate_dest_user", "root")
    password = params.get("migrate_dest_pwd")
    prompt = params.get("shell_prompt", r"[\#\$]")

    # Set vm_bytes for start_cmd
    mem_total = utils_memory.memtotal()
    vm_reserved = len(vms) * memory
    if vm_bytes == "half":
        vm_bytes = (mem_total - vm_reserved) / 2
    elif vm_bytes == "shortage":
        vm_bytes = mem_total - vm_reserved + 524288
    if vm_bytes is not None:
        params["stress_args"] = stress_args % vm_bytes

    for vm in vms:
        # Keep vm dead for edit
        if vm.is_alive():
            vm.destroy()
        set_cpu_memory(vm.name, cpu, memory)

    try:
        vm_ipaddr = {}
        if start_migration_vms:
            for vm in vms:
                vm.start()
                vm.wait_for_login()
                vm_ipaddr[vm.name] = vm.get_address()
                # TODO: recover vm if start failed?
        # Config ssh autologin for remote host
        ssh_key.setup_ssh_key(remote_host, username, password, port=22)

        do_stress_migration(vms, src_uri, dest_uri, stress_type,
                            migration_type, params, thread_timeout)
        # Check network of vms on destination
        if start_migration_vms and migration_type != "cross":
            for vm in vms:
                utils_test.check_dest_vm_network(vm, vm_ipaddr[vm.name],
                                                 remote_host,
                                                 username, password, prompt)
    finally:
        logging.debug("Cleanup vms...")
        for vm_name in vm_names:
            vm = libvirt_vm.VM(vm_name, params, test.bindir,
                               env.get("address_cache"))
            utlv.MigrationTest().cleanup_dest_vm(vm, None, dest_uri)
            if vm.is_alive():
                vm.destroy(gracefully=False)

        if nfs_client:
            logging.info("Cleanup NFS client environment...")
            nfs_client.cleanup()
        env.clean_objects()