def get_cpu_cores_per_run(coreLimit, num_of_threads, my_cgroups): """ Calculate an assignment of the available CPU cores to a number of parallel benchmark executions such that each run gets its own cores without overlapping of cores between runs. In case the machine has hyper-threading, this method tries to avoid putting two different runs on the same physical core (but it does not guarantee this if the number of parallel runs is too high to avoid it). In case the machine has multiple CPUs, this method avoids splitting a run across multiple CPUs if the number of cores per run is lower than the number of cores per CPU (splitting a run over multiple CPUs provides worse performance). It will also try to split the runs evenly across all available CPUs. A few theoretically-possible cases are not implemented, for example assigning three 10-core runs on a machine with two 16-core CPUs (this would have unfair core assignment and thus undesirable performance characteristics anyway). The list of available cores is read from the cgroup file system, such that the assigned cores are a subset of the cores that the current process is allowed to use. This script does currently not support situations where the available cores are asymmetrically split over CPUs, e.g. 3 cores on one CPU and 5 on another. @param coreLimit: the number of cores for each run @param num_of_threads: the number of parallel benchmark executions @return a list of lists, where each inner list contains the cores for one run """ try: # read list of available CPU cores allCpus = util.parse_int_list( my_cgroups.get_value(cgroups.CPUSET, 'cpus')) logging.debug("List of available CPU cores is %s.", allCpus) # read mapping of core to CPU ("physical package") physical_packages = [ get_cpu_package_for_core(core) for core in allCpus ] cores_of_package = collections.defaultdict(list) for core, package in zip(allCpus, physical_packages): cores_of_package[package].append(core) logging.debug("Physical packages of cores are %s.", cores_of_package) # read hyper-threading information (sibling cores sharing the same physical core) siblings_of_core = {} for core in allCpus: siblings = util.parse_int_list( util.read_file( '/sys/devices/system/cpu/cpu{0}/topology/thread_siblings_list' .format(core))) siblings_of_core[core] = siblings logging.debug("Siblings of cores are %s.", siblings_of_core) except ValueError as e: sys.exit("Could not read CPU information from kernel: {0}".format(e)) return _get_cpu_cores_per_run0(coreLimit, num_of_threads, allCpus, cores_of_package, siblings_of_core)
def get_cpu_cores_per_run(coreLimit, num_of_threads, my_cgroups): """ Calculate an assignment of the available CPU cores to a number of parallel benchmark executions such that each run gets its own cores without overlapping of cores between runs. In case the machine has hyper-threading, this method tries to avoid putting two different runs on the same physical core (but it does not guarantee this if the number of parallel runs is too high to avoid it). In case the machine has multiple CPUs, this method avoids splitting a run across multiple CPUs if the number of cores per run is lower than the number of cores per CPU (splitting a run over multiple CPUs provides worse performance). It will also try to split the runs evenly across all available CPUs. A few theoretically-possible cases are not implemented, for example assigning three 10-core runs on a machine with two 16-core CPUs (this would have unfair core assignment and thus undesirable performance characteristics anyway). The list of available cores is read from the cgroup file system, such that the assigned cores are a subset of the cores that the current process is allowed to use. This script does currently not support situations where the available cores are asymmetrically split over CPUs, e.g. 3 cores on one CPU and 5 on another. @param coreLimit: the number of cores for each run @param num_of_threads: the number of parallel benchmark executions @return a list of lists, where each inner list contains the cores for one run """ try: # read list of available CPU cores allCpus = util.parse_int_list(my_cgroups.get_value(cgroups.CPUSET, 'cpus')) logging.debug("List of available CPU cores is %s.", allCpus) # read mapping of core to CPU ("physical package") physical_packages = [int(util.read_file('/sys/devices/system/cpu/cpu{0}/topology/physical_package_id'.format(core))) for core in allCpus] cores_of_package = collections.defaultdict(list) for core, package in zip(allCpus, physical_packages): cores_of_package[package].append(core) logging.debug("Physical packages of cores are %s.", cores_of_package) # read hyper-threading information (sibling cores sharing the same physical core) siblings_of_core = {} for core in allCpus: siblings = util.parse_int_list(util.read_file('/sys/devices/system/cpu/cpu{0}/topology/thread_siblings_list'.format(core))) siblings_of_core[core] = siblings logging.debug("Siblings of cores are %s.", siblings_of_core) except ValueError as e: sys.exit("Could not read CPU information from kernel: {0}".format(e)) return _get_cpu_cores_per_run0(coreLimit, num_of_threads, allCpus, cores_of_package, siblings_of_core)
def check_cgroup_availability(wait=1): """ Basic utility to check the availability and permissions of cgroups. This will log some warnings for the user if necessary. On some systems, daemons such as cgrulesengd might interfere with the cgroups of a process soon after it was started. Thus this function starts a process, waits a configurable amount of time, and check whether the cgroups have been changed. @param wait: a non-negative int that is interpreted as seconds to wait during the check @raise SystemExit: if cgroups are not usable """ logging.basicConfig(format="%(levelname)s: %(message)s") runexecutor = RunExecutor(use_namespaces=False) my_cgroups = runexecutor.cgroups if not ( CPUACCT in my_cgroups and CPUSET in my_cgroups # and FREEZER in my_cgroups # For now, we do not require freezer and MEMORY in my_cgroups ): sys.exit(1) with tempfile.NamedTemporaryFile(mode="rt") as tmp: runexecutor.execute_run( ["sh", "-c", "sleep {0}; cat /proc/self/cgroup".format(wait)], tmp.name, memlimit=1024 * 1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET cores=util.parse_int_list(my_cgroups.get_value(CPUSET, "cpus")), memory_nodes=my_cgroups.read_allowed_memory_banks(), ) lines = [] for line in tmp: line = line.strip() if ( line and not line == "sh -c 'sleep {0}; cat /proc/self/cgroup'".format(wait) and not all(c == "-" for c in line) ): lines.append(line) task_cgroups = find_my_cgroups(lines) fail = False for subsystem in CPUACCT, CPUSET, MEMORY, FREEZER: if subsystem in my_cgroups: if not task_cgroups[subsystem].startswith( os.path.join(my_cgroups[subsystem], "benchmark_") ): logging.warning( "Task was in cgroup %s for subsystem %s, " "which is not the expected sub-cgroup of %s. " "Maybe some other program is interfering with cgroup management?", task_cgroups[subsystem], subsystem, my_cgroups[subsystem], ) fail = True if fail: sys.exit(1)
def check_cgroup_availability(wait=1): """ Basic utility to check the availability and permissions of cgroups. This will log some warnings for the user if necessary. On some systems, daemons such as cgrulesengd might interfere with the cgroups of a process soon after it was started. Thus this function starts a process, waits a configurable amount of time, and check whether the cgroups have been changed. @param wait: a non-negative int that is interpreted as seconds to wait during the check @raise SystemExit: if cgroups are not usable """ logging.basicConfig(format="%(levelname)s: %(message)s") runexecutor = RunExecutor() my_cgroups = runexecutor.cgroups if not (CPUACCT in my_cgroups and CPUSET in my_cgroups and # FREEZER in my_cgroups and # For now, we do not require freezer MEMORY in my_cgroups): sys.exit(1) with tempfile.NamedTemporaryFile(mode='rt') as tmp: runexecutor.execute_run(['sh', '-c', 'sleep {0}; cat /proc/self/cgroup'.format(wait)], tmp.name, memlimit=1024*1024, # set memlimit to force check for swapaccount # set cores and memory_nodes to force usage of CPUSET cores=util.parse_int_list(my_cgroups.get_value(CPUSET, 'cpus')), memory_nodes=my_cgroups.read_allowed_memory_banks()) lines = [] for line in tmp: line = line.strip() if line and not line == "sh -c 'sleep {0}; cat /proc/self/cgroup'".format(wait) \ and not all(c == '-' for c in line): lines.append(line) task_cgroups = find_my_cgroups(lines) fail = False for subsystem in CPUACCT, CPUSET, MEMORY, FREEZER: if subsystem in my_cgroups: if not task_cgroups[subsystem].startswith(os.path.join(my_cgroups[subsystem], 'benchmark_')): logging.warning('Task was in cgroup %s for subsystem %s, ' 'which is not the expected sub-cgroup of %s. ' 'Maybe some other program is interfering with cgroup management?', task_cgroups[subsystem], subsystem, my_cgroups[subsystem]) fail = True if fail: sys.exit(1)
def get_cores_of_same_package_as(core): return util.parse_int_list( util.read_file( f"/sys/devices/system/cpu/cpu{core}/topology/core_siblings_list"))
def get_cpu_cores_per_run(coreLimit, num_of_threads, use_hyperthreading, my_cgroups, coreSet=None): """ Calculate an assignment of the available CPU cores to a number of parallel benchmark executions such that each run gets its own cores without overlapping of cores between runs. In case the machine has hyper-threading, this method tries to avoid putting two different runs on the same physical core (but it does not guarantee this if the number of parallel runs is too high to avoid it). In case the machine has multiple CPUs, this method avoids splitting a run across multiple CPUs if the number of cores per run is lower than the number of cores per CPU (splitting a run over multiple CPUs provides worse performance). It will also try to split the runs evenly across all available CPUs. A few theoretically-possible cases are not implemented, for example assigning three 10-core runs on a machine with two 16-core CPUs (this would have unfair core assignment and thus undesirable performance characteristics anyway). The list of available cores is read from the cgroup file system, such that the assigned cores are a subset of the cores that the current process is allowed to use. This script does currently not support situations where the available cores are asymmetrically split over CPUs, e.g. 3 cores on one CPU and 5 on another. @param coreLimit: the number of cores for each run @param num_of_threads: the number of parallel benchmark executions @param coreSet: the list of CPU cores identifiers provided by a user, None makes benchexec using all cores @return a list of lists, where each inner list contains the cores for one run """ try: # read list of available CPU cores allCpus = util.parse_int_list( my_cgroups.get_value(cgroups.CPUSET, "cpus")) # Filter CPU cores according to the list of identifiers provided by a user if coreSet: invalid_cores = sorted(set(coreSet).difference(set(allCpus))) if len(invalid_cores) > 0: raise ValueError( "The following provided CPU cores are not available: " + ", ".join(map(str, invalid_cores))) allCpus = [core for core in allCpus if core in coreSet] logging.debug("List of available CPU cores is %s.", allCpus) # read mapping of core to memory region cores_of_memory_region = collections.defaultdict(list) for core in allCpus: coreDir = f"/sys/devices/system/cpu/cpu{core}/" memory_regions = _get_memory_banks_listed_in_dir(coreDir) if memory_regions: cores_of_memory_region[memory_regions[0]].append(core) else: # If some cores do not have NUMA information, skip using it completely logging.warning( "Kernel does not have NUMA support. Use benchexec at your own risk." ) cores_of_memory_region = {} break logging.debug("Memory regions of cores are %s.", cores_of_memory_region) # read mapping of core to CPU ("physical package") cores_of_package = collections.defaultdict(list) for core in allCpus: package = get_cpu_package_for_core(core) cores_of_package[package].append(core) logging.debug("Physical packages of cores are %s.", cores_of_package) # select the more fine grained division among memory regions and physical package if len(cores_of_memory_region) >= len(cores_of_package): cores_of_unit = cores_of_memory_region logging.debug( "Using memory regions as the basis for cpu core division") else: cores_of_unit = cores_of_package logging.debug( "Using physical packages as the basis for cpu core division") # read hyper-threading information (sibling cores sharing the same physical core) siblings_of_core = {} for core in allCpus: siblings = util.parse_int_list( util.read_file( f"/sys/devices/system/cpu/cpu{core}/topology/thread_siblings_list" )) siblings_of_core[core] = siblings logging.debug("Siblings of cores are %s.", siblings_of_core) except ValueError as e: sys.exit(f"Could not read CPU information from kernel: {e}") return _get_cpu_cores_per_run0( coreLimit, num_of_threads, use_hyperthreading, allCpus, cores_of_unit, siblings_of_core, )
def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" return util.parse_int_list(self.get_value(CPUSET, "mems"))
def get_cores_of_same_package_as(core): return util.parse_int_list(util.read_file('/sys/devices/system/cpu/cpu{0}/topology/core_siblings_list'.format(core)))
def read_allowed_memory_banks(self): """Get the list of all memory banks allowed by this cgroup.""" return util.parse_int_list(self.get_value(CPUSET, 'mems'))