def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if (benchmark.requirements.cpu_model or benchmark.requirements.cpu_cores != benchmark.rlimits.get( CORELIMIT, None) or benchmark.requirements.memory != benchmark.rlimits.get( MEMLIMIT, None)): logging.warning( "Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run cpu_packages = None pqos = Pqos( show_warnings=True) # The pqos class instance for cache allocation pqos.reset_monitoring() if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit( "Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes." ) coreAssignment = get_cpu_cores_per_run( benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, benchmark.config.use_hyperthreading, my_cgroups, benchmark.config.coreset, ) pqos.allocate_l3ca(coreAssignment) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) cpu_packages = { get_cpu_package_for_core(core) for cores_of_run in coreAssignment for core in cores_of_run } elif benchmark.config.coreset: sys.exit( "Please limit the number of cores first if you also want to limit the set of available cores." ) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size( benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups, ) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning( "Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set( runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet energy_measurement = EnergyMeasurement.create_if_supported() ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() if energy_measurement: energy_measurement.start() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # keep a counter of unfinished runs for the below assertion unfinished_runs = len(runSet.runs) unfinished_runs_lock = threading.Lock() def run_finished(): nonlocal unfinished_runs with unfinished_runs_lock: unfinished_runs -= 1 if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: logging.debug( "Using sys.setswitchinterval() workaround for #435 in container " "mode because native callback is not available.") py_switch_interval = sys.getswitchinterval() sys.setswitchinterval(1000) # create some workers for i in range(min(benchmark.num_of_threads, unfinished_runs)): if STOPPED_BY_INTERRUPT: break cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None WORKER_THREADS.append( _Worker(benchmark, cores, memBanks, output_handler, run_finished)) # wait until workers are finished (all tasks done or STOPPED_BY_INTERRUPT) for worker in WORKER_THREADS: worker.join() assert unfinished_runs == 0 or STOPPED_BY_INTERRUPT # get times after runSet walltime_after = util.read_monotonic_time() energy = energy_measurement.stop() if energy_measurement else None usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) - ( ruBefore.ru_utime + ruBefore.ru_stime) if energy and cpu_packages: energy = { pkg: energy[pkg] for pkg in energy if pkg in cpu_packages } if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: sys.setswitchinterval(py_switch_interval) if STOPPED_BY_INTERRUPT: output_handler.set_error("interrupted", runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) if throttle_check.has_throttled(): logging.warning( "CPU throttled itself during benchmarking due to overheating. " "Benchmark results are unreliable!") if swap_check.has_swapped(): logging.warning("System has swapped during benchmarking. " "Benchmark results are unreliable!") pqos.reset_resources() output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if benchmark.requirements.cpu_model \ or benchmark.requirements.cpu_cores != benchmark.rlimits.get(CORELIMIT, None) \ or benchmark.requirements.memory != benchmark.rlimits.get(MEMLIMIT, None): logging.warning("Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit("Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes.") coreAssignment = get_cpu_cores_per_run(benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, my_cgroups) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size(benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning("Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") if benchmark.num_of_threads > 1 and benchmark.config.users: if len(benchmark.config.users) == 1: logging.warning( 'Executing multiple parallel benchmarks under same user account. ' 'Consider specifying multiple user accounts for increased separation of runs.') benchmark.config.users = [benchmark.config.users[0] for i in range(benchmark.num_of_threads)] elif len(benchmark.config.users) < benchmark.num_of_threads: sys.exit('Distributing parallel runs to different user accounts was requested, but not enough accounts were given. Please specify {} user accounts, or only one account.'.format(benchmark.num_of_threads)) elif len(benchmark.config.users) != len(set(benchmark.config.users)): sys.exit('Same user account was specified multiple times, please specify {} separate accounts, or only one account.'.format(benchmark.num_of_threads)) throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set(runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() energyBefore = util.measure_energy() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # create some workers for i in range(benchmark.num_of_threads): cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None user = benchmark.config.users[i] if benchmark.config.users else None WORKER_THREADS.append(_Worker(benchmark, cores, memBanks, user, output_handler)) # wait until all tasks are done, # instead of queue.join(), we use a loop and sleep(1) to handle KeyboardInterrupt finished = False while not finished and not STOPPED_BY_INTERRUPT: try: _Worker.working_queue.all_tasks_done.acquire() finished = (_Worker.working_queue.unfinished_tasks == 0) finally: _Worker.working_queue.all_tasks_done.release() try: time.sleep(0.1) # sleep some time except KeyboardInterrupt: stop() # get times after runSet walltime_after = util.read_monotonic_time() energy = util.measure_energy(energyBefore) usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) \ - (ruBefore.ru_utime + ruBefore.ru_stime) if STOPPED_BY_INTERRUPT: output_handler.set_error('interrupted', runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) for worker in WORKER_THREADS: worker.cleanup() if throttle_check.has_throttled(): logging.warning('CPU throttled itself during benchmarking due to overheating. ' 'Benchmark results are unreliable!') if swap_check.has_swapped(): logging.warning('System has swapped during benchmarking. ' 'Benchmark results are unreliable!') output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if (benchmark.requirements.cpu_model or benchmark.requirements.cpu_cores != benchmark.rlimits.cpu_cores or benchmark.requirements.memory != benchmark.rlimits.memory): logging.warning( "Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() required_cgroups = set() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run cpu_packages = None pqos = Pqos( show_warnings=True) # The pqos class instance for cache allocation pqos.reset_monitoring() if benchmark.rlimits.cpu_cores: if not my_cgroups.require_subsystem(cgroups.CPUSET): required_cgroups.add(cgroups.CPUSET) logging.error("Cgroup subsystem cpuset is required " "for limiting the number of CPU cores/memory nodes.") else: coreAssignment = resources.get_cpu_cores_per_run( benchmark.rlimits.cpu_cores, benchmark.num_of_threads, benchmark.config.use_hyperthreading, my_cgroups, benchmark.config.coreset, ) pqos.allocate_l3ca(coreAssignment) memoryAssignment = resources.get_memory_banks_per_run( coreAssignment, my_cgroups) cpu_packages = { resources.get_cpu_package_for_core(core) for cores_of_run in coreAssignment for core in cores_of_run } elif benchmark.config.coreset: sys.exit( "Please limit the number of cores first if you also want to limit the set of available cores." ) if benchmark.rlimits.memory: if not my_cgroups.require_subsystem(cgroups.MEMORY): required_cgroups.add(cgroups.MEMORY) logging.error( "Cgroup subsystem memory is required for memory limit.") else: # check whether we have enough memory in the used memory banks for all runs resources.check_memory_size( benchmark.rlimits.memory, benchmark.num_of_threads, memoryAssignment, my_cgroups, ) if benchmark.rlimits.cputime: if not my_cgroups.require_subsystem(cgroups.CPUACCT): required_cgroups.add(cgroups.CPUACCT) logging.error( "Cgroup subsystem cpuacct is required for cputime limit.") my_cgroups.handle_errors(required_cgroups) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning( "Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set( runSet, "because it has no files") else: run_sets_executed += 1 _execute_run_set( runSet, benchmark, output_handler, coreAssignment, memoryAssignment, cpu_packages, ) if throttle_check.has_throttled(): logging.warning( "CPU throttled itself during benchmarking due to overheating. " "Benchmark results are unreliable!") if swap_check.has_swapped(): logging.warning("System has swapped during benchmarking. " "Benchmark results are unreliable!") pqos.reset_resources() output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0