def execute_benchmark(benchmark, output_handler):

    run_sets_executed = 0

    logging.debug("I will use %s threads.", benchmark.num_of_threads)

    if (benchmark.requirements.cpu_model
            or benchmark.requirements.cpu_cores != benchmark.rlimits.get(
                CORELIMIT, None)
            or benchmark.requirements.memory != benchmark.rlimits.get(
                MEMLIMIT, None)):
        logging.warning(
            "Ignoring specified resource requirements in local-execution mode, "
            "only resource limits are used.")

    my_cgroups = cgroups.find_my_cgroups()

    coreAssignment = None  # cores per run
    memoryAssignment = None  # memory banks per run
    cpu_packages = None
    pqos = Pqos(
        show_warnings=True)  # The pqos class instance for cache allocation
    pqos.reset_monitoring()

    if CORELIMIT in benchmark.rlimits:
        if not my_cgroups.require_subsystem(cgroups.CPUSET):
            sys.exit(
                "Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes."
            )
        coreAssignment = get_cpu_cores_per_run(
            benchmark.rlimits[CORELIMIT],
            benchmark.num_of_threads,
            benchmark.config.use_hyperthreading,
            my_cgroups,
            benchmark.config.coreset,
        )
        pqos.allocate_l3ca(coreAssignment)
        memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups)
        cpu_packages = {
            get_cpu_package_for_core(core)
            for cores_of_run in coreAssignment for core in cores_of_run
        }
    elif benchmark.config.coreset:
        sys.exit(
            "Please limit the number of cores first if you also want to limit the set of available cores."
        )

    if MEMLIMIT in benchmark.rlimits:
        # check whether we have enough memory in the used memory banks for all runs
        check_memory_size(
            benchmark.rlimits[MEMLIMIT],
            benchmark.num_of_threads,
            memoryAssignment,
            my_cgroups,
        )

    if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled():
        logging.warning(
            "Turbo boost of CPU is enabled. "
            "Starting more than one benchmark in parallel affects the CPU frequency "
            "and thus makes the performance unreliable.")

    throttle_check = systeminfo.CPUThrottleCheck()
    swap_check = systeminfo.SwapCheck()

    # iterate over run sets
    for runSet in benchmark.run_sets:

        if STOPPED_BY_INTERRUPT:
            break

        if not runSet.should_be_executed():
            output_handler.output_for_skipping_run_set(runSet)

        elif not runSet.runs:
            output_handler.output_for_skipping_run_set(
                runSet, "because it has no files")

        else:
            run_sets_executed += 1
            # get times before runSet
            energy_measurement = EnergyMeasurement.create_if_supported()
            ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN)
            walltime_before = util.read_monotonic_time()
            if energy_measurement:
                energy_measurement.start()

            output_handler.output_before_run_set(runSet)

            # put all runs into a queue
            for run in runSet.runs:
                _Worker.working_queue.put(run)

            # keep a counter of unfinished runs for the below assertion
            unfinished_runs = len(runSet.runs)
            unfinished_runs_lock = threading.Lock()

            def run_finished():
                nonlocal unfinished_runs
                with unfinished_runs_lock:
                    unfinished_runs -= 1

            if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED:
                logging.debug(
                    "Using sys.setswitchinterval() workaround for #435 in container "
                    "mode because native callback is not available.")
                py_switch_interval = sys.getswitchinterval()
                sys.setswitchinterval(1000)

            # create some workers
            for i in range(min(benchmark.num_of_threads, unfinished_runs)):
                if STOPPED_BY_INTERRUPT:
                    break
                cores = coreAssignment[i] if coreAssignment else None
                memBanks = memoryAssignment[i] if memoryAssignment else None
                WORKER_THREADS.append(
                    _Worker(benchmark, cores, memBanks, output_handler,
                            run_finished))

            # wait until workers are finished (all tasks done or STOPPED_BY_INTERRUPT)
            for worker in WORKER_THREADS:
                worker.join()
            assert unfinished_runs == 0 or STOPPED_BY_INTERRUPT

            # get times after runSet
            walltime_after = util.read_monotonic_time()
            energy = energy_measurement.stop() if energy_measurement else None
            usedWallTime = walltime_after - walltime_before
            ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN)
            usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) - (
                ruBefore.ru_utime + ruBefore.ru_stime)
            if energy and cpu_packages:
                energy = {
                    pkg: energy[pkg]
                    for pkg in energy if pkg in cpu_packages
                }

            if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED:
                sys.setswitchinterval(py_switch_interval)

            if STOPPED_BY_INTERRUPT:
                output_handler.set_error("interrupted", runSet)
            output_handler.output_after_run_set(runSet,
                                                cputime=usedCpuTime,
                                                walltime=usedWallTime,
                                                energy=energy)

    if throttle_check.has_throttled():
        logging.warning(
            "CPU throttled itself during benchmarking due to overheating. "
            "Benchmark results are unreliable!")
    if swap_check.has_swapped():
        logging.warning("System has swapped during benchmarking. "
                        "Benchmark results are unreliable!")
    pqos.reset_resources()
    output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT)

    return 0
Example #2
0
def execute_benchmark(benchmark, output_handler):

    run_sets_executed = 0

    logging.debug("I will use %s threads.", benchmark.num_of_threads)

    if benchmark.requirements.cpu_model \
            or benchmark.requirements.cpu_cores != benchmark.rlimits.get(CORELIMIT, None) \
            or benchmark.requirements.memory != benchmark.rlimits.get(MEMLIMIT, None):
        logging.warning("Ignoring specified resource requirements in local-execution mode, "
                        "only resource limits are used.")

    my_cgroups = cgroups.find_my_cgroups()

    coreAssignment = None # cores per run
    memoryAssignment = None # memory banks per run
    if CORELIMIT in benchmark.rlimits:
        if not my_cgroups.require_subsystem(cgroups.CPUSET):
            sys.exit("Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes.")
        coreAssignment = get_cpu_cores_per_run(benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, my_cgroups)
        memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups)

    if MEMLIMIT in benchmark.rlimits:
        # check whether we have enough memory in the used memory banks for all runs
        check_memory_size(benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads,
                          memoryAssignment, my_cgroups)

    if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled():
        logging.warning("Turbo boost of CPU is enabled. "
                        "Starting more than one benchmark in parallel affects the CPU frequency "
                        "and thus makes the performance unreliable.")

    if benchmark.num_of_threads > 1 and benchmark.config.users:
        if len(benchmark.config.users) == 1:
            logging.warning(
                'Executing multiple parallel benchmarks under same user account. '
                'Consider specifying multiple user accounts for increased separation of runs.')
            benchmark.config.users = [benchmark.config.users[0] for i in range(benchmark.num_of_threads)]
        elif len(benchmark.config.users) < benchmark.num_of_threads:
            sys.exit('Distributing parallel runs to different user accounts was requested, but not enough accounts were given. Please specify {} user accounts, or only one account.'.format(benchmark.num_of_threads))
        elif len(benchmark.config.users) != len(set(benchmark.config.users)):
            sys.exit('Same user account was specified multiple times, please specify {} separate accounts, or only one account.'.format(benchmark.num_of_threads))

    throttle_check = systeminfo.CPUThrottleCheck()
    swap_check = systeminfo.SwapCheck()

    # iterate over run sets
    for runSet in benchmark.run_sets:

        if STOPPED_BY_INTERRUPT:
            break

        if not runSet.should_be_executed():
            output_handler.output_for_skipping_run_set(runSet)

        elif not runSet.runs:
            output_handler.output_for_skipping_run_set(runSet, "because it has no files")

        else:
            run_sets_executed += 1
            # get times before runSet
            ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN)
            walltime_before = util.read_monotonic_time()
            energyBefore = util.measure_energy()

            output_handler.output_before_run_set(runSet)

            # put all runs into a queue
            for run in runSet.runs:
                _Worker.working_queue.put(run)

            # create some workers
            for i in range(benchmark.num_of_threads):
                cores = coreAssignment[i] if coreAssignment else None
                memBanks = memoryAssignment[i] if memoryAssignment else None
                user = benchmark.config.users[i] if benchmark.config.users else None
                WORKER_THREADS.append(_Worker(benchmark, cores, memBanks, user, output_handler))

            # wait until all tasks are done,
            # instead of queue.join(), we use a loop and sleep(1) to handle KeyboardInterrupt
            finished = False
            while not finished and not STOPPED_BY_INTERRUPT:
                try:
                    _Worker.working_queue.all_tasks_done.acquire()
                    finished = (_Worker.working_queue.unfinished_tasks == 0)
                finally:
                    _Worker.working_queue.all_tasks_done.release()

                try:
                    time.sleep(0.1) # sleep some time
                except KeyboardInterrupt:
                    stop()

            # get times after runSet
            walltime_after = util.read_monotonic_time()
            energy = util.measure_energy(energyBefore)
            usedWallTime = walltime_after - walltime_before
            ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN)
            usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) \
                        - (ruBefore.ru_utime + ruBefore.ru_stime)

            if STOPPED_BY_INTERRUPT:
                output_handler.set_error('interrupted', runSet)
            output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy)

            for worker in WORKER_THREADS:
                worker.cleanup()

    if throttle_check.has_throttled():
        logging.warning('CPU throttled itself during benchmarking due to overheating. '
                        'Benchmark results are unreliable!')
    if swap_check.has_swapped():
        logging.warning('System has swapped during benchmarking. '
                        'Benchmark results are unreliable!')

    output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT)

    return 0
Example #3
0
def execute_benchmark(benchmark, output_handler):

    run_sets_executed = 0

    logging.debug("I will use %s threads.", benchmark.num_of_threads)

    if (benchmark.requirements.cpu_model
            or benchmark.requirements.cpu_cores != benchmark.rlimits.cpu_cores
            or benchmark.requirements.memory != benchmark.rlimits.memory):
        logging.warning(
            "Ignoring specified resource requirements in local-execution mode, "
            "only resource limits are used.")

    my_cgroups = cgroups.find_my_cgroups()
    required_cgroups = set()

    coreAssignment = None  # cores per run
    memoryAssignment = None  # memory banks per run
    cpu_packages = None
    pqos = Pqos(
        show_warnings=True)  # The pqos class instance for cache allocation
    pqos.reset_monitoring()

    if benchmark.rlimits.cpu_cores:
        if not my_cgroups.require_subsystem(cgroups.CPUSET):
            required_cgroups.add(cgroups.CPUSET)
            logging.error("Cgroup subsystem cpuset is required "
                          "for limiting the number of CPU cores/memory nodes.")
        else:
            coreAssignment = resources.get_cpu_cores_per_run(
                benchmark.rlimits.cpu_cores,
                benchmark.num_of_threads,
                benchmark.config.use_hyperthreading,
                my_cgroups,
                benchmark.config.coreset,
            )
            pqos.allocate_l3ca(coreAssignment)
            memoryAssignment = resources.get_memory_banks_per_run(
                coreAssignment, my_cgroups)
            cpu_packages = {
                resources.get_cpu_package_for_core(core)
                for cores_of_run in coreAssignment for core in cores_of_run
            }
    elif benchmark.config.coreset:
        sys.exit(
            "Please limit the number of cores first if you also want to limit the set of available cores."
        )

    if benchmark.rlimits.memory:
        if not my_cgroups.require_subsystem(cgroups.MEMORY):
            required_cgroups.add(cgroups.MEMORY)
            logging.error(
                "Cgroup subsystem memory is required for memory limit.")
        else:
            # check whether we have enough memory in the used memory banks for all runs
            resources.check_memory_size(
                benchmark.rlimits.memory,
                benchmark.num_of_threads,
                memoryAssignment,
                my_cgroups,
            )

    if benchmark.rlimits.cputime:
        if not my_cgroups.require_subsystem(cgroups.CPUACCT):
            required_cgroups.add(cgroups.CPUACCT)
            logging.error(
                "Cgroup subsystem cpuacct is required for cputime limit.")

    my_cgroups.handle_errors(required_cgroups)

    if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled():
        logging.warning(
            "Turbo boost of CPU is enabled. "
            "Starting more than one benchmark in parallel affects the CPU frequency "
            "and thus makes the performance unreliable.")

    throttle_check = systeminfo.CPUThrottleCheck()
    swap_check = systeminfo.SwapCheck()

    # iterate over run sets
    for runSet in benchmark.run_sets:

        if STOPPED_BY_INTERRUPT:
            break

        if not runSet.should_be_executed():
            output_handler.output_for_skipping_run_set(runSet)

        elif not runSet.runs:
            output_handler.output_for_skipping_run_set(
                runSet, "because it has no files")

        else:
            run_sets_executed += 1
            _execute_run_set(
                runSet,
                benchmark,
                output_handler,
                coreAssignment,
                memoryAssignment,
                cpu_packages,
            )

    if throttle_check.has_throttled():
        logging.warning(
            "CPU throttled itself during benchmarking due to overheating. "
            "Benchmark results are unreliable!")
    if swap_check.has_swapped():
        logging.warning("System has swapped during benchmarking. "
                        "Benchmark results are unreliable!")
    pqos.reset_resources()
    output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT)

    return 0