def run(self): while not self._finished.is_set(): self._finished.wait(_CHECK_INTERVAL_SECONDS) files_count = 0 files_size = 0 start_time = util.read_monotonic_time() for current_dir, dirs, files in os.walk(self._path): for file in files: abs_file = os.path.join(current_dir, file) file = '/' + os.path.relpath(file, self._path) # as visible for tool if (not container.is_container_system_config_file(file) and os.path.isfile(abs_file) and not os.path.islink(abs_file)): files_count += 1 if self._files_size_limit: try: files_size += os.path.getsize(abs_file) except OSError: # possibly just deleted pass if self._check_limit(files_count, files_size): return duration = util.read_monotonic_time() - start_time logging.debug( "FileHierarchyLimitThread for process %d: " "files count: %d, files size: %d, scan duration %fs", self._pid_to_kill, files_count, files_size, duration) if duration > _DURATION_WARNING_THRESHOLD: logging.warning( "Scanning file hierarchy for enforcement of limits took %ds.", duration)
def output_after_run(self, run): """ The method output_after_run() prints filename, result, time and status of a run to terminal and stores all data in XML """ # format times, type is changed from float to string! cputime_str = util.format_number(run.cputime, TIME_PRECISION) walltime_str = util.format_number(run.walltime, TIME_PRECISION) # format numbers, number_of_digits is optional, so it can be None for column in run.columns: if column.number_of_digits is not None: # if the number ends with "s" or another letter, remove it if (not column.value.isdigit()) and column.value[-2:-1].isdigit(): column.value = column.value[:-1] try: floatValue = float(column.value) column.value = util.format_number(floatValue, column.number_of_digits) except ValueError: # if value is no float, don't format it pass # store information in run run.resultline = self.create_output_line(run.runSet, run.identifier, run.status, cputime_str, walltime_str, run.values.get('host'), run.columns) self.add_values_to_run_xml(run) # output in terminal/console if USE_COLORS and sys.stdout.isatty(): # is terminal, not file statusStr = COLOR_DIC[run.category].format(run.status.ljust(LEN_OF_STATUS)) else: statusStr = run.status.ljust(LEN_OF_STATUS) try: OutputHandler.print_lock.acquire() valueStr = statusStr + cputime_str.rjust(8) + walltime_str.rjust(8) if self.benchmark.num_of_threads == 1: util.printOut(valueStr) else: timeStr = time.strftime("%H:%M:%S", time.localtime()) + " "*14 util.printOut(timeStr + self.format_sourcefile_name(run.identifier, run.runSet) + valueStr) # write result in txt_file and XML self.txt_file.append(self.run_set_to_text(run.runSet), False) self.statistics.add_result(run) # we don't want to write this file to often, it can slow down the whole script, # so we wait at least 10 seconds between two write-actions currentTime = util.read_monotonic_time() if currentTime - run.runSet.xml_file.lastModifiedTime > 60: run.runSet.xml_file.replace(self._result_xml_to_string(run.runSet.xml)) run.runSet.xml_file.lastModifiedTime = util.read_monotonic_time() finally: OutputHandler.print_lock.release()
def output_before_run_set(self, runSet): """ The method output_before_run_set() calculates the length of the first column for the output in terminal and stores information about the runSet in XML. @param runSet: current run set """ sourcefiles = [run.identifier for run in runSet.runs] # common prefix of file names runSet.common_prefix = util.common_base_dir(sourcefiles) + os.path.sep # length of the first column in terminal runSet.max_length_of_filename = max( len(file) for file in sourcefiles) if sourcefiles else 20 runSet.max_length_of_filename = max( 20, runSet.max_length_of_filename - len(runSet.common_prefix)) # write run set name to terminal numberOfFiles = len(runSet.runs) numberOfFilesStr = (" (1 file)" if numberOfFiles == 1 else " ({0} files)".format(numberOfFiles)) util.printOut("\nexecuting run set" + (" '" + runSet.name + "'" if runSet.name else "") + numberOfFilesStr + (TERMINAL_TITLE.format(runSet.full_name) if USE_COLORS and sys.stdout.isatty() else "")) # write information about the run set into txt_file self.writeRunSetInfoToLog(runSet) # prepare information for text output for run in runSet.runs: run.resultline = self.format_sourcefile_name( run.identifier, runSet) # prepare XML structure for each run and runSet run.xml = ET.Element( "run", { "name": run.identifier, "files": "[" + ", ".join(run.sourcefiles) + "]" }) if run.specific_options: run.xml.set("options", " ".join(run.specific_options)) if run.properties: run.xml.set("properties", " ".join(sorted(run.properties))) run.xml.extend(self.xml_dummy_elements) runSet.xml = self.runs_to_xml(runSet, runSet.runs) # write (empty) results to txt_file and XML self.txt_file.append(self.run_set_to_text(runSet), False) xml_file_name = self.get_filename(runSet.name, "xml") runSet.xml_file = filewriter.FileWriter( xml_file_name, self._result_xml_to_string(runSet.xml)) runSet.xml_file.lastModifiedTime = util.read_monotonic_time() self.all_created_files.append(xml_file_name) self.xml_file_names.append(xml_file_name)
def output_before_run_set(self, runSet): """ The method output_before_run_set() calculates the length of the first column for the output in terminal and stores information about the runSet in XML. @param runSet: current run set """ sourcefiles = [run.identifier for run in runSet.runs] # common prefix of file names runSet.common_prefix = util.common_base_dir(sourcefiles) + os.path.sep # length of the first column in terminal runSet.max_length_of_filename = max(len(file) for file in sourcefiles) if sourcefiles else 20 runSet.max_length_of_filename = max(20, runSet.max_length_of_filename - len(runSet.common_prefix)) # write run set name to terminal numberOfFiles = len(runSet.runs) numberOfFilesStr = (" (1 file)" if numberOfFiles == 1 else " ({0} files)".format(numberOfFiles)) util.printOut("\nexecuting run set" + (" '" + runSet.name + "'" if runSet.name else "") + numberOfFilesStr + (TERMINAL_TITLE.format(runSet.full_name) if USE_COLORS and sys.stdout.isatty() else "")) # write information about the run set into txt_file self.writeRunSetInfoToLog(runSet) # prepare information for text output for run in runSet.runs: run.resultline = self.format_sourcefile_name(run.identifier, runSet) # prepare XML structure for each run and runSet run.xml = ET.Element("run", {"name": run.identifier, "files": "[" + ", ".join(run.sourcefiles) + "]"}) if run.specific_options: run.xml.set("options", " ".join(run.specific_options)) if run.properties: run.xml.set("properties", " ".join(sorted(run.properties))) run.xml.extend(self.xml_dummy_elements) runSet.xml = self.runs_to_xml(runSet, runSet.runs) # write (empty) results to txt_file and XML self.txt_file.append(self.run_set_to_text(runSet), False) xml_file_name = self.get_filename(runSet.name, "xml") runSet.xml_file = filewriter.FileWriter(xml_file_name, self._result_xml_to_string(runSet.xml)) runSet.xml_file.lastModifiedTime = util.read_monotonic_time() self.all_created_files.append(xml_file_name) self.xml_file_names.append(xml_file_name)
def output_after_run(self, run): """ The method output_after_run() prints filename, result, time and status of a run to terminal and stores all data in XML """ # format times, type is changed from float to string! cputime_str = util.format_number(run.cputime, TIME_PRECISION) walltime_str = util.format_number(run.walltime, TIME_PRECISION) # format numbers, number_of_digits is optional, so it can be None for column in run.columns: if column.number_of_digits is not None: # if the number ends with "s" or another letter, remove it if (not column.value.isdigit()) and column.value[-2:-1].isdigit(): column.value = column.value[:-1] try: floatValue = float(column.value) column.value = util.format_number(floatValue, column.number_of_digits) except ValueError: # if value is no float, don't format it pass # store information in run run.resultline = self.create_output_line(run.runSet, run.identifier, run.status, cputime_str, walltime_str, run.values.get('host'), run.columns) self.add_values_to_run_xml(run) # output in terminal/console statusStr = COLOR_DIC[run.category].format(run.status.ljust(LEN_OF_STATUS)) try: OutputHandler.print_lock.acquire() valueStr = statusStr + cputime_str.rjust(8) + walltime_str.rjust(8) if self.benchmark.num_of_threads == 1: util.printOut(valueStr) else: timeStr = time.strftime("%H:%M:%S", time.localtime()) + " "*14 util.printOut(timeStr + self.format_sourcefile_name(run.identifier, run.runSet) + valueStr) # write result in txt_file and XML self.txt_file.append(self.run_set_to_text(run.runSet), False) self.statistics.add_result(run) # we don't want to write this file to often, it can slow down the whole script, # so we wait at least 10 seconds between two write-actions currentTime = util.read_monotonic_time() if currentTime - run.runSet.xml_file_last_modified_time > 60: self._write_rough_result_xml_to_file(run.runSet.xml, run.runSet.xml_file_name) run.runSet.xml_file_last_modified_time = util.read_monotonic_time() finally: OutputHandler.print_lock.release() if self.compress_results: log_file_path = os.path.relpath(run.log_file, os.path.join(self.benchmark.log_folder, os.pardir)) with self.log_zip_lock: self.log_zip.write(run.log_file, log_file_path) os.remove(run.log_file) else: self.all_created_files.add(run.log_file) if os.path.isdir(run.result_files_folder): self.all_created_files.add(run.result_files_folder)
def output_before_run_set(self, runSet): """ The method output_before_run_set() calculates the length of the first column for the output in terminal and stores information about the runSet in XML. @param runSet: current run set """ xml_file_name = self.get_filename(runSet.name, "xml") identifier_names = [run.identifier for run in runSet.runs] # common prefix of file names runSet.common_prefix = util.common_base_dir(identifier_names) if runSet.common_prefix: runSet.common_prefix += os.path.sep # length of the first column in terminal runSet.max_length_of_filename = max(len(file) for file in identifier_names) if identifier_names else 20 runSet.max_length_of_filename = max(20, runSet.max_length_of_filename - len(runSet.common_prefix)) # write run set name to terminal numberOfFiles = len(runSet.runs) numberOfFilesStr = (" (1 file)" if numberOfFiles == 1 else " ({0} files)".format(numberOfFiles)) util.printOut("\nexecuting run set" + (" '" + runSet.name + "'" if runSet.name else "") + numberOfFilesStr + TERMINAL_TITLE.format(runSet.full_name)) # write information about the run set into txt_file self.writeRunSetInfoToLog(runSet) # prepare information for text output for run in runSet.runs: run.resultline = self.format_sourcefile_name(run.identifier, runSet) if run.sourcefiles: adjusted_identifier = util.relative_path(run.identifier, xml_file_name) else: # If no source files exist the task doesn't point to any file that could be downloaded. # In this case, the name doesn't have to be adjusted because it's no path. adjusted_identifier = run.identifier # prepare XML structure for each run and runSet run_attributes = {'name': adjusted_identifier} if run.sourcefiles: adjusted_sourcefiles = [util.relative_path(s, xml_file_name) for s in run.sourcefiles] run_attributes['files'] = '[' + ', '.join(adjusted_sourcefiles) + ']' run.xml = ET.Element("run", run_attributes) if run.specific_options: run.xml.set("options", " ".join(run.specific_options)) if run.properties: all_properties = [prop_name for prop in run.properties for prop_name in prop.names] run.xml.set("properties", " ".join(sorted(all_properties))) run.xml.extend(self.xml_dummy_elements) block_name = runSet.blocks[0].name if len(runSet.blocks) == 1 else None runSet.xml = self.runs_to_xml(runSet, runSet.runs, block_name) # write (empty) results to txt_file and XML self.txt_file.append(self.run_set_to_text(runSet), False) runSet.xml_file_name = xml_file_name self._write_rough_result_xml_to_file(runSet.xml, runSet.xml_file_name) runSet.xml_file_last_modified_time = util.read_monotonic_time() self.all_created_files.add(runSet.xml_file_name) self.xml_file_names.append(runSet.xml_file_name)
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if benchmark.requirements.cpu_model \ or benchmark.requirements.cpu_cores != benchmark.rlimits.get(CORELIMIT, None) \ or benchmark.requirements.memory != benchmark.rlimits.get(MEMLIMIT, None): logging.warning("Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit("Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes.") coreAssignment = get_cpu_cores_per_run(benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, my_cgroups) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size(benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning("Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") if benchmark.num_of_threads > 1 and benchmark.config.users: if len(benchmark.config.users) == 1: logging.warning( 'Executing multiple parallel benchmarks under same user account. ' 'Consider specifying multiple user accounts for increased separation of runs.') benchmark.config.users = [benchmark.config.users[0] for i in range(benchmark.num_of_threads)] elif len(benchmark.config.users) < benchmark.num_of_threads: sys.exit('Distributing parallel runs to different user accounts was requested, but not enough accounts were given. Please specify {} user accounts, or only one account.'.format(benchmark.num_of_threads)) elif len(benchmark.config.users) != len(set(benchmark.config.users)): sys.exit('Same user account was specified multiple times, please specify {} separate accounts, or only one account.'.format(benchmark.num_of_threads)) throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set(runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() energyBefore = util.measure_energy() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # create some workers for i in range(benchmark.num_of_threads): cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None user = benchmark.config.users[i] if benchmark.config.users else None WORKER_THREADS.append(_Worker(benchmark, cores, memBanks, user, output_handler)) # wait until all tasks are done, # instead of queue.join(), we use a loop and sleep(1) to handle KeyboardInterrupt finished = False while not finished and not STOPPED_BY_INTERRUPT: try: _Worker.working_queue.all_tasks_done.acquire() finished = (_Worker.working_queue.unfinished_tasks == 0) finally: _Worker.working_queue.all_tasks_done.release() try: time.sleep(0.1) # sleep some time except KeyboardInterrupt: stop() # get times after runSet walltime_after = util.read_monotonic_time() energy = util.measure_energy(energyBefore) usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) \ - (ruBefore.ru_utime + ruBefore.ru_stime) if STOPPED_BY_INTERRUPT: output_handler.set_error('interrupted', runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) for worker in WORKER_THREADS: worker.cleanup() if throttle_check.has_throttled(): logging.warning('CPU throttled itself during benchmarking due to overheating. ' 'Benchmark results are unreliable!') if swap_check.has_swapped(): logging.warning('System has swapped during benchmarking. ' 'Benchmark results are unreliable!') output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0
def execute_benchmark(benchmark, output_handler): run_sets_executed = 0 logging.debug("I will use %s threads.", benchmark.num_of_threads) if (benchmark.requirements.cpu_model or benchmark.requirements.cpu_cores != benchmark.rlimits.get( CORELIMIT, None) or benchmark.requirements.memory != benchmark.rlimits.get( MEMLIMIT, None)): logging.warning( "Ignoring specified resource requirements in local-execution mode, " "only resource limits are used.") my_cgroups = cgroups.find_my_cgroups() coreAssignment = None # cores per run memoryAssignment = None # memory banks per run cpu_packages = None pqos = Pqos( show_warnings=True) # The pqos class instance for cache allocation pqos.reset_monitoring() if CORELIMIT in benchmark.rlimits: if not my_cgroups.require_subsystem(cgroups.CPUSET): sys.exit( "Cgroup subsystem cpuset is required for limiting the number of CPU cores/memory nodes." ) coreAssignment = get_cpu_cores_per_run( benchmark.rlimits[CORELIMIT], benchmark.num_of_threads, benchmark.config.use_hyperthreading, my_cgroups, benchmark.config.coreset, ) pqos.allocate_l3ca(coreAssignment) memoryAssignment = get_memory_banks_per_run(coreAssignment, my_cgroups) cpu_packages = { get_cpu_package_for_core(core) for cores_of_run in coreAssignment for core in cores_of_run } elif benchmark.config.coreset: sys.exit( "Please limit the number of cores first if you also want to limit the set of available cores." ) if MEMLIMIT in benchmark.rlimits: # check whether we have enough memory in the used memory banks for all runs check_memory_size( benchmark.rlimits[MEMLIMIT], benchmark.num_of_threads, memoryAssignment, my_cgroups, ) if benchmark.num_of_threads > 1 and systeminfo.is_turbo_boost_enabled(): logging.warning( "Turbo boost of CPU is enabled. " "Starting more than one benchmark in parallel affects the CPU frequency " "and thus makes the performance unreliable.") throttle_check = systeminfo.CPUThrottleCheck() swap_check = systeminfo.SwapCheck() # iterate over run sets for runSet in benchmark.run_sets: if STOPPED_BY_INTERRUPT: break if not runSet.should_be_executed(): output_handler.output_for_skipping_run_set(runSet) elif not runSet.runs: output_handler.output_for_skipping_run_set( runSet, "because it has no files") else: run_sets_executed += 1 # get times before runSet energy_measurement = EnergyMeasurement.create_if_supported() ruBefore = resource.getrusage(resource.RUSAGE_CHILDREN) walltime_before = util.read_monotonic_time() if energy_measurement: energy_measurement.start() output_handler.output_before_run_set(runSet) # put all runs into a queue for run in runSet.runs: _Worker.working_queue.put(run) # keep a counter of unfinished runs for the below assertion unfinished_runs = len(runSet.runs) unfinished_runs_lock = threading.Lock() def run_finished(): nonlocal unfinished_runs with unfinished_runs_lock: unfinished_runs -= 1 if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: logging.debug( "Using sys.setswitchinterval() workaround for #435 in container " "mode because native callback is not available.") py_switch_interval = sys.getswitchinterval() sys.setswitchinterval(1000) # create some workers for i in range(min(benchmark.num_of_threads, unfinished_runs)): if STOPPED_BY_INTERRUPT: break cores = coreAssignment[i] if coreAssignment else None memBanks = memoryAssignment[i] if memoryAssignment else None WORKER_THREADS.append( _Worker(benchmark, cores, memBanks, output_handler, run_finished)) # wait until workers are finished (all tasks done or STOPPED_BY_INTERRUPT) for worker in WORKER_THREADS: worker.join() assert unfinished_runs == 0 or STOPPED_BY_INTERRUPT # get times after runSet walltime_after = util.read_monotonic_time() energy = energy_measurement.stop() if energy_measurement else None usedWallTime = walltime_after - walltime_before ruAfter = resource.getrusage(resource.RUSAGE_CHILDREN) usedCpuTime = (ruAfter.ru_utime + ruAfter.ru_stime) - ( ruBefore.ru_utime + ruBefore.ru_stime) if energy and cpu_packages: energy = { pkg: energy[pkg] for pkg in energy if pkg in cpu_packages } if not containerexecutor.NATIVE_CLONE_CALLBACK_SUPPORTED: sys.setswitchinterval(py_switch_interval) if STOPPED_BY_INTERRUPT: output_handler.set_error("interrupted", runSet) output_handler.output_after_run_set(runSet, cputime=usedCpuTime, walltime=usedWallTime, energy=energy) if throttle_check.has_throttled(): logging.warning( "CPU throttled itself during benchmarking due to overheating. " "Benchmark results are unreliable!") if swap_check.has_swapped(): logging.warning("System has swapped during benchmarking. " "Benchmark results are unreliable!") pqos.reset_resources() output_handler.output_after_benchmark(STOPPED_BY_INTERRUPT) return 0