def test_write_results_to_disk(self, mock_platform, no_results_instantiation_check): config = Config("krun/tests/example.krun") mock_platform.num_cpus = 4 mock_platform.num_per_core_measurements = mock_platform.num_cpus out_file = "krun/tests/example_results.json.bz2" results0 = Results(config, mock_platform) results0.audit = dict() results0.starting_temperatures = [4355, 9879] results0.wallclock_times = {u"dummy:Java:default-java": [[1.000726]]} results0.eta_estimates = {u"dummy:Java:default-java": [1.1]} results0.core_cycle_counts = { u"dummy:Java:default-java": [[[2], [3], [4], [5]]] } results0.aperf_counts = { u"dummy:Java:default-java": [[[3], [4], [5], [6]]] } results0.mperf_counts = { u"dummy:Java:default-java": [[[4], [5], [6], [7]]] } results0.reboots = 5 results0.error_flag = False results0.write_to_file() results1 = Results(config, None, results_file=out_file) assert results0 == results1 # Clean-up generated file. os.unlink(out_file)
def test_write_results_to_disk(self, mock_platform): config = Config("krun/tests/example.krun") out_file = "krun/tests/example_results.json.bz2" results0 = Results(config, mock_platform) results0.audit = dict() results0.starting_temperatures = [4355, 9879] results0.data = {u"dummy:Java:default-java": [[1.000726]]} results0.eta_estimates = {u"dummy:Java:default-java": [1.1]} results0.reboots = 5 results0.error_flag = False results0.write_to_file() results1 = Results(config, None, results_file=out_file) assert results0 == results1 # Clean-up generated file. os.unlink(out_file)
def test_write_results_to_disk(self, mock_platform, no_results_instantiation_check): config = Config("krun/tests/example.krun") mock_platform.num_cpus = 4 mock_platform.num_per_core_measurements = mock_platform.num_cpus out_file = "krun/tests/example_results.json.bz2" results0 = Results(config, mock_platform) results0.audit = dict() results0.starting_temperatures = [4355, 9879] results0.wallclock_times = {u"dummy:Java:default-java": [[1.000726]]} results0.eta_estimates = {u"dummy:Java:default-java": [1.1]} results0.core_cycle_counts = {u"dummy:Java:default-java": [[[2], [3], [4], [5]]]} results0.aperf_counts = {u"dummy:Java:default-java": [[[3], [4], [5], [6]]]} results0.mperf_counts = {u"dummy:Java:default-java": [[[4], [5], [6], [7]]]} results0.reboots = 5 results0.error_flag = False results0.write_to_file() results1 = Results(config, None, results_file=out_file) assert results0 == results1 # Clean-up generated file. os.unlink(out_file)
def run(self): """Benchmark execution starts here""" # In reboot mode, wait for the system to come up before we proceed if self.platform.hardware_reboots: debug("Waiting %s seconds for the system to come up." % str(STARTUP_WAIT_SECONDS)) self.platform.sleep(STARTUP_WAIT_SECONDS) # Important that the dmesg is collected after the above startup wait. # Otherwise we get spurious dmesg changes. self.platform.collect_starting_dmesg() assert self.manifest.num_execs_left > 0 self.platform.wait_for_temperature_sensors() bench, vm, variant = self.manifest.next_exec_key.split(":") key_pexec_idx = self.manifest.next_exec_key_index() job = ExecutionJob(self, vm, self.config.VMS[vm], bench, variant, self.config.BENCHMARKS[bench], key_pexec_idx) # Default to error state. This is the value the finally block will see # if an exception is raised inside the try block, otherwise it is # re-assigned based on the result of running the benchmark. flag = 'E' # Run the pre-exec commands, the benchmark and the post-exec commands. # These are wrapped in a try/except, so that the post-exec commands # are always executed, even if an exception has occurred. We only # reboot /after/ the post-exec commands have completed. results = None try: # Run the user's pre-process-execution commands. We can't put an # ETA estimate in the environment for the pre-commands as we have # not (and should not) load the results file into memory yet. # # It might seem tempting to move this outside the try block, to # ensure that post-hooks are only run if pre-hooks ran. We don't, # thus avoiding the case where only *part* of the pre-hooks run, # but the post-hooks then don't run. util.run_shell_cmd_list(self.config.PRE_EXECUTION_CMDS,) # We collect rough execution times separate from real results. The # reason for this is that, even if a benchmark crashes it takes # time and we need to account for this when making estimates. A # crashing benchmark will give an empty list of iteration times, # meaning we can't use 'raw_exec_result' below for estimates. exec_start_time = time.time() measurements, instr_data, flag = job.run(self.mailer, self.dry_run) exec_end_time = time.time() # Only now is it OK to load the results file into memory. Results.ok_to_instantiate = True results = Results(self.config, self.platform, results_file=self.config.results_filename()) # Bail early if the process execution needs to be re-run. if flag == "O": util.run_shell_cmd_list( self.config.POST_EXECUTION_CMDS, extra_env=self._make_post_cmd_env(results) ) info("Rebooting to re-run previous process execution") util.reboot(self.manifest, self.platform, update_count=False) # reboot() does not return raise RuntimeError("reached unreachable code!") # Store new result. results.append_exec_measurements(job.key, measurements, flag) # Store instrumentation data in a separate file if job.vm_info["vm_def"].instrument: key_exec_num = self.manifest.completed_exec_counts[job.key] util.dump_instr_json(job.key, key_exec_num, self.config, instr_data) eta_info = exec_end_time - exec_start_time if self.platform.hardware_reboots: # Add time taken to wait for system to come up if we are in # hardware-reboot mode. eta_info += STARTUP_WAIT_SECONDS results.eta_estimates[job.key].append(eta_info) self.manifest.update(flag) except Exception: raise finally: # Run the user's post-process-execution commands with updated # ETA estimates. Important that this happens *after* dumping # results, as the user is likely copying intermediate results to # another host. # _make_post_cmd_env() needs the results to make an ETA. If an # exception occurred in the above try block, there's a chance that # they have not have been loaded. if results is None: Results.ok_to_instantiate = True results = Results(self.config, self.platform, results_file=self.config.results_filename()) # If errors occured, set error flag in results file if self.platform.check_dmesg_for_changes(self.manifest) or \ flag == 'E': results.error_flag = True results.write_to_file() util.run_shell_cmd_list( self.config.POST_EXECUTION_CMDS, extra_env=self._make_post_cmd_env(results) ) tfmt = self.get_overall_time_estimate_formatter(results) if self.manifest.eta_avail_idx == self.manifest.next_exec_idx: # We just found out roughly how long the session has left, mail out. msg = "ETA for current session now known: %s" % tfmt.finish_str util.log_and_mail(self.mailer, debug, "ETA for Current Session Available", msg, bypass_limiter=True) info("{:<25s}: {} ({} from now)".format( "Estimated completion (whole session)", tfmt.finish_str, tfmt.delta_str)) info("%d executions left in scheduler queue" % self.manifest.num_execs_left) if self.manifest.num_execs_left > 0 and \ self.manifest.eta_avail_idx > self.manifest.next_exec_idx: info("Executions until ETA known: %s" % (self.manifest.eta_avail_idx - self.manifest.next_exec_idx)) # Although it would have been nice to have checked this prior to # running the execution, it depends on the results file, which we # should not load prior to the process execution. util.check_audit_unchanged(results, self.platform) assert self.manifest.num_execs_left >= 0 if self.manifest.num_execs_left > 0: # print info about the next job benchmark, vm_name, variant = \ self.manifest.next_exec_key.split(":") info("Next execution is '%s(%d)' (%s variant) under '%s'" % (benchmark, self.config.BENCHMARKS[benchmark], variant, vm_name)) tfmt = self.get_exec_estimate_time_formatter(job.key, results) info("{:<35s}: {} ({} from now)".format( "Estimated completion (next execution)", tfmt.finish_str, tfmt.delta_str)) info("Reboot in preparation for next execution") util.reboot(self.manifest, self.platform) elif self.manifest.num_execs_left == 0: self.platform.save_power() if self.config.ENABLE_PINNING: self.platform.clear_cpu_pinning() info("Done: Results dumped to %s" % self.config.results_filename()) err_msg = "Errors/warnings occurred -- read the log!" if results.error_flag: warn(err_msg) msg = "Session completed. Log file at: '%s'" % (self.log_path) if results.error_flag: msg += "\n\n%s" % err_msg msg += "\n\nDon't forget to disable Krun at boot." util.log_and_mail(self.mailer, info, "Benchmarks Complete", msg, bypass_limiter=True)