def test_validation_callback_error_iter(): """Test error propagation from custom validation callback using iterable.""" def a(env): yield ValidationError(type="Compilation Error") yield ValidationError(type="Runtime Error") benchmark = Benchmark(BenchmarkProto(uri="benchmark://example-v0/foobar")) benchmark.add_validation_callback(a) errors = benchmark.ivalidate(env=None) next(errors) == ValidationError(type="Compilation Error") next(errors) == ValidationError(type="Runtime Error")
def test_validation_callback_error(): """Test error propagation from custom validation callback.""" def a(env): yield ValidationError(type="Compilation Error") yield ValidationError(type="Runtime Error") benchmark = Benchmark(BenchmarkProto(uri="benchmark://example-v0/foobar")) benchmark.add_validation_callback(a) errors = benchmark.validate(env=None) assert errors == [ ValidationError(type="Compilation Error"), ValidationError(type="Runtime Error"), ]
def test_validation_callback_flaky(): """Test error propagation on callback which *may* fail.""" flaky = False def a(env): nonlocal flaky del env if flaky: yield ValidationError(type="Runtime Error") benchmark = Benchmark(BenchmarkProto(uri="benchmark://example-v0/foobar")) benchmark.add_validation_callback(a) errors = benchmark.validate(env=None) assert errors == [] flaky = True errors = benchmark.validate(env=None) assert errors == [ ValidationError(type="Runtime Error"), ]
def _compile_and_run_bitcode_file( bitcode_file: Path, cmd: str, cwd: Path, linkopts: List[str], env: Dict[str, str], num_runs: int, sanitizer: Optional[LlvmSanitizer] = None, timeout_seconds: float = 300, compilation_timeout_seconds: float = 60, ) -> BenchmarkExecutionResult: """Run the given cBench benchmark.""" # cBench benchmarks expect that a file _finfo_dataset exists in the # current working directory and contains the number of benchmark # iterations in it. with open(cwd / "_finfo_dataset", "w") as f: print(num_runs, file=f) # Create a barebones execution environment for the benchmark. run_env = { "TMPDIR": os.environ.get("TMPDIR", ""), "HOME": os.environ.get("HOME", ""), "USER": os.environ.get("USER", ""), # Disable all logging from GRPC. In the past I have had false-positive # "Wrong output" errors caused by GRPC error messages being logged to # stderr. "GRPC_VERBOSITY": "NONE", } run_env.update(env) error_data = {} if sanitizer: clang_path = llvm.clang_path() binary = cwd / "a.out" error_data["run_cmd"] = cmd.replace("$BIN", "./a.out") # Generate the a.out binary file. compile_cmd = ([clang_path.name, str(bitcode_file), "-o", str(binary)] + _COMPILE_ARGS + list(linkopts) + _SANITIZER_FLAGS.get(sanitizer, [])) error_data["compile_cmd"] = compile_cmd logger.debug("compile: %s", compile_cmd) assert not binary.is_file() try: with Popen( compile_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, env={ "PATH": f"{clang_path.parent}:{os.environ.get('PATH', '')}" }, ) as clang: output, _ = clang.communicate( timeout=compilation_timeout_seconds) if clang.returncode: error_data["output"] = output return BenchmarkExecutionResult( walltime_seconds=timeout_seconds, error=ValidationError( type="Compilation failed", data=error_data, ), ) except subprocess.TimeoutExpired: error_data["timeout"] = compilation_timeout_seconds return BenchmarkExecutionResult( walltime_seconds=timeout_seconds, error=ValidationError( type="Compilation timeout", data=error_data, ), ) assert binary.is_file() else: lli_path = llvm.lli_path() error_data["run_cmd"] = cmd.replace("$BIN", f"{lli_path.name} benchmark.bc") run_env["PATH"] = str(lli_path.parent) logger.debug("exec: %s", error_data["run_cmd"]) try: with Timer() as timer, Popen( error_data["run_cmd"], shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE, env=run_env, cwd=cwd, ) as process: stdout, _ = process.communicate(timeout=timeout_seconds) except subprocess.TimeoutExpired: error_data["timeout_seconds"] = timeout_seconds return BenchmarkExecutionResult( walltime_seconds=timeout_seconds, error=ValidationError( type="Execution timeout", data=error_data, ), ) finally: if sanitizer: binary.unlink() try: output = stdout.decode("utf-8") except UnicodeDecodeError: output = "<binary>" if process.returncode: # Runtime error. if sanitizer == LlvmSanitizer.ASAN and "LeakSanitizer" in output: error_type = "Memory leak" elif sanitizer == LlvmSanitizer.ASAN and "AddressSanitizer" in output: error_type = "Memory error" elif sanitizer == LlvmSanitizer.MSAN and "MemorySanitizer" in output: error_type = "Memory error" elif "Segmentation fault" in output: error_type = "Segmentation fault" elif "Illegal Instruction" in output: error_type = "Illegal Instruction" else: error_type = f"Runtime error ({process.returncode})" error_data["return_code"] = process.returncode error_data["output"] = output return BenchmarkExecutionResult( walltime_seconds=timer.time, error=ValidationError( type=error_type, data=error_data, ), ) return BenchmarkExecutionResult(walltime_seconds=timer.time, output=output)
def validator_cb( env: "LlvmEnv") -> Optional[ValidationError]: # noqa: F821 """The validation callback.""" with _CBENCH_DOWNLOAD_THREAD_LOCK: with fasteners.InterProcessLock( cache_path(".cbench-v1-runtime-data.LOCK")): download_cBench_runtime_data() cbench_data = site_data_path( "llvm-v0/cbench-v1-runtime-data/runtime_data") for input_file_name in input_files: path = cbench_data / input_file_name if not path.is_file(): raise FileNotFoundError( f"Required benchmark input not found: {path}") # Create a temporary working directory to execute the benchmark in. with tempfile.TemporaryDirectory( dir=env.service.connection.cache.path) as d: cwd = Path(d) # Expand shell variable substitutions in the benchmark command. expanded_command = cmd.replace("$D", str(cbench_data)) # Translate the output file names into paths inside the working # directory. output_paths = [cwd / o for o in output_files] if pre_execution_callback: pre_execution_callback(cwd) # Produce a gold-standard output using a reference version of # the benchmark. if compare_output or output_files: gs_env = env.fork() try: # Reset to the original benchmark state and compile it. gs_env.reset(benchmark=env.benchmark) gs_env.write_bitcode(cwd / "benchmark.bc") gold_standard = _compile_and_run_bitcode_file( bitcode_file=cwd / "benchmark.bc", cmd=expanded_command, cwd=cwd, num_runs=1, # Use default optimizations for gold standard. linkopts=linkopts + ["-O2"], # Always assume safe. sanitizer=None, env=os_env, ) if gold_standard.error: return ValidationError( type=f"Gold standard: {gold_standard.error.type}", data=gold_standard.error.data, ) finally: gs_env.close() # Check that the reference run produced the expected output # files. for path in output_paths: if not path.is_file(): try: output = gold_standard.output except UnicodeDecodeError: output = "<binary>" raise FileNotFoundError( f"Expected file '{path.name}' not generated\n" f"Benchmark: {env.benchmark}\n" f"Command: {cmd}\n" f"Output: {output}") path.rename(f"{path}.gold_standard") # Serialize the benchmark to a bitcode file that will then be # compiled to a binary. env.write_bitcode(cwd / "benchmark.bc") outcome = _compile_and_run_bitcode_file( bitcode_file=cwd / "benchmark.bc", cmd=expanded_command, cwd=cwd, num_runs=num_runs, linkopts=linkopts, sanitizer=sanitizer, env=os_env, ) if outcome.error: return outcome.error # Run a user-specified validation hook. if validate_result: validate_result(outcome) # Difftest the console output. if compare_output and gold_standard.output != outcome.output: return ValidationError( type="Wrong output", data={ "expected": gold_standard.output, "actual": outcome.output }, ) # Difftest the output files. for path in output_paths: if not path.is_file(): return ValidationError( type="Output not generated", data={ "path": path.name, "command": cmd }, ) with Popen( ["diff", str(path), f"{path}.gold_standard"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) as diff: stdout, _ = diff.communicate(timeout=300) if diff.returncode: try: stdout = stdout.decode("utf-8") return ValidationError( type="Wrong output (file)", data={ "path": path.name, "diff": stdout }, ) except UnicodeDecodeError: return ValidationError( type="Wrong output (file)", data={ "path": path.name, "diff": "<binary>" }, )
def a(env): yield ValidationError(type="Compilation Error") yield ValidationError(type="Runtime Error")
def a(env): nonlocal flaky del env if flaky: yield ValidationError(type="Runtime Error")