def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: self.install() benchmark_name = uri.path[1:] if not benchmark_name: raise LookupError(f"No benchmark specified: {uri}") # The absolute path of the file, without an extension. path_stem = os.path.normpath(f"{self.dataset_root}/{uri.path}") bc_path, cl_path = Path(f"{path_stem}.bc"), Path(f"{path_stem}.cl") # If the file does not exist, compile it on-demand. if not bc_path.is_file(): if not cl_path.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {cl_path}, path_stem {path_stem})" ) # Compile the OpenCL kernel into a bitcode file. with atomic_file_write(bc_path) as tmp_bc_path: compile_command: List[str] = ClangInvocation.from_c_file( cl_path, copt=[ "-isystem", str(self.libclc_dir), "-include", str(self.opencl_h_path), "-target", "nvptx64-nvidia-nvcl", "-ferror-limit=1", # Stop on first error. "-w", # No warnings. ], ).command(outpath=tmp_bc_path) logger.debug("Exec %s", compile_command) try: with Popen( compile_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as clang: _, stderr = communicate(clang, timeout=300) except subprocess.TimeoutExpired: raise BenchmarkInitError( f"Benchmark compilation timed out: {uri}") if clang.returncode: compile_command = " ".join(compile_command) error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=20000) raise BenchmarkInitError(f"Compilation job failed!\n" f"Command: {compile_command}\n" f"Error: {error}") return BenchmarkWithSource.create(uri, bc_path, "kernel.cl", cl_path)
def get_compiler_includes(compiler: str) -> Iterable[Path]: """Run the system compiler in verbose mode on a dummy input to get the system header search path. """ # Create a temporary directory to write the compiled 'binary' to, since # GNU assembler does not support piping to stdout. with tempfile.TemporaryDirectory() as d: try: process = subprocess.Popen( [ compiler, "-xc++", "-v", "-c", "-", "-o", str(Path(d) / "a.out") ], stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True, ) except FileNotFoundError as e: raise OSError(f"Failed to invoke {compiler}. " f"Is there a working system compiler?\n" f"Error: {e}") from e _, stderr = communicate(process, input="", timeout=30) if process.returncode: raise OSError(f"Failed to invoke {compiler}. " f"Is there a working system compiler?\n" f"Error: {stderr.strip()}") # Parse the compiler output that matches the conventional output format # used by clang and GCC: # # #include <...> search starts here: # /path/1 # /path/2 # End of search list in_search_list = False for line in stderr.split("\n"): if in_search_list and line.startswith("End of search list"): break elif in_search_list: # We have an include path to return. path = Path(line.strip()) yield path # Compatibility fix for compiling benchmark sources which use the # '#include <endian.h>' header, which on macOS is located in a # 'machine/endian.h' directory. if (path / "machine").is_dir(): yield path / "machine" elif line.startswith("#include <...> search starts here:"): in_search_list = True else: msg = f"Failed to parse '#include <...>' search paths from {compiler}" stderr = stderr.strip() if stderr: msg += f":\n{stderr}" raise OSError(msg)
def _get_system_library_flags(compiler: str) -> Iterable[str]: """Private implementation function.""" # Create a temporary file to write the compiled binary to, since GNU # assembler does not support piping to stdout. transient_cache = transient_cache_path(".") transient_cache.mkdir(parents=True, exist_ok=True) with tempfile.NamedTemporaryFile(dir=transient_cache) as f: cmd = [compiler, "-xc++", "-v", "-", "-o", f.name] # On macOS we need to compile a binary to invoke the linker. if sys.platform != "darwin": cmd.append("-c") # Retry loop to permit timeouts, though unlikely, in case of a # heavily overloaded system (I have observed CI failures because # of this). for _ in range(3): try: with Popen( cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, stdin=subprocess.PIPE, universal_newlines=True, ) as process: _, stderr = communicate( process=process, input="int main(){return 0;}", timeout=30 ) if process.returncode: raise HostCompilerFailure( f"Failed to invoke '{compiler}'. " f"Is there a working system compiler?\n" f"Error: {stderr.strip()}" ) break except subprocess.TimeoutExpired: continue except FileNotFoundError as e: raise HostCompilerFailure( f"Failed to invoke '{compiler}'. " f"Is there a working system compiler?\n" f"Error: {e}" ) from e else: raise HostCompilerFailure( f"Compiler invocation '{join_cmd(cmd)}' timed out after 3 attempts." ) # Parse the compiler output that matches the conventional output format # used by clang and GCC: # # #include <...> search starts here: # /path/1 # /path/2 # End of search list in_search_list = False lines = stderr.split("\n") for line in lines: if in_search_list and line.startswith("End of search list"): break elif in_search_list: # We have an include path to return. path = Path(line.strip()) yield "-isystem" yield str(path) # Compatibility fix for compiling benchmark sources which use the # '#include <endian.h>' header, which on macOS is located in a # 'machine/endian.h' directory. if (path / "machine").is_dir(): yield "-isystem" yield str(path / "machine") elif line.startswith("#include <...> search starts here:"): in_search_list = True else: msg = f"Failed to parse '#include <...>' search paths from '{compiler}'" stderr = stderr.strip() if stderr: msg += f":\n{stderr}" raise UnableToParseHostCompilerOutput(msg) if sys.platform == "darwin": yield "-L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib"
def make_benchmark( inputs: Union[str, Path, ClangInvocation, List[Union[str, Path, ClangInvocation]]], copt: Optional[List[str]] = None, system_includes: bool = True, timeout: int = 600, ) -> Benchmark: """Create a benchmark for use by LLVM environments. This function takes one or more inputs and uses them to create a benchmark that can be passed to :meth:`compiler_gym.envs.LlvmEnv.reset`. For single-source C/C++ programs, you can pass the path of the source file: >>> benchmark = make_benchmark('my_app.c') >>> env = gym.make("llvm-v0") >>> env.reset(benchmark=benchmark) The clang invocation used is roughly equivalent to: .. code-block:: $ clang my_app.c -O0 -c -emit-llvm -o benchmark.bc Additional compile-time arguments to clang can be provided using the :code:`copt` argument: >>> benchmark = make_benchmark('/path/to/my_app.cpp', copt=['-O2']) If you need more fine-grained control over the options, you can directly construct a :class:`ClangInvocation <compiler_gym.envs.llvm.ClangInvocation>` to pass a list of arguments to clang: >>> benchmark = make_benchmark( ClangInvocation(['/path/to/my_app.c'], timeout=10) ) For multi-file programs, pass a list of inputs that will be compiled separately and then linked to a single module: >>> benchmark = make_benchmark([ 'main.c', 'lib.cpp', 'lib2.bc', ]) If you already have prepared bitcode files, those can be linked and used directly: >>> benchmark = make_benchmark([ 'bitcode1.bc', 'bitcode2.bc', ]) Text-format LLVM assembly can also be used: >>> benchmark = make_benchmark('module.ll') .. note:: LLVM bitcode compatibility is `not guaranteed <https://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility>`_, so you must ensure that any precompiled bitcodes are compatible with the LLVM version used by CompilerGym, which can be queried using :func:`env.compiler_version <compiler_gym.envs.CompilerEnv.compiler_version>`. :param inputs: An input, or list of inputs. :param copt: A list of command line options to pass to clang when compiling source files. :param system_includes: Whether to include the system standard libraries during compilation jobs. This requires a system toolchain. See :func:`get_system_includes`. :param timeout: The maximum number of seconds to allow clang to run before terminating. :return: A :code:`Benchmark` instance. :raises FileNotFoundError: If any input sources are not found. :raises TypeError: If the inputs are of unsupported types. :raises OSError: If a suitable compiler cannot be found. :raises BenchmarkInitError: If a compilation job fails. :raises TimeoutExpired: If a compilation job exceeds :code:`timeout` seconds. """ copt = copt or [] bitcodes: List[Path] = [] clang_jobs: List[ClangInvocation] = [] ll_paths: List[Path] = [] def _add_path(path: Path): if not path.is_file(): raise FileNotFoundError(path) if path.suffix == ".bc": bitcodes.append(path.absolute()) elif path.suffix in {".c", ".cxx", ".cpp", ".cc"}: clang_jobs.append( ClangInvocation.from_c_file(path, copt=copt, system_includes=system_includes, timeout=timeout)) elif path.suffix == ".ll": ll_paths.append(path) else: raise ValueError(f"Unrecognized file type: {path.name}") # Determine from inputs the list of pre-compiled bitcodes and the clang # invocations required to compile the bitcodes. if isinstance(inputs, str) or isinstance(inputs, Path): _add_path(Path(inputs)) elif isinstance(inputs, ClangInvocation): clang_jobs.append(inputs) else: for input in inputs: if isinstance(input, str) or isinstance(input, Path): _add_path(Path(input)) elif isinstance(input, ClangInvocation): clang_jobs.append(input) else: raise TypeError(f"Invalid input type: {type(input).__name__}") # Shortcut if we only have a single pre-compiled bitcode. if len(bitcodes) == 1 and not clang_jobs and not ll_paths: bitcode = bitcodes[0] return Benchmark.from_file(uri=f"benchmark://file-v0{bitcode}", path=bitcode) tmpdir_root = transient_cache_path(".") tmpdir_root.mkdir(exist_ok=True, parents=True) with tempfile.TemporaryDirectory(dir=tmpdir_root, prefix="llvm-make_benchmark-") as d: working_dir = Path(d) clang_outs = [ working_dir / f"clang-out-{i}.bc" for i in range(1, len(clang_jobs) + 1) ] llvm_as_outs = [ working_dir / f"llvm-as-out-{i}.bc" for i in range(1, len(ll_paths) + 1) ] # Run the clang and llvm-as invocations in parallel. Avoid running this # code path if possible as get_thread_pool_executor() requires locking. if clang_jobs or ll_paths: llvm_as_path = str(llvm.llvm_as_path()) executor = get_thread_pool_executor() llvm_as_commands = [[ llvm_as_path, str(ll_path), "-o", bc_path ] for ll_path, bc_path in zip(ll_paths, llvm_as_outs)] # Fire off the clang and llvm-as jobs. futures = [ executor.submit(run_command, job.command(out), job.timeout) for job, out in zip(clang_jobs, clang_outs) ] + [ executor.submit(run_command, command, timeout) for command in llvm_as_commands ] # Block until finished. list(future.result() for future in as_completed(futures)) # Check that the expected files were generated. for clang_job, bc_path in zip(clang_jobs, clang_outs): if not bc_path.is_file(): raise BenchmarkInitError( f"clang failed: {' '.join(clang_job.command(bc_path))}" ) for command, bc_path in zip(llvm_as_commands, llvm_as_outs): if not bc_path.is_file(): raise BenchmarkInitError(f"llvm-as failed: {command}") all_outs = bitcodes + clang_outs + llvm_as_outs if not all_outs: raise ValueError("No inputs") elif len(all_outs) == 1: # We only have a single bitcode so read it. with open(str(all_outs[0]), "rb") as f: bitcode = f.read() else: # Link all of the bitcodes into a single module. llvm_link_cmd = [str(llvm.llvm_link_path()), "-o", "-" ] + [str(path) for path in bitcodes + clang_outs] llvm_link = subprocess.Popen(llvm_link_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) bitcode, stderr = communicate(llvm_link, timeout=timeout) if llvm_link.returncode: raise BenchmarkInitError( f"Failed to link LLVM bitcodes with error: {stderr.decode('utf-8')}" ) timestamp = datetime.now().strftime("%Y%m%HT%H%M%S") uri = f"benchmark://user-v0/{timestamp}-{random.randrange(16**4):04x}" return Benchmark.from_file_contents(uri, bitcode)
def test_popen(): with Popen(["echo"]) as process: communicate(process, timeout=60) assert process.poll() is not None # Process is dead.
def test_communicate_timeout(): with pytest.raises(subprocess.TimeoutExpired): with subprocess.Popen(["sleep", "60"]) as process: communicate(process, timeout=1) assert process.poll() is not None # Process is dead.
def benchmark_from_seed(self, seed: int, max_retries: int = 3, retry_count: int = 0) -> CsmithBenchmark: """Get a benchmark from a uint32 seed. :param seed: A number in the range 0 <= n < 2^32. :return: A benchmark instance. :raises OSError: If Csmith fails. :raises BenchmarkInitError: If the C program generated by Csmith cannot be lowered to LLVM-IR. """ if retry_count >= max_retries: raise OSError( f"Csmith failed after {retry_count} {plural(retry_count, 'attempt', 'attempts')} " f"with seed {seed}") self.install() # Run csmith with the given seed and pipe the output to clang to # assemble a bitcode. logger.debug("Exec csmith --seed %d", seed) try: with Popen( [str(self.csmith_bin_path), "--seed", str(seed)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as csmith: # Generate the C source. src, stderr = communicate(csmith, timeout=300) if csmith.returncode: try: stderr = "\n".join( truncate(stderr.decode("utf-8"), max_line_len=200, max_lines=20)) logger.warning("Csmith failed with seed %d: %s", seed, stderr) except UnicodeDecodeError: # Failed to interpret the stderr output, generate a generic # error message. logger.warning("Csmith failed with seed %d", seed) return self.benchmark_from_seed(seed, max_retries=max_retries, retry_count=retry_count + 1) # Compile to IR. with Popen( self.clang_compile_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, ) as clang: stdout, _ = communicate(clang, input=src, timeout=300) if clang.returncode: compile_cmd = " ".join(self.clang_compile_command) raise BenchmarkInitError(f"Compilation job failed!\n" f"Csmith seed: {seed}\n" f"Command: {compile_cmd}\n") except subprocess.TimeoutExpired: raise BenchmarkInitError( f"Benchmark generation using seed {seed} timed out") return self.benchmark_class.create(f"{self.name}/{seed}", stdout, src)