def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: self.install() benchmark_name = uri.path[1:] if not benchmark_name: raise LookupError(f"No benchmark specified: {uri}") # The absolute path of the file, without an extension. path_stem = self.dataset_root / benchmark_name bitcode_abspath = Path(f"{path_stem}.bc") c_file_abspath = Path(f"{path_stem}.c") # If the file does not exist, compile it on-demand. if not bitcode_abspath.is_file(): if not c_file_abspath.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {c_file_abspath})" ) with atomic_file_write(bitcode_abspath) as tmp_path: compile_cmd = ClangInvocation.from_c_file( c_file_abspath, copt=[ "-ferror-limit=1", # Stop on first error. "-w", # No warnings. ], ).command(outpath=tmp_path) subprocess.check_call(compile_cmd, timeout=300) return BenchmarkWithSource.create( uri, bitcode_abspath, "function.c", c_file_abspath )
def benchmark(self, uri: Optional[str] = None) -> Benchmark: self.install() if uri is None or len(uri) <= len(self.name) + 1: return self._get_benchmark_by_index(self.random.integers( self.size)) # The absolute path of the file, without an extension. path_stem = self.dataset_root / uri[len(self.name) + 1:] # If the file does not exist, compile it on-demand. bitcode_path = Path(f"{path_stem}.bc") cc_file_path = Path(f"{path_stem}.txt") if not bitcode_path.is_file(): if not cc_file_path.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {cc_file_path})" ) # Load the C++ source into memory and pre-process it. with open(cc_file_path) as f: src = self.preprocess_poj104_source(f.read()) # Compile the C++ source into a bitcode file. with atomic_file_write(bitcode_path) as tmp_bitcode_path: compile_cmd = ClangInvocation.from_c_file( "-", copt=[ "-xc++", "-ferror-limit=1", # Stop on first error. "-w", # No warnings. # Some of the programs use the gets() function that was # deprecated in C++11 and removed in C++14. "-std=c++11", ], ).command(outpath=tmp_bitcode_path) logger.debug("Exec %s", compile_cmd) clang = subprocess.Popen( compile_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) _, stderr = clang.communicate(src.encode("utf-8"), timeout=300) if clang.returncode: compile_cmd = " ".join(compile_cmd) error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100) raise BenchmarkInitError(f"Compilation job failed!\n" f"Command: {compile_cmd}\n" f"Error: {error}") if not bitcode_path.is_file(): raise BenchmarkInitError( f"Compilation job failed to produce output file!\nCommand: {compile_cmd}" ) return BenchmarkWithSource.create(uri, bitcode_path, "source.cc", cc_file_path)
def test_atomic_file_write_text_io(tmpwd: Path): out = Path("a").resolve() with filesystem.atomic_file_write(out, fileobj=True, mode="w") as f: f.write("Hello!") with open(out) as f: assert f.read() == "Hello!"
def test_atomic_file_write_binary_io(tmpwd: Path): out = Path("a").resolve() with filesystem.atomic_file_write(out, fileobj=True) as f: f.write("Hello!".encode("utf-8")) with open(out) as f: assert f.read() == "Hello!"
def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: self.install() benchmark_name = uri.path[1:] if not benchmark_name: raise LookupError(f"No benchmark specified: {uri}") # The absolute path of the file, without an extension. path_stem = os.path.normpath(f"{self.dataset_root}/{uri.path}") bc_path, cl_path = Path(f"{path_stem}.bc"), Path(f"{path_stem}.cl") # If the file does not exist, compile it on-demand. if not bc_path.is_file(): if not cl_path.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {cl_path}, path_stem {path_stem})" ) # Compile the OpenCL kernel into a bitcode file. with atomic_file_write(bc_path) as tmp_bc_path: compile_command: List[str] = ClangInvocation.from_c_file( cl_path, copt=[ "-isystem", str(self.libclc_dir), "-include", str(self.opencl_h_path), "-target", "nvptx64-nvidia-nvcl", "-ferror-limit=1", # Stop on first error. "-w", # No warnings. ], ).command(outpath=tmp_bc_path) logger.debug("Exec %s", compile_command) try: with Popen( compile_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as clang: _, stderr = communicate(clang, timeout=300) except subprocess.TimeoutExpired: raise BenchmarkInitError( f"Benchmark compilation timed out: {uri}") if clang.returncode: compile_command = " ".join(compile_command) error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=20000) raise BenchmarkInitError(f"Compilation job failed!\n" f"Command: {compile_command}\n" f"Error: {error}") return BenchmarkWithSource.create(uri, bc_path, "kernel.cl", cl_path)
def test_atomic_file_write_path(tmpwd: Path): out = Path("a").resolve() assert not out.is_file() with filesystem.atomic_file_write(out) as tmp_out: assert tmp_out != out assert tmp_out.parent == out.parent # Write to the temporary file as normal. with open(tmp_out, "w") as f: f.write("Hello!") with open(out) as f: assert f.read() == "Hello!" assert not tmp_out.is_file()
def _get_spec(gcc: Gcc, cache_dir: Path) -> Optional[GccSpec]: """Get the specification for a GCC executable. :param gcc: The executable. :param cache_dir: An optional directory to search for cached versions of the spec. """ # Get the version version = _gcc_get_version(gcc) spec = None # See if there is a pickled spec in the cache_dir. First we use a hash to # name the file. spec_path = cache_dir / _version_hash(version) / "spec.pkl" # Try to get the pickled version if os.path.isfile(spec_path): try: with open(spec_path, "rb") as f: spec = pickle.load(f) spec = GccSpec(gcc=gcc, version=spec.version, options=spec.options) logger.debug("GccSpec for version '%s' read from %s", version, spec_path) except (pickle.UnpicklingError, EOFError) as e: logger.warning("Unable to read spec from '%s': %s", spec_path, e) if spec is None: # Pickle doesn't exist, parse optim_opts = _gcc_parse_optimize(gcc) param_opts = _gcc_parse_params(gcc) options = _fix_options(optim_opts + param_opts) spec = GccSpec(gcc, version, options) if not spec.options: return None # Cache the spec file for future. spec_path.parent.mkdir(exist_ok=True, parents=True) with atomic_file_write(spec_path, fileobj=True) as f: pickle.dump(spec, f) logger.debug("GccSpec for %s written to %s", version, spec_path) logger.debug("GccSpec size is approximately 10^%.0f", round(math.log(spec.size))) return spec
def benchmark(self, uri: str) -> Benchmark: self.install() benchmark_name = uri[len(self.name) + 1:] if not benchmark_name: raise LookupError(f"No benchmark specified: {uri}") # Most of the source files are named after the parent directory, but not # all. c_file_name = { "blowfish": "bf.c", "motion": "mpeg2.c", "sha": "sha_driver.c", "jpeg": "main.c", }.get(benchmark_name, f"{benchmark_name}.c") source_dir_path = self.dataset_root / benchmark_name source_path = source_dir_path / c_file_name preprocessed_path = source_dir_path / "src.c" # If the file does not exist, preprocess it on-demand. if not preprocessed_path.is_file(): if not source_path.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {source_path})" ) with atomic_file_write(preprocessed_path) as tmp_path: # TODO(github.com/facebookresearch/CompilerGym/issues/325): Send # over the unprocessed code to the service, have the service # preprocess. Until then, we do it client side with GCC having # to fixed by an environment variable self.gcc( "-E", "-o", tmp_path.name, c_file_name, cwd=source_dir_path, timeout=300, ) return Benchmark.from_file(uri, preprocessed_path)
def _do_download_attempt(url: str, sha256: Optional[str]) -> bytes: logger.info("Downloading %s ...", url) content = _get_url_data(url) if sha256: # Validate the checksum. checksum = hashlib.sha256() checksum.update(content) actual_sha256 = checksum.hexdigest() if sha256 != actual_sha256: raise DownloadFailed(f"Checksum of download does not match:\n" f"Url: {url}\n" f"Expected: {sha256}\n" f"Actual: {actual_sha256}") # Cache the downloaded file. path = cache_path(f"downloads/{sha256}") path.parent.mkdir(parents=True, exist_ok=True) with atomic_file_write(path, fileobj=True) as f: f.write(content) logger.debug(f"Downloaded {url}") return content
def _benchmark_uris(self) -> List[str]: """Fetch or download the URI list.""" if self._manifest_path.is_file(): return self._read_manifest_file() # Thread-level and process-level locks to prevent races. with _TAR_MANIFEST_INSTALL_LOCK, InterProcessLock(self._manifest_lockfile): # Now that we have acquired the lock, repeat the check, since # another thread may have downloaded the manifest. if self._manifest_path.is_file(): return self._read_manifest_file() # Determine how to decompress the manifest data. decompressor = { "bz2": lambda compressed_data: bz2.BZ2File(compressed_data), "gz": lambda compressed_data: gzip.GzipFile(compressed_data), }.get(self.manifest_compression, None) if not decompressor: raise TypeError( f"Unknown manifest compression: {self.manifest_compression}" ) # Decompress the manifest data. logger.debug("Downloading %s manifest", self.name) manifest_data = io.BytesIO( download(self.manifest_urls, self.manifest_sha256) ) with decompressor(manifest_data) as f: manifest_data = f.read() # Although we have exclusive-execution locks, we still need to # create the manifest atomically to prevent calls to _benchmark_uris # racing to read an incompletely written manifest. with atomic_file_write(self._manifest_path, fileobj=True) as f: f.write(manifest_data) uris = self._read_manifest(manifest_data.decode("utf-8")) logger.debug("Downloaded %s manifest, %d entries", self.name, len(uris)) return uris
def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: self.install() benchmark_name = uri.path[1:] if not benchmark_name: raise LookupError(f"No benchmark specified: {uri}") bitcode_abspath = self.dataset_root / f"{benchmark_name}.bc" # Most of the source files are named after the parent directory, but not # all. c_file_name = { "blowfish": "bf.c", "motion": "mpeg2.c", "sha": "sha_driver.c", "jpeg": "main.c", }.get(benchmark_name, f"{benchmark_name}.c") c_file_abspath = self.dataset_root / benchmark_name / c_file_name # If the file does not exist, compile it on-demand. if not bitcode_abspath.is_file(): if not c_file_abspath.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {c_file_abspath})" ) with atomic_file_write(bitcode_abspath) as tmp_path: compile_cmd = ClangInvocation.from_c_file( c_file_abspath, copt=[ "-ferror-limit=1", # Stop on first error. "-w", # No warnings. ], ).command(outpath=tmp_path) subprocess.check_call(compile_cmd, timeout=300) return BenchmarkWithSource.create(uri, bitcode_abspath, "function.c", c_file_abspath)
def main(argv): # Register a signal handler for SIGTERM that will set the shutdownSignal # future value. signal(SIGTERM, _shutdown_handler) argv = [x for x in argv if x.strip()] if len(argv) > 1: print( f"ERROR: Unrecognized command line argument '{argv[1]}'", file=sys.stderr, ) sys.exit(1) working_dir = Path(FLAGS.working_dir or mkdtemp(prefix="compiler_gym-service-")) (working_dir / "logs").mkdir(exist_ok=True, parents=True) FLAGS.log_dir = str(working_dir / "logs") logging.get_absl_handler().use_absl_log_file() logging.set_verbosity(dbg.get_logging_level()) # Create the service. server = grpc.server( futures.ThreadPoolExecutor(max_workers=FLAGS.rpc_service_threads), options=connection.GRPC_CHANNEL_OPTIONS, ) service = CompilerGymService( working_directory=working_dir, compilation_session_type=compilation_session_type, ) compiler_gym_service_pb2_grpc.add_CompilerGymServiceServicer_to_server( service, server) address = f"0.0.0.0:{FLAGS.port}" if FLAGS.port else "0.0.0.0:0" port = server.add_insecure_port(address) with atomic_file_write(working_dir / "port.txt", fileobj=True, mode="w") as f: f.write(str(port)) with atomic_file_write(working_dir / "pid.txt", fileobj=True, mode="w") as f: f.write(str(os.getpid())) logging.info("Service %s listening on %d, PID = %d", working_dir, port, os.getpid()) server.start() # Block on the RPC service in a separate thread. This enables the # current thread to handle the shutdown routine. server_thread = Thread(target=server.wait_for_termination) server_thread.start() # Block until the shutdown signal is received. shutdown_signal.wait() logging.info("Shutting down the RPC service") server.stop(60).wait() server_thread.join() logging.info("Service closed") if len(service.sessions): print( "ERROR: Killing a service with", plural(len(service.session), "active session", "active sessions"), file=sys.stderr, ) sys.exit(6)
def benchmark_from_parsed_uri(self, uri: BenchmarkUri) -> Benchmark: self.install() # The absolute path of the file, without an extension. path_stem = os.path.normpath(f"{self.dataset_root}/{uri.path}") # If the file does not exist, compile it on-demand. bitcode_path = Path(f"{path_stem}.bc") cc_file_path = Path(f"{path_stem}.txt") if not bitcode_path.is_file(): if not cc_file_path.is_file(): raise LookupError( f"Benchmark not found: {uri} (file not found: {cc_file_path})" ) # Load the C++ source into memory and pre-process it. with open(cc_file_path) as f: src = self.preprocess_poj104_source(f.read()) # Compile the C++ source into a bitcode file. with atomic_file_write(bitcode_path) as tmp_bitcode_path: compile_cmd = ClangInvocation.from_c_file( "-", copt=[ "-xc++", "-ferror-limit=1", # Stop on first error. "-w", # No warnings. # Some of the programs use the gets() function that was # deprecated in C++11 and removed in C++14. "-std=c++11", ], ).command(outpath=tmp_bitcode_path) logger.debug("Exec %s", compile_cmd) try: with Popen( compile_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) as clang: _, stderr = clang.communicate( input=src.encode("utf-8"), timeout=300 ) except subprocess.TimeoutExpired: raise BenchmarkInitError(f"Benchmark compilation timed out: {uri}") if clang.returncode: compile_cmd = " ".join(compile_cmd) error = truncate(stderr.decode("utf-8"), max_lines=20, max_line_len=100) if tmp_bitcode_path.is_file(): tmp_bitcode_path.unlink() raise BenchmarkInitError( f"Compilation job failed!\n" f"Command: {compile_cmd}\n" f"Error: {error}" ) if not bitcode_path.is_file(): raise BenchmarkInitError( f"Compilation job failed to produce output file!\nCommand: {compile_cmd}" ) return BenchmarkWithSource.create(uri, bitcode_path, "source.cc", cc_file_path)