def download_llvm_files() -> Path: """Download and unpack the LLVM data pack.""" global _LLVM_UNPACKED_LOCATION unpacked_location = site_data_path("llvm-v0") # Fast path for repeated calls. if _LLVM_UNPACKED_LOCATION == unpacked_location: return unpacked_location with _LLVM_DOWNLOAD_LOCK: # Fast path for first call. This check will be repeated inside the locked # region if required. if (unpacked_location / ".unpacked").is_file(): _LLVM_UNPACKED_LOCATION = unpacked_location return unpacked_location with InterProcessLock(cache_path(".llvm-v0-install.LOCK")): # Now that the lock is acquired, repeat the check to see if it is # necessary to download the dataset. if (unpacked_location / ".unpacked").is_file(): return unpacked_location _download_llvm_files(unpacked_location) # Create the marker file to indicate that the directory is unpacked # and ready to go. (unpacked_location / ".unpacked").touch() _LLVM_UNPACKED_LOCATION = unpacked_location return unpacked_location
def _get_gcc_datasets( gcc_bin: Union[str, Path], site_data_base: Optional[Path] = None) -> Iterable[Dataset]: site_data_base = site_data_base or site_data_path("gcc-v0") yield CHStoneDataset(gcc_bin=gcc_bin, site_data_base=site_data_base) yield AnghaBenchDataset(site_data_base=site_data_base) yield CsmithDataset(gcc_bin=gcc_bin, site_data_base=site_data_base)
def get_mlir_datasets(site_data_base: Optional[Path] = None) -> Iterable[Dataset]: """Instantiate the builtin datasets. :param site_data_base: The root of the site data path. :return: An iterable sequence of :class:`Dataset <compiler_gym.datasets.Dataset>` instances. """ site_data_base = site_data_base or site_data_path("mlir-v0") yield MatmulDataset(site_data_base=site_data_base)
def get_storage_paths() -> List[Path]: """Return the list of paths used by CompilerGym for filesystem storage. :return: A list of filesystem paths that CompilerGym uses to store files. """ return sorted({ runfiles_path.cache_path("."), runfiles_path.transient_cache_path("."), runfiles_path.site_data_path("."), })
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.actions: List[int] = [] self.datasets_site_path = site_data_path( "llvm/10.0.0/bitcode_benchmarks") # Register the LLVM datasets. self.datasets_site_path.mkdir(parents=True, exist_ok=True) self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True) for dataset in LLVM_DATASETS: self.register_dataset(dataset)
def __init__(self, bin: Union[str, Path]): self.bin = str(bin) self.image = self.bin[len("docker:"):] if self.bin.startswith("docker:"): pull_docker_image(self.image) self.call = self._docker_run else: self.call = self._subprocess_run self.spec = _get_spec(self, cache_dir=site_data_path("gcc-v0"))
def setup(cwd: Path): cbench_data = site_data_path( "llvm-v0/cbench-v1-runtime-data/runtime_data") # Copy the input data file into the current directory since ghostscript # doesn't like long input paths. shutil.copyfile(cbench_data / "office_data" / f"{dataset_id}.ps", cwd / "input.ps") # Ghostscript doesn't like the library files being symlinks so copy them # into the working directory as regular files. for path in (cbench_data / "ghostscript").iterdir(): if path.name.endswith(".ps"): shutil.copyfile(path, cwd / path.name)
def __init__(self, *args, **kwargs): super().__init__( name="benchmark://unrolling-v0", license="MIT", description="Unrolling example dataset", site_data_base=site_data_path( "example_dataset" ), # TODO: what should we set this to? we are not using it ) self._benchmarks = { "/offsets1": Benchmark.from_file_contents( "benchmark://unrolling-v0/offsets1", self.preprocess(BENCHMARKS_PATH / "offsets1.c"), ), "/conv2d": Benchmark.from_file_contents( "benchmark://unrolling-v0/conv2d", self.preprocess(BENCHMARKS_PATH / "conv2d.c"), ), }
def download_cBench_runtime_data() -> bool: """Download and unpack the cBench runtime dataset.""" cbench_data = site_data_path("llvm-v0/cbench-v1-runtime-data/runtime_data") if (cbench_data / "unpacked").is_file(): return False else: # Clean up any partially-extracted data directory. if cbench_data.is_dir(): shutil.rmtree(cbench_data) url, sha256 = _CBENCH_RUNTOME_DATA tar_contents = io.BytesIO(download(url, sha256)) with tarfile.open(fileobj=tar_contents, mode="r:bz2") as tar: cbench_data.parent.mkdir(parents=True, exist_ok=True) tar.extractall(cbench_data.parent) assert cbench_data.is_dir() # Create the marker file to indicate that the directory is unpacked # and ready to go. (cbench_data / "unpacked").touch() return True
def test_download_llvm_threaded_load_test(temporary_environ, tmpwd: Path, mocker): """A load test for download_llvm_files() that checks that redundant downloads are not performed when multiple simultaneous calls to download_llvm_files() are issued. """ mocker.spy(llvm, "_download_llvm_files") mocker.spy(llvm, "download") # Force the LLVM download function to run. llvm._LLVM_DOWNLOADED = False # Force a temporary new site data path and sanity check it. temporary_environ["COMPILER_GYM_SITE_DATA"] = str(tmpwd) assert str(site_data_path(".")).endswith(str(tmpwd)) # Perform a bunch of concurrent calls to download_llvm_files(). with ThreadPoolExecutor() as executor: futures = [ executor.submit(llvm.download_llvm_files) for _ in range(100) ] for future in futures: future.result() # For debugging in case of error. print("Downloads:", llvm._download_llvm_files.call_count) # pylint: disable for root, _, filenames in os.walk(tmpwd): print(root) for filename in filenames: print(Path(root) / filename) # Check that the files were unpacked. assert (tmpwd / "llvm-v0" / "LICENSE").is_file() assert (tmpwd / "llvm-v0" / "bin" / "clang").is_file() # Check that the underlying download implementation was only called a single # time. assert llvm._download_llvm_files.call_count == 1 # pylint: disable assert llvm.download.call_count == 1
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.actions: List[int] = [] self.datasets_site_path = site_data_path( "llvm/10.0.0/bitcode_benchmarks") # Register the LLVM datasets. self.datasets_site_path.mkdir(parents=True, exist_ok=True) self.inactive_datasets_site_path.mkdir(parents=True, exist_ok=True) for dataset in LLVM_DATASETS: self.register_dataset(dataset) self.inst2vec = _INST2VEC_ENCODER self.observation.spaces["CpuInfo"].space = DictSpace({ "name": Sequence(size_range=(0, None), dtype=str), "cores_count": Scalar(min=None, max=None, dtype=int), "l1i_cache_size": Scalar(min=None, max=None, dtype=int), "l1i_cache_count": Scalar(min=None, max=None, dtype=int), "l1d_cache_size": Scalar(min=None, max=None, dtype=int), "l1d_cache_count": Scalar(min=None, max=None, dtype=int), "l2_cache_size": Scalar(min=None, max=None, dtype=int), "l2_cache_count": Scalar(min=None, max=None, dtype=int), "l3_cache_size": Scalar(min=None, max=None, dtype=int), "l3_cache_count": Scalar(min=None, max=None, dtype=int), "l4_cache_size": Scalar(min=None, max=None, dtype=int), "l4_cache_count": Scalar(min=None, max=None, dtype=int), }) self.observation.add_derived_space( id="Inst2vecPreprocessedText", base_id="Ir", space=Sequence(size_range=(0, None), dtype=str), cb=lambda base_observation: self.inst2vec.preprocess( base_observation), default_value="", ) self.observation.add_derived_space( id="Inst2vecEmbeddingIndices", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.int32), cb=lambda base_observation: self.inst2vec.encode( self.inst2vec.preprocess(base_observation)), default_value=np.array([self.inst2vec.vocab["!UNK"]]), ) self.observation.add_derived_space( id="Inst2vec", base_id="Ir", space=Sequence(size_range=(0, None), dtype=np.ndarray), cb=lambda base_observation: self.inst2vec.embed( self.inst2vec.encode(self.inst2vec.preprocess(base_observation) )), default_value=np.vstack( [self.inst2vec.embeddings[self.inst2vec.vocab["!UNK"]]]), ) self.observation.add_derived_space( id="AutophaseDict", base_id="Autophase", space=DictSpace({ name: Scalar(min=0, max=None, dtype=int) for name in AUTOPHASE_FEATURE_NAMES }), cb=lambda base_observation: { name: val for name, val in zip(AUTOPHASE_FEATURE_NAMES, base_observation) }, )
def __init__( self, local_service_binary: Path, port_init_max_seconds: float, rpc_init_max_seconds: float, process_exit_max_seconds: float, script_args: List[str], script_env: Dict[str, str], ): """Constructor. :param local_service_binary: The path of the service binary. :raises TimeoutError: If fails to establish connection within a specified time limit. """ self.process_exit_max_seconds = process_exit_max_seconds if not Path(local_service_binary).is_file(): raise FileNotFoundError(f"File not found: {local_service_binary}") self.cache = ServiceCache() # The command that will be executed. The working directory of this # command will be set to the local_service_binary's parent, so we can # use the relpath for a neater `ps aux` view. cmd = [ f"./{local_service_binary.name}", f"--working_dir={self.cache.path}", ] # Add any custom arguments cmd += script_args # Set the root of the runfiles directory. env = os.environ.copy() env["COMPILER_GYM_RUNFILES"] = str(runfiles_path(".")) env["COMPILER_GYM_SITE_DATA"] = str(site_data_path(".")) # Set the pythonpath so that executable python scripts can use absolute # import paths like `from compiler_gym.envs.foo import bar`. if "PYTHONPATH" in env: env["PYTHONPATH"] = f'{env["PYTHONPATH"]}:{env["COMPILER_GYM_RUNFILES"]}' else: env["PYTHONPATH"] = env["COMPILER_GYM_RUNFILES"] # Set the verbosity of the service. The logging level of the service is # the debug level - 1, so that COMPILER_GYM_DEBUG=3 will cause VLOG(2) # and lower to be logged to stdout. debug_level = max( get_debug_level(), logging_level_to_debug_level(logger.getEffectiveLevel())) if debug_level > 0: cmd.append("--alsologtostderr") cmd.append(f"-v={debug_level - 1}") # If we are debugging the backend, set the logbuflevel to a low # value to disable buffering of logging messages. This removes any # buffering between `LOG(INFO) << "..."` and the message being # emited to stderr. cmd.append("--logbuflevel=-1") else: # Silence the gRPC logs as we will do our own error reporting, but # don't override any existing value so that the user may debug the # gRPC backend by setting GRPC_VERBOSITY to ERROR, INFO, or DEBUG. if not os.environ.get("GRPC_VERBOSITY"): env["GRPC_VERBOSITY"] = "NONE" # Set environment variable COMPILER_GYM_SERVICE_ARGS to pass # additional arguments to the service. args = os.environ.get("COMPILER_GYM_SERVICE_ARGS", "") if args: cmd.append(args) # Add any custom environment variables env.update(script_env) logger.debug( "Exec `%s%s`", " ".join(f"{k}={v}" for k, v in script_env.items()) + " " if script_env else "", join_cmd(cmd), ) self.process = subprocess.Popen( cmd, env=env, cwd=local_service_binary.parent, ) self._process_returncode_exception_raised = False # Read the port from a file generated by the service. wait_secs = 0.1 port_path = self.cache / "port.txt" end_time = time() + port_init_max_seconds while time() < end_time: returncode = self.process.poll() if returncode is not None: try: # Try and decode the name of a signal. Signal returncodes # are negative. returncode = f"{returncode} ({Signals(abs(returncode)).name})" except ValueError: pass msg = f"Service terminated with returncode: {returncode}" # Attach any logs from the service if available. logs = truncate_lines(self.loglines(), max_line_len=100, max_lines=25, tail=True) if logs: msg = f"{msg}\nService logs:\n{logs}" self.cache.close() raise ServiceError(msg) if port_path.is_file(): try: with open(port_path) as f: self.port = int(f.read().rstrip()) break except ValueError: # ValueError is raised by int(...) on invalid input. In that # case, wait for longer. pass sleep(wait_secs) wait_secs *= 1.2 else: # kill() was added in Python 3.7. if sys.version_info >= (3, 7, 0): self.process.kill() else: self.process.terminate() self.process.communicate(timeout=rpc_init_max_seconds) self.cache.close() raise TimeoutError("Service failed to produce port file after " f"{port_init_max_seconds:.1f} seconds") url = f"localhost:{self.port}" wait_secs = 0.1 attempts = 0 end_time = time() + rpc_init_max_seconds while time() < end_time: try: channel = grpc.insecure_channel( url, options=GRPC_CHANNEL_OPTIONS, ) channel_ready = grpc.channel_ready_future(channel) attempts += 1 channel_ready.result(timeout=wait_secs) break except (grpc.FutureTimeoutError, grpc.RpcError) as e: logger.debug("Connection attempt %d = %s %s", attempts, type(e).__name__, str(e)) wait_secs *= 1.2 else: # kill() was added in Python 3.7. if sys.version_info >= (3, 7, 0): self.process.kill() else: self.process.terminate() self.process.communicate(timeout=process_exit_max_seconds) # Include the last few lines of logs generated by the compiler # service, if any. logs = truncate_lines(self.loglines(), max_line_len=100, max_lines=25, tail=True) logs_message = f" Service logs:\n{logs}" if logs else "" self.cache.close() raise TimeoutError( "Failed to connect to RPC service after " f"{rpc_init_max_seconds:.1f} seconds.{logs_message}") super().__init__(channel, url)
def benchmark_from_size(self, mnk, max_retries: int = 3, retry_count: int = 0) -> MatmulBenchmark: """Get a benchmark from a uint32 seed. :param mnk: 3-tuple containing m, n, k sizes of the matmul :return: A benchmark instance. :raises OSError: If matmul fails. :raises BenchmarkInitError: If the C program generated by matmul cannot be lowered to mlir-IR. """ if retry_count >= max_retries: raise OSError( f"matmul failed after {retry_count} {plural(retry_count, 'attempt', 'attempts')} " f"with size {mnk}") self.install() mnk = list(mnk) # Run matmul with the given size and regex to produce the correct mlir logger.debug("Exec matmul --mnk %d", mnk) # TODO(kyleherndon): refactor these to another location src_content = """ func @matmul(%a: tensor<${M}x${K}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>}, %b: tensor<${K}x${N}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>}, %c: tensor<${M}x${N}xf32> {linalg.buffer_layout = affine_map<(i, j)[s0, s1] -> (i, j)>}) -> tensor<${M}x${N}xf32> attributes { passthrough = [["target-cpu", "haswell"], ["prefer-vector-width", "256"]]} { %f0 = arith.constant 0.0 : f32 %f1 = linalg.fill(%f0, %c) : f32, tensor<${M}x${N}xf32> -> tensor<${M}x${N}xf32> %d = linalg.matmul ins(%a, %b : tensor<${M}x${K}xf32>, tensor<${K}x${N}xf32>) outs(%f1: tensor<${M}x${N}xf32>) -> tensor<${M}x${N}xf32> return %d : tensor<${M}x${N}xf32> }""" cc_src = """ #include <benchmark/benchmark.h> #include <mlir/ExecutionEngine/RunnerUtils.h> #include <cstdio> #include <vector> void naive_matmul(const float* a, const float* b, float* c, size_t m, size_t k, size_t n) { // correctness check for (size_t i = 0; i < m; i++) { for (size_t j = 0; j < n; j++) { #ifdef COLUMN_MAJOR size_t ci = i + j * m; #else size_t ci = i * n + j; #endif c[ci] = 0.0f; for (size_t p = 0; p < k; p++) { #ifdef COLUMN_MAJOR c[ci] += a[i + p * m] * b[p + j * k]; #else c[ci] += a[i * k + p] * b[p * n + j]; #endif } } } } void init_matrix(float* a, int nrows, int ncols) { for (int j = 0; j < ncols; j++) { for (int i = 0; i < nrows; i++) { a[i + j * nrows] = ((float)rand() / (float)RAND_MAX); } } } extern "C" { void matmul(float* aligned_a, float* allocated_a, int64_t offset_a, int64_t size_a0, int64_t size_a1, int64_t strides_a0, int64_t strides_a1, float* aligned_b, float* allocated_b, int64_t offset_b, int64_t size_b0, int64_t size_b1, int64_t strides_b0, int64_t strides_b1, float* aligned_c, float* allocated_c, int64_t offset_c, int64_t size_c0, int64_t size_c1, int64_t strides_c0, int64_t strides_c1); } size_t g_errors = 0; static void BenchmarkFunction(benchmark::State& state) { // TODO(boian): pass these as command line arguments int MDIM = ${M}; int NDIM = ${N}; int KDIM = ${K}; std::vector<float> a(MDIM * KDIM); std::vector<float> b(KDIM * NDIM); std::vector<float> c(MDIM * NDIM); float *A = a.data(), *B = b.data(), *C = c.data(); // a[0] = 1; b[0] = 2; init_matrix(A, MDIM, KDIM); init_matrix(B, KDIM, NDIM); init_matrix(C, MDIM, NDIM); int LDA = KDIM; int LDB = NDIM; int LDC = NDIM; for (auto _ : state) { matmul(A, A, 0, MDIM, KDIM, LDA, 1, B, B, 0, KDIM, NDIM, LDB, 1, C, C, 0, MDIM, NDIM, LDC, 1); } std::vector<float> c2(MDIM * NDIM); float* C2 = c2.data(); size_t errors = 0; naive_matmul(A, B, C2, MDIM, KDIM, NDIM); for (size_t i = 0; i < MDIM; i++) { for (size_t j = 0; j < NDIM; j++) { size_t ci = i + j * MDIM; if (std::abs(C[ci] - C2[ci]) > 0.01f) { if (errors == 0) { fprintf(stderr, "Incorrect result at index %ld,%ld: C=%0.2f C2=%0.2f\\n", i, j, C[ci], C2[ci]); } errors++; } } } fprintf(stderr, "Detected %ld errors.\\n", errors); g_errors = errors; } int main(int argc, char** argv) { benchmark::Initialize(&argc, argv); benchmark::RegisterBenchmark("BM_Matmul", BenchmarkFunction) ->MeasureProcessCPUTime() ->UseRealTime(); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); return g_errors != 0; } """ mlir_site_dir = site_data_path("mlir-v0") mlir_site_dir.mkdir(parents=True, exist_ok=True) mlir_file_path = site_data_path("mlir-v0") / "matmul.mlir.template" with open(mlir_file_path, "w+") as mlir_file: mlir_file.write(src_content) mlir_file.close() cc_file_path = site_data_path("mlir-v0") / "benchmark_main.cc.template" with open(cc_file_path, "w+") as cc_file: cc_file.write(cc_src) cc_file.close() new_content = src_content.replace("${M}", str(mnk[0])) new_content = new_content.replace("${N}", str(mnk[1])) content = new_content.replace("${K}", str(mnk[2])) return self.benchmark_class.create( self.name_from_size(mnk), bytes(content, "utf-8"), bytes(src_content, "utf-8"), )
def validator_cb( env: "LlvmEnv") -> Optional[ValidationError]: # noqa: F821 """The validation callback.""" with _CBENCH_DOWNLOAD_THREAD_LOCK: with fasteners.InterProcessLock( cache_path(".cbench-v1-runtime-data.LOCK")): download_cBench_runtime_data() cbench_data = site_data_path( "llvm-v0/cbench-v1-runtime-data/runtime_data") for input_file_name in input_files: path = cbench_data / input_file_name if not path.is_file(): raise FileNotFoundError( f"Required benchmark input not found: {path}") # Create a temporary working directory to execute the benchmark in. with tempfile.TemporaryDirectory( dir=env.service.connection.working_dir) as d: cwd = Path(d) # Expand shell variable substitutions in the benchmark command. expanded_command = cmd.replace("$D", str(cbench_data)) # Translate the output file names into paths inside the working # directory. output_paths = [cwd / o for o in output_files] if pre_execution_callback: pre_execution_callback(cwd) # Produce a gold-standard output using a reference version of # the benchmark. if compare_output or output_files: gs_env = env.fork() try: # Reset to the original benchmark state and compile it. gs_env.reset(benchmark=env.benchmark) gs_env.write_bitcode(cwd / "benchmark.bc") gold_standard = _compile_and_run_bitcode_file( bitcode_file=cwd / "benchmark.bc", cmd=expanded_command, cwd=cwd, num_runs=1, # Use default optimizations for gold standard. linkopts=linkopts + ["-O2"], # Always assume safe. sanitizer=None, env=os_env, ) if gold_standard.error: return ValidationError( type=f"Gold standard: {gold_standard.error.type}", data=gold_standard.error.data, ) finally: gs_env.close() # Check that the reference run produced the expected output # files. for path in output_paths: if not path.is_file(): try: output = gold_standard.output except UnicodeDecodeError: output = "<binary>" raise FileNotFoundError( f"Expected file '{path.name}' not generated\n" f"Benchmark: {env.benchmark}\n" f"Command: {cmd}\n" f"Output: {output}") path.rename(f"{path}.gold_standard") # Serialize the benchmark to a bitcode file that will then be # compiled to a binary. env.write_bitcode(cwd / "benchmark.bc") outcome = _compile_and_run_bitcode_file( bitcode_file=cwd / "benchmark.bc", cmd=expanded_command, cwd=cwd, num_runs=num_runs, linkopts=linkopts, sanitizer=sanitizer, env=os_env, ) if outcome.error: return outcome.error # Run a user-specified validation hook. if validate_result: validate_result(outcome) # Difftest the console output. if compare_output and gold_standard.output != outcome.output: return ValidationError( type="Wrong output", data={ "expected": gold_standard.output, "actual": outcome.output }, ) # Difftest the output files. for path in output_paths: if not path.is_file(): return ValidationError( type="Output not generated", data={ "path": path.name, "command": cmd }, ) diff = subprocess.Popen( ["diff", str(path), f"{path}.gold_standard"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) stdout, _ = diff.communicate() if diff.returncode: try: stdout = stdout.decode("utf-8") return ValidationError( type="Wrong output (file)", data={ "path": path.name, "diff": stdout }, ) except UnicodeDecodeError: return ValidationError( type="Wrong output (file)", data={ "path": path.name, "diff": "<binary>" }, )
import tempfile from collections import Counter, defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from typing import Callable, Dict, List, NamedTuple, Optional import fasteners from compiler_gym.datasets.dataset import Dataset from compiler_gym.util.download import download from compiler_gym.util.runfiles_path import cache_path, runfiles_path, site_data_path from compiler_gym.util.timer import Timer _CLANG = runfiles_path("CompilerGym/compiler_gym/third_party/llvm/clang") _CBENCH_DATA = site_data_path("llvm/cBench-v0-runtime-data/runtime_data") _CBENCH_DATA_URL = ( "https://dl.fbaipublicfiles.com/compiler_gym/cBench-v0-runtime-data.tar.bz2" ) _CBENCH_DATA_SHA256 = "a1b5b5d6b115e5809ccaefc2134434494271d184da67e2ee43d7f84d07329055" if sys.platform == "darwin": _COMPILE_ARGS = [ "-L", "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib", ] else: _COMPILE_ARGS = [] LLVM_DATASETS = [ Dataset(
def validator( benchmark: str, cmd: str, data: Optional[List[str]] = None, outs: Optional[List[str]] = None, platforms: Optional[List[str]] = None, compare_output: bool = True, validate_result: Optional[Callable[[BenchmarkExecutionResult], Optional[str]]] = None, linkopts: Optional[List[str]] = None, env: Optional[Dict[str, str]] = None, pre_execution_callback: Optional[Callable[[], None]] = None, sanitizers: Optional[List[LlvmSanitizer]] = None, ) -> bool: """Declare a new benchmark validator. TODO(cummins): Pull this out into a public API. :param benchmark: The name of the benchmark that this validator supports. :cmd: The shell command to run the validation. Variable substitution is applied to this value as follows: :code:`$BIN` is replaced by the path of the compiled binary and :code:`$D` is replaced with the path to the benchmark's runtime data directory. :data: A list of paths to input files. :outs: A list of paths to output files. :return: :code:`True` if the new validator was registered, else :code:`False`. """ platforms = platforms or ["linux", "macos"] if {"darwin": "macos"}.get(sys.platform, sys.platform) not in platforms: return False infiles = data or [] outfiles = [Path(p) for p in outs or []] linkopts = linkopts or [] env = env or {} if sanitizers is None: sanitizers = LlvmSanitizer VALIDATORS[benchmark].append( _make_cBench_validator( cmd=cmd, input_files=infiles, output_files=outfiles, compare_output=compare_output, validate_result=validate_result, linkopts=linkopts, os_env=env, pre_execution_callback=pre_execution_callback, )) # Register additional validators using the sanitizers. if sys.platform.startswith("linux"): for sanitizer in sanitizers: VALIDATORS[benchmark].append( _make_cBench_validator( cmd=cmd, input_files=infiles, output_files=outfiles, compare_output=compare_output, validate_result=validate_result, linkopts=linkopts, os_env=env, pre_execution_callback=pre_execution_callback, sanitizer=sanitizer, )) # Create the BenchmarkDynamicConfig object. cbench_data = site_data_path("llvm-v0/cbench-v1-runtime-data/runtime_data") DYNAMIC_CONFIGS[benchmark] = BenchmarkDynamicConfig( build_cmd=Command( argument=["$CC", "$IN"] + linkopts, timeout_seconds=60, outfile=["a.out"], ), run_cmd=Command( argument=cmd.replace("$BIN", "./a.out").replace("$D", str(cbench_data)).split(), timeout_seconds=300, infile=["a.out", "_finfo_dataset"], outfile=[str(s) for s in outfiles], ), pre_run_cmd=[ Command(argument=["echo", "1", ">_finfo_dataset"], timeout_seconds=30), ], ) return True
def get_llvm_datasets( site_data_base: Optional[Path] = None) -> Iterable[Dataset]: """Instantiate the builtin LLVM datasets. :param site_data_base: The root of the site data path. :return: An iterable sequence of :class:`Dataset <compiler_gym.datasets.Dataset>` instances. """ site_data_base = site_data_base or site_data_path("llvm-v0") yield AnghaBenchDataset(site_data_base=site_data_base, sort_order=0) # Add legacy version of Anghabench using an old manifest. anghabench_v0_manifest_url, anghabench_v0_manifest_sha256 = { "darwin": ( "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-macos-manifest.bz2", "39464256405aacefdb7550a7f990c9c578264c132804eec3daac091fa3c21bd1", ), "linux": ( "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-anghabench-v0-linux-manifest.bz2", "a038d25d39ee9472662a9704dfff19c9e3512ff6a70f1067af85c5cb3784b477", ), }[sys.platform] yield AnghaBenchDataset( name="benchmark://anghabench-v0", site_data_base=site_data_base, sort_order=0, manifest_url=anghabench_v0_manifest_url, manifest_sha256=anghabench_v0_manifest_sha256, deprecated="Please use anghabench-v1", ) yield BlasDataset(site_data_base=site_data_base, sort_order=0) yield CLgenDataset(site_data_base=site_data_base, sort_order=0) yield CBenchDataset(site_data_base=site_data_base) # Add legacy version of cbench-v1 in which the 'b' was capitalized. This # is deprecated and will be removed no earlier than v0.1.10. yield CBenchLegacyDataset2( site_data_base=site_data_base, name="benchmark://cBench-v1", deprecated=( "Please use 'benchmark://cbench-v1' (note the lowercase name). " "The dataset is the same, only the name has changed"), manifest_url= "https://dl.fbaipublicfiles.com/compiler_gym/llvm_bitcodes-10.0.0-cBench-v1-manifest.bz2", manifest_sha256= "635b94eeb2784dfedb3b53fd8f84517c3b4b95d851ddb662d4c1058c72dc81e0", sort_order=100, ) yield CBenchLegacyDataset(site_data_base=site_data_base) yield CHStoneDataset(site_data_base=site_data_base) yield CsmithDataset(site_data_base=site_data_base, sort_order=0) yield GitHubDataset(site_data_base=site_data_base, sort_order=0) yield LinuxDataset(site_data_base=site_data_base, sort_order=0) yield LlvmStressDataset(site_data_base=site_data_base, sort_order=0) yield MibenchDataset(site_data_base=site_data_base, sort_order=0) yield MibenchV0Dataset(site_data_base=site_data_base, sort_order=100) yield NPBDataset(site_data_base=site_data_base, sort_order=0) yield OpenCVDataset(site_data_base=site_data_base, sort_order=0) yield POJ104Dataset(site_data_base=site_data_base, sort_order=0) yield POJ104LegacyDataset(site_data_base=site_data_base, sort_order=100) yield TensorFlowDataset(site_data_base=site_data_base, sort_order=0)