def test_deterministic_action(env: LlvmEnv, benchmark_name: str, action_name: str): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Do this for every combination of benchmark and action. This generates many tests. Caveats of this test: * The initial states are all unoptimized benchmarks. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark_name) checksum_before = sha1(ir) ir, _, done, _ = env.step(env.action_space.names.index(action_name)) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark_name} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def test_fuzz(env: LlvmEnv): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Caveats of this test: * The initial state is an unoptimized benchmark. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ action = env.action_space.sample() action_name = env.action_space.names[action] benchmark = random.choice(BENCHMARK_NAMES) env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark) checksum_before = sha1(ir) ir, _, done, _ = env.step(action) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def test_fork_child_process_is_not_orphaned(env: LlvmEnv): env.reset("cbench-v1/crc32") with env.fork() as fkd: # Check that both environments share the same service. assert isinstance(env.service.connection.process, subprocess.Popen) assert isinstance(fkd.service.connection.process, subprocess.Popen) assert env.service.connection.process.pid == fkd.service.connection.process.pid process = env.service.connection.process # Sanity check that both services are alive. assert not env.service.connection.process.poll() assert not fkd.service.connection.process.poll() # Close the parent service. env.close() # Check that the service is still alive. assert not env.service assert not fkd.service.connection.process.poll() # Close the forked service. fkd.close() # Check that the service has been killed. assert process.poll() is not None
def test_invalid_benchmark_missing_file(env: LlvmEnv): benchmark = Benchmark(uri="benchmark://new", ) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == "No program set"
def test_fork_chain_child_processes_are_not_orphaned(env: LlvmEnv): env.reset("cbench-v1/crc32") # Create a chain of forked environments. a = env.fork() b = a.fork() c = b.fork() d = c.fork() try: # Sanity check that they share the same underlying service. assert (env.service.connection.process == a.service.connection.process == b.service.connection.process == c.service.connection.process == d.service.connection.process) proc = env.service.connection.process # Kill the forked environments one by one. a.close() assert proc.poll() is None b.close() assert proc.poll() is None c.close() assert proc.poll() is None d.close() assert proc.poll() is None # Kill the final environment, refcount 0, service is closed. env.close() assert proc.poll() is not None finally: a.close() b.close() c.close() d.close()
def test_fork_twice_test(env: LlvmEnv): """Test that fork() on a forked environment works.""" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fork_a: with fork_a.fork() as fork_b: assert env.state == fork_a.state assert fork_a.state == fork_b.state
def test_validate_known_bad_trajectory(env: LlvmEnv, state): env.apply(state) for _ in range(VALIDATION_FLAKINESS): result = env.validate() if result.okay(): pytest.fail( "Validation succeeded on state where it should have failed")
def test_non_validatable_benchmark_callback( env: LlvmEnv, non_validatable_benchmark_name: str): """Run the validation routine on unoptimized versions of all benchmarks.""" env.reset(benchmark=non_validatable_benchmark_name) cb = get_llvm_benchmark_validation_callback(env) assert cb is None
def test_fork_state_fuzz_test(env: LlvmEnv): """Run random episodes and check that fork() produces equivalent state.""" end_time = time() + FUZZ_TIME_SECONDS while time() < end_time: env.reset(benchmark="cBench-v0/dijkstra") # Take a few warmup steps to get an environment in a random state. for _ in range(A_FEW_RANDOM_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. new_env = env.fork() try: assert env.state == new_env.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(A_FEW_RANDOM_ACTIONS): action = env.action_space.sample() _, _, done_a, _ = env.step(action) _, _, done_b, _ = new_env.step(action) assert done_a == done_b if done_a: # Broken episode, restart. break assert env.state == new_env.state finally: new_env.close()
def test_make_benchmark_from_command_line_system_includes(env: LlvmEnv): with temporary_working_directory(): with open("in.c", "w") as f: f.write(""" #include <stdio.h> int main() { return 0; } """) env.make_benchmark_from_command_line("gcc in.c")
def test_fork_state(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(0) assert env.actions == [0] with env.fork() as fkd: assert fkd.benchmark == fkd.benchmark assert fkd.actions == env.actions
def test_invalid_benchmark_data(env: LlvmEnv): benchmark = Benchmark.from_file_contents("benchmark://new", "Invalid bitcode".encode("utf-8")) with pytest.raises( ValueError, match='Failed to parse LLVM bitcode: "benchmark://new"'): env.reset(benchmark=benchmark)
def test_with_statement(env: LlvmEnv): """Test that the `with` statement context manager works on forks.""" env.reset("cbench-v1/crc32") env.step(0) with env.fork() as fkd: assert fkd.in_episode assert fkd.actions == [0] assert not fkd.in_episode assert env.in_episode
def test_fork_spaces_are_same(env: LlvmEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fkd: assert fkd.observation_space == env.observation_space assert fkd.reward_space == env.reward_space assert fkd.benchmark == env.benchmark
def test_invalid_benchmark_missing_file(env: LlvmEnv): benchmark = Benchmark( BenchmarkProto( uri="benchmark://new", ) ) with pytest.raises(ValueError, match="No program set"): env.reset(benchmark=benchmark)
def test_invalid_benchmark_data(env: LlvmEnv): benchmark = Benchmark( uri="benchmark://new", program=File(contents="Invalid bitcode".encode("utf-8"))) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == 'Failed to parse LLVM bitcode: "benchmark://new"'
def test_benchmark_path_empty_file(env: LlvmEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) (tmpdir / "test.bc").touch() benchmark = Benchmark.from_file("benchmark://new", tmpdir / "test.bc") with pytest.raises(ValueError, match="Failed to parse LLVM bitcode"): env.reset(benchmark=benchmark)
def test_benchmark_path_invalid_protocol(env: LlvmEnv): benchmark = Benchmark(uri="benchmark://new", program=File(uri="invalid_protocol://test")) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert (str(ctx.value) == 'Unsupported benchmark URI protocol: "invalid_protocol://test"')
def test_make_benchmark_single_bitcode(env: LlvmEnv): benchmark = llvm.make_benchmark(EXAMPLE_BITCODE_FILE) assert benchmark.uri == f"file:///{EXAMPLE_BITCODE_FILE}" assert benchmark.program.uri == f"file:///{EXAMPLE_BITCODE_FILE}" env.reset(benchmark=benchmark) assert env.benchmark == benchmark.uri assert env.observation[ "IrInstructionCount"] == EXAMPLE_BITCODE_IR_INSTRUCTION_COUNT
def test_invalid_benchmark_path_contents(env: LlvmEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) with open(str(tmpdir / "test.bc"), "w") as f: f.write("Invalid bitcode") benchmark = Benchmark.from_file("benchmark://new", tmpdir / "test.bc") with pytest.raises(ValueError, match="Failed to parse LLVM bitcode"): env.reset(benchmark=benchmark)
def test_validate_cBench_unoptimized(env: LlvmEnv, benchmark_name: str): """Run the validation routine on unoptimized version of all cBench benchmarks.""" env.reset(benchmark=benchmark_name) cb = datasets.get_llvm_benchmark_validation_callback(env) if benchmark_name in CBENCH_VALIDATION_EXCLUDE_LIST: assert cb is None else: assert cb assert cb(env) is None
def train_and_run(env: LlvmEnv) -> None: """Run tabular Q learning on an environment""" FLAGS.log_every = 0 # Disable printing to stdout q_table: Dict[StateActionTuple, float] = {} env.observation_space = "Autophase" training_env = env.fork() train(q_table, training_env) training_env.close() rollout(q_table, env, printout=False)
def test_make_benchmark_from_command_line_no_system_includes(env: LlvmEnv): with temporary_working_directory(): with open("in.c", "w") as f: f.write(""" #include <stdio.h> int main() { return 0; } """) with pytest.raises(BenchmarkInitError, match="stdio.h"): env.make_benchmark_from_command_line("gcc in.c", system_includes=False)
def test_select_best_action_closed_environment(env: LlvmEnv): """Test that select_best_action() recovers from an environment whose service has closed.""" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with ThreadPoolExecutor() as executor: best_a = select_best_action(env, executor) env.close() best_b = select_best_action(env, executor) assert best_a == best_b
def test_benchmark_path_not_found(env: LlvmEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{tmpdir}/not_found")) with pytest.raises(FileNotFoundError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == f'File not found: "{tmpdir}/not_found"'
def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float: """Compute the final reward of the environment. Note that this may modify the environment state. You should call :code:`reset()` before continuing to use the environment after this. """ # Reapply the environment state in a retry loop. actions = list(env.actions) env.reset() for i in range(1, 5 + 1): _, _, done, info = env.step(actions) if not done: break logger.warning( "Attempt %d to apply actions during final reward failed: %s", i, info.get("error_details"), ) else: raise ValueError("Failed to replay environment's actions") if self.value == OptimizationTarget.CODESIZE: return env.observation.IrInstructionCountOz() / max( env.observation.IrInstructionCount(), 1) if self.value == OptimizationTarget.BINSIZE: return env.observation.ObjectTextSizeOz() / max( env.observation.ObjectTextSizeBytes(), 1) if self.value == OptimizationTarget.RUNTIME: with _RUNTIME_LOCK: with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env: new_env.reset() new_env.runtime_observation_count = runtime_count new_env.runtime_warmup_count = 0 new_env.apply(env.state) final_runtimes = new_env.observation.Runtime() assert len(final_runtimes) == runtime_count new_env.reset() new_env.send_param("llvm.apply_baseline_optimizations", "-O3") o3_runtimes = new_env.observation.Runtime() assert len(o3_runtimes) == runtime_count logger.debug("O3 runtimes: %s", o3_runtimes) logger.debug("Final runtimes: %s", final_runtimes) speedup = np.median(o3_runtimes) / max( np.median(final_runtimes), 1e-12) logger.debug("Speedup: %.4f", speedup) return speedup assert False, f"Unknown OptimizationTarget: {self.value}"
def test_fork_state(env: LlvmEnv): env.reset("cBench-v0/crc32") env.step(0) assert env.actions == [0] new_env = env.fork() try: assert new_env.benchmark == new_env.benchmark assert new_env.actions == env.actions finally: new_env.close()
def test_no_validation_callback_for_custom_benchmark(env: LlvmEnv): """Test that a custom benchmark has no validation callback.""" with tempfile.TemporaryDirectory() as d: p = Path(d) / "example.c" with open(p, "w") as f: print("int main() {return 0;}", file=f) benchmark = env.make_benchmark(p) env.reset(benchmark=benchmark) assert not env.benchmark.is_validatable()
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv): """Test that episode_reward is not updated when custom rewards passed to step().""" env.reward_space = "IrInstructionCountOz" env.reset("cbench-v1/crc32") assert env.episode_reward == 0 _, rewards, _, _ = env.step( env.action_space["-mem2reg"], rewards=["IrInstructionCount"], ) assert rewards[0] > 0 assert env.episode_reward == 0
def test_benchmark_path_empty_file(env: LlvmEnv): with tempfile.TemporaryDirectory() as tmpdir: tmpdir = Path(tmpdir) (tmpdir / "test.bc").touch() benchmark = Benchmark(uri="benchmark://new", program=File(uri=f"file:///{tmpdir}/test.bc")) with pytest.raises(ValueError) as ctx: env.reset(benchmark=benchmark) assert str(ctx.value) == f'File is empty: "{tmpdir}/test.bc"'