def test_fork_state_fuzz_test(env: LlvmEnv): """Run random episodes and check that fork() produces equivalent state.""" end_time = time() + FUZZ_TIME_SECONDS while time() < end_time: env.reset(benchmark="cBench-v0/dijkstra") # Take a few warmup steps to get an environment in a random state. for _ in range(A_FEW_RANDOM_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. new_env = env.fork() try: assert env.state == new_env.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(A_FEW_RANDOM_ACTIONS): action = env.action_space.sample() _, _, done_a, _ = env.step(action) _, _, done_b, _ = new_env.step(action) assert done_a == done_b if done_a: # Broken episode, restart. break assert env.state == new_env.state finally: new_env.close()
def test_fork_state(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(0) assert env.actions == [0] with env.fork() as fkd: assert fkd.benchmark == fkd.benchmark assert fkd.actions == env.actions
def test_with_statement(env: LlvmEnv): """Test that the `with` statement context manager works on forks.""" env.reset("cbench-v1/crc32") env.step(0) with env.fork() as fkd: assert fkd.in_episode assert fkd.actions == [0] assert not fkd.in_episode assert env.in_episode
def test_fork_previous_cost_reward_update(env: LlvmEnv): env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") env.step(env.action_space.flags.index("-mem2reg")) with env.fork() as fkd: _, a, _, _ = env.step(env.action_space.flags.index("-mem2reg")) _, b, _, _ = fkd.step(env.action_space.flags.index("-mem2reg")) assert a == b
def test_fork_state(env: LlvmEnv): env.reset("cBench-v0/crc32") env.step(0) assert env.actions == [0] new_env = env.fork() try: assert new_env.benchmark == new_env.benchmark assert new_env.actions == env.actions finally: new_env.close()
def test_fork_previous_cost_lazy_reward_update(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(env.action_space.flags.index("-mem2reg")) env.reward["IrInstructionCount"] with env.fork() as fkd: env.step(env.action_space.flags.index("-mem2reg")) fkd.step(env.action_space.flags.index("-mem2reg")) assert env.reward["IrInstructionCount"] == fkd.reward[ "IrInstructionCount"]
def e_greedy_search(env: LlvmEnv) -> None: """Run an ϵ-greedy search on an environment.""" step_count = 0 with ThreadPoolExecutor(max_workers=FLAGS.nproc) as executor: while True: step_count += 1 if random.random() < FLAGS.epsilon: # Exploratory step. Randomly select and apply an action. action = env.action_space.sample() _, reward, done, _ = env.step(action) logging.debug( "Step %d, exploratory action %s, reward %.4f, cumulative %.4f", step_count, env.action_space.flags[action], reward, env.episode_reward, ) else: # Select the best reward and apply it, or terminate the search # if no positive reward is attainable. best = select_best_action(env, executor) if best.reward <= 0: logging.debug( "Greedy search terminated after %d steps, " "no further reward attainable", step_count, ) done = True else: _, reward, done, _ = env.step(best.action) logging.debug( "Step %d, greedy action %s, reward %.4f, cumulative %.4f", step_count, env.action_space.flags[best.action], reward, env.episode_reward, ) if env.reward_space.deterministic and reward != best.reward: logging.warning( "Action %s produced different reward on replay, %.4f != %.4f", env.action_space.flags[best.action], best.reward, reward, ) # Stop the search if we have reached a terminal state. if done: return
def test_fork_reset(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(0) env.step(1) env.step(2) with env.fork() as fkd: fkd.step(3) assert env.actions == [0, 1, 2] assert fkd.actions == [0, 1, 2, 3] fkd.reset() assert env.actions == [0, 1, 2] assert fkd.actions == []
def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space benchmark = env.datasets["generator://csmith-v0"].random_benchmark() print(benchmark.uri) # For debugging in case of failure. try: env.reset(benchmark=benchmark) except BenchmarkInitError: return trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE ) print(env.state) # For debugging in case of failure. env.reset(benchmark=benchmark) for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)
def random_search(env: LlvmEnv) -> None: """Run a random search on the given environment.""" patience = int(env.action_space.n * FLAGS.patience_ratio) # Start parallel random search workers. workers = [ RandomAgentWorker( make_env=lambda: gym.make("llvm-ic-v0", benchmark=env.benchmark), patience=patience, ) for _ in range(FLAGS.nproc) ] for worker in workers: worker.start() sleep(FLAGS.search_time) # Stop the workers. for worker in workers: worker.alive = False for worker in workers: worker.join() # Aggregate the best results. best_actions = [] best_reward = -float("inf") for worker in workers: if worker.best_returns > best_reward: best_reward, best_actions = worker.best_returns, list( worker.best_actions) # Replay the best sequence of actions to produce the final environment # state. for action in best_actions: _, _, done, _ = env.step(action) assert not done
def test_fuzz(env: LlvmEnv): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Caveats of this test: * The initial state is an unoptimized benchmark. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ action = env.action_space.sample() action_name = env.action_space.names[action] benchmark = random.choice(BENCHMARK_NAMES) env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark) checksum_before = sha1(ir) ir, _, done, _ = env.step(action) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def test_deterministic_action(env: LlvmEnv, benchmark_name: str, action_name: str): """Run an action multiple times from the same starting state and check that the generated LLVM-IR is the same. Do this for every combination of benchmark and action. This generates many tests. Caveats of this test: * The initial states are all unoptimized benchmarks. If a pass depends on other passes to take effect it will not be tested. * Non-determinism is tested by running the action 20 times. Extremely unlikely non-determinism may not be detected. """ env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): ir = env.reset(benchmark=benchmark_name) checksum_before = sha1(ir) ir, _, done, _ = env.step(env.action_space.names.index(action_name)) assert not done checksums.add(sha1(ir)) if len(checksums) != 1: pytest.fail(f"Repeating the {action_name} action {i} times on " f"{benchmark_name} produced different states") # An action which has no effect is not likely to be nondeterministic. if list(checksums)[0] == checksum_before: break
def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue): assert actions for action in actions: observation, reward, done, info = env.step(action) assert not done queue.put((env, observation, reward, done, info))
def test_episode_reward_init_zero(env: LlvmEnv): env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") assert env.episode_reward == 0 _, reward, _, _ = env.step(env.action_space["-mem2reg"]) assert reward > 0 assert env.episode_reward == reward env.reset() assert env.episode_reward == 0
def test_fork_reset(env: LlvmEnv): env.reset("cBench-v0/crc32") env.step(0) env.step(1) env.step(2) new_env = env.fork() try: new_env.step(3) assert env.actions == [0, 1, 2] assert new_env.actions == [0, 1, 2, 3] new_env.reset() assert env.actions == [0, 1, 2] assert new_env.actions == [] finally: new_env.close()
def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float: """Compute the final reward of the environment. Note that this may modify the environment state. You should call :code:`reset()` before continuing to use the environment after this. """ # Reapply the environment state in a retry loop. actions = list(env.actions) env.reset() for i in range(1, 5 + 1): _, _, done, info = env.step(actions) if not done: break logger.warning( "Attempt %d to apply actions during final reward failed: %s", i, info.get("error_details"), ) else: raise ValueError("Failed to replay environment's actions") if self.value == OptimizationTarget.CODESIZE: return env.observation.IrInstructionCountOz() / max( env.observation.IrInstructionCount(), 1) if self.value == OptimizationTarget.BINSIZE: return env.observation.ObjectTextSizeOz() / max( env.observation.ObjectTextSizeBytes(), 1) if self.value == OptimizationTarget.RUNTIME: with _RUNTIME_LOCK: with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env: new_env.reset() new_env.runtime_observation_count = runtime_count new_env.runtime_warmup_count = 0 new_env.apply(env.state) final_runtimes = new_env.observation.Runtime() assert len(final_runtimes) == runtime_count new_env.reset() new_env.send_param("llvm.apply_baseline_optimizations", "-O3") o3_runtimes = new_env.observation.Runtime() assert len(o3_runtimes) == runtime_count logger.debug("O3 runtimes: %s", o3_runtimes) logger.debug("Final runtimes: %s", final_runtimes) speedup = np.median(o3_runtimes) / max( np.median(final_runtimes), 1e-12) logger.debug("Speedup: %.4f", speedup) return speedup assert False, f"Unknown OptimizationTarget: {self.value}"
def test_fuzz(env: LlvmEnv, reward_space: str): """This test generates a random trajectory and checks that fork() produces an equivalent state. It then runs a second trajectory on the two environments to check that behavior is consistent across them. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset() print(f"Running fuzz test of environment {env.benchmark}") # Take a few warmup steps to get an environment in a random state. for _ in range(PRE_FORK_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. fkd = env.fork() try: print(env.state) # For debugging in case of error. assert env.state == fkd.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(POST_FORK_ACTIONS): action = env.action_space.sample() observation_a, reward_a, done_a, _ = env.step(action) observation_b, reward_b, done_b, _ = fkd.step(action) print(env.state) # For debugging in case of error. assert done_a == done_b np.testing.assert_array_almost_equal(observation_a, observation_b) if reward_a != reward_b: pytest.fail( f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}" ) if done_a: break # Broken episode, we're done. assert env.state == fkd.state finally: fkd.close()
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv): """Test that episode_reward is not updated when custom rewards passed to step().""" env.reward_space = "IrInstructionCountOz" env.reset("cbench-v1/crc32") assert env.episode_reward == 0 _, rewards, _, _ = env.step( env.action_space["-mem2reg"], rewards=["IrInstructionCount"], ) assert rewards[0] > 0 assert env.episode_reward == 0
def test_fork_closed_service(env: LlvmEnv): env.reset(benchmark="cbench-v1/crc32") _, _, done, _ = env.step(0) assert not done assert env.actions == [0] env.close() assert not env.service with env.fork() as fkd: assert env.actions == [0] assert fkd.actions == [0]
def test_fork_modified_ir_is_the_same(env: LlvmEnv): """Test that the IR of a forked environment is the same.""" env.reset("cbench-v1/crc32") # Apply an action that modifies the benchmark. _, _, done, info = env.step(env.action_space.flags.index("-mem2reg")) assert not done assert not info["action_had_no_effect"] with env.fork() as fkd: assert "\n".join(env.ir.split("\n")[1:]) == "\n".join( fkd.ir.split("\n")[1:]) # Apply another action. _, _, done, info = env.step(env.action_space.flags.index("-gvn")) _, _, done, info = fkd.step(fkd.action_space.flags.index("-gvn")) assert not done assert not info["action_had_no_effect"] # Check that IRs are still equivalent. assert "\n".join(env.ir.split("\n")[1:]) == "\n".join( fkd.ir.split("\n")[1:])
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest): """Run the fork regression test: 1. Initialize an environment. 2. Apply a "pre_fork" sequence of actions. 3. Create a fork of the environment. 4. Apply a "post_fork" sequence of actions in both the fork and parent. 5. Verify that the environment states have gone out of sync. """ env.reward_space = test.reward_space env.reset(test.benchmark) pre_fork = [env.action_space[f] for f in test.pre_fork.split()] post_fork = [env.action_space[f] for f in test.post_fork.split()] _, _, done, info = env.step(pre_fork) assert not done, info with env.fork() as fkd: assert env.state == fkd.state # Sanity check env.step(post_fork) fkd.step(post_fork) # Verify that the environment states no longer line up. assert env.state != fkd.state
def test_fuzz(env: LlvmEnv): """This test generates a random trajectory and validates the semantics.""" benchmark = random.choice(VALIDATABLE_CBENCH_URIS) num_actions = random.randint(*RANDOM_TRAJECTORY_LENGTH_RANGE) print(benchmark) while True: env.reset(benchmark=benchmark) for _ in range(num_actions): _, _, done, _ = env.step(env.action_space.sample()) if done: break # Broken trajectory, retry. else: print(f"Validating state {env.state}") result = env.validate() assert result.okay(), result # Stop the test. break
def test_fuzz(env: LlvmEnv): """This test generates a random trajectory and validates the semantics.""" benchmark = random.choice(VALIDATABLE_BENCHMARKS) num_actions = random.randint(*RANDOM_TRAJECTORY_LENGTH_RANGE) while True: env.reset(benchmark=benchmark) for _ in range(num_actions): _, _, done, _ = env.step(env.action_space.sample()) if done: break # Broken trajectory, retry. else: print(f"Validating state {env.state}") cb = get_llvm_benchmark_validation_callback(env) assert cb assert cb(env) is None # Stop the test. break
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards are equal after fork() is called.""" env.reward_space = reward_space env.reset("cbench-v1/dijkstra") actions = [ env.action_space.flags.index(n) for n in ["-mem2reg", "-simplifycfg"] ] forked = env.fork() try: for action in actions: _, env_reward, env_done, _ = env.step(action) _, fkd_reward, fkd_done, _ = forked.step(action) assert env_done is False assert fkd_done is False assert env_reward == fkd_reward finally: forked.close()
def test_gvn_sink_non_determinism(env: LlvmEnv, benchmark_name: str): """Regression test for -gvn-sink non-determinism. See: https://github.com/facebookresearch/CompilerGym/issues/46 """ env.observation_space = "Ir" checksums = set() for i in range(1, ACTION_REPTITION_COUNT + 1): env.reset(benchmark=benchmark_name) ir, _, done, _ = env.step(env.action_space.names.index("-gvn-sink")) assert not done sha1 = hashlib.sha1() sha1.update(ir.encode("utf-8")) checksums.add(sha1.hexdigest()) if len(checksums) != 1: pytest.fail( f"Repeating the -gvn-sink action {i} times on {benchmark_name} " "produced different states")
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards of """ env.reward_space = reward_space env.reset("cBench-v0/dijkstra") actions = env.action_space.names act_names = ["-mem2reg", "-simplifycfg"] act_indcs = [actions.index(n) for n in act_names] for i in range(len(act_indcs)): act_indc = act_indcs[i] forked = env.fork() try: _, env_reward, _, _ = env.step(act_indc) _, fkd_reward, _, _ = forked.step(act_indc) assert env_reward == fkd_reward finally: forked.close()
def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset(benchmark=random.choice(BENCHMARK_NAMES)) trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE) print(env.state) # For debugging in case of failure. env.reset() for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)