def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space benchmark = env.datasets["generator://csmith-v0"].random_benchmark() print(benchmark.uri) # For debugging in case of failure. try: env.reset(benchmark=benchmark) except BenchmarkInitError: return trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE ) print(env.state) # For debugging in case of failure. env.reset(benchmark=benchmark) for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)
def test_episode_reward_init_zero(env: LlvmEnv): env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") assert env.episode_reward == 0 _, reward, _, _ = env.step(env.action_space["-mem2reg"]) assert reward > 0 assert env.episode_reward == reward env.reset() assert env.episode_reward == 0
def test_fork_previous_cost_reward_update(env: LlvmEnv): env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") env.step(env.action_space.flags.index("-mem2reg")) with env.fork() as fkd: _, a, _, _ = env.step(env.action_space.flags.index("-mem2reg")) _, b, _, _ = fkd.step(env.action_space.flags.index("-mem2reg")) assert a == b
def test_fork_spaces_are_same(env: LlvmEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fkd: assert fkd.observation_space == env.observation_space assert fkd.reward_space == env.reward_space assert fkd.benchmark == env.benchmark
def test_select_best_action_closed_environment(env: LlvmEnv): """Test that select_best_action() recovers from an environment whose service has closed.""" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with ThreadPoolExecutor() as executor: best_a = select_best_action(env, executor) env.close() best_b = select_best_action(env, executor) assert best_a == best_b
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv): """Test that episode_reward is not updated when custom rewards passed to step().""" env.reward_space = "IrInstructionCountOz" env.reset("cbench-v1/crc32") assert env.episode_reward == 0 _, rewards, _, _ = env.step( env.action_space["-mem2reg"], rewards=["IrInstructionCount"], ) assert rewards[0] > 0 assert env.episode_reward == 0
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards are equal after fork() is called.""" env.reward_space = reward_space env.reset("cbench-v1/dijkstra") actions = [ env.action_space.flags.index(n) for n in ["-mem2reg", "-simplifycfg"] ] forked = env.fork() try: for action in actions: _, env_reward, env_done, _ = env.step(action) _, fkd_reward, fkd_done, _ = forked.step(action) assert env_done is False assert fkd_done is False assert env_reward == fkd_reward finally: forked.close()
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards of """ env.reward_space = reward_space env.reset("cBench-v0/dijkstra") actions = env.action_space.names act_names = ["-mem2reg", "-simplifycfg"] act_indcs = [actions.index(n) for n in act_names] for i in range(len(act_indcs)): act_indc = act_indcs[i] forked = env.fork() try: _, env_reward, _, _ = env.step(act_indc) _, fkd_reward, _, _ = forked.step(act_indc) assert env_reward == fkd_reward finally: forked.close()
def test_fuzz(env: LlvmEnv, reward_space: str): """This test generates a random trajectory and checks that fork() produces an equivalent state. It then runs a second trajectory on the two environments to check that behavior is consistent across them. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset() print(f"Running fuzz test of environment {env.benchmark}") # Take a few warmup steps to get an environment in a random state. for _ in range(PRE_FORK_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. fkd = env.fork() try: print(env.state) # For debugging in case of error. assert env.state == fkd.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(POST_FORK_ACTIONS): action = env.action_space.sample() observation_a, reward_a, done_a, _ = env.step(action) observation_b, reward_b, done_b, _ = fkd.step(action) print(env.state) # For debugging in case of error. assert done_a == done_b np.testing.assert_array_almost_equal(observation_a, observation_b) if reward_a != reward_b: pytest.fail( f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}" ) if done_a: break # Broken episode, we're done. assert env.state == fkd.state finally: fkd.close()
def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str): """This test produces a random trajectory using a program generated using llvm-stress. """ benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark() print(benchmark.uri) # For debugging in case of failure. env.observation_space = observation_space env.reward_space = reward_space try: env.reset(benchmark=benchmark) except BenchmarkInitError: return # Benchmark is invalid. apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE, timeout=10, ) print(env.state) # For debugging in case of failure.
def test_fuzz(env: LlvmEnv, reward_space: str): """This test produces a random trajectory, resets the environment, then replays the trajectory and checks that it produces the same state. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset(benchmark=random.choice(BENCHMARK_NAMES)) trajectory = apply_random_trajectory( env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE) print(env.state) # For debugging in case of failure. env.reset() for i, (action, observation, reward, done) in enumerate(trajectory, start=1): print(f"Replaying step {i}: {env.action_space.flags[action]}") replay_observation, replay_reward, replay_done, info = env.step(action) assert done == replay_done, info np.testing.assert_array_almost_equal(observation, replay_observation) np.testing.assert_almost_equal(reward, replay_reward)
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest): """Run the fork regression test: 1. Initialize an environment. 2. Apply a "pre_fork" sequence of actions. 3. Create a fork of the environment. 4. Apply a "post_fork" sequence of actions in both the fork and parent. 5. Verify that the environment states have gone out of sync. """ env.reward_space = test.reward_space env.reset(test.benchmark) pre_fork = [env.action_space[f] for f in test.pre_fork.split()] post_fork = [env.action_space[f] for f in test.post_fork.split()] _, _, done, info = env.multistep(pre_fork) assert not done, info with env.fork() as fkd: assert env.state == fkd.state # Sanity check env.multistep(post_fork) fkd.multistep(post_fork) # Verify that the environment states no longer line up. assert env.state != fkd.state