def test_fork_child_process_is_not_orphaned(env: LlvmEnv): env.reset("cbench-v1/crc32") with env.fork() as fkd: # Check that both environments share the same service. assert isinstance(env.service.connection.process, subprocess.Popen) assert isinstance(fkd.service.connection.process, subprocess.Popen) assert env.service.connection.process.pid == fkd.service.connection.process.pid process = env.service.connection.process # Sanity check that both services are alive. assert not env.service.connection.process.poll() assert not fkd.service.connection.process.poll() # Close the parent service. env.close() # Check that the service is still alive. assert not env.service assert not fkd.service.connection.process.poll() # Close the forked service. fkd.close() # Check that the service has been killed. assert process.poll() is not None
def test_fork_state_fuzz_test(env: LlvmEnv): """Run random episodes and check that fork() produces equivalent state.""" end_time = time() + FUZZ_TIME_SECONDS while time() < end_time: env.reset(benchmark="cBench-v0/dijkstra") # Take a few warmup steps to get an environment in a random state. for _ in range(A_FEW_RANDOM_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. new_env = env.fork() try: assert env.state == new_env.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(A_FEW_RANDOM_ACTIONS): action = env.action_space.sample() _, _, done_a, _ = env.step(action) _, _, done_b, _ = new_env.step(action) assert done_a == done_b if done_a: # Broken episode, restart. break assert env.state == new_env.state finally: new_env.close()
def test_fork_chain_child_processes_are_not_orphaned(env: LlvmEnv): env.reset("cbench-v1/crc32") # Create a chain of forked environments. a = env.fork() b = a.fork() c = b.fork() d = c.fork() try: # Sanity check that they share the same underlying service. assert (env.service.connection.process == a.service.connection.process == b.service.connection.process == c.service.connection.process == d.service.connection.process) proc = env.service.connection.process # Kill the forked environments one by one. a.close() assert proc.poll() is None b.close() assert proc.poll() is None c.close() assert proc.poll() is None d.close() assert proc.poll() is None # Kill the final environment, refcount 0, service is closed. env.close() assert proc.poll() is not None finally: a.close() b.close() c.close() d.close()
def test_fork_twice_test(env: LlvmEnv): """Test that fork() on a forked environment works.""" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fork_a: with fork_a.fork() as fork_b: assert env.state == fork_a.state assert fork_a.state == fork_b.state
def test_fork_state(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(0) assert env.actions == [0] with env.fork() as fkd: assert fkd.benchmark == fkd.benchmark assert fkd.actions == env.actions
def test_fork_previous_cost_reward_update(env: LlvmEnv): env.reward_space = "IrInstructionCount" env.reset("cbench-v1/crc32") env.step(env.action_space.flags.index("-mem2reg")) with env.fork() as fkd: _, a, _, _ = env.step(env.action_space.flags.index("-mem2reg")) _, b, _, _ = fkd.step(env.action_space.flags.index("-mem2reg")) assert a == b
def test_fork_spaces_are_same(env: LlvmEnv): env.observation_space = "Autophase" env.reward_space = "IrInstructionCount" env.reset(benchmark="cbench-v1/crc32") with env.fork() as fkd: assert fkd.observation_space == env.observation_space assert fkd.reward_space == env.reward_space assert fkd.benchmark == env.benchmark
def test_with_statement(env: LlvmEnv): """Test that the `with` statement context manager works on forks.""" env.reset("cbench-v1/crc32") env.step(0) with env.fork() as fkd: assert fkd.in_episode assert fkd.actions == [0] assert not fkd.in_episode assert env.in_episode
def train_and_run(env: LlvmEnv) -> None: """Run tabular Q learning on an environment""" FLAGS.log_every = 0 # Disable printing to stdout q_table: Dict[StateActionTuple, float] = {} env.observation_space = "Autophase" training_env = env.fork() train(q_table, training_env) training_env.close() rollout(q_table, env, printout=False)
def test_fork_state(env: LlvmEnv): env.reset("cBench-v0/crc32") env.step(0) assert env.actions == [0] new_env = env.fork() try: assert new_env.benchmark == new_env.benchmark assert new_env.actions == env.actions finally: new_env.close()
def test_fork_previous_cost_lazy_reward_update(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(env.action_space.flags.index("-mem2reg")) env.reward["IrInstructionCount"] with env.fork() as fkd: env.step(env.action_space.flags.index("-mem2reg")) fkd.step(env.action_space.flags.index("-mem2reg")) assert env.reward["IrInstructionCount"] == fkd.reward[ "IrInstructionCount"]
def test_fork_custom_benchmark(env: LlvmEnv): benchmark = env.make_benchmark(EXAMPLE_BITCODE_FILE) env.reset(benchmark=benchmark) def ir(env): """Strip the ModuleID line from IR.""" return "\n".join(env.ir.split("\n")[1:]) with env.fork() as fkd: assert ir(env) == ir(fkd) fkd.reset() assert ir(env) == ir(fkd)
def test_fork_closed_service(env: LlvmEnv): env.reset(benchmark="cbench-v1/crc32") _, _, done, _ = env.step(0) assert not done assert env.actions == [0] env.close() assert not env.service with env.fork() as fkd: assert env.actions == [0] assert fkd.actions == [0]
def test_fork_twice_test(env: LlvmEnv): """Test that fork() on a forked environment works.""" env.reset(benchmark="cBench-v0/crc32") fork_a = env.fork() try: fork_b = fork_a.fork() try: assert env.state == fork_a.state assert fork_a.state == fork_b.state finally: fork_b.close() finally: fork_a.close()
def test_fork_reset(env: LlvmEnv): env.reset("cbench-v1/crc32") env.step(0) env.step(1) env.step(2) with env.fork() as fkd: fkd.step(3) assert env.actions == [0, 1, 2] assert fkd.actions == [0, 1, 2, 3] fkd.reset() assert env.actions == [0, 1, 2] assert fkd.actions == []
def test_fork_custom_benchmark(env: LlvmEnv): benchmark = env.make_benchmark(EXAMPLE_BITCODE_FILE) env.reset(benchmark=benchmark) def ir(env): """Strip the ModuleID line from IR.""" return "\n".join(env.ir.split("\n")[1:]) try: new_env = env.fork() assert ir(env) == ir(new_env) new_env.reset() assert ir(env) == ir(new_env) finally: new_env.close()
def test_fork_reset(env: LlvmEnv): env.reset("cBench-v0/crc32") env.step(0) env.step(1) env.step(2) new_env = env.fork() try: new_env.step(3) assert env.actions == [0, 1, 2] assert new_env.actions == [0, 1, 2, 3] new_env.reset() assert env.actions == [0, 1, 2] assert new_env.actions == [] finally: new_env.close()
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards are equal after fork() is called.""" env.reward_space = reward_space env.reset("cbench-v1/dijkstra") actions = [ env.action_space.flags.index(n) for n in ["-mem2reg", "-simplifycfg"] ] forked = env.fork() try: for action in actions: _, env_reward, env_done, _ = env.step(action) _, fkd_reward, fkd_done, _ = forked.step(action) assert env_done is False assert fkd_done is False assert env_reward == fkd_reward finally: forked.close()
def test_fork_rewards(env: LlvmEnv, reward_space: str): """Test that rewards of """ env.reward_space = reward_space env.reset("cBench-v0/dijkstra") actions = env.action_space.names act_names = ["-mem2reg", "-simplifycfg"] act_indcs = [actions.index(n) for n in act_names] for i in range(len(act_indcs)): act_indc = act_indcs[i] forked = env.fork() try: _, env_reward, _, _ = env.step(act_indc) _, fkd_reward, _, _ = forked.step(act_indc) assert env_reward == fkd_reward finally: forked.close()
def test_fuzz(env: LlvmEnv, reward_space: str): """This test generates a random trajectory and checks that fork() produces an equivalent state. It then runs a second trajectory on the two environments to check that behavior is consistent across them. """ env.observation_space = "Autophase" env.reward_space = reward_space env.reset() print(f"Running fuzz test of environment {env.benchmark}") # Take a few warmup steps to get an environment in a random state. for _ in range(PRE_FORK_ACTIONS): _, _, done, _ = env.step(env.action_space.sample()) if done: # Broken episode, restart. break else: # Fork the environment and check that the states are equivalent. fkd = env.fork() try: print(env.state) # For debugging in case of error. assert env.state == fkd.state # Check that environment states remain equal if identical # subsequent steps are taken. for _ in range(POST_FORK_ACTIONS): action = env.action_space.sample() observation_a, reward_a, done_a, _ = env.step(action) observation_b, reward_b, done_b, _ = fkd.step(action) print(env.state) # For debugging in case of error. assert done_a == done_b np.testing.assert_array_almost_equal(observation_a, observation_b) if reward_a != reward_b: pytest.fail( f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}" ) if done_a: break # Broken episode, we're done. assert env.state == fkd.state finally: fkd.close()
def test_fork_modified_ir_is_the_same(env: LlvmEnv): """Test that the IR of a forked environment is the same.""" env.reset("cbench-v1/crc32") # Apply an action that modifies the benchmark. _, _, done, info = env.step(env.action_space.flags.index("-mem2reg")) assert not done assert not info["action_had_no_effect"] with env.fork() as fkd: assert "\n".join(env.ir.split("\n")[1:]) == "\n".join( fkd.ir.split("\n")[1:]) # Apply another action. _, _, done, info = env.step(env.action_space.flags.index("-gvn")) _, _, done, info = fkd.step(fkd.action_space.flags.index("-gvn")) assert not done assert not info["action_had_no_effect"] # Check that IRs are still equivalent. assert "\n".join(env.ir.split("\n")[1:]) == "\n".join( fkd.ir.split("\n")[1:])
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest): """Run the fork regression test: 1. Initialize an environment. 2. Apply a "pre_fork" sequence of actions. 3. Create a fork of the environment. 4. Apply a "post_fork" sequence of actions in both the fork and parent. 5. Verify that the environment states have gone out of sync. """ env.reward_space = test.reward_space env.reset(test.benchmark) pre_fork = [env.action_space[f] for f in test.pre_fork.split()] post_fork = [env.action_space[f] for f in test.post_fork.split()] _, _, done, info = env.multistep(pre_fork) assert not done, info with env.fork() as fkd: assert env.state == fkd.state # Sanity check env.multistep(post_fork) fkd.multistep(post_fork) # Verify that the environment states no longer line up. assert env.state != fkd.state
def test_fork_before_reset(env: LlvmEnv): """Test that fork() before reset() starts an episode.""" assert not env.in_episode with env.fork() as fkd: assert env.in_episode assert fkd.in_episode