def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float: """Compute the final reward of the environment. Note that this may modify the environment state. You should call :code:`reset()` before continuing to use the environment after this. """ # Reapply the environment state in a retry loop. actions = list(env.actions) env.reset() for i in range(1, 5 + 1): _, _, done, info = env.multistep(actions) if not done: break logger.warning( "Attempt %d to apply actions during final reward failed: %s", i, info.get("error_details"), ) else: raise ValueError("Failed to replay environment's actions") if self.value == OptimizationTarget.CODESIZE: return env.observation.IrInstructionCountOz() / max( env.observation.IrInstructionCount(), 1 ) if self.value == OptimizationTarget.BINSIZE: return env.observation.ObjectTextSizeOz() / max( env.observation.ObjectTextSizeBytes(), 1 ) if self.value == OptimizationTarget.RUNTIME: with _RUNTIME_LOCK: with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env: new_env.reset() new_env.runtime_observation_count = runtime_count new_env.runtime_warmup_count = 0 new_env.apply(env.state) final_runtimes = new_env.observation.Runtime() assert len(final_runtimes) == runtime_count new_env.reset() new_env.send_param("llvm.apply_baseline_optimizations", "-O3") o3_runtimes = new_env.observation.Runtime() assert len(o3_runtimes) == runtime_count logger.debug("O3 runtimes: %s", o3_runtimes) logger.debug("Final runtimes: %s", final_runtimes) speedup = np.median(o3_runtimes) / max(np.median(final_runtimes), 1e-12) logger.debug("Speedup: %.4f", speedup) return speedup assert False, f"Unknown OptimizationTarget: {self.value}"
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest): """Run the fork regression test: 1. Initialize an environment. 2. Apply a "pre_fork" sequence of actions. 3. Create a fork of the environment. 4. Apply a "post_fork" sequence of actions in both the fork and parent. 5. Verify that the environment states have gone out of sync. """ env.reward_space = test.reward_space env.reset(test.benchmark) pre_fork = [env.action_space[f] for f in test.pre_fork.split()] post_fork = [env.action_space[f] for f in test.post_fork.split()] _, _, done, info = env.multistep(pre_fork) assert not done, info with env.fork() as fkd: assert env.state == fkd.state # Sanity check env.multistep(post_fork) fkd.multistep(post_fork) # Verify that the environment states no longer line up. assert env.state != fkd.state