Exemple #1
0
    def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
        """Compute the final reward of the environment.

        Note that this may modify the environment state. You should call
        :code:`reset()` before continuing to use the environment after this.
        """
        # Reapply the environment state in a retry loop.
        actions = list(env.actions)
        env.reset()
        for i in range(1, 5 + 1):
            _, _, done, info = env.multistep(actions)
            if not done:
                break
            logger.warning(
                "Attempt %d to apply actions during final reward failed: %s",
                i,
                info.get("error_details"),
            )
        else:
            raise ValueError("Failed to replay environment's actions")

        if self.value == OptimizationTarget.CODESIZE:
            return env.observation.IrInstructionCountOz() / max(
                env.observation.IrInstructionCount(), 1
            )

        if self.value == OptimizationTarget.BINSIZE:
            return env.observation.ObjectTextSizeOz() / max(
                env.observation.ObjectTextSizeBytes(), 1
            )

        if self.value == OptimizationTarget.RUNTIME:
            with _RUNTIME_LOCK:
                with compiler_gym.make("llvm-v0", benchmark=env.benchmark) as new_env:
                    new_env.reset()
                    new_env.runtime_observation_count = runtime_count
                    new_env.runtime_warmup_count = 0
                    new_env.apply(env.state)
                    final_runtimes = new_env.observation.Runtime()
                    assert len(final_runtimes) == runtime_count

                    new_env.reset()
                    new_env.send_param("llvm.apply_baseline_optimizations", "-O3")
                    o3_runtimes = new_env.observation.Runtime()
                    assert len(o3_runtimes) == runtime_count

                logger.debug("O3 runtimes: %s", o3_runtimes)
                logger.debug("Final runtimes: %s", final_runtimes)
                speedup = np.median(o3_runtimes) / max(np.median(final_runtimes), 1e-12)
                logger.debug("Speedup: %.4f", speedup)

                return speedup

        assert False, f"Unknown OptimizationTarget: {self.value}"
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest):
    """Run the fork regression test:

    1. Initialize an environment.
    2. Apply a "pre_fork" sequence of actions.
    3. Create a fork of the environment.
    4. Apply a "post_fork" sequence of actions in both the fork and parent.
    5. Verify that the environment states have gone out of sync.
    """
    env.reward_space = test.reward_space
    env.reset(test.benchmark)
    pre_fork = [env.action_space[f] for f in test.pre_fork.split()]
    post_fork = [env.action_space[f] for f in test.post_fork.split()]

    _, _, done, info = env.multistep(pre_fork)
    assert not done, info

    with env.fork() as fkd:
        assert env.state == fkd.state  # Sanity check

        env.multistep(post_fork)
        fkd.multistep(post_fork)
        # Verify that the environment states no longer line up.
        assert env.state != fkd.state