def test_fork_state_fuzz_test(env: LlvmEnv):
    """Run random episodes and check that fork() produces equivalent state."""
    end_time = time() + FUZZ_TIME_SECONDS
    while time() < end_time:
        env.reset(benchmark="cBench-v0/dijkstra")

        # Take a few warmup steps to get an environment in a random state.
        for _ in range(A_FEW_RANDOM_ACTIONS):
            _, _, done, _ = env.step(env.action_space.sample())
            if done:  # Broken episode, restart.
                break
        else:
            # Fork the environment and check that the states are equivalent.
            new_env = env.fork()
            try:
                assert env.state == new_env.state
                # Check that environment states remain equal if identical
                # subsequent steps are taken.
                for _ in range(A_FEW_RANDOM_ACTIONS):
                    action = env.action_space.sample()
                    _, _, done_a, _ = env.step(action)
                    _, _, done_b, _ = new_env.step(action)
                    assert done_a == done_b
                    if done_a:  # Broken episode, restart.
                        break
                    assert env.state == new_env.state
            finally:
                new_env.close()
예제 #2
0
def test_fork_state(env: LlvmEnv):
    env.reset("cbench-v1/crc32")
    env.step(0)
    assert env.actions == [0]

    with env.fork() as fkd:
        assert fkd.benchmark == fkd.benchmark
        assert fkd.actions == env.actions
예제 #3
0
def test_with_statement(env: LlvmEnv):
    """Test that the `with` statement context manager works on forks."""
    env.reset("cbench-v1/crc32")
    env.step(0)
    with env.fork() as fkd:
        assert fkd.in_episode
        assert fkd.actions == [0]
    assert not fkd.in_episode
    assert env.in_episode
예제 #4
0
def test_fork_previous_cost_reward_update(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset("cbench-v1/crc32")

    env.step(env.action_space.flags.index("-mem2reg"))
    with env.fork() as fkd:
        _, a, _, _ = env.step(env.action_space.flags.index("-mem2reg"))
        _, b, _, _ = fkd.step(env.action_space.flags.index("-mem2reg"))
        assert a == b
예제 #5
0
def test_fork_state(env: LlvmEnv):
    env.reset("cBench-v0/crc32")
    env.step(0)
    assert env.actions == [0]

    new_env = env.fork()
    try:
        assert new_env.benchmark == new_env.benchmark
        assert new_env.actions == env.actions
    finally:
        new_env.close()
예제 #6
0
def test_fork_previous_cost_lazy_reward_update(env: LlvmEnv):
    env.reset("cbench-v1/crc32")

    env.step(env.action_space.flags.index("-mem2reg"))
    env.reward["IrInstructionCount"]
    with env.fork() as fkd:
        env.step(env.action_space.flags.index("-mem2reg"))
        fkd.step(env.action_space.flags.index("-mem2reg"))

        assert env.reward["IrInstructionCount"] == fkd.reward[
            "IrInstructionCount"]
예제 #7
0
def e_greedy_search(env: LlvmEnv) -> None:
    """Run an ϵ-greedy search on an environment."""
    step_count = 0
    with ThreadPoolExecutor(max_workers=FLAGS.nproc) as executor:
        while True:
            step_count += 1

            if random.random() < FLAGS.epsilon:
                # Exploratory step. Randomly select and apply an action.
                action = env.action_space.sample()
                _, reward, done, _ = env.step(action)
                logging.debug(
                    "Step %d, exploratory action %s, reward %.4f, cumulative %.4f",
                    step_count,
                    env.action_space.flags[action],
                    reward,
                    env.episode_reward,
                )
            else:
                # Select the best reward and apply it, or terminate the search
                # if no positive reward is attainable.
                best = select_best_action(env, executor)
                if best.reward <= 0:
                    logging.debug(
                        "Greedy search terminated after %d steps, "
                        "no further reward attainable",
                        step_count,
                    )
                    done = True
                else:
                    _, reward, done, _ = env.step(best.action)
                    logging.debug(
                        "Step %d, greedy action %s, reward %.4f, cumulative %.4f",
                        step_count,
                        env.action_space.flags[best.action],
                        reward,
                        env.episode_reward,
                    )
                    if env.reward_space.deterministic and reward != best.reward:
                        logging.warning(
                            "Action %s produced different reward on replay, %.4f != %.4f",
                            env.action_space.flags[best.action],
                            best.reward,
                            reward,
                        )

                # Stop the search if we have reached a terminal state.
                if done:
                    return
예제 #8
0
def test_fork_reset(env: LlvmEnv):
    env.reset("cbench-v1/crc32")
    env.step(0)
    env.step(1)
    env.step(2)

    with env.fork() as fkd:
        fkd.step(3)

        assert env.actions == [0, 1, 2]
        assert fkd.actions == [0, 1, 2, 3]

        fkd.reset()
        assert env.actions == [0, 1, 2]
        assert fkd.actions == []
예제 #9
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test produces a random trajectory, resets the environment, then
    replays the trajectory and checks that it produces the same state.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space
    benchmark = env.datasets["generator://csmith-v0"].random_benchmark()
    print(benchmark.uri)  # For debugging in case of failure.

    try:
        env.reset(benchmark=benchmark)
    except BenchmarkInitError:
        return

    trajectory = apply_random_trajectory(
        env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE
    )
    print(env.state)  # For debugging in case of failure.
    env.reset(benchmark=benchmark)

    for i, (action, observation, reward, done) in enumerate(trajectory, start=1):
        print(f"Replaying step {i}: {env.action_space.flags[action]}")
        replay_observation, replay_reward, replay_done, info = env.step(action)
        assert done == replay_done, info

        np.testing.assert_array_almost_equal(observation, replay_observation)
        np.testing.assert_almost_equal(reward, replay_reward)
예제 #10
0
def random_search(env: LlvmEnv) -> None:
    """Run a random search on the given environment."""
    patience = int(env.action_space.n * FLAGS.patience_ratio)

    # Start parallel random search workers.
    workers = [
        RandomAgentWorker(
            make_env=lambda: gym.make("llvm-ic-v0", benchmark=env.benchmark),
            patience=patience,
        ) for _ in range(FLAGS.nproc)
    ]
    for worker in workers:
        worker.start()

    sleep(FLAGS.search_time)

    # Stop the workers.
    for worker in workers:
        worker.alive = False
    for worker in workers:
        worker.join()

    # Aggregate the best results.
    best_actions = []
    best_reward = -float("inf")
    for worker in workers:
        if worker.best_returns > best_reward:
            best_reward, best_actions = worker.best_returns, list(
                worker.best_actions)

    # Replay the best sequence of actions to produce the final environment
    # state.
    for action in best_actions:
        _, _, done, _ = env.step(action)
        assert not done
예제 #11
0
def test_fuzz(env: LlvmEnv):
    """Run an action multiple times from the same starting state and check that
    the generated LLVM-IR is the same.

    Caveats of this test:

        * The initial state is an unoptimized benchmark. If a pass depends
          on other passes to take effect it will not be tested.

        * Non-determinism is tested by running the action 20 times. Extremely
          unlikely non-determinism may not be detected.
    """
    action = env.action_space.sample()
    action_name = env.action_space.names[action]
    benchmark = random.choice(BENCHMARK_NAMES)

    env.observation_space = "Ir"

    checksums = set()
    for i in range(1, ACTION_REPTITION_COUNT + 1):
        ir = env.reset(benchmark=benchmark)
        checksum_before = sha1(ir)

        ir, _, done, _ = env.step(action)
        assert not done
        checksums.add(sha1(ir))

        if len(checksums) != 1:
            pytest.fail(f"Repeating the {action_name} action {i} times on "
                        f"{benchmark} produced different states")

        # An action which has no effect is not likely to be nondeterministic.
        if list(checksums)[0] == checksum_before:
            break
def test_deterministic_action(env: LlvmEnv, benchmark_name: str,
                              action_name: str):
    """Run an action multiple times from the same starting state and check that
    the generated LLVM-IR is the same.

    Do this for every combination of benchmark and action. This generates many
    tests.

    Caveats of this test:

        * The initial states are all unoptimized benchmarks. If a pass depends
          on other passes to take effect it will not be tested.

        * Non-determinism is tested by running the action 20 times. Extremely
          unlikely non-determinism may not be detected.
    """
    env.observation_space = "Ir"

    checksums = set()
    for i in range(1, ACTION_REPTITION_COUNT + 1):
        ir = env.reset(benchmark=benchmark_name)
        checksum_before = sha1(ir)

        ir, _, done, _ = env.step(env.action_space.names.index(action_name))
        assert not done
        checksums.add(sha1(ir))

        if len(checksums) != 1:
            pytest.fail(f"Repeating the {action_name} action {i} times on "
                        f"{benchmark_name} produced different states")

        # An action which has no effect is not likely to be nondeterministic.
        if list(checksums)[0] == checksum_before:
            break
예제 #13
0
def process_worker_with_env(env: LlvmEnv, actions: List[int], queue: mp.Queue):
    assert actions

    for action in actions:
        observation, reward, done, info = env.step(action)
        assert not done

    queue.put((env, observation, reward, done, info))
예제 #14
0
def test_episode_reward_init_zero(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset("cbench-v1/crc32")
    assert env.episode_reward == 0
    _, reward, _, _ = env.step(env.action_space["-mem2reg"])
    assert reward > 0
    assert env.episode_reward == reward
    env.reset()
    assert env.episode_reward == 0
예제 #15
0
def test_fork_reset(env: LlvmEnv):
    env.reset("cBench-v0/crc32")
    env.step(0)
    env.step(1)
    env.step(2)

    new_env = env.fork()
    try:
        new_env.step(3)

        assert env.actions == [0, 1, 2]
        assert new_env.actions == [0, 1, 2, 3]

        new_env.reset()
        assert env.actions == [0, 1, 2]
        assert new_env.actions == []
    finally:
        new_env.close()
예제 #16
0
    def final_reward(self, env: LlvmEnv, runtime_count: int = 30) -> float:
        """Compute the final reward of the environment.

        Note that this may modify the environment state. You should call
        :code:`reset()` before continuing to use the environment after this.
        """
        # Reapply the environment state in a retry loop.
        actions = list(env.actions)
        env.reset()
        for i in range(1, 5 + 1):
            _, _, done, info = env.step(actions)
            if not done:
                break
            logger.warning(
                "Attempt %d to apply actions during final reward failed: %s",
                i,
                info.get("error_details"),
            )
        else:
            raise ValueError("Failed to replay environment's actions")

        if self.value == OptimizationTarget.CODESIZE:
            return env.observation.IrInstructionCountOz() / max(
                env.observation.IrInstructionCount(), 1)

        if self.value == OptimizationTarget.BINSIZE:
            return env.observation.ObjectTextSizeOz() / max(
                env.observation.ObjectTextSizeBytes(), 1)

        if self.value == OptimizationTarget.RUNTIME:
            with _RUNTIME_LOCK:
                with compiler_gym.make("llvm-v0",
                                       benchmark=env.benchmark) as new_env:
                    new_env.reset()
                    new_env.runtime_observation_count = runtime_count
                    new_env.runtime_warmup_count = 0
                    new_env.apply(env.state)
                    final_runtimes = new_env.observation.Runtime()
                    assert len(final_runtimes) == runtime_count

                    new_env.reset()
                    new_env.send_param("llvm.apply_baseline_optimizations",
                                       "-O3")
                    o3_runtimes = new_env.observation.Runtime()
                    assert len(o3_runtimes) == runtime_count

                logger.debug("O3 runtimes: %s", o3_runtimes)
                logger.debug("Final runtimes: %s", final_runtimes)
                speedup = np.median(o3_runtimes) / max(
                    np.median(final_runtimes), 1e-12)
                logger.debug("Speedup: %.4f", speedup)

                return speedup

        assert False, f"Unknown OptimizationTarget: {self.value}"
예제 #17
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test generates a random trajectory and checks that fork() produces
    an equivalent state. It then runs a second trajectory on the two
    environments to check that behavior is consistent across them.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space
    env.reset()
    print(f"Running fuzz test of environment {env.benchmark}")

    # Take a few warmup steps to get an environment in a random state.
    for _ in range(PRE_FORK_ACTIONS):
        _, _, done, _ = env.step(env.action_space.sample())
        if done:  # Broken episode, restart.
            break
    else:
        # Fork the environment and check that the states are equivalent.
        fkd = env.fork()
        try:
            print(env.state)  # For debugging in case of error.
            assert env.state == fkd.state
            # Check that environment states remain equal if identical
            # subsequent steps are taken.
            for _ in range(POST_FORK_ACTIONS):
                action = env.action_space.sample()
                observation_a, reward_a, done_a, _ = env.step(action)
                observation_b, reward_b, done_b, _ = fkd.step(action)

                print(env.state)  # For debugging in case of error.
                assert done_a == done_b
                np.testing.assert_array_almost_equal(observation_a,
                                                     observation_b)
                if reward_a != reward_b:
                    pytest.fail(
                        f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}"
                    )
                if done_a:
                    break  # Broken episode, we're done.
                assert env.state == fkd.state
        finally:
            fkd.close()
예제 #18
0
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
    """Test that episode_reward is not updated when custom rewards passed to
    step()."""
    env.reward_space = "IrInstructionCountOz"
    env.reset("cbench-v1/crc32")
    assert env.episode_reward == 0
    _, rewards, _, _ = env.step(
        env.action_space["-mem2reg"],
        rewards=["IrInstructionCount"],
    )
    assert rewards[0] > 0
    assert env.episode_reward == 0
예제 #19
0
def test_fork_closed_service(env: LlvmEnv):
    env.reset(benchmark="cbench-v1/crc32")

    _, _, done, _ = env.step(0)
    assert not done
    assert env.actions == [0]

    env.close()
    assert not env.service

    with env.fork() as fkd:
        assert env.actions == [0]
        assert fkd.actions == [0]
예제 #20
0
def test_fork_modified_ir_is_the_same(env: LlvmEnv):
    """Test that the IR of a forked environment is the same."""
    env.reset("cbench-v1/crc32")

    # Apply an action that modifies the benchmark.
    _, _, done, info = env.step(env.action_space.flags.index("-mem2reg"))
    assert not done
    assert not info["action_had_no_effect"]

    with env.fork() as fkd:
        assert "\n".join(env.ir.split("\n")[1:]) == "\n".join(
            fkd.ir.split("\n")[1:])

        # Apply another action.
        _, _, done, info = env.step(env.action_space.flags.index("-gvn"))
        _, _, done, info = fkd.step(fkd.action_space.flags.index("-gvn"))
        assert not done
        assert not info["action_had_no_effect"]

        # Check that IRs are still equivalent.
        assert "\n".join(env.ir.split("\n")[1:]) == "\n".join(
            fkd.ir.split("\n")[1:])
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest):
    """Run the fork regression test:

    1. Initialize an environment.
    2. Apply a "pre_fork" sequence of actions.
    3. Create a fork of the environment.
    4. Apply a "post_fork" sequence of actions in both the fork and parent.
    5. Verify that the environment states have gone out of sync.
    """
    env.reward_space = test.reward_space
    env.reset(test.benchmark)
    pre_fork = [env.action_space[f] for f in test.pre_fork.split()]
    post_fork = [env.action_space[f] for f in test.post_fork.split()]

    _, _, done, info = env.step(pre_fork)
    assert not done, info

    with env.fork() as fkd:
        assert env.state == fkd.state  # Sanity check

        env.step(post_fork)
        fkd.step(post_fork)
        # Verify that the environment states no longer line up.
        assert env.state != fkd.state
def test_fuzz(env: LlvmEnv):
    """This test generates a random trajectory and validates the semantics."""
    benchmark = random.choice(VALIDATABLE_CBENCH_URIS)
    num_actions = random.randint(*RANDOM_TRAJECTORY_LENGTH_RANGE)
    print(benchmark)

    while True:
        env.reset(benchmark=benchmark)
        for _ in range(num_actions):
            _, _, done, _ = env.step(env.action_space.sample())
            if done:
                break  # Broken trajectory, retry.
        else:
            print(f"Validating state {env.state}")
            result = env.validate()
            assert result.okay(), result
            # Stop the test.
            break
예제 #23
0
def test_fuzz(env: LlvmEnv):
    """This test generates a random trajectory and validates the semantics."""
    benchmark = random.choice(VALIDATABLE_BENCHMARKS)
    num_actions = random.randint(*RANDOM_TRAJECTORY_LENGTH_RANGE)

    while True:
        env.reset(benchmark=benchmark)
        for _ in range(num_actions):
            _, _, done, _ = env.step(env.action_space.sample())
            if done:
                break  # Broken trajectory, retry.
        else:
            print(f"Validating state {env.state}")
            cb = get_llvm_benchmark_validation_callback(env)
            assert cb
            assert cb(env) is None
            # Stop the test.
            break
예제 #24
0
def test_fork_rewards(env: LlvmEnv, reward_space: str):
    """Test that rewards are equal after fork() is called."""
    env.reward_space = reward_space
    env.reset("cbench-v1/dijkstra")

    actions = [
        env.action_space.flags.index(n) for n in ["-mem2reg", "-simplifycfg"]
    ]

    forked = env.fork()
    try:
        for action in actions:
            _, env_reward, env_done, _ = env.step(action)
            _, fkd_reward, fkd_done, _ = forked.step(action)
            assert env_done is False
            assert fkd_done is False
            assert env_reward == fkd_reward
    finally:
        forked.close()
예제 #25
0
def test_gvn_sink_non_determinism(env: LlvmEnv, benchmark_name: str):
    """Regression test for -gvn-sink non-determinism.
    See: https://github.com/facebookresearch/CompilerGym/issues/46
    """
    env.observation_space = "Ir"

    checksums = set()
    for i in range(1, ACTION_REPTITION_COUNT + 1):
        env.reset(benchmark=benchmark_name)
        ir, _, done, _ = env.step(env.action_space.names.index("-gvn-sink"))
        assert not done
        sha1 = hashlib.sha1()
        sha1.update(ir.encode("utf-8"))
        checksums.add(sha1.hexdigest())

        if len(checksums) != 1:
            pytest.fail(
                f"Repeating the -gvn-sink action {i} times on {benchmark_name} "
                "produced different states")
예제 #26
0
def test_fork_rewards(env: LlvmEnv, reward_space: str):
    """Test that rewards of """
    env.reward_space = reward_space
    env.reset("cBench-v0/dijkstra")

    actions = env.action_space.names
    act_names = ["-mem2reg", "-simplifycfg"]
    act_indcs = [actions.index(n) for n in act_names]

    for i in range(len(act_indcs)):
        act_indc = act_indcs[i]

        forked = env.fork()
        try:
            _, env_reward, _, _ = env.step(act_indc)
            _, fkd_reward, _, _ = forked.step(act_indc)
            assert env_reward == fkd_reward
        finally:
            forked.close()
예제 #27
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test produces a random trajectory, resets the environment, then
    replays the trajectory and checks that it produces the same state.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space

    env.reset(benchmark=random.choice(BENCHMARK_NAMES))
    trajectory = apply_random_trajectory(
        env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE)
    print(env.state)  # For debugging in case of failure.
    env.reset()

    for i, (action, observation, reward, done) in enumerate(trajectory,
                                                            start=1):
        print(f"Replaying step {i}: {env.action_space.flags[action]}")
        replay_observation, replay_reward, replay_done, info = env.step(action)
        assert done == replay_done, info

        np.testing.assert_array_almost_equal(observation, replay_observation)
        np.testing.assert_almost_equal(reward, replay_reward)