Ejemplo n.º 1
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test produces a random trajectory, resets the environment, then
    replays the trajectory and checks that it produces the same state.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space
    benchmark = env.datasets["generator://csmith-v0"].random_benchmark()
    print(benchmark.uri)  # For debugging in case of failure.

    try:
        env.reset(benchmark=benchmark)
    except BenchmarkInitError:
        return

    trajectory = apply_random_trajectory(
        env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE
    )
    print(env.state)  # For debugging in case of failure.
    env.reset(benchmark=benchmark)

    for i, (action, observation, reward, done) in enumerate(trajectory, start=1):
        print(f"Replaying step {i}: {env.action_space.flags[action]}")
        replay_observation, replay_reward, replay_done, info = env.step(action)
        assert done == replay_done, info

        np.testing.assert_array_almost_equal(observation, replay_observation)
        np.testing.assert_almost_equal(reward, replay_reward)
Ejemplo n.º 2
0
def test_episode_reward_init_zero(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset("cbench-v1/crc32")
    assert env.episode_reward == 0
    _, reward, _, _ = env.step(env.action_space["-mem2reg"])
    assert reward > 0
    assert env.episode_reward == reward
    env.reset()
    assert env.episode_reward == 0
Ejemplo n.º 3
0
def test_fork_previous_cost_reward_update(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset("cbench-v1/crc32")

    env.step(env.action_space.flags.index("-mem2reg"))
    with env.fork() as fkd:
        _, a, _, _ = env.step(env.action_space.flags.index("-mem2reg"))
        _, b, _, _ = fkd.step(env.action_space.flags.index("-mem2reg"))
        assert a == b
Ejemplo n.º 4
0
def test_fork_spaces_are_same(env: LlvmEnv):
    env.observation_space = "Autophase"
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")

    with env.fork() as fkd:
        assert fkd.observation_space == env.observation_space
        assert fkd.reward_space == env.reward_space
        assert fkd.benchmark == env.benchmark
Ejemplo n.º 5
0
def test_select_best_action_closed_environment(env: LlvmEnv):
    """Test that select_best_action() recovers from an environment whose service
    has closed."""
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")
    with ThreadPoolExecutor() as executor:
        best_a = select_best_action(env, executor)
        env.close()
        best_b = select_best_action(env, executor)
        assert best_a == best_b
Ejemplo n.º 6
0
def test_episode_reward_with_non_default_reward_space(env: LlvmEnv):
    """Test that episode_reward is not updated when custom rewards passed to
    step()."""
    env.reward_space = "IrInstructionCountOz"
    env.reset("cbench-v1/crc32")
    assert env.episode_reward == 0
    _, rewards, _, _ = env.step(
        env.action_space["-mem2reg"],
        rewards=["IrInstructionCount"],
    )
    assert rewards[0] > 0
    assert env.episode_reward == 0
Ejemplo n.º 7
0
def test_fork_rewards(env: LlvmEnv, reward_space: str):
    """Test that rewards are equal after fork() is called."""
    env.reward_space = reward_space
    env.reset("cbench-v1/dijkstra")

    actions = [
        env.action_space.flags.index(n) for n in ["-mem2reg", "-simplifycfg"]
    ]

    forked = env.fork()
    try:
        for action in actions:
            _, env_reward, env_done, _ = env.step(action)
            _, fkd_reward, fkd_done, _ = forked.step(action)
            assert env_done is False
            assert fkd_done is False
            assert env_reward == fkd_reward
    finally:
        forked.close()
Ejemplo n.º 8
0
def test_fork_rewards(env: LlvmEnv, reward_space: str):
    """Test that rewards of """
    env.reward_space = reward_space
    env.reset("cBench-v0/dijkstra")

    actions = env.action_space.names
    act_names = ["-mem2reg", "-simplifycfg"]
    act_indcs = [actions.index(n) for n in act_names]

    for i in range(len(act_indcs)):
        act_indc = act_indcs[i]

        forked = env.fork()
        try:
            _, env_reward, _, _ = env.step(act_indc)
            _, fkd_reward, _, _ = forked.step(act_indc)
            assert env_reward == fkd_reward
        finally:
            forked.close()
Ejemplo n.º 9
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test generates a random trajectory and checks that fork() produces
    an equivalent state. It then runs a second trajectory on the two
    environments to check that behavior is consistent across them.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space
    env.reset()
    print(f"Running fuzz test of environment {env.benchmark}")

    # Take a few warmup steps to get an environment in a random state.
    for _ in range(PRE_FORK_ACTIONS):
        _, _, done, _ = env.step(env.action_space.sample())
        if done:  # Broken episode, restart.
            break
    else:
        # Fork the environment and check that the states are equivalent.
        fkd = env.fork()
        try:
            print(env.state)  # For debugging in case of error.
            assert env.state == fkd.state
            # Check that environment states remain equal if identical
            # subsequent steps are taken.
            for _ in range(POST_FORK_ACTIONS):
                action = env.action_space.sample()
                observation_a, reward_a, done_a, _ = env.step(action)
                observation_b, reward_b, done_b, _ = fkd.step(action)

                print(env.state)  # For debugging in case of error.
                assert done_a == done_b
                np.testing.assert_array_almost_equal(observation_a,
                                                     observation_b)
                if reward_a != reward_b:
                    pytest.fail(
                        f"Parent environment produced reward {reward_a}, fork produced reward {reward_b}"
                    )
                if done_a:
                    break  # Broken episode, we're done.
                assert env.state == fkd.state
        finally:
            fkd.close()
def test_fuzz(env: LlvmEnv, observation_space: str, reward_space: str):
    """This test produces a random trajectory using a program generated using
    llvm-stress.
    """
    benchmark = env.datasets["generator://llvm-stress-v0"].random_benchmark()
    print(benchmark.uri)  # For debugging in case of failure.

    env.observation_space = observation_space
    env.reward_space = reward_space

    try:
        env.reset(benchmark=benchmark)
    except BenchmarkInitError:
        return  # Benchmark is invalid.

    apply_random_trajectory(
        env,
        random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE,
        timeout=10,
    )
    print(env.state)  # For debugging in case of failure.
Ejemplo n.º 11
0
def test_fuzz(env: LlvmEnv, reward_space: str):
    """This test produces a random trajectory, resets the environment, then
    replays the trajectory and checks that it produces the same state.
    """
    env.observation_space = "Autophase"
    env.reward_space = reward_space

    env.reset(benchmark=random.choice(BENCHMARK_NAMES))
    trajectory = apply_random_trajectory(
        env, random_trajectory_length_range=RANDOM_TRAJECTORY_LENGTH_RANGE)
    print(env.state)  # For debugging in case of failure.
    env.reset()

    for i, (action, observation, reward, done) in enumerate(trajectory,
                                                            start=1):
        print(f"Replaying step {i}: {env.action_space.flags[action]}")
        replay_observation, replay_reward, replay_done, info = env.step(action)
        assert done == replay_done, info

        np.testing.assert_array_almost_equal(observation, replay_observation)
        np.testing.assert_almost_equal(reward, replay_reward)
Ejemplo n.º 12
0
def test_fork_regression_test(env: LlvmEnv, test: ForkRegressionTest):
    """Run the fork regression test:

    1. Initialize an environment.
    2. Apply a "pre_fork" sequence of actions.
    3. Create a fork of the environment.
    4. Apply a "post_fork" sequence of actions in both the fork and parent.
    5. Verify that the environment states have gone out of sync.
    """
    env.reward_space = test.reward_space
    env.reset(test.benchmark)
    pre_fork = [env.action_space[f] for f in test.pre_fork.split()]
    post_fork = [env.action_space[f] for f in test.post_fork.split()]

    _, _, done, info = env.multistep(pre_fork)
    assert not done, info

    with env.fork() as fkd:
        assert env.state == fkd.state  # Sanity check

        env.multistep(post_fork)
        fkd.multistep(post_fork)
        # Verify that the environment states no longer line up.
        assert env.state != fkd.state