Example #1
0
def test_apply_state(env: LlvmEnv):
    """Test that apply() on a clean environment produces same state."""
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")
    env.step(env.action_space.flags.index("-mem2reg"))

    with gym.make("llvm-v0", reward_space="IrInstructionCount") as other:
        other.apply(env.state)
        assert other.state == env.state
Example #2
0
def test_commandline(env: LlvmEnv):
    env.reset(benchmark="cbench-v1/crc32")
    env.step(env.action_space.flags.index("-mem2reg"))
    env.step(env.action_space.flags.index("-reg2mem"))
    assert env.commandline() == "opt -mem2reg -reg2mem input.bc -o output.bc"
    assert env.commandline_to_actions(env.commandline()) == [
        env.action_space.flags.index("-mem2reg"),
        env.action_space.flags.index("-reg2mem"),
    ]
Example #3
0
def test_same_reward_after_reset(env: LlvmEnv):
    """Check that running the same action after calling reset() produces
    same reward.
    """
    env.reward_space = "IrInstructionCount"
    env.benchmark = "cbench-v1/dijkstra"

    action = env.action_space.flags.index("-instcombine")
    env.reset()

    _, reward_a, _, _ = env.step(action)
    assert reward_a, "Sanity check that action produces a reward"

    env.reset()
    _, reward_b, _, _ = env.step(action)
    assert reward_a == reward_b
Example #4
0
def test_instruction_count_reward(env: LlvmEnv):
    env.reset(benchmark="cbench-v1/crc32")

    assert env.observation.IrInstructionCount() == CRC32_INSTRUCTION_COUNT
    action = env.action_space.flags.index("-reg2mem")
    env.step(action)
    assert env.observation.IrInstructionCount() == CRC32_INSTRUCTION_COUNT_AFTER_REG2MEM

    ic_diff = CRC32_INSTRUCTION_COUNT - CRC32_INSTRUCTION_COUNT_AFTER_REG2MEM
    assert env.reward.IrInstructionCount() == ic_diff
    assert env.reward.IrInstructionCountNorm() == ic_diff / CRC32_INSTRUCTION_COUNT

    o3_improvement = CRC32_INSTRUCTION_COUNT - CRC32_INSTRUCTION_COUNT_O3
    assert env.reward.IrInstructionCountO3() == ic_diff / o3_improvement

    oz_improvement = CRC32_INSTRUCTION_COUNT - CRC32_INSTRUCTION_COUNT_OZ
    assert env.reward.IrInstructionCountOz() == ic_diff / oz_improvement
Example #5
0
def test_step_multiple_actions_list(env: LlvmEnv):
    """Pass a list of actions to step()."""
    env.reset(benchmark="cbench-v1/crc32")
    actions = [
        env.action_space.flags.index("-mem2reg"),
        env.action_space.flags.index("-reg2mem"),
    ]
    _, _, done, _ = env.step(actions)
    assert not done
    assert env.actions == actions
Example #6
0
def test_ir_sha1(env: LlvmEnv, tmpwd: Path):
    env.reset(benchmark="cbench-v1/crc32")
    before = env.ir_sha1

    _, _, done, info = env.step(env.action_space.flags.index("-mem2reg"))
    assert not done, info
    assert not info[
        "action_had_no_effect"], "sanity check failed, action had no effect"

    after = env.ir_sha1
    assert before != after
def test_connection_dies_default_reward_negated(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cBench-v0/crc32")

    env.reward_space.default_negates_returns = True
    env.reward_space.default_value = 2.5
    env.episode_reward = 10

    env.service.close()
    observation, reward, done, _ = env.step(0)
    assert done

    assert reward == -7.5  # negates reward.
Example #8
0
def test_connection_dies_default_reward(env: LlvmEnv):
    env.reward_space = "IrInstructionCount"
    env.reset(benchmark="cbench-v1/crc32")

    env.reward_space.default_negates_returns = False
    env.reward_space.default_value = 2.5
    env.episode_reward = 10

    # Kill the service. Note killing the service for a ManagedConnection will
    # result in a ServiceError because we have not ended the session we started
    # with env.reset() above. For UnmanagedConnection, this error will not be
    # raised.
    try:
        env.service.close()
    except ServiceError as e:
        assert "Service exited with returncode " in str(e)

    _, reward, done, _ = env.step(0)
    assert done

    assert reward == 2.5