def test_frozenlake_transitions():
    env = make_env('FrozenLake-v0')
    env = DiscreteEnvModelWrapper(env)
    transitions = env.get_transition_array()

    # assert probability sums to 1.0
    for s in range(transitions.shape[0]):
        for a in range(transitions.shape[1]):
            assert transitions[s, a].sum() == 1.0

    assert isinstance(transitions, np.ndarray)
    assert transitions.shape == (17, 4, 17)

    # check state distribution under random policy:
    S = np.zeros(17)
    S[0] = 1.
    transitions_random_pol = np.sum(transitions, axis=1) * 0.25

    S_1 = S.dot(np.linalg.matrix_power(transitions_random_pol, 1))
    S_2 = S.dot(np.linalg.matrix_power(transitions_random_pol, 2))
    S_50 = S.dot(np.linalg.matrix_power(transitions_random_pol, 50))
    assert S_1[-1] == 0
    assert S_2[-1] == 0
    assert S_50[-1] > .999
    for i in range(50):
        S_i = S.dot(np.linalg.matrix_power(transitions_random_pol, i))
        assert np.isclose(np.sum(S_i), 1.)
Ejemplo n.º 2
0
def test_make_maze1():
    env = make_env('MazeWorld1-v0')
    assert is_unwrappable_to(env, MazeWorld)
    walls, rews = get_maps(MAP1)
    maze_env = unwrap_env(env, MazeWorld)
    assert np.all(maze_env.map_walls == walls)
    assert np.all(maze_env.map_rewards == rews)
Ejemplo n.º 3
0
def test_is_unwrappable_to():
    assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit)
    assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv)
    assert is_unwrappable_to(feature_wrapper.make('FrozenLake-v0'),
                             FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(feature_wrapper.make('FrozenLake8x8-v0'),
                             FrozenLakeFeatureWrapper)
    assert is_unwrappable_to(feature_wrapper.make('FrozenLake-v0'),
                             feature_wrapper.FeatureWrapper)
    env = feature_wrapper.make('FrozenLake-v0')
    reward_function = FeatureBasedRewardFunction(env, 'random')
    env = RewardWrapper(env, reward_function)
    assert is_unwrappable_to(env, RewardWrapper)
    assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper)
    assert is_unwrappable_to(env, DiscreteEnv)
    assert is_unwrappable_to(env, gym.Env)
Ejemplo n.º 4
0
def test_unwrap():
    env = make_env('FrozenLake-v0')
    assert env.env is unwrap_env(env, DiscreteEnv)

    # No unwrapping needed:
    assert env is unwrap_env(env, gym.Env)

    # Unwrap all the way:
    assert env.env is unwrap_env(env)

    env = FrozenLakeFeatureWrapper(env)
    assert env.env.env is unwrap_env(env, DiscreteEnv)

    # No unwrapping needed:
    assert env is unwrap_env(env, FrozenLakeFeatureWrapper)

    # Unwrap all the way:
    assert env.env.env is unwrap_env(env)

    # check types:
    assert isinstance(unwrap_env(env, DiscreteEnv), DiscreteEnv)
    assert isinstance(unwrap_env(env, feature_wrapper.FeatureWrapper),
                      feature_wrapper.FeatureWrapper)
    assert isinstance(unwrap_env(env, FrozenLakeFeatureWrapper),
                      FrozenLakeFeatureWrapper)
    assert isinstance(unwrap_env(env, FrozenLakeFeatureWrapper),
                      feature_wrapper.FeatureWrapper)
    assert isinstance(unwrap_env(env), gym.Env)
def test_frozenlake8_transitions():
    env = make_env('FrozenLake8x8-v0')
    env = DiscreteEnvModelWrapper(env)
    transitions = env.get_transition_array()

    # assert probability sums to 1.0
    for s in range(transitions.shape[0]):
        for a in range(transitions.shape[1]):
            assert transitions[s, a].sum() == 1.0

    assert isinstance(transitions, np.ndarray)
    assert transitions.shape == (65, 4, 65)
def test_frozenlake8x8_rewards():
    env = make_env('FrozenLake8x8-v0')
    env = DiscreteEnvModelWrapper(env)
    transitions = env.get_transition_array()
    rewards = env.get_reward_array()

    assert rewards.shape == (65, 4)
    assert transitions.shape == (65, 4, 65)

    true_rews = np.zeros(64 + 1)
    # [-2] since [-1] is the added absorbing state
    true_rews[-2] = 1.0

    for s in range(64 + 1):
        for a in range(4):
            assert np.isclose(rewards[s, a],
                              transitions[s, a, :].dot(true_rews))
Ejemplo n.º 7
0
def test_make_frozen8():
    env = make_env('FrozenLake8x8-v0')
    assert is_unwrappable_to(env, FrozenLakeEnv)
Ejemplo n.º 8
0
 def wrapper_factory():
     # import unified way of creating environments
     # (usually using gym.make, with some exceptions
     from irl_benchmark.envs import make_env
     # return a new feature wrapper around a new gym environment:
     return decorated_function(make_env(key))
Ejemplo n.º 9
0
def test_random_tabular_function():
    env = make_env('FrozenLake-v0')
    rf = TabularRewardFunction(env, 'random')