def test_frozenlake_transitions(): env = make_env('FrozenLake-v0') env = DiscreteEnvModelWrapper(env) transitions = env.get_transition_array() # assert probability sums to 1.0 for s in range(transitions.shape[0]): for a in range(transitions.shape[1]): assert transitions[s, a].sum() == 1.0 assert isinstance(transitions, np.ndarray) assert transitions.shape == (17, 4, 17) # check state distribution under random policy: S = np.zeros(17) S[0] = 1. transitions_random_pol = np.sum(transitions, axis=1) * 0.25 S_1 = S.dot(np.linalg.matrix_power(transitions_random_pol, 1)) S_2 = S.dot(np.linalg.matrix_power(transitions_random_pol, 2)) S_50 = S.dot(np.linalg.matrix_power(transitions_random_pol, 50)) assert S_1[-1] == 0 assert S_2[-1] == 0 assert S_50[-1] > .999 for i in range(50): S_i = S.dot(np.linalg.matrix_power(transitions_random_pol, i)) assert np.isclose(np.sum(S_i), 1.)
def test_make_maze1(): env = make_env('MazeWorld1-v0') assert is_unwrappable_to(env, MazeWorld) walls, rews = get_maps(MAP1) maze_env = unwrap_env(env, MazeWorld) assert np.all(maze_env.map_walls == walls) assert np.all(maze_env.map_rewards == rews)
def test_is_unwrappable_to(): assert is_unwrappable_to(make_env('FrozenLake-v0'), TimeLimit) assert is_unwrappable_to(make_env('FrozenLake-v0'), DiscreteEnv) assert is_unwrappable_to(feature_wrapper.make('FrozenLake-v0'), FrozenLakeFeatureWrapper) assert is_unwrappable_to(feature_wrapper.make('FrozenLake8x8-v0'), FrozenLakeFeatureWrapper) assert is_unwrappable_to(feature_wrapper.make('FrozenLake-v0'), feature_wrapper.FeatureWrapper) env = feature_wrapper.make('FrozenLake-v0') reward_function = FeatureBasedRewardFunction(env, 'random') env = RewardWrapper(env, reward_function) assert is_unwrappable_to(env, RewardWrapper) assert is_unwrappable_to(env, feature_wrapper.FeatureWrapper) assert is_unwrappable_to(env, DiscreteEnv) assert is_unwrappable_to(env, gym.Env)
def test_unwrap(): env = make_env('FrozenLake-v0') assert env.env is unwrap_env(env, DiscreteEnv) # No unwrapping needed: assert env is unwrap_env(env, gym.Env) # Unwrap all the way: assert env.env is unwrap_env(env) env = FrozenLakeFeatureWrapper(env) assert env.env.env is unwrap_env(env, DiscreteEnv) # No unwrapping needed: assert env is unwrap_env(env, FrozenLakeFeatureWrapper) # Unwrap all the way: assert env.env.env is unwrap_env(env) # check types: assert isinstance(unwrap_env(env, DiscreteEnv), DiscreteEnv) assert isinstance(unwrap_env(env, feature_wrapper.FeatureWrapper), feature_wrapper.FeatureWrapper) assert isinstance(unwrap_env(env, FrozenLakeFeatureWrapper), FrozenLakeFeatureWrapper) assert isinstance(unwrap_env(env, FrozenLakeFeatureWrapper), feature_wrapper.FeatureWrapper) assert isinstance(unwrap_env(env), gym.Env)
def test_frozenlake8_transitions(): env = make_env('FrozenLake8x8-v0') env = DiscreteEnvModelWrapper(env) transitions = env.get_transition_array() # assert probability sums to 1.0 for s in range(transitions.shape[0]): for a in range(transitions.shape[1]): assert transitions[s, a].sum() == 1.0 assert isinstance(transitions, np.ndarray) assert transitions.shape == (65, 4, 65)
def test_frozenlake8x8_rewards(): env = make_env('FrozenLake8x8-v0') env = DiscreteEnvModelWrapper(env) transitions = env.get_transition_array() rewards = env.get_reward_array() assert rewards.shape == (65, 4) assert transitions.shape == (65, 4, 65) true_rews = np.zeros(64 + 1) # [-2] since [-1] is the added absorbing state true_rews[-2] = 1.0 for s in range(64 + 1): for a in range(4): assert np.isclose(rewards[s, a], transitions[s, a, :].dot(true_rews))
def test_make_frozen8(): env = make_env('FrozenLake8x8-v0') assert is_unwrappable_to(env, FrozenLakeEnv)
def wrapper_factory(): # import unified way of creating environments # (usually using gym.make, with some exceptions from irl_benchmark.envs import make_env # return a new feature wrapper around a new gym environment: return decorated_function(make_env(key))
def test_random_tabular_function(): env = make_env('FrozenLake-v0') rf = TabularRewardFunction(env, 'random')