def test_flatten_observation(self): gym_env = gym.make('Pong-v0') env = GymWrapper(gym_env) obs = env.reset() assert obs.shape == (210, 160, 3) env = FlattenObservation(env) obs = env.reset() assert obs.shape == (210 * 160 * 3, )
def test_scale_image_observation(self): env = gym.make('Pong-v0') env = GymWrapper(env) env = ScaleImageObservation(env) assert np.allclose(env.observation_space.high, 1.0) assert np.allclose(env.observation_space.low, 0.0) obs = env.reset() assert np.alltrue(obs <= 1.0) and np.alltrue(obs >= 0.0) obs, _, _, _ = env.step(env.action_space.sample()) assert np.alltrue(obs <= 1.0) and np.alltrue(obs >= 0.0)
def test_clip_reward(self, env_id): env = gym.make(env_id) env = GymWrapper(env) env = ClipReward(env) env.reset() for _ in range(100): obs, reward, done, info = env.step(env.action_space.sample()) assert reward >= -1.0 and reward <= 1.0 if done: break
def test_frame_stack(self): env = gym.make('CartPole-v1') env = GymWrapper(env) env = FrameStack(env, num_stack=4) env.seed(1) assert isinstance(env, Env) and isinstance(env, FrameStack) assert env.num_stack == 4 assert env.observation_space.shape == (4, 4) assert isinstance(env.stack_buffer, np.ndarray) assert env.stack_buffer.shape == (4, 4) assert np.all(env.stack_buffer == 0.0) assert env.stack_buffer.dtype == np.float32 assert env.reset().shape == (4, 4) obs = env.step(0)[0] assert obs[:, 0].sum() != 0.0 assert obs[:, 1].sum() != 0.0 assert np.all(obs[:, 2:] == 0.0) assert np.any(obs[:, 0] != obs[:, 1]) obs = env.step(1)[0] obs = env.step(1)[0] assert np.allclose(obs[:, -1], [0.03073904, 0.00145001, -0.03088818, -0.03131252]) assert np.allclose(obs[:, 2], [0.03076804, -0.19321568, -0.03151444, 0.25146705]) obs = env.step(1)[0] assert np.allclose(obs[:, -1], [0.03076804, -0.19321568, -0.03151444, 0.25146705])
def test_env_spec(self): env = gym.make('CartPole-v1') env = GymWrapper(env) env.seed(0) env_spec = EnvSpec(env) assert isinstance(env_spec.observation_space, Box) assert isinstance(env_spec.action_space, Discrete) assert env_spec.control_type == 'Discrete' assert env_spec.T == 500 assert env_spec.max_episode_reward == 475.0 assert env_spec.reward_range == (-float('inf'), float('inf'))
def test_reward_scale(self): env = gym.make('CartPole-v1') env = GymWrapper(env) env = RewardScale(env, scale=0.02) env.seed(1) env.reset() observation, reward, done, info = env.step(env.action_space.sample()) assert reward == 0.02
def test_time_aware_observation(self, env_id): gym_env = gym.make(env_id) env = GymWrapper(gym_env) timed_env = TimeAwareObservation(env) assert env.observation_space.shape[ 0] + 1 == timed_env.observation_space.shape[0] obs = env.reset() timed_obs = timed_env.reset() assert obs.shape[0] + 1 == timed_obs.shape[0] assert timed_obs[-1] == 0.0 assert timed_env.t == 0.0 timed_obs, _, _, _ = timed_env.step(timed_env.action_space.sample()) assert obs.shape[0] + 1 == timed_obs.shape[0] assert timed_obs[-1] == 1.0 assert timed_env.t == 1.0 timed_obs, _, _, _ = timed_env.step(timed_env.action_space.sample()) assert timed_obs[-1] == 2.0 assert timed_env.t == 2.0 timed_obs = timed_env.reset() assert timed_obs[-1] == 0.0 assert timed_env.t == 0.0
def test_gym_wrapper(self): gym_env = gym.make('CartPole-v1') env = GymWrapper(gym_env) assert isinstance(env, GymWrapper) and isinstance(env, Wrapper) assert isinstance(gym_env, gym.Env) and isinstance( env.unwrapped, gym.Env) assert isinstance(env.env, gym.Wrapper) assert env.reset().shape == (4, ) assert isinstance(env.observation_space, Box) assert isinstance(env.action_space, Discrete) assert len(env.step(env.action_space.sample())) == 4 assert env.seed(3) == [3] assert env.T == 500 assert env.max_episode_reward == 475.0 assert env.reward_range == (-float('inf'), float('inf')) del gym_env del env gym_env = gym.make('Pendulum-v0') env = GymWrapper(gym_env) assert isinstance(env, GymWrapper) and isinstance(env, Wrapper) assert isinstance(gym_env, gym.Env) and isinstance( env.unwrapped, gym.Env) assert isinstance(env.env, gym.Wrapper) assert env.reset().shape == (3, ) assert isinstance(env.observation_space, Box) assert isinstance(env.action_space, Box) assert len(env.step(env.action_space.sample())) == 4 assert env.seed(3) == [3] assert env.T == 200 assert env.max_episode_reward is None assert env.reward_range == (-float('inf'), float('inf')) del gym_env del env