コード例 #1
0
ファイル: test_envs.py プロジェクト: lewisKit/lagom
    def test_flatten_observation(self):
        gym_env = gym.make('Pong-v0')
        env = GymWrapper(gym_env)

        obs = env.reset()
        assert obs.shape == (210, 160, 3)

        env = FlattenObservation(env)
        obs = env.reset()
        assert obs.shape == (210 * 160 * 3, )
コード例 #2
0
 def test_scale_image_observation(self):
     env = gym.make('Pong-v0')
     env = GymWrapper(env)
     env = ScaleImageObservation(env)
     assert np.allclose(env.observation_space.high, 1.0)
     assert np.allclose(env.observation_space.low, 0.0)
     obs = env.reset()
     assert np.alltrue(obs <= 1.0) and np.alltrue(obs >= 0.0)
     obs, _, _, _ = env.step(env.action_space.sample())
     assert np.alltrue(obs <= 1.0) and np.alltrue(obs >= 0.0)
コード例 #3
0
 def test_clip_reward(self, env_id):
     env = gym.make(env_id)
     env = GymWrapper(env)
     env = ClipReward(env)
     env.reset()
     for _ in range(100):
         obs, reward, done, info = env.step(env.action_space.sample())
         assert reward >= -1.0 and reward <= 1.0
         if done:
             break
コード例 #4
0
ファイル: test_envs.py プロジェクト: lewisKit/lagom
 def test_frame_stack(self):
     env = gym.make('CartPole-v1')
     env = GymWrapper(env)
     env = FrameStack(env, num_stack=4)
     env.seed(1)
     assert isinstance(env, Env) and isinstance(env, FrameStack)
     assert env.num_stack == 4
     assert env.observation_space.shape == (4, 4)
     assert isinstance(env.stack_buffer, np.ndarray)
     assert env.stack_buffer.shape == (4, 4)
     assert np.all(env.stack_buffer == 0.0)
     assert env.stack_buffer.dtype == np.float32
     assert env.reset().shape == (4, 4)
     obs = env.step(0)[0]
     assert obs[:, 0].sum() != 0.0
     assert obs[:, 1].sum() != 0.0
     assert np.all(obs[:, 2:] == 0.0)
     assert np.any(obs[:, 0] != obs[:, 1])
     obs = env.step(1)[0]
     obs = env.step(1)[0]
     assert np.allclose(obs[:, -1],
                        [0.03073904, 0.00145001, -0.03088818, -0.03131252])
     assert np.allclose(obs[:, 2],
                        [0.03076804, -0.19321568, -0.03151444, 0.25146705])
     obs = env.step(1)[0]
     assert np.allclose(obs[:, -1],
                        [0.03076804, -0.19321568, -0.03151444, 0.25146705])
コード例 #5
0
ファイル: test_envs.py プロジェクト: lewisKit/lagom
    def test_env_spec(self):
        env = gym.make('CartPole-v1')
        env = GymWrapper(env)
        env.seed(0)

        env_spec = EnvSpec(env)
        assert isinstance(env_spec.observation_space, Box)
        assert isinstance(env_spec.action_space, Discrete)
        assert env_spec.control_type == 'Discrete'
        assert env_spec.T == 500
        assert env_spec.max_episode_reward == 475.0
        assert env_spec.reward_range == (-float('inf'), float('inf'))
コード例 #6
0
ファイル: test_envs.py プロジェクト: lewisKit/lagom
 def test_reward_scale(self):
     env = gym.make('CartPole-v1')
     env = GymWrapper(env)
     env = RewardScale(env, scale=0.02)
     env.seed(1)
     env.reset()
     observation, reward, done, info = env.step(env.action_space.sample())
     assert reward == 0.02
コード例 #7
0
    def test_time_aware_observation(self, env_id):
        gym_env = gym.make(env_id)
        env = GymWrapper(gym_env)
        timed_env = TimeAwareObservation(env)
        assert env.observation_space.shape[
            0] + 1 == timed_env.observation_space.shape[0]

        obs = env.reset()
        timed_obs = timed_env.reset()
        assert obs.shape[0] + 1 == timed_obs.shape[0]
        assert timed_obs[-1] == 0.0
        assert timed_env.t == 0.0
        timed_obs, _, _, _ = timed_env.step(timed_env.action_space.sample())
        assert obs.shape[0] + 1 == timed_obs.shape[0]
        assert timed_obs[-1] == 1.0
        assert timed_env.t == 1.0
        timed_obs, _, _, _ = timed_env.step(timed_env.action_space.sample())
        assert timed_obs[-1] == 2.0
        assert timed_env.t == 2.0
        timed_obs = timed_env.reset()
        assert timed_obs[-1] == 0.0
        assert timed_env.t == 0.0
コード例 #8
0
ファイル: test_envs.py プロジェクト: lewisKit/lagom
    def test_gym_wrapper(self):
        gym_env = gym.make('CartPole-v1')
        env = GymWrapper(gym_env)
        assert isinstance(env, GymWrapper) and isinstance(env, Wrapper)
        assert isinstance(gym_env, gym.Env) and isinstance(
            env.unwrapped, gym.Env)
        assert isinstance(env.env, gym.Wrapper)
        assert env.reset().shape == (4, )
        assert isinstance(env.observation_space, Box)
        assert isinstance(env.action_space, Discrete)
        assert len(env.step(env.action_space.sample())) == 4
        assert env.seed(3) == [3]
        assert env.T == 500
        assert env.max_episode_reward == 475.0
        assert env.reward_range == (-float('inf'), float('inf'))

        del gym_env
        del env

        gym_env = gym.make('Pendulum-v0')
        env = GymWrapper(gym_env)
        assert isinstance(env, GymWrapper) and isinstance(env, Wrapper)
        assert isinstance(gym_env, gym.Env) and isinstance(
            env.unwrapped, gym.Env)
        assert isinstance(env.env, gym.Wrapper)
        assert env.reset().shape == (3, )
        assert isinstance(env.observation_space, Box)
        assert isinstance(env.action_space, Box)
        assert len(env.step(env.action_space.sample())) == 4
        assert env.seed(3) == [3]
        assert env.T == 200
        assert env.max_episode_reward is None
        assert env.reward_range == (-float('inf'), float('inf'))

        del gym_env
        del env