Example #1
0
    def test_sample_batch_as_Transition_2(self):
        env = make('ModifiedHalfCheetah')
        env.init()
        env_spec = env.env_spec
        random_buffer = TransitionData(env_spec=env_spec, obs_shape=env_spec.obs_shape, \
                                       action_shape=env_spec.action_shape, size=100)
        print("====> Random Sample")
        num_trajectory = 1
        max_step = 100

        for i in range(num_trajectory):
            ep_len = 0
            obs = env.reset()
            while ep_len < max_step:
                act = self.RandomController_get_action(env=env, state=obs)
                obs_, reward, done, _ = env.step(act)
                random_buffer.append(obs, act, obs_, done, reward)
                assert not done
                obs = obs_
                ep_len += 1

        batch_data = random_buffer.sample_batch_as_Transition(
            batch_size=32, shuffle_flag=True, all_as_batch=True)
        self.assertEqual(len(batch_data), 4)
        self.assertEqual(len(batch_data[-1]), 4)
Example #2
0
    def test_StandScaler(self):
        env = make('ModifiedHalfCheetah')
        env_spec = env.env_spec
        self.assertEqual(env_spec.flat_obs_dim, 18)
        self.assertEqual(env_spec.flat_action_dim, 6)

        buffer_size = 10
        buffer = TransitionData(env_spec=env_spec, obs_shape=env_spec.obs_shape, \
                       action_shape=env_spec.action_shape, size=buffer_size)
        obs = env.reset()
        for i in range(buffer_size):
            act = env.action_space.sample()
            obs_, rew, done, _ = env.step(act)
            buffer.append(obs, act, obs_, done, rew)

        batch_list = buffer.sample_batch_as_Transition(4, all_as_batch=True)
        state_input_scaler_1 = RunningStandardScaler(env_spec.flat_action_dim)

        for batch_data in batch_list:
            state_input_scaler_1.update_scaler(batch_data.action_set)

        mean_1 = state_input_scaler_1._mean
        var_1 = state_input_scaler_1._var

        print(mean_1)
        print(var_1)

        state_input_scaler_2 = RunningStandardScaler(env_spec.flat_action_dim)
        state_input_scaler_2.update_scaler(buffer.action_set)
        mean_2 = state_input_scaler_2._mean
        var_2 = state_input_scaler_2._var
        print(mean_2)
        print(var_2)