def test_sample_batch_as_Transition_2(self): env = make('ModifiedHalfCheetah') env.init() env_spec = env.env_spec random_buffer = TransitionData(env_spec=env_spec, obs_shape=env_spec.obs_shape, \ action_shape=env_spec.action_shape, size=100) print("====> Random Sample") num_trajectory = 1 max_step = 100 for i in range(num_trajectory): ep_len = 0 obs = env.reset() while ep_len < max_step: act = self.RandomController_get_action(env=env, state=obs) obs_, reward, done, _ = env.step(act) random_buffer.append(obs, act, obs_, done, reward) assert not done obs = obs_ ep_len += 1 batch_data = random_buffer.sample_batch_as_Transition( batch_size=32, shuffle_flag=True, all_as_batch=True) self.assertEqual(len(batch_data), 4) self.assertEqual(len(batch_data[-1]), 4)
def test_StandScaler(self): env = make('ModifiedHalfCheetah') env_spec = env.env_spec self.assertEqual(env_spec.flat_obs_dim, 18) self.assertEqual(env_spec.flat_action_dim, 6) buffer_size = 10 buffer = TransitionData(env_spec=env_spec, obs_shape=env_spec.obs_shape, \ action_shape=env_spec.action_shape, size=buffer_size) obs = env.reset() for i in range(buffer_size): act = env.action_space.sample() obs_, rew, done, _ = env.step(act) buffer.append(obs, act, obs_, done, rew) batch_list = buffer.sample_batch_as_Transition(4, all_as_batch=True) state_input_scaler_1 = RunningStandardScaler(env_spec.flat_action_dim) for batch_data in batch_list: state_input_scaler_1.update_scaler(batch_data.action_set) mean_1 = state_input_scaler_1._mean var_1 = state_input_scaler_1._var print(mean_1) print(var_1) state_input_scaler_2 = RunningStandardScaler(env_spec.flat_action_dim) state_input_scaler_2.update_scaler(buffer.action_set) mean_2 = state_input_scaler_2._mean var_2 = state_input_scaler_2._var print(mean_2) print(var_2)