def test_rms_normalizer_box(self): # image box space = gym.spaces.Box(low=np.zeros((64,64,3)), high=np.ones((64,64,3))*255, dtype=np.uint8) rms_norm = utils.RMSNormalizer(space) self.assertFalse(rms_norm.enabled) self.assertTrue(rms_norm.fixed) batch_size = 3 obs = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obs, space) res_obs = rms_norm.normalize(obs) self.assertArrayEqual(obs, res_obs) # non-image box space = gym.spaces.Box(low=np.ones((64,64,3))*0.0, high=np.ones((64,64,3))*1.0, dtype=np.float32) rms_norm = utils.RMSNormalizer(space) self.assertTrue(rms_norm.enabled) self.assertFalse(rms_norm.fixed) batch_size = 3 obs = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obs, space) rms_norm.update(obs) res_obs = rms_norm.normalize(obs) obs_mean = np.mean(obs, axis=0) obs_var = np.var(obs, axis=0) self.assertArrayClose(rms_norm.rms.mean, obs_mean) self.assertArrayClose(rms_norm.rms.var, obs_var) eps = np.finfo(np.float32).eps.item() obs_norm = (obs-obs_mean)/np.sqrt(obs_var+eps) self.assertArrayClose(obs_norm, res_obs, decimal=3) # sampling obs2 = [space.sample() for _ in range(batch_size)] obs2 = utils.stack_obs(obs2, space) rms_norm.update(obs2) res_obs = rms_norm.normalize(obs2) concat_obs = np.concatenate((obs, obs2), axis=0) obs_mean = np.mean(concat_obs, axis=0) obs_var = np.var(concat_obs, axis=0) self.assertArrayClose(rms_norm.rms.mean, obs_mean) self.assertArrayClose(rms_norm.rms.var, obs_var) obs_norm = (obs2-obs_mean) / np.sqrt(obs_var+eps) self.assertArrayClose(obs_norm, res_obs, decimal=3) # test save/load with tempfile.NamedTemporaryFile() as f: rms_norm.save(f.name) new_rms_norm = utils.RMSNormalizer(space).load(f.name) new_rms_norm.fixed = True new_rms_norm.enabled = True res_obs = new_rms_norm.normalize(obs2) self.assertArrayClose(obs_norm, res_obs, decimal=3) new_rms_norm.enabled = False rew_obs = new_rms_norm.normalize(obs2) self.assertArrayClose(obs2, rew_obs, decimal=3)
def test_stack_obs_box(self): space = gym.spaces.Box(low=np.zeros((64,64,3)), high=np.ones((64,64,3))*255, dtype=np.uint8) obs1 = space.sample() obs2 = space.sample() stacked = utils.stack_obs((obs1, obs2), space) self.assertArrayEqual(stacked, np.stack((obs1, obs2))) # test exceptions with self.assertRaises(ValueError): utils.stack_obs(obs1, space) with self.assertRaises(ValueError): utils.stack_obs([], space)
def test_stack_obs_tuple(self): space1 = gym.spaces.Box(low=np.zeros((64,64,3)), high=np.ones((64,64,3))*255, dtype=np.uint8) space2 = gym.spaces.MultiBinary(5) space = gym.spaces.Tuple((space1, space2)) obs1 = space.sample() obs2 = space.sample() stacked = utils.stack_obs((obs1, obs2), space) self.assertArrayEqual(stacked[0], np.stack((obs1[0], obs2[0]))) self.assertArrayEqual(stacked[1], np.stack((obs1[1], obs2[1]))) # test exceptions with self.assertRaises(ValueError): dict_space = gym.spaces.Dict({'pov': space1, 'vec': space2}) utils.stack_obs((obs1, obs2), dict_space)
def test_running_mean_std_3d(self): space = gym.spaces.Box(low=np.ones((64,64,3))*0.0, high=np.ones((64,64,3))*1.0, dtype=np.float32) batch_size = 3 obs = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obs, space) mean = 0.0 std = 1.0 rms = utils.RunningMeanStd(mean, std) rms.update(obs) self.assertArrayClose(rms.mean, np.mean(obs, axis=0), decimal=6) self.assertArrayClose(rms.var, np.var(obs, axis=0), decimal=6)
def test_preprocess_observation_box_bounded(self): space = gym.spaces.Box(low=np.zeros((64,64,3)), high=np.ones((64,64,3))*255, dtype=np.uint8) obs = space.sample() norm_obs = utils.normalize(obs, space.low, space.high, 0., 1.) res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs) # batch batch_size = 8 obses = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obses, space) norm_obs = utils.normalize(obs, space.low, space.high, 0., 1.) res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs)
def test_preprocess_observation_discrete(self): space_dim = 5 space = gym.spaces.Discrete(space_dim) obs = space.sample() # one hot norm_obs = np.zeros((space_dim,), dtype=np.float32) norm_obs[obs] = 1.0 res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs) # batch batch_size = 8 obses = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obses, space) # one hot norm_obs = np.zeros((obs.size, space_dim), dtype=np.float32) norm_obs[np.arange(obs.size), obs] = 1.0 res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs)
def test_preprocess_observation_multidiscrete(self): space_dims = [4, 7] space = gym.spaces.MultiDiscrete(space_dims) obs = space.sample() # one hot offset = np.cumsum([0] + space_dims)[:len(space_dims)] norm_obs = np.zeros((np.sum(space_dims),), dtype=np.float32) norm_obs[obs+offset] = 1.0 res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs) # batch batch_size = 8 obses = [space.sample() for _ in range(batch_size)] obs = utils.stack_obs(obses, space) # one hot norm_obs = np.zeros((batch_size, np.sum(space_dims)), dtype=np.float32) for batch, item in zip(np.arange(batch_size), obs+offset): norm_obs[batch, item] = 1.0 res_obs = utils.preprocess_observation(obs, space) self.assertArrayClose(norm_obs, res_obs)