def test_fit_unnormalized(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec, subsample_factor=0.9, normalize_inputs=False, normalize_outputs=False) train_paths, _, _, paths, expected = get_train_test_data() for _ in range(150): gmb.fit(train_paths) prediction = gmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(gmb._networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(gmb._networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected) y_mean = self.sess.run(gmb._networks['default'].y_mean) y_mean_expected = np.zeros_like(y_mean) y_std = self.sess.run(gmb._networks['default'].y_std) y_std_expected = np.ones_like(y_std) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_fit_normalized(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec) (train_paths, observations, returns, paths, expected) = get_train_test_data() for _ in range(150): gmb.fit(train_paths) prediction = gmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1) x_mean = self.sess.run(gmb._networks['default'].x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(gmb._networks['default'].x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected) y_mean = self.sess.run(gmb._networks['default'].y_mean) y_mean_expected = np.mean(returns, axis=0, keepdims=True) y_std = self.sess.run(gmb._networks['default'].y_std) y_std_expected = np.std(returns, axis=0, keepdims=True) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_unflattened_input(self): env = GymEnv(DummyBoxEnv(obs_dim=(2, 2))) gmb = GaussianMLPBaseline(env_spec=env.spec) env.reset() es = env.step(1) obs, rewards = es.observation, es.reward train_paths = [{'observations': [obs], 'returns': [rewards]}] gmb.fit(train_paths) paths = {'observations': [obs]} prediction = gmb.predict(paths) assert np.allclose(0., prediction)
def test_fit_smaller_subsample_factor(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec, subsample_factor=0.9) train_paths, _, _, paths, expected = get_train_test_data() for _ in range(150): gmb.fit(train_paths) prediction = gmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1)
def test_fit_without_trusted_region(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec, use_trust_region=False) train_paths, _, _, paths, expected = get_train_test_data() for _ in range(150): gmb.fit(train_paths) prediction = gmb.predict(paths) assert np.allclose(prediction, expected, rtol=0, atol=0.1)
def test_fit(self, obs_dim): box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_mlp_baseline.' 'GaussianMLPRegressor'), new=SimpleGaussianMLPRegressor): gmb = GaussianMLPBaseline(env_spec=box_env.spec) paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gmb.fit(paths) obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]} prediction = gmb.predict(obs) assert np.array_equal(prediction, [1, 2])
def test_is_pickleable(self): box_env_spec = GarageEnv(DummyBoxEnv(obs_dim=(2, ))).spec gmb = GaussianMLPBaseline(env_spec=box_env_spec) _, _, _, paths, _ = get_train_test_data() with tf.compat.v1.variable_scope('GaussianMLPBaseline', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) prediction = gmb.predict(paths) h = pickle.dumps(gmb) with tf.compat.v1.Session(graph=tf.Graph()): gmb_pickled = pickle.loads(h) prediction2 = gmb_pickled.predict(paths) assert np.array_equal(prediction, prediction2)
def test_is_pickleable(self): box_env = TfEnv(DummyBoxEnv(obs_dim=(1, ))) with mock.patch(('garage.tf.baselines.' 'gaussian_mlp_baseline.' 'GaussianMLPRegressor'), new=SimpleGaussianMLPRegressor): gmb = GaussianMLPBaseline(env_spec=box_env.spec) obs = {'observations': [np.full(1, 1), np.full(1, 1)]} with tf.compat.v1.variable_scope('GaussianMLPBaseline', reuse=True): return_var = tf.compat.v1.get_variable( 'SimpleGaussianMLPModel/return_var') return_var.load(1.0) prediction = gmb.predict(obs) h = pickle.dumps(gmb) with tf.compat.v1.Session(graph=tf.Graph()): gmb_pickled = pickle.loads(h) prediction2 = gmb_pickled.predict(obs) assert np.array_equal(prediction, prediction2)