def test_param_values(self): gcb = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) new_gcb = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False, name='GaussianCNNBaseline2') # Manual change the parameter of GaussianCNNBaseline with tf.compat.v1.variable_scope('GaussianCNNBaseline', reuse=True): bias_var = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias_var.load(tf.ones_like(bias_var).eval()) old_param_values = gcb.get_param_values() new_param_values = new_gcb.get_param_values() assert not np.array_equal(old_param_values, new_param_values) new_gcb.set_param_values(old_param_values) new_param_values = new_gcb.get_param_values() assert np.array_equal(old_param_values, new_param_values)
def test_get_params(self): gcb = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) params_interal = gcb.get_params() trainable_params = tf.compat.v1.trainable_variables( scope='GaussianCNNBaseline') assert np.array_equal(params_interal, trainable_params)
def test_clone(self): gcb = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) cloned_gcb_model = gcb.clone_model(name='cloned_model') for cloned_param, param in zip(cloned_gcb_model.parameters.values(), gcb.parameters.values()): assert np.array_equal(cloned_param, param)
def test_get_params_internal(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): gcb = GaussianCNNBaseline(env_spec=box_env.spec, regressor_args=dict()) params_interal = gcb.get_params_internal() trainable_params = tf.compat.v1.trainable_variables( scope='GaussianCNNBaseline') assert np.array_equal(params_interal, trainable_params)
def test_flattened_image_input(self): env = GymEnv(DummyDiscretePixelEnv(), is_image=True) gcb = GaussianCNNBaseline(env_spec=env.spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, )) env.reset() es = env.step(1) obs, rewards = es.observation, es.reward train_paths = [{'observations': [obs.flatten()], 'returns': [rewards]}] gcb.fit(train_paths) paths = {'observations': [obs.flatten()]} prediction = gcb.predict(paths) assert np.allclose(0., prediction)
def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(normalize(gym.make('MemorizeDigits-v0'))) policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, 64, 64), conv_filter_sizes=(5, 3, 2), conv_strides=(4, 2, 1), conv_pad='VALID', hidden_sizes=(256, )) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64, 64), filter_dims=(5, 3, 2), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, ), use_trust_region=True)) algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=1000, batch_size=2048)
def test_trpo_cnn_cubecrash(self): with LocalTFRunner(snapshot_config, sess=self.sess) as runner: env = TfEnv(normalize(gym.make('CubeCrash-v0'))) policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, 64), conv_filter_sizes=(8, 4), conv_strides=(4, 2), conv_pad='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64), filter_dims=(8, 4), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True)) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.98, max_kl_step=0.01, policy_ent_coeff=0.0, flatten_input=False) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=10, batch_size=2048) assert last_avg_ret > -0.9 env.close()
def run_task(snapshot_config, *_): """Run task.""" with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(normalize(gym.make('CubeCrash-v0'))) print('shape= ', env.spec.observation_space.shape) policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, 64), conv_filter_sizes=(8, 4), conv_strides=(4, 2), conv_pad='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64), filter_dims=(8, 4), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True)) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=100, batch_size=4000)
def test_trpo_cnn_cubecrash(self): with TFTrainer(snapshot_config, sess=self.sess) as trainer: env = normalize(GymEnv('CubeCrash-v0', max_episode_length=100)) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, gae_lambda=0.98, max_kl_step=0.01, policy_ent_coeff=0.0) trainer.setup(algo, env, sampler_cls=LocalSampler) last_avg_ret = trainer.train(n_epochs=10, batch_size=2048) assert last_avg_ret > -1.5 env.close()
def ppo_memorize_digits(ctxt=None, seed=1, batch_size=4000, max_episode_length=100): """Train PPO on MemorizeDigits-v0 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. batch_size (int): Number of timesteps to use in each training step. max_episode_length (int): Max number of timesteps in an episode. """ set_seed(seed) with TFTrainer(ctxt) as trainer: env = normalize( GymEnv('MemorizeDigits-v0', is_image=True, max_episode_length=max_episode_length)) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=( (32, (5, 5)), (64, (3, 3)), (64, (2, 2)), ), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, )) # yapf: disable baseline = GaussianCNNBaseline( env_spec=env.spec, filters=( (32, (5, 5)), (64, (3, 3)), (64, (2, 2)), ), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, ), use_trust_region=True) # yapf: disable algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_optimization_epochs=10, learning_rate=1e-3, )) trainer.setup(algo, env) trainer.train(n_epochs=1000, batch_size=batch_size)
def test_fit(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): gcb = GaussianCNNBaseline(env_spec=box_env.spec) paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]} prediction = gcb.predict(obs) assert np.array_equal(prediction, [1, 2])
def gaussian_cnn_baseline(ctxt, env_id, seed): """Create Gaussian CNN Baseline on TF-PPO. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with TFTrainer(ctxt) as trainer: env = normalize(GymEnv(env_id)) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=params['conv_filters'], strides=params['conv_strides'], padding=params['conv_pad'], hidden_sizes=params['hidden_sizes']) baseline = GaussianCNNBaseline( env_spec=env.spec, filters=params['conv_filters'], strides=params['conv_strides'], padding=params['conv_pad'], hidden_sizes=params['hidden_sizes'], use_trust_region=params['use_trust_region']) sampler = RaySampler(agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_optimization_epochs=10, learning_rate=1e-3, ), ) trainer.setup(algo, env) trainer.train(n_epochs=params['n_epochs'], batch_size=params['batch_size'])
def categorical_cnn_policy(ctxt, env_id, seed): """Create Categorical CNN Policy on TF-PPO. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. env_id (str): Environment id of the task. seed (int): Random positive integer for the trial. """ deterministic.set_seed(seed) with LocalTFRunner(ctxt, max_cpus=12) as runner: env = TfEnv(normalize(gym.make(env_id))) policy = CategoricalCNNPolicy( env_spec=env.spec, conv_filters=hyper_params['conv_filters'], conv_filter_sizes=hyper_params['conv_filter_sizes'], conv_strides=hyper_params['conv_strides'], conv_pad=hyper_params['conv_pad'], hidden_sizes=hyper_params['hidden_sizes']) baseline = GaussianCNNBaseline( env_spec=env.spec, regressor_args=dict( num_filters=hyper_params['conv_filters'], filter_dims=hyper_params['conv_filter_sizes'], strides=hyper_params['conv_strides'], padding=hyper_params['conv_pad'], hidden_sizes=hyper_params['hidden_sizes'], use_trust_region=hyper_params['use_trust_region'])) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), flatten_input=False, ) runner.setup(algo, env) runner.train(n_epochs=hyper_params['n_epochs'], batch_size=hyper_params['batch_size'])
def test_obs_not_image(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=False) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): with mock.patch( 'garage.tf.baselines.' 'gaussian_cnn_baseline.' 'normalize_pixel_batch', side_effect=normalize_pixel_batch) as npb: gcb = GaussianCNNBaseline(env_spec=env.spec) obs_dim = env.spec.observation_space.shape paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) obs = { 'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)] } gcb.predict(obs) assert not npb.called
def test_fit_unnormalized(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=True, normalize_inputs=False, normalize_outputs=False) train_data, test_data = get_train_test_data() train_paths, _, _ = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr._networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(gcr._networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected) y_mean = self.sess.run(gcr._networks['default'].y_mean) y_mean_expected = np.zeros_like(y_mean) y_std = self.sess.run(gcr._networks['default'].y_std) y_std_expected = np.ones_like(y_std) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_fit_without_trusted_region(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) train_data, test_data = get_train_test_data() train_paths, _, _ = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1
def test_fit_normalized(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=True) train_data, test_data = get_train_test_data() train_paths, observations, returns = train_data for _ in range(20): gcr.fit(train_paths) test_paths, expected = test_data prediction = gcr.predict(test_paths) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr._networks['default'].x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(gcr._networks['default'].x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected) y_mean = self.sess.run(gcr._networks['default'].y_mean) y_mean_expected = np.mean(returns, axis=0, keepdims=True) y_std = self.sess.run(gcr._networks['default'].y_std) y_std_expected = np.std(returns, axis=0, keepdims=True) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_optimizer_args(self, mock_lbfgs): lbfgs_args = dict(max_opt_itr=25) gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), optimizer=LbfgsOptimizer, optimizer_args=lbfgs_args, use_trust_region=True) assert mock_lbfgs.return_value is gcr._optimizer mock_lbfgs.assert_called_with(max_opt_itr=25)
def test_is_pickleable(self): box_env = GarageEnv(DummyBoxEnv(obs_dim=(1, 1))) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): gcb = GaussianCNNBaseline(env_spec=box_env.spec) obs = {'observations': [np.full((1, 1), 1), np.full((1, 1), 1)]} with tf.compat.v1.variable_scope('GaussianCNNBaseline', reuse=True): return_var = tf.compat.v1.get_variable( 'SimpleGaussianCNNModel/return_var') return_var.load(1.0) prediction = gcb.predict(obs) h = pickle.dumps(gcb) with tf.compat.v1.Session(graph=tf.Graph()): gcb_pickled = pickle.loads(h) prediction2 = gcb_pickled.predict(obs) assert np.array_equal(prediction, prediction2)
def test_is_pickleable(self): gcr = GaussianCNNBaseline(env_spec=test_env_spec, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), adaptive_std=False, use_trust_region=False) with tf.compat.v1.variable_scope('GaussianCNNBaseline', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) _, test_data = get_train_test_data() test_paths, _ = test_data result1 = gcr.predict(test_paths) h = pickle.dumps(gcr) with tf.compat.v1.Session(graph=tf.Graph()): gcr_pickled = pickle.loads(h) result2 = gcr_pickled.predict(test_paths) assert np.array_equal(result1, result2)
def ppo_memorize_digits(ctxt=None, seed=1, batch_size=4000): """Train PPO on MemorizeDigits-v0 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. batch_size (int): Number of timesteps to use in each training step. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make('MemorizeDigits-v0')), is_image=True) policy = CategoricalCNNPolicy(env_spec=env.spec, num_filters=(32, 64, 64), filter_dims=(5, 3, 2), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, )) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64, 64), filter_dims=(5, 3, 2), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, ), use_trust_region=True)) algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=1000, batch_size=batch_size)
def test_param_values(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): gcb = GaussianCNNBaseline(env_spec=box_env.spec) new_gcb = GaussianCNNBaseline(env_spec=box_env.spec, name='GaussianCNNBaseline2') # Manual change the parameter of GaussianCNNBaseline with tf.compat.v1.variable_scope('GaussianCNNBaseline', reuse=True): return_var = tf.compat.v1.get_variable( 'SimpleGaussianCNNModel/return_var') return_var.load(1.0) old_param_values = gcb.get_param_values() new_param_values = new_gcb.get_param_values() assert not np.array_equal(old_param_values, new_param_values) new_gcb.set_param_values(old_param_values) new_param_values = new_gcb.get_param_values() assert np.array_equal(old_param_values, new_param_values)
def trpo_cubecrash(ctxt=None, seed=1, max_episode_length=5, batch_size=4000): """Train TRPO with CubeCrash-v0 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by Trainer to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. max_episode_length (int): Maximum length of a single episode. batch_size (int): Number of timesteps to use in each training step. """ set_seed(seed) with TFTrainer(ctxt) as trainer: env = normalize( GymEnv('CubeCrash-v0', max_episode_length=max_episode_length)) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True) sampler = RaySampler(agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0) trainer.setup(algo, env) trainer.train(n_epochs=100, batch_size=batch_size)
def run_task(snapshot_config, variant_data, *_): """Run task. Args: snapshot_config (garage.experiment.SnapshotConfig): The snapshot configuration used by LocalRunner to create the snapshotter. variant_data (dict): Custom arguments for the task. *_ (object): Ignored by this function. """ with LocalTFRunner(snapshot_config=snapshot_config) as runner: env = TfEnv(normalize(gym.make('MemorizeDigits-v0')), is_image=True) policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, 64, 64), conv_filter_sizes=(5, 3, 2), conv_strides=(4, 2, 1), conv_pad='VALID', hidden_sizes=(256, )) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64, 64), filter_dims=(5, 3, 2), strides=(4, 2, 1), padding='VALID', hidden_sizes=(256, ), use_trust_region=True)) algo = PPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=1000, batch_size=variant_data['batch_size'])
def trpo_cubecrash(ctxt=None, seed=1, batch_size=4000): """Train TRPO with CubeCrash-v0 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. batch_size (int): Number of timesteps to use in each training step. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = TfEnv(normalize(gym.make('CubeCrash-v0'))) policy = CategoricalCNNPolicy(env_spec=env.spec, conv_filters=(32, 64), conv_filter_sizes=(8, 4), conv_strides=(4, 2), conv_pad='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, regressor_args=dict( num_filters=(32, 64), filter_dims=(8, 4), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True)) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, max_kl_step=0.01, flatten_input=False) runner.setup(algo, env) runner.train(n_epochs=100, batch_size=batch_size)
def trpo_cubecrash(ctxt=None, seed=1, batch_size=4000): """Train TRPO with CubeCrash-v0 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. batch_size (int): Number of timesteps to use in each training step. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = GarageEnv(normalize(gym.make('CubeCrash-v0'))) policy = CategoricalCNNPolicy(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32)) baseline = GaussianCNNBaseline(env_spec=env.spec, filters=((32, (8, 8)), (64, (4, 4))), strides=(4, 2), padding='VALID', hidden_sizes=(32, 32), use_trust_region=True) algo = TRPO(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0) runner.setup(algo, env) runner.train(n_epochs=100, batch_size=batch_size)
def test_obs_is_image(self): env = GarageEnv(DummyDiscretePixelEnv(), is_image=True) with mock.patch(('garage.tf.baselines.' 'gaussian_cnn_baseline.' 'GaussianCNNRegressor'), new=SimpleGaussianCNNRegressor): with mock.patch( 'garage.tf.baselines.' 'gaussian_cnn_baseline.' 'normalize_pixel_batch', side_effect=normalize_pixel_batch) as npb: gcb = GaussianCNNBaseline(env_spec=env.spec) obs_dim = env.spec.observation_space.shape paths = [{ 'observations': [np.full(obs_dim, 1)], 'returns': [1] }, { 'observations': [np.full(obs_dim, 2)], 'returns': [2] }] gcb.fit(paths) observations = np.concatenate( [p['observations'] for p in paths]) assert npb.call_count == 1, ( "Expected '%s' to have been called once. Called %s times." % (npb._mock_name or 'mock', npb.call_count)) assert (npb.call_args_list[0][0][0] == observations).all() obs = { 'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)] } observations = obs['observations'] gcb.predict(obs) assert npb.call_args_list[1][0][0] == observations
def run_garage(env, seed, log_dir): ''' Create garage model and training. Replace the ppo with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return: ''' deterministic.set_seed(seed) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=12, inter_op_parallelism_threads=12) sess = tf.Session(config=config) with LocalTFRunner(snapshot_config, sess=sess, max_cpus=12) as runner: env = TfEnv(normalize(env)) policy = CategoricalCNNPolicy( env_spec=env.spec, conv_filters=params['conv_filters'], conv_filter_sizes=params['conv_filter_sizes'], conv_strides=params['conv_strides'], conv_pad=params['conv_pad'], hidden_sizes=params['hidden_sizes']) baseline = GaussianCNNBaseline( env_spec=env.spec, regressor_args=dict(num_filters=params['conv_filters'], filter_dims=params['conv_filter_sizes'], strides=params['conv_strides'], padding=params['conv_pad'], hidden_sizes=params['hidden_sizes'], use_trust_region=params['use_trust_region'])) algo = PPO( env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, gae_lambda=0.95, lr_clip_range=0.2, policy_ent_coeff=0.0, flatten_input=False, optimizer_args=dict( batch_size=32, max_epochs=10, tf_optimizer_args=dict(learning_rate=1e-3), ), ) # Set up logger since we are not using run_experiment tabular_log_file = osp.join(log_dir, 'progress.csv') dowel_logger.add_output(dowel.StdOutput()) dowel_logger.add_output(dowel.CsvOutput(tabular_log_file)) dowel_logger.add_output(dowel.TensorBoardOutput(log_dir)) runner.setup(algo, env) runner.train(n_epochs=params['n_epochs'], batch_size=params['batch_size']) dowel_logger.remove_all() return tabular_log_file
def test_invalid_obs_shape(self, obs_dim): box_env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim)) with pytest.raises(ValueError): GaussianCNNBaseline(env_spec=box_env.spec)