class TestPPO: def setup_method(self): self.env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2'))) self.policy = GaussianMLPPolicy( env_spec=self.env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) self.baseline = LinearFeatureBaseline(env_spec=self.env.spec) def teardown_method(self): self.env.close() def test_ppo_pendulum(self): """Test DDPG with Pendulum environment.""" deterministic.set_seed(0) runner = LocalRunner(snapshot_config) algo = PPO(env_spec=self.env.spec, policy=self.policy, baseline=self.baseline, optimizer=torch.optim.Adam, max_path_length=100, discount=0.99, gae_lambda=0.97, lr_clip_range=2e-1, policy_lr=3e-4) runner.setup(algo, self.env) last_avg_ret = runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0
class TestTRPO: """Test class for TRPO.""" def setup_method(self): """Setup method which is called before every test.""" self.env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2'))) self.policy = GaussianMLPPolicy( env_spec=self.env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) self.value_function = LinearFeatureBaseline(env_spec=self.env.spec) def teardown_method(self): """Teardown method which is called after every test.""" self.env.close() @pytest.mark.mujoco def test_trpo_pendulum(self): """Test TRPO with Pendulum environment.""" deterministic.set_seed(0) runner = LocalRunner(snapshot_config) algo = TRPO(env_spec=self.env.spec, policy=self.policy, value_function=self.value_function, max_path_length=100, discount=0.99, gae_lambda=0.98) runner.setup(algo, self.env) last_avg_ret = runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 50
def test_maml_trpo_pendulum(): """Test PPO with Pendulum environment.""" env = GarageEnv(normalize(HalfCheetahDirEnv(), expected_action_scale=10.)) policy = GaussianMLPPolicy( env_spec=env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) baseline = LinearFeatureBaseline(env_spec=env.spec) rollouts_per_task = 5 max_path_length = 100 runner = LocalRunner(snapshot_config) algo = MAMLTRPO(env=env, policy=policy, baseline=baseline, max_path_length=max_path_length, meta_batch_size=5, discount=0.99, gae_lambda=1., inner_lr=0.1, num_grad_updates=1) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=5, batch_size=rollouts_per_task * max_path_length) assert last_avg_ret > -5 env.close()
class TestMAMLVPG: """Test class for MAML-VPG.""" def setup_method(self): """Setup method which is called before every test.""" self.env = GarageEnv( normalize(HalfCheetahDirEnv(), expected_action_scale=10.)) self.policy = GaussianMLPPolicy( env_spec=self.env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) self.value_function = LinearFeatureBaseline(env_spec=self.env.spec) def teardown_method(self): """Teardown method which is called after every test.""" self.env.close() def test_ppo_pendulum(self): """Test PPO with Pendulum environment.""" deterministic.set_seed(0) rollouts_per_task = 5 max_path_length = 100 task_sampler = SetTaskSampler(lambda: GarageEnv( normalize(HalfCheetahDirEnv(), expected_action_scale=10.))) meta_evaluator = MetaEvaluator(test_task_sampler=task_sampler, max_path_length=max_path_length, n_test_tasks=1, n_test_rollouts=10) runner = LocalRunner(snapshot_config) algo = MAMLVPG(env=self.env, policy=self.policy, value_function=self.value_function, max_path_length=max_path_length, meta_batch_size=5, discount=0.99, gae_lambda=1., inner_lr=0.1, num_grad_updates=1, meta_evaluator=meta_evaluator) runner.setup(algo, self.env) last_avg_ret = runner.train(n_epochs=10, batch_size=rollouts_per_task * max_path_length) assert last_avg_ret > -5
def test_ddpg_pendulum(self): """ Test DDPG with Pendulum environment. This environment has a [-3, 3] action_space bound. """ runner = LocalRunner() env = GarageEnv(normalize(gym.make('InvertedPendulum-v2'))) action_noise = OUStrategy(env.spec, sigma=0.2) policy = DeterministicMLPPolicy(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=F.relu, output_nonlinearity=torch.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=F.relu) replay_buffer = SimpleReplayBuffer(env_spec=env.spec, size_in_transitions=int(1e6), time_horizon=100) algo = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, n_train_steps=50, min_buffer_size=int(1e4), exploration_strategy=action_noise, target_update_tau=1e-2, policy_lr=1e-4, qf_lr=1e-3, discount=0.9) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=10, n_epoch_cycles=20, batch_size=100) assert last_avg_ret > 10 env.close()
class TestLocalRunner: """Test class for LocalRunner.""" def setup_method(self): """Setup method which is called before every test.""" self.env = GarageEnv(normalize(gym.make('InvertedDoublePendulum-v2'))) self.policy = GaussianMLPPolicy( env_spec=self.env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) self.baseline = LinearFeatureBaseline(env_spec=self.env.spec) def teardown_method(self): """Teardown method which is called after every test.""" self.env.close() @pytest.mark.mujoco def test_set_plot(self): deterministic.set_seed(0) runner = LocalRunner(snapshot_config) algo = PPO(env_spec=self.env.spec, policy=self.policy, baseline=self.baseline, max_path_length=100, discount=0.99, gae_lambda=0.97, lr_clip_range=2e-1) runner.setup(algo, self.env) runner.train(n_epochs=1, batch_size=100, plot=True) assert isinstance( runner._plotter, Plotter), ('self.plotter in LocalRunner should be set to Plotter.')
def test_ddpg_double_pendulum(self): """Test DDPG with Pendulum environment.""" deterministic.set_seed(0) runner = LocalRunner(snapshot_config) env = GarageEnv(gym.make('InvertedDoublePendulum-v2')) action_noise = OUStrategy(env.spec, sigma=0.2) policy = DeterministicMLPPolicy(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=F.relu, output_nonlinearity=torch.tanh) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=F.relu) replay_buffer = SimpleReplayBuffer(env_spec=env.spec, size_in_transitions=int(1e6), time_horizon=100) algo = DDPG(env_spec=env.spec, policy=policy, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=20, n_train_steps=50, min_buffer_size=int(1e4), exploration_strategy=action_noise, target_update_tau=1e-2, discount=0.9) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 45 env.close()
class TestVPG: @classmethod def setup_class(cls): deterministic.set_seed(0) def setup_method(self): self._env = GarageEnv(gym.make('InvertedDoublePendulum-v2')) self._runner = LocalRunner(snapshot_config) policy = GaussianMLPPolicy(env_spec=self._env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) self._params = { 'env_spec': self._env.spec, 'policy': policy, 'optimizer': torch.optim.Adam, 'baseline': LinearFeatureBaseline(env_spec=self._env.spec), 'max_path_length': 100, 'discount': 0.99, 'policy_lr': 1e-2 } def teardown_method(self): self._env.close() def test_vpg_no_entropy(self): """Test VPG with no_entropy.""" self._params['positive_adv'] = True self._params['use_softplus_entropy'] = True algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0 def test_vpg_max(self): """Test VPG with maximum entropy.""" self._params['center_adv'] = False self._params['stop_entropy_gradient'] = True self._params['entropy_method'] = 'max' algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0 def test_vpg_regularized(self): """Test VPG with entropy_regularized.""" self._params['entropy_method'] = 'regularized' algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 30 @pytest.mark.parametrize('algo_param, error, msg', INVALID_ENTROPY_CONFIG) def test_invalid_entropy_config(self, algo_param, error, msg): self._params.update(algo_param) with pytest.raises(error, match=msg): VPG(**self._params)
class TestMAML: """Test class for MAML.""" def setup_method(self): """Setup method which is called before every test.""" self.env = GarageEnv( normalize(HalfCheetahDirEnv(), expected_action_scale=10.)) self.policy = GaussianMLPPolicy( env_spec=self.env.spec, hidden_sizes=(64, 64), hidden_nonlinearity=torch.tanh, output_nonlinearity=None, ) self.value_function = GaussianMLPValueFunction(env_spec=self.env.spec, hidden_sizes=(32, 32)) self.algo = MAMLPPO(env=self.env, policy=self.policy, value_function=self.value_function, max_path_length=100, meta_batch_size=5, discount=0.99, gae_lambda=1., inner_lr=0.1, num_grad_updates=1) def teardown_method(self): """Teardown method which is called after every test.""" self.env.close() @staticmethod def _set_params(v, m): """Set the parameters of a module to a value.""" if isinstance(m, torch.nn.Linear): m.weight.data.fill_(v) m.bias.data.fill_(v) @staticmethod def _test_params(v, m): """Test if all parameters of a module equal to a value.""" if isinstance(m, torch.nn.Linear): assert torch.all(torch.eq(m.weight.data, v)) assert torch.all(torch.eq(m.bias.data, v)) def test_get_exploration_policy(self): """Test if an independent copy of policy is returned.""" self.policy.apply(partial(self._set_params, 0.1)) adapt_policy = self.algo.get_exploration_policy() adapt_policy.apply(partial(self._set_params, 0.2)) # Old policy should remain untouched self.policy.apply(partial(self._test_params, 0.1)) adapt_policy.apply(partial(self._test_params, 0.2)) def test_adapt_policy(self): """Test if policy can adapt to samples.""" worker = WorkerFactory(seed=100, max_path_length=100) sampler = LocalSampler.from_worker_factory(worker, self.policy, self.env) self.policy.apply(partial(self._set_params, 0.1)) adapt_policy = self.algo.get_exploration_policy() trajs = sampler.obtain_samples(0, 100, adapt_policy) self.algo.adapt_policy(adapt_policy, trajs) # Old policy should remain untouched self.policy.apply(partial(self._test_params, 0.1)) # Adapted policy should not be identical to old policy for v1, v2 in zip(adapt_policy.parameters(), self.policy.parameters()): if v1.data.ne(v2.data).sum() > 0: break else: pytest.fail('Parameters of adapted policy should not be ' 'identical to the old policy.')
class TestVPG: """Test class for VPG.""" @classmethod def setup_class(cls): """Setup method which is called once before all tests in this class.""" deterministic.set_seed(0) def setup_method(self): """Setup method which is called before every test.""" self._env = GarageEnv(gym.make('InvertedDoublePendulum-v2')) self._runner = LocalRunner(snapshot_config) self._policy = GaussianMLPPolicy(env_spec=self._env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=torch.tanh, output_nonlinearity=None) self._params = { 'env_spec': self._env.spec, 'policy': self._policy, 'value_function': GaussianMLPValueFunction(env_spec=self._env.spec), 'max_path_length': 100, 'discount': 0.99, } def teardown_method(self): """Teardown method which is called after every test.""" self._env.close() @pytest.mark.mujoco def test_vpg_no_entropy(self): """Test VPG with no_entropy.""" self._params['positive_adv'] = True self._params['use_softplus_entropy'] = True algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0 @pytest.mark.mujoco def test_vpg_max(self): """Test VPG with maximum entropy.""" self._params['center_adv'] = False self._params['stop_entropy_gradient'] = True self._params['entropy_method'] = 'max' algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0 @pytest.mark.mujoco def test_vpg_regularized(self): """Test VPG with entropy_regularized.""" self._params['entropy_method'] = 'regularized' algo = VPG(**self._params) self._runner.setup(algo, self._env) last_avg_ret = self._runner.train(n_epochs=10, batch_size=100) assert last_avg_ret > 0 @pytest.mark.mujoco @pytest.mark.parametrize('algo_param, error, msg', INVALID_ENTROPY_CONFIG) def test_invalid_entropy_config(self, algo_param, error, msg): """Test VPG with invalid entropy config.""" self._params.update(algo_param) with pytest.raises(error, match=msg): VPG(**self._params)