def test_tnpg_inverted_pendulum(self): """Test TNPG with InvertedPendulum-v2 environment.""" with TFTrainer(snapshot_config, sess=self.sess) as trainer: env = normalize(GymEnv('InvertedPendulum-v2')) policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) sampler = LocalSampler( agents=policy, envs=env, max_episode_length=env.spec.max_episode_length, is_tf_worker=True) algo = TNPG(env_spec=env.spec, policy=policy, baseline=baseline, sampler=sampler, discount=0.99, optimizer_args=dict(reg_coeff=5e-1)) trainer.setup(algo, env) last_avg_ret = trainer.train(n_epochs=10, batch_size=10000) assert last_avg_ret > 15 env.close()
def test_tnpg_cartpole(self): """Test TNPG with Cartpole environment.""" logger.reset() env = TfEnv(normalize(CartpoleEnv())) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TNPG(env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(reg_coeff=5e-2)) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 40
def test_tnpg_inverted_pendulum(self): """Test TNPG with InvertedPendulum-v2 environment.""" env = TfEnv(normalize(gym.make("InvertedPendulum-v2"))) policy = GaussianMLPPolicy(name="policy", env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TNPG(env=env, policy=policy, baseline=baseline, batch_size=10000, max_path_length=100, n_itr=10, discount=0.99, optimizer_args=dict(reg_coeff=5e-1)) last_avg_ret = algo.train(sess=self.sess) assert last_avg_ret > 30 env.close()
def test_tnpg_inverted_pendulum(self): """Test TNPG with InvertedPendulum-v2 environment.""" with LocalTFRunner(snapshot_config, sess=self.sess) as runner: env = GarageEnv(normalize(gym.make('InvertedPendulum-v2'))) policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32)) baseline = LinearFeatureBaseline(env_spec=env.spec) algo = TNPG(env_spec=env.spec, policy=policy, baseline=baseline, max_path_length=100, discount=0.99, optimizer_args=dict(reg_coeff=5e-1)) runner.setup(algo, env) last_avg_ret = runner.train(n_epochs=10, batch_size=10000) assert last_avg_ret > 15 env.close()