예제 #1
0
    def test_tnpg_inverted_pendulum(self):
        """Test TNPG with InvertedPendulum-v2 environment."""
        with TFTrainer(snapshot_config, sess=self.sess) as trainer:
            env = normalize(GymEnv('InvertedPendulum-v2'))

            policy = GaussianMLPPolicy(name='policy',
                                       env_spec=env.spec,
                                       hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            sampler = LocalSampler(
                agents=policy,
                envs=env,
                max_episode_length=env.spec.max_episode_length,
                is_tf_worker=True)

            algo = TNPG(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        sampler=sampler,
                        discount=0.99,
                        optimizer_args=dict(reg_coeff=5e-1))

            trainer.setup(algo, env)

            last_avg_ret = trainer.train(n_epochs=10, batch_size=10000)
            assert last_avg_ret > 15

            env.close()
예제 #2
0
    def test_tnpg_cartpole(self):
        """Test TNPG with Cartpole environment."""
        logger.reset()
        env = TfEnv(normalize(CartpoleEnv()))

        policy = GaussianMLPPolicy(name="policy",
                                   env_spec=env.spec,
                                   hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TNPG(env=env,
                    policy=policy,
                    baseline=baseline,
                    batch_size=10000,
                    max_path_length=100,
                    n_itr=10,
                    discount=0.99,
                    optimizer_args=dict(reg_coeff=5e-2))

        last_avg_ret = algo.train(sess=self.sess)
        assert last_avg_ret > 40
예제 #3
0
    def test_tnpg_inverted_pendulum(self):
        """Test TNPG with InvertedPendulum-v2 environment."""
        env = TfEnv(normalize(gym.make("InvertedPendulum-v2")))

        policy = GaussianMLPPolicy(name="policy",
                                   env_spec=env.spec,
                                   hidden_sizes=(32, 32))

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TNPG(env=env,
                    policy=policy,
                    baseline=baseline,
                    batch_size=10000,
                    max_path_length=100,
                    n_itr=10,
                    discount=0.99,
                    optimizer_args=dict(reg_coeff=5e-1))

        last_avg_ret = algo.train(sess=self.sess)
        assert last_avg_ret > 30

        env.close()
예제 #4
0
    def test_tnpg_inverted_pendulum(self):
        """Test TNPG with InvertedPendulum-v2 environment."""
        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            env = GarageEnv(normalize(gym.make('InvertedPendulum-v2')))

            policy = GaussianMLPPolicy(name='policy',
                                       env_spec=env.spec,
                                       hidden_sizes=(32, 32))

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = TNPG(env_spec=env.spec,
                        policy=policy,
                        baseline=baseline,
                        max_path_length=100,
                        discount=0.99,
                        optimizer_args=dict(reg_coeff=5e-1))

            runner.setup(algo, env)

            last_avg_ret = runner.train(n_epochs=10, batch_size=10000)
            assert last_avg_ret > 15

            env.close()