Python GaussianMLPBaselineWithModel 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: garage.tf.baselines

hotexamples.com에서의 예제들: 7

Python GaussianMLPBaselineWithModel - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 garage.tf.baselines.GaussianMLPBaselineWithModel에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GaussianMLPBaselineWithModel(4)

get_param_values(2)

predict(2)

set_param_values(2)

fit(1)

get_params_internal(1)

예제 #1

파일 보기

 def test_get_params_internal(self, obs_dim):
     box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim))
     with mock.patch(('garage.tf.baselines.'
                      'gaussian_mlp_baseline_with_model.'
                      'GaussianMLPRegressorWithModel'),
                     new=SimpleGaussianMLPRegressor):
         gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec,
                                            regressor_args=dict())
     params_interal = gmb.get_params_internal()
     trainable_params = tf.compat.v1.trainable_variables(
         scope='GaussianMLPBaselineWithModel')
     assert np.array_equal(params_interal, trainable_params)

예제 #2

파일 보기

    def test_ppo_pendulum_with_model(self):
        """Test PPO with model, with Pendulum environment."""
        with LocalRunner(self.sess) as runner:
            env = TfEnv(normalize(gym.make('InvertedDoublePendulum-v2')))
            policy = GaussianMLPPolicyWithModel(
                env_spec=env.spec,
                hidden_sizes=(64, 64),
                hidden_nonlinearity=tf.nn.tanh,
                output_nonlinearity=None,
            )
            baseline = GaussianMLPBaselineWithModel(
                env_spec=env.spec,
                regressor_args=dict(hidden_sizes=(32, 32)),
            )
            algo = PPO(
                env_spec=env.spec,
                policy=policy,
                baseline=baseline,
                max_path_length=100,
                discount=0.99,
                lr_clip_range=0.01,
                optimizer_args=dict(batch_size=32, max_epochs=10),
            )
            runner.setup(algo, env)
            last_avg_ret = runner.train(n_epochs=10, batch_size=2048)
            assert last_avg_ret > 30

            env.close()

예제 #3

파일 보기

    def test_ppo_pendulum_gru_with_model(self):
        """Test PPO with model, with Pendulum environment."""
        with LocalTFRunner(sess=self.sess) as runner:
            env = TfEnv(normalize(gym.make('InvertedDoublePendulum-v2')))
            policy = GaussianGRUPolicyWithModel(env_spec=env.spec, )
            baseline = GaussianMLPBaselineWithModel(
                env_spec=env.spec,
                regressor_args=dict(hidden_sizes=(32, 32)),
            )
            algo = PPO(
                env_spec=env.spec,
                policy=policy,
                baseline=baseline,
                max_path_length=100,
                discount=0.99,
                gae_lambda=0.95,
                lr_clip_range=0.2,
                optimizer_args=dict(
                    batch_size=32,
                    max_epochs=10,
                ),
                stop_entropy_gradient=True,
                entropy_method='max',
                policy_ent_coeff=0.02,
                center_adv=False,
            )
            runner.setup(algo, env)
            last_avg_ret = runner.train(n_epochs=10, batch_size=2048)
            assert last_avg_ret > 80

            env.close()

예제 #4

파일 보기

 def test_param_values(self, obs_dim):
     box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim))
     with mock.patch(('garage.tf.baselines.'
                      'gaussian_mlp_baseline_with_model.'
                      'GaussianMLPRegressorWithModel'),
                     new=SimpleGaussianMLPRegressor):
         gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec)
         new_gmb = GaussianMLPBaselineWithModel(
             env_spec=box_env.spec, name='GaussianMLPBaselineWithModel2')
     old_param_values = gmb.get_param_values()
     new_param_values = new_gmb.get_param_values()
     assert not np.array_equal(old_param_values, new_param_values)
     new_gmb.set_param_values(old_param_values)
     new_param_values = new_gmb.get_param_values()
     assert np.array_equal(old_param_values, new_param_values)

예제 #5

파일 보기

    def test_fit(self, obs_dim):
        box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim))
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_mlp_baseline_with_model.'
                         'GaussianMLPRegressorWithModel'),
                        new=SimpleGaussianMLPRegressor):
            gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec)
        paths = [{
            'observations': [np.full(obs_dim, 1)],
            'returns': [1]
        }, {
            'observations': [np.full(obs_dim, 2)],
            'returns': [2]
        }]
        gmb.fit(paths)

        obs = {'observations': [np.full(obs_dim, 1), np.full(obs_dim, 2)]}
        prediction = gmb.predict(obs)
        assert np.array_equal(prediction, [1, 2])

예제 #6

파일 보기

파일: test_gaussian_mlp_baseline_with_model.py 프로젝트: yanxg/garage

    def test_is_pickleable(self):
        box_env = TfEnv(DummyBoxEnv(obs_dim=(1, )))
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_mlp_baseline_with_model.'
                         'GaussianMLPRegressorWithModel'),
                        new=SimpleGaussianMLPRegressor):
            gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec)
        obs = {'observations': [np.full(1, 1), np.full(1, 1)]}

        with tf.variable_scope('GaussianMLPBaselineWithModel', reuse=True):
            return_var = tf.get_variable('SimpleGaussianMLPModel/return_var')
        return_var.load(1.0)

        prediction = gmb.predict(obs)

        h = pickle.dumps(gmb)

        with tf.Session(graph=tf.Graph()):
            gmb_pickled = pickle.loads(h)
            prediction2 = gmb_pickled.predict(obs)

            assert np.array_equal(prediction, prediction2)

예제 #7

파일 보기

파일: test_gaussian_mlp_baseline_with_model.py 프로젝트: yanxg/garage

    def test_param_values(self, obs_dim):
        box_env = TfEnv(DummyBoxEnv(obs_dim=obs_dim))
        with mock.patch(('garage.tf.baselines.'
                         'gaussian_mlp_baseline_with_model.'
                         'GaussianMLPRegressorWithModel'),
                        new=SimpleGaussianMLPRegressor):
            gmb = GaussianMLPBaselineWithModel(env_spec=box_env.spec)
            new_gmb = GaussianMLPBaselineWithModel(
                env_spec=box_env.spec, name='GaussianMLPBaselineWithModel2')

        # Manual change the parameter of GaussianMLPBaselineWithModel
        with tf.variable_scope('GaussianMLPBaselineWithModel', reuse=True):
            return_var = tf.get_variable('SimpleGaussianMLPModel/return_var')
        return_var.load(1.0)

        old_param_values = gmb.get_param_values()
        new_param_values = new_gmb.get_param_values()
        assert not np.array_equal(old_param_values, new_param_values)
        new_gmb.set_param_values(old_param_values)
        new_param_values = new_gmb.get_param_values()
        assert np.array_equal(old_param_values, new_param_values)