Python CategoricalMLPPolicyWithModel 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: garage.tf.policies

hotexamples.com에서의 예제들: 5

Python CategoricalMLPPolicyWithModel - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 garage.tf.policies.CategoricalMLPPolicyWithModel에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

CategoricalMLPPolicyWithModel(5)

dist_info(2)

dist_info_sym(1)

get_action(1)

get_actions(1)

예제 #1

파일 보기

    def test_is_pickleable(self, obs_dim, action_dim, mock_rand):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()):
            policy_pickled = pickle.loads(p)
            action, prob = policy_pickled.get_action(obs)
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob['prob'], expected_prob)

            prob1 = policy.dist_info([obs.flatten()])
            prob2 = policy_pickled.dist_info([obs.flatten()])
            assert np.array_equal(prob1['prob'], prob2['prob'])
            assert np.array_equal(prob2['prob'][0], expected_prob)

예제 #2

파일 보기

    def test_dist_info(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        policy_probs = policy.dist_info([obs.flatten()])
        assert np.array_equal(policy_probs['prob'][0], expected_prob)

예제 #3

파일 보기

    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.variable_scope('CategoricalMLPPolicy/MLPModel', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            policy.model.outputs,
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.outputs,
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)

예제 #4

파일 보기

    def test_dist_info_sym(self, obs_dim, action_dim):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        obs_dim = env.spec.observation_space.flat_dim
        state_input = tf.placeholder(tf.float32, shape=(None, obs_dim))
        dist1 = policy.dist_info_sym(state_input, name='policy2')

        prob = self.sess.run(
            dist1['prob'], feed_dict={state_input: [obs.flatten()]})
        assert np.array_equal(prob[0], expected_prob)

예제 #5

파일 보기

    def test_get_action(self, mock_rand, obs_dim, action_dim):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = CategoricalMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, prob = policy.get_action(obs)
        expected_prob = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert action == 0
        assert np.array_equal(prob['prob'], expected_prob)

        actions, probs = policy.get_actions([obs, obs, obs])
        for action, prob in zip(actions, probs['prob']):
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob, expected_prob)