Esempio n. 1
0
    def test_get_action(self, obs_dim, action_dim, filter_dims, filter_sizes,
                        strides, padding, hidden_sizes, mock_rand):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_conv_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_conv_policy_with_model.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalConvPolicyWithModel(
                    env_spec=env.spec,
                    conv_filters=filter_dims,
                    conv_filter_sizes=filter_sizes,
                    conv_strides=strides,
                    conv_pad=padding,
                    hidden_sizes=hidden_sizes)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, prob = policy.get_action(obs)
        expected_prob = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert action == 0
        assert np.array_equal(prob['prob'], expected_prob)

        actions, probs = policy.get_actions([obs, obs, obs])
        for action, prob in zip(actions, probs['prob']):
            assert env.action_space.contains(action)
            assert action == 0
            assert np.array_equal(prob, expected_prob)
Esempio n. 2
0
    def test_dist_info_sym(self, obs_dim, action_dim, filter_dims,
                           filter_sizes, strides, padding, hidden_sizes):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_conv_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_conv_policy_with_model.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalConvPolicyWithModel(
                    env_spec=env.spec,
                    conv_filters=filter_dims,
                    conv_filter_sizes=filter_sizes,
                    conv_strides=strides,
                    conv_pad=padding,
                    hidden_sizes=hidden_sizes)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        obs_dim = env.spec.observation_space.shape
        state_input = tf.placeholder(tf.float32, shape=(None, ) + obs_dim)
        dist1 = policy.dist_info_sym(state_input, name='policy2')

        prob = self.sess.run(dist1['prob'], feed_dict={state_input: [obs]})
        assert np.array_equal(prob[0], expected_prob)
Esempio n. 3
0
    def test_is_pickleable(self, obs_dim, action_dim, mock_rand):
        mock_rand.return_value = 0
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_conv_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_conv_policy_with_model.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalConvPolicyWithModel(
                    env_spec=env.spec,
                    conv_filters=(32, ),
                    conv_filter_sizes=(3, ),
                    conv_strides=(1, ),
                    conv_pad='SAME',
                    hidden_sizes=(4, ))
        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.variable_scope('CategoricalConvPolicy/Sequential/MLPModel',
                               reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        self.sess.run(tf.assign(return_var, tf.ones_like(return_var)))
        output1 = self.sess.run(policy.model.outputs,
                                feed_dict={policy.model.input: [obs]})
        p = pickle.dumps(policy)

        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(policy_pickled.model.outputs,
                               feed_dict={policy_pickled.model.input: [obs]})
            assert np.array_equal(output1, output2)
Esempio n. 4
0
    def test_dist_info(self, obs_dim, action_dim, filter_dims, filter_sizes,
                       strides, padding, hidden_sizes):
        env = TfEnv(DummyDiscreteEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'categorical_conv_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            with mock.patch(('garage.tf.policies.'
                             'categorical_conv_policy_with_model.CNNModel'),
                            new=SimpleCNNModel):
                policy = CategoricalConvPolicyWithModel(
                    env_spec=env.spec,
                    conv_filters=filter_dims,
                    conv_filter_sizes=filter_sizes,
                    conv_strides=strides,
                    conv_pad=padding,
                    hidden_sizes=hidden_sizes)

        env.reset()
        obs, _, _, _ = env.step(1)

        expected_prob = np.full(action_dim, 0.5)

        policy_probs = policy.dist_info([obs])
        assert np.array_equal(policy_probs['prob'][0], expected_prob)