def setup_method(self):
        with mock.patch('tensorflow.random.normal') as mock_rand:
            mock_rand.return_value = 0.5
            super().setup_method()
            self.box_env = TfEnv(DummyBoxEnv())
            self.policy1 = DeterministicMLPPolicy(env_spec=self.box_env,
                                                  hidden_sizes=(32, 32),
                                                  name='P1')
            self.policy2 = DeterministicMLPPolicy(env_spec=self.box_env,
                                                  hidden_sizes=(64, 64),
                                                  name='P2')
            self.policy3 = DeterministicMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(32, 32), name='P3')
            self.policy4 = DeterministicMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(64, 64), name='P4')

            self.sess.run(tf.global_variables_initializer())
            for a, b in zip(self.policy3.get_params(),
                            self.policy1.get_params()):
                self.sess.run(a.assign(b))
            for a, b in zip(self.policy4.get_params(),
                            self.policy2.get_params()):
                self.sess.run(a.assign(b))

            self.obs = [self.box_env.reset()]

            assert self.policy1.vectorized == self.policy2.vectorized
            assert self.policy3.vectorized == self.policy4.vectorized
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'deterministic_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = DeterministicMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        with tf.variable_scope('DeterministicMLPPolicy/MLPModel', reuse=True):
            return_var = tf.get_variable('return_var')
        # assign it to all one
        return_var.load(tf.ones_like(return_var).eval())
        output1 = self.sess.run(
            policy.model.outputs,
            feed_dict={policy.model.input: [obs.flatten()]})

        p = pickle.dumps(policy)
        with tf.Session(graph=tf.Graph()) as sess:
            policy_pickled = pickle.loads(p)
            output2 = sess.run(
                policy_pickled.model.outputs,
                feed_dict={policy_pickled.model.input: [obs.flatten()]})
            assert np.array_equal(output1, output2)
    def test_is_pickleable(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'deterministic_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = DeterministicMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action1, _ = policy.get_action(obs)

        p = pickle.dumps(policy)
        with tf.Session(graph=tf.Graph()):
            policy_pickled = pickle.loads(p)
            action2, _ = policy_pickled.get_action(obs)
            assert env.action_space.contains(action2)
            assert np.array_equal(action1, action2)
    def test_get_action_sym(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'deterministic_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = DeterministicMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        obs_dim = env.spec.observation_space.flat_dim
        state_input = tf.placeholder(tf.float32, shape=(None, obs_dim))
        action_sym = policy.get_action_sym(state_input, name="action_sym")

        expected_action = np.full(action_dim, 0.5)

        action = self.sess.run(action_sym,
                               feed_dict={state_input: [obs.flatten()]})
        assert env.action_space.contains(action)
        assert np.array_equal(action, expected_action)
    def test_get_action(self, obs_dim, action_dim):
        env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim))
        with mock.patch(('garage.tf.policies.'
                         'deterministic_mlp_policy_with_model.MLPModel'),
                        new=SimpleMLPModel):
            policy = DeterministicMLPPolicyWithModel(env_spec=env.spec)

        env.reset()
        obs, _, _, _ = env.step(1)

        action, _ = policy.get_action(obs)

        expected_action = np.full(action_dim, 0.5)

        assert env.action_space.contains(action)
        assert np.array_equal(action, expected_action)

        actions, _ = policy.get_actions([obs, obs, obs])
        for action in actions:
            assert env.action_space.contains(action)
            assert np.array_equal(action, expected_action)
class TestDeterministicMLPPolicyWithModelTransit(TfGraphTestCase):
    def setup_method(self):
        with mock.patch('tensorflow.random.normal') as mock_rand:
            mock_rand.return_value = 0.5
            super().setup_method()
            self.box_env = TfEnv(DummyBoxEnv())
            self.policy1 = DeterministicMLPPolicy(env_spec=self.box_env,
                                                  hidden_sizes=(32, 32),
                                                  name='P1')
            self.policy2 = DeterministicMLPPolicy(env_spec=self.box_env,
                                                  hidden_sizes=(64, 64),
                                                  name='P2')
            self.policy3 = DeterministicMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(32, 32), name='P3')
            self.policy4 = DeterministicMLPPolicyWithModel(
                env_spec=self.box_env, hidden_sizes=(64, 64), name='P4')

            self.sess.run(tf.global_variables_initializer())
            for a, b in zip(self.policy3.get_params(),
                            self.policy1.get_params()):
                self.sess.run(a.assign(b))
            for a, b in zip(self.policy4.get_params(),
                            self.policy2.get_params()):
                self.sess.run(a.assign(b))

            self.obs = [self.box_env.reset()]

            assert self.policy1.vectorized == self.policy2.vectorized
            assert self.policy3.vectorized == self.policy4.vectorized

    @mock.patch('numpy.random.normal')
    def test_get_action(self, mock_rand):
        mock_rand.return_value = 0.5
        action1, _ = self.policy1.get_action(self.obs)
        action2, _ = self.policy2.get_action(self.obs)
        action3, _ = self.policy3.get_action(self.obs)
        action4, _ = self.policy4.get_action(self.obs)

        assert np.array_equal(action1, action3)
        assert np.array_equal(action2, action4)

        actions1, _ = self.policy1.get_actions([self.obs, self.obs])
        actions2, _ = self.policy2.get_actions([self.obs, self.obs])
        actions3, _ = self.policy3.get_actions([self.obs, self.obs])
        actions4, _ = self.policy4.get_actions([self.obs, self.obs])

        assert np.array_equal(actions1, actions3)
        assert np.array_equal(actions2, actions4)

    def test_get_action_sym(self):
        obs_dim = self.box_env.spec.observation_space.flat_dim
        state_input = tf.placeholder(tf.float32, shape=(None, obs_dim))

        action_sym1 = self.policy1.get_action_sym(state_input,
                                                  name='action_sym')
        action_sym2 = self.policy2.get_action_sym(state_input,
                                                  name='action_sym')
        action_sym3 = self.policy3.get_action_sym(state_input,
                                                  name='action_sym')
        action_sym4 = self.policy4.get_action_sym(state_input,
                                                  name='action_sym')

        action1 = self.sess.run(action_sym1, feed_dict={state_input: self.obs})
        action2 = self.sess.run(action_sym2, feed_dict={state_input: self.obs})
        action3 = self.sess.run(action_sym3, feed_dict={state_input: self.obs})
        action4 = self.sess.run(action_sym4, feed_dict={state_input: self.obs})

        assert np.array_equal(action1, action3)
        assert np.array_equal(action2, action4)