def setup_method(self): with mock.patch('tensorflow.random.normal') as mock_rand: mock_rand.return_value = 0.5 super().setup_method() self.box_env = TfEnv(DummyBoxEnv()) self.policy1 = DeterministicMLPPolicy(env_spec=self.box_env, hidden_sizes=(32, 32), name='P1') self.policy2 = DeterministicMLPPolicy(env_spec=self.box_env, hidden_sizes=(64, 64), name='P2') self.policy3 = DeterministicMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(32, 32), name='P3') self.policy4 = DeterministicMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(64, 64), name='P4') self.sess.run(tf.global_variables_initializer()) for a, b in zip(self.policy3.get_params(), self.policy1.get_params()): self.sess.run(a.assign(b)) for a, b in zip(self.policy4.get_params(), self.policy2.get_params()): self.sess.run(a.assign(b)) self.obs = [self.box_env.reset()] assert self.policy1.vectorized == self.policy2.vectorized assert self.policy3.vectorized == self.policy4.vectorized
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'deterministic_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = DeterministicMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) with tf.variable_scope('DeterministicMLPPolicy/MLPModel', reuse=True): return_var = tf.get_variable('return_var') # assign it to all one return_var.load(tf.ones_like(return_var).eval()) output1 = self.sess.run( policy.model.outputs, feed_dict={policy.model.input: [obs.flatten()]}) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()) as sess: policy_pickled = pickle.loads(p) output2 = sess.run( policy_pickled.model.outputs, feed_dict={policy_pickled.model.input: [obs.flatten()]}) assert np.array_equal(output1, output2)
def test_is_pickleable(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'deterministic_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = DeterministicMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action1, _ = policy.get_action(obs) p = pickle.dumps(policy) with tf.Session(graph=tf.Graph()): policy_pickled = pickle.loads(p) action2, _ = policy_pickled.get_action(obs) assert env.action_space.contains(action2) assert np.array_equal(action1, action2)
def test_get_action_sym(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'deterministic_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = DeterministicMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) obs_dim = env.spec.observation_space.flat_dim state_input = tf.placeholder(tf.float32, shape=(None, obs_dim)) action_sym = policy.get_action_sym(state_input, name="action_sym") expected_action = np.full(action_dim, 0.5) action = self.sess.run(action_sym, feed_dict={state_input: [obs.flatten()]}) assert env.action_space.contains(action) assert np.array_equal(action, expected_action)
def test_get_action(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'deterministic_mlp_policy_with_model.MLPModel'), new=SimpleMLPModel): policy = DeterministicMLPPolicyWithModel(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs) expected_action = np.full(action_dim, 0.5) assert env.action_space.contains(action) assert np.array_equal(action, expected_action) actions, _ = policy.get_actions([obs, obs, obs]) for action in actions: assert env.action_space.contains(action) assert np.array_equal(action, expected_action)
class TestDeterministicMLPPolicyWithModelTransit(TfGraphTestCase): def setup_method(self): with mock.patch('tensorflow.random.normal') as mock_rand: mock_rand.return_value = 0.5 super().setup_method() self.box_env = TfEnv(DummyBoxEnv()) self.policy1 = DeterministicMLPPolicy(env_spec=self.box_env, hidden_sizes=(32, 32), name='P1') self.policy2 = DeterministicMLPPolicy(env_spec=self.box_env, hidden_sizes=(64, 64), name='P2') self.policy3 = DeterministicMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(32, 32), name='P3') self.policy4 = DeterministicMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(64, 64), name='P4') self.sess.run(tf.global_variables_initializer()) for a, b in zip(self.policy3.get_params(), self.policy1.get_params()): self.sess.run(a.assign(b)) for a, b in zip(self.policy4.get_params(), self.policy2.get_params()): self.sess.run(a.assign(b)) self.obs = [self.box_env.reset()] assert self.policy1.vectorized == self.policy2.vectorized assert self.policy3.vectorized == self.policy4.vectorized @mock.patch('numpy.random.normal') def test_get_action(self, mock_rand): mock_rand.return_value = 0.5 action1, _ = self.policy1.get_action(self.obs) action2, _ = self.policy2.get_action(self.obs) action3, _ = self.policy3.get_action(self.obs) action4, _ = self.policy4.get_action(self.obs) assert np.array_equal(action1, action3) assert np.array_equal(action2, action4) actions1, _ = self.policy1.get_actions([self.obs, self.obs]) actions2, _ = self.policy2.get_actions([self.obs, self.obs]) actions3, _ = self.policy3.get_actions([self.obs, self.obs]) actions4, _ = self.policy4.get_actions([self.obs, self.obs]) assert np.array_equal(actions1, actions3) assert np.array_equal(actions2, actions4) def test_get_action_sym(self): obs_dim = self.box_env.spec.observation_space.flat_dim state_input = tf.placeholder(tf.float32, shape=(None, obs_dim)) action_sym1 = self.policy1.get_action_sym(state_input, name='action_sym') action_sym2 = self.policy2.get_action_sym(state_input, name='action_sym') action_sym3 = self.policy3.get_action_sym(state_input, name='action_sym') action_sym4 = self.policy4.get_action_sym(state_input, name='action_sym') action1 = self.sess.run(action_sym1, feed_dict={state_input: self.obs}) action2 = self.sess.run(action_sym2, feed_dict={state_input: self.obs}) action3 = self.sess.run(action_sym3, feed_dict={state_input: self.obs}) action4 = self.sess.run(action_sym4, feed_dict={state_input: self.obs}) assert np.array_equal(action1, action3) assert np.array_equal(action2, action4)