def test_continuous_mlp_policy(self): continuous_mlp_policy = ContinuousMLPPolicy(env_spec=self.env, hidden_sizes=(1, )) self.sess.run(tf.global_variables_initializer()) obs = self.env.observation_space.high assert continuous_mlp_policy.get_action(obs)
def test_get_action(self, obs_dim, action_dim): """Test get_action method""" env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs.flatten()) assert env.action_space.contains(action) actions, _ = policy.get_actions( [obs.flatten(), obs.flatten(), obs.flatten()]) for action in actions: assert env.action_space.contains(action)
def test_get_action(self, obs_dim, action_dim): env = TfEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) with mock.patch(('garage.tf.policies.' 'continuous_mlp_policy.MLPModel'), new=SimpleMLPModel): policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs, _, _, _ = env.step(1) action, _ = policy.get_action(obs) expected_action = np.full(action_dim, 0.5) assert env.action_space.contains(action) assert np.array_equal(action, expected_action) actions, _ = policy.get_actions([obs, obs, obs]) for action in actions: assert env.action_space.contains(action) assert np.array_equal(action, expected_action)
def test_get_action(self, obs_dim, action_dim, obs_type): """Test get_action method""" assert obs_type in ['box', 'dict'] if obs_type == 'box': env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) else: env = GymEnv( DummyDictEnv(obs_space_type='box', act_space_type='box')) policy = ContinuousMLPPolicy(env_spec=env.spec) env.reset() obs = env.step(1).observation if obs_type == 'box': obs = obs.flatten() action, _ = policy.get_action(obs) assert env.action_space.contains(action) actions, _ = policy.get_actions([obs, obs, obs]) for action in actions: assert env.action_space.contains(action)
def osimArm(ctxt=None, seed=1): """Train TRPO with CartPole-v1 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = GarageEnv(Arm2DVecEnv()) env.reset() policy = ContinuousMLPPolicy(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec, policy, sigma=0.2) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=tf.nn.relu) replay_buffer = PathBuffer(capacity_in_transitions=int(1e6)) ddpg = DDPG(env_spec=env.spec, policy=policy, policy_lr=1e-4, qf_lr=1e-3, max_path_length=200, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=20, target_update_tau=1e-2, n_train_steps=50, discount=0.9, min_buffer_size=int(1e4), exploration_policy=exploration_policy, policy_optimizer=tf.compat.v1.train.AdamOptimizer, qf_optimizer=tf.compat.v1.train.AdamOptimizer) env.render() obs = env.step(env.action_space.sample()) steps = 0 n_steps = 1000 while True: if steps == n_steps: env.close() break temp = policy.get_action(obs[0]) obs = env.step(temp[0]) env.render() steps += 1 runner.setup(algo=ddpg, env=env) runner.train(n_epochs=500, batch_size=100)
class TestContinuousMLPPolicyWithModelTransit(TfGraphTestCase): def setup_method(self): with mock.patch('tensorflow.random.normal') as mock_rand: mock_rand.return_value = 0.5 super().setup_method() self.box_env = TfEnv(DummyBoxEnv()) self.policy1 = ContinuousMLPPolicy( env_spec=self.box_env, hidden_sizes=(32, 32), name='P1') self.policy2 = ContinuousMLPPolicy( env_spec=self.box_env, hidden_sizes=(64, 64), name='P2') self.policy3 = ContinuousMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(32, 32), name='P3') self.policy4 = ContinuousMLPPolicyWithModel( env_spec=self.box_env, hidden_sizes=(64, 64), name='P4') self.sess.run(tf.compat.v1.global_variables_initializer()) for a, b in zip(self.policy3.get_params(), self.policy1.get_params()): self.sess.run(a.assign(b)) for a, b in zip(self.policy4.get_params(), self.policy2.get_params()): self.sess.run(a.assign(b)) self.obs = self.box_env.reset() self.action_bound = self.box_env.action_space.high assert self.policy1.vectorized == self.policy2.vectorized assert self.policy3.vectorized == self.policy4.vectorized @mock.patch('numpy.random.normal') def test_get_action(self, mock_rand): mock_rand.return_value = 0.5 action1, _ = self.policy1.get_action(self.obs) action2, _ = self.policy2.get_action(self.obs) action3, _ = self.policy3.get_action(self.obs) action4, _ = self.policy4.get_action(self.obs) assert np.array_equal(action1, action3 * self.action_bound) assert np.array_equal(action2, action4 * self.action_bound) actions1, _ = self.policy1.get_actions([self.obs, self.obs]) actions2, _ = self.policy2.get_actions([self.obs, self.obs]) actions3, _ = self.policy3.get_actions([self.obs, self.obs]) actions4, _ = self.policy4.get_actions([self.obs, self.obs]) assert np.array_equal(actions1, actions3 * self.action_bound) assert np.array_equal(actions2, actions4 * self.action_bound) def test_get_action_sym(self): obs_dim = self.box_env.spec.observation_space.flat_dim state_input = tf.compat.v1.placeholder( tf.float32, shape=(None, obs_dim)) action_sym1 = self.policy1.get_action_sym( state_input, name='action_sym') action_sym2 = self.policy2.get_action_sym( state_input, name='action_sym') action_sym3 = self.policy3.get_action_sym( state_input, name='action_sym') action_sym4 = self.policy4.get_action_sym( state_input, name='action_sym') action1 = self.sess.run( action_sym1, feed_dict={state_input: [self.obs]}) action2 = self.sess.run( action_sym2, feed_dict={state_input: [self.obs]}) action3 = self.sess.run( action_sym3, feed_dict={state_input: [self.obs]}) action4 = self.sess.run( action_sym4, feed_dict={state_input: [self.obs]}) assert np.array_equal(action1, action3 * self.action_bound) assert np.array_equal(action2, action4 * self.action_bound)
def test_continuous_mlp_policy(self): continuous_mlp_policy = ContinuousMLPPolicy(env_spec=self.env, hidden_sizes=(1, )) obs = self.env.observation_space.high assert continuous_mlp_policy.get_action(obs)