def osimArmResume(ctxt=None, snapshot_dir='data/local/experiment/osimArm_153', seed=1): set_seed(seed) with LocalTFRunner(snapshot_config=ctxt) as runner: runner.restore(snapshot_dir) ddpg = runner._algo env = GarageEnv(Arm2DVecEnv(visualize=True)) env.reset() policy = ddpg.policy env.render() obs = env.step(env.action_space.sample()) steps = 0 n_steps = 100 while True: if steps == n_steps: env.close() break temp = policy.get_action(obs[0]) obs = env.step(temp[0]) env.render() steps += 1
class TestNormalizedGym: def setup_method(self): self.env = GarageEnv( normalize(gym.make('Pendulum-v0'), normalize_reward=True, normalize_obs=True, flatten_obs=True)) def teardown_method(self): self.env.close() def test_does_not_modify_action(self): a = self.env.action_space.sample() a_copy = a self.env.reset() self.env.step(a) assert a == a_copy def test_flatten(self): for _ in range(10): self.env.reset() for _ in range(5): self.env.render() action = self.env.action_space.sample() next_obs, _, done, _ = self.env.step(action) assert next_obs.shape == self.env.observation_space.low.shape if done: break def test_unflatten(self): for _ in range(10): self.env.reset() for _ in range(5): action = self.env.action_space.sample() next_obs, _, done, _ = self.env.step(action) # yapf: disable assert (self.env.observation_space.flatten(next_obs).shape == self.env.observation_space.flat_dim) # yapf: enable if done: break
def test_closes_box2d(self): garage_env = GarageEnv(env_name='CarRacing-v0') garage_env.render() assert garage_env.env.viewer is not None garage_env.close() assert garage_env.env.viewer is None
def test_closes_mujoco(self): garage_env = GarageEnv(env_name='Ant-v2') garage_env.render() assert garage_env.env.viewer is not None garage_env.close() assert garage_env.env.viewer is None
def osimArm(ctxt=None, seed=1): """Train TRPO with CartPole-v1 environment. Args: ctxt (garage.experiment.ExperimentContext): The experiment configuration used by LocalRunner to create the snapshotter. seed (int): Used to seed the random number generator to produce determinism. """ set_seed(seed) with LocalTFRunner(ctxt) as runner: env = GarageEnv(Arm2DVecEnv()) env.reset() policy = ContinuousMLPPolicy(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=tf.nn.relu, output_nonlinearity=tf.nn.tanh) exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec, policy, sigma=0.2) qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=[64, 64], hidden_nonlinearity=tf.nn.relu) replay_buffer = PathBuffer(capacity_in_transitions=int(1e6)) ddpg = DDPG(env_spec=env.spec, policy=policy, policy_lr=1e-4, qf_lr=1e-3, max_path_length=200, qf=qf, replay_buffer=replay_buffer, steps_per_epoch=20, target_update_tau=1e-2, n_train_steps=50, discount=0.9, min_buffer_size=int(1e4), exploration_policy=exploration_policy, policy_optimizer=tf.compat.v1.train.AdamOptimizer, qf_optimizer=tf.compat.v1.train.AdamOptimizer) env.render() obs = env.step(env.action_space.sample()) steps = 0 n_steps = 1000 while True: if steps == n_steps: env.close() break temp = policy.get_action(obs[0]) obs = env.step(temp[0]) env.render() steps += 1 runner.setup(algo=ddpg, env=env) runner.train(n_epochs=500, batch_size=100)