예제 #1
0
def osimArmResume(ctxt=None,
                  snapshot_dir='data/local/experiment/osimArm_153',
                  seed=1):
    set_seed(seed)
    with LocalTFRunner(snapshot_config=ctxt) as runner:
        runner.restore(snapshot_dir)
        ddpg = runner._algo

        env = GarageEnv(Arm2DVecEnv(visualize=True))
        env.reset()

        policy = ddpg.policy

        env.render()
        obs = env.step(env.action_space.sample())
        steps = 0
        n_steps = 100

        while True:
            if steps == n_steps:
                env.close()
                break
            temp = policy.get_action(obs[0])
            obs = env.step(temp[0])
            env.render()
            steps += 1
예제 #2
0
class TestNormalizedGym:
    def setup_method(self):
        self.env = GarageEnv(
            normalize(gym.make('Pendulum-v0'),
                      normalize_reward=True,
                      normalize_obs=True,
                      flatten_obs=True))

    def teardown_method(self):
        self.env.close()

    def test_does_not_modify_action(self):
        a = self.env.action_space.sample()
        a_copy = a
        self.env.reset()
        self.env.step(a)
        assert a == a_copy

    def test_flatten(self):
        for _ in range(10):
            self.env.reset()
            for _ in range(5):
                self.env.render()
                action = self.env.action_space.sample()
                next_obs, _, done, _ = self.env.step(action)
                assert next_obs.shape == self.env.observation_space.low.shape
                if done:
                    break

    def test_unflatten(self):
        for _ in range(10):
            self.env.reset()
            for _ in range(5):
                action = self.env.action_space.sample()
                next_obs, _, done, _ = self.env.step(action)
                # yapf: disable
                assert (self.env.observation_space.flatten(next_obs).shape
                        == self.env.observation_space.flat_dim)
                # yapf: enable
                if done:
                    break
예제 #3
0
 def test_closes_box2d(self):
     garage_env = GarageEnv(env_name='CarRacing-v0')
     garage_env.render()
     assert garage_env.env.viewer is not None
     garage_env.close()
     assert garage_env.env.viewer is None
예제 #4
0
 def test_closes_mujoco(self):
     garage_env = GarageEnv(env_name='Ant-v2')
     garage_env.render()
     assert garage_env.env.viewer is not None
     garage_env.close()
     assert garage_env.env.viewer is None
예제 #5
0
파일: osimArm.py 프로젝트: j-donahue/garage
def osimArm(ctxt=None, seed=1):
    """Train TRPO with CartPole-v1 environment.

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.

    """
    set_seed(seed)
    with LocalTFRunner(ctxt) as runner:

        env = GarageEnv(Arm2DVecEnv())
        env.reset()

        policy = ContinuousMLPPolicy(env_spec=env.spec,
                                     hidden_sizes=[64, 64],
                                     hidden_nonlinearity=tf.nn.relu,
                                     output_nonlinearity=tf.nn.tanh)

        exploration_policy = AddOrnsteinUhlenbeckNoise(env.spec,
                                                       policy,
                                                       sigma=0.2)

        qf = ContinuousMLPQFunction(env_spec=env.spec,
                                    hidden_sizes=[64, 64],
                                    hidden_nonlinearity=tf.nn.relu)

        replay_buffer = PathBuffer(capacity_in_transitions=int(1e6))

        ddpg = DDPG(env_spec=env.spec,
                    policy=policy,
                    policy_lr=1e-4,
                    qf_lr=1e-3,
                    max_path_length=200,
                    qf=qf,
                    replay_buffer=replay_buffer,
                    steps_per_epoch=20,
                    target_update_tau=1e-2,
                    n_train_steps=50,
                    discount=0.9,
                    min_buffer_size=int(1e4),
                    exploration_policy=exploration_policy,
                    policy_optimizer=tf.compat.v1.train.AdamOptimizer,
                    qf_optimizer=tf.compat.v1.train.AdamOptimizer)

        env.render()
        obs = env.step(env.action_space.sample())
        steps = 0
        n_steps = 1000

        while True:
            if steps == n_steps:
                env.close()
                break
            temp = policy.get_action(obs[0])
            obs = env.step(temp[0])
            env.render()
            steps += 1

        runner.setup(algo=ddpg, env=env)

        runner.train(n_epochs=500, batch_size=100)