def test_all_does_not_modify_actions(self, domain_name, task_name):
     env = DmControlEnv(domain_name, task_name)
     a = env.action_space.sample()
     a_copy = copy(a)
     env.step(a)
     if isinstance(a, collections.Iterable):
         self.assertEquals(a.all(), a_copy.all())
     else:
         self.assertEquals(a, a_copy)
 def test_can_step_and_render(self, domain_name, task_name):
     env = DmControlEnv(domain_name, task_name)
     ob_space = env.observation_space
     act_space = env.action_space
     ob = env.reset()
     assert ob_space.contains(ob)
     a = act_space.sample()
     assert act_space.contains(a)
     step_env(env, n=10, render=True)
    def test_dm_control_tf_policy(self):
        task = ALL_TASKS[0]

        with LocalTFRunner(snapshot_config, sess=self.sess) as runner:
            env = TfEnv(DmControlEnv.from_suite(*task))

            policy = GaussianMLPPolicy(
                env_spec=env.spec,
                hidden_sizes=(32, 32),
            )

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = TRPO(
                env_spec=env.spec,
                policy=policy,
                baseline=baseline,
                max_path_length=5,
                discount=0.99,
                max_kl_step=0.01,
            )

            runner.setup(algo, env)
            runner.train(n_epochs=1, batch_size=10)

            env.close()
Example #4
0
    def test_dm_control_tf_policy(self):
        task = ALL_TASKS[0]

        with self.graph.as_default():
            env = TfEnv(DmControlEnv.from_suite(*task))

            policy = GaussianMLPPolicy(
                env_spec=env.spec,
                hidden_sizes=(32, 32),
            )

            baseline = LinearFeatureBaseline(env_spec=env.spec)

            algo = TRPO(
                env=env,
                policy=policy,
                baseline=baseline,
                batch_size=10,
                max_path_length=5,
                n_itr=1,
                discount=0.99,
                step_size=0.01,
            )
            algo.train()
            env.close()
    def test_dm_control_tf_policy(self):
        task = ALL_TASKS[0]

        env = TfEnv(DmControlEnv.from_suite(*task))

        policy = GaussianMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
        )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=10,
            max_path_length=5,
            n_itr=1,
            discount=0.99,
            step_size=0.01,
        )

        runner = LocalRunner(self.sess)
        runner.setup(algo, env)
        runner.train(n_epochs=1, batch_size=10)

        env.close()
Example #6
0
 def test_all_pickleable(self, domain_name, task_name):
     env = DmControlEnv.from_suite(domain_name, task_name)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     # Skip rendering because it causes TravisCI to run out of memory
     step_env(round_trip, render=False)
     round_trip.close()
     env.close()
Example #7
0
 def test_all_does_not_modify_actions(self, domain_name, task_name):
     env = DmControlEnv.from_suite(domain_name, task_name)
     a = env.action_space.sample()
     a_copy = copy(a)
     env.step(a)
     if isinstance(a, collections.Iterable):
         assert a.all() == a_copy.all()
     else:
         assert a == a_copy
     env.close()
Example #8
0
 def test_all_can_step(self, domain_name, task_name):
     env = DmControlEnv.from_suite(domain_name, task_name)
     ob_space = env.observation_space
     act_space = env.action_space
     ob = env.reset()
     assert ob_space.contains(ob)
     a = act_space.sample()
     assert act_space.contains(a)
     # Skip rendering because it causes TravisCI to run out of memory
     step_env(env, render=False)
     env.close()
Example #9
0
 def test_does_not_modify_actions(self):
     domain_name, task_name = dm_control.suite.ALL_TASKS[0]
     env = DmControlEnv.from_suite(domain_name, task_name)
     a = env.action_space.sample()
     a_copy = copy(a)
     env.step(a)
     if isinstance(a, collections.Iterable):
         self.assertEqual(a.all(), a_copy.all())
     else:
         self.assertEqual(a, a_copy)
     env.close()
Example #10
0
def run_task(domain_name, task_name):
    print("run: domain %s task %s" % (domain_name, task_name))
    dm_control_env = normalize(
        DmControlEnv(
            domain_name=domain_name,
            task_name=task_name,
            plot=True,
            width=600,
            height=400),
        normalize_obs=False,
        normalize_reward=False)

    time_step = dm_control_env.reset()
    action_spec = dm_control_env.action_space
    for _ in range(5):
        dm_control_env.render()
        action = action_spec.sample()
        next_obs, reward, done, info = dm_control_env.step(action)
        if done:
            break

    dm_control_env.close()
Example #11
0
    def test_dm_control_theano_policy(self):
        task = ALL_TASKS[0]

        env = TheanoEnv(DmControlEnv(domain_name=task[0], task_name=task[1]))

        policy = GaussianMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
        )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=10,
            max_path_length=5,
            n_itr=1,
            discount=0.99,
            step_size=0.01,
        )
        algo.train()
def run_task(*_):

    with LocalRunner() as runner:
        env = normalize(DmControlEnv.from_suite('cartpole', 'balance'))

        policy = GaussianMLPPolicy(
            env_spec=env.spec,
            hidden_sizes=(32, 32),
        )

        baseline = LinearFeatureBaseline(env_spec=env.spec)

        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            max_path_length=100,
            discount=0.99,
            max_kl_step=0.01,
        )

        runner.setup(algo, env)
        runner.train(n_epochs=400, batch_size=4000, plot=True)
 def test_pickling(self, domain_name, task_name):
     env = DmControlEnv(domain_name, task_name)
     round_trip = pickle.loads(pickle.dumps(env))
     assert round_trip
     step_env(round_trip)
Example #14
0
"""Example of how to load, step, and visualize an environment.

This example requires that garage[dm_control] be installed.
"""
import argparse

from garage.envs.dm_control import DmControlEnv

parser = argparse.ArgumentParser()
parser.add_argument('--n_steps',
                    type=int,
                    default=1000,
                    help='Number of steps to run')
args = parser.parse_args()

# Construct the environment
env = DmControlEnv.from_suite('walker', 'run')

# Reset the environment and launch the viewer
env.reset()
env.render()

# Step randomly until interrupted
steps = 0
while True:
    if steps == args.n_steps:
        break
    env.step(env.action_space.sample())
    env.render()
    steps += 1
Example #15
0
 def __init__(self, method_name='runTest', param=ALL_TASKS[0]):
     super().__init__(method_name)
     self.env = DmControlEnv(domain_name=param[0], task_name=param[1])