Ejemplo n.º 1
0
 def loader():
   env = suite.load(
       domain_name=domain_name, task_name=task_name, task_kwargs=task_kwargs)
   env.task.visualize_reward = FLAGS.visualize_reward
   if FLAGS.action_noise > 0:
     env = action_noise.Wrapper(env, scale=FLAGS.action_noise)
   return env
Ejemplo n.º 2
0
 def test_error_if_action_bounds_non_finite(self, lower, upper):
   action_spec = self.make_action_spec(lower=lower, upper=upper)
   env = self.make_mock_env(action_spec=action_spec)
   with self.assertRaisesWithLiteralMatch(
       ValueError,
       action_noise._BOUNDS_MUST_BE_FINITE.format(action_spec=action_spec)):
     _ = action_noise.Wrapper(env)
Ejemplo n.º 3
0
 def test_action_spec(self):
     env = self.make_mock_env()
     wrapped_env = action_noise.Wrapper(env)
     # `env.action_spec()` is called in `Wrapper.__init__()`
     env.action_spec.reset_mock()
     action_spec = wrapped_env.action_spec()
     env.action_spec.assert_called_once_with()
     self.assertIs(action_spec, env.action_spec())
Ejemplo n.º 4
0
 def test_action_clipping(self, action, noise):
     lower = -1.
     upper = 1.
     expected_noisy_action = np.clip(action + noise, lower, upper)
     task = mock.Mock(spec=control.Task)
     task.random = mock.Mock(spec=np.random.RandomState)
     task.random.normal.return_value = noise
     action_spec = self.make_action_spec(lower=lower, upper=upper)
     env = self.make_mock_env(action_spec=action_spec)
     env.task = task
     wrapped_env = action_noise.Wrapper(env)
     time_step = wrapped_env.step(action)
     self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
     self.assertIs(time_step, env.step(expected_noisy_action))
Ejemplo n.º 5
0
    def loader():
        try:
            env = suite.load(domain_name=domain_name,
                             task_name=task_name,
                             task_kwargs=task_kwargs)
        except ValueError:
            task_kwargs['params'] = [0.25, 1.0, 8]
            env = custom_suite.load(domain_name=domain_name,
                                    task_name=task_name,
                                    task_kwargs=task_kwargs)

        env.task.visualize_reward = FLAGS.visualize_reward
        if FLAGS.action_noise > 0:
            env = action_noise.Wrapper(env, scale=FLAGS.action_noise)
        return env
Ejemplo n.º 6
0
 def test_step(self, lower, upper, scale):
     seed = 0
     std = scale * (upper - lower)
     expected_noise = np.random.RandomState(seed).normal(scale=std)
     action = np.random.RandomState(seed).uniform(lower, upper)
     expected_noisy_action = np.clip(action + expected_noise, lower, upper)
     task = mock.Mock(spec=control.Task)
     task.random = np.random.RandomState(seed)
     action_spec = self.make_action_spec(lower=lower, upper=upper)
     env = self.make_mock_env(action_spec=action_spec)
     env.task = task
     wrapped_env = action_noise.Wrapper(env, scale=scale)
     time_step = wrapped_env.step(action)
     self.assertStepCalledOnceWithCorrectAction(env, expected_noisy_action)
     self.assertIs(time_step, env.step(expected_noisy_action))
Ejemplo n.º 7
0
 def __init__(self,
              env,
              symbolic,
              seed,
              max_episode_length,
              action_repeat,
              bit_depth,
              action_noise_scale=None,
              render_size=64,
              use_rgbgr=False):
     from dm_control import suite
     from dm_control.suite.wrappers import pixels
     from dm_control.suite.wrappers import action_noise
     domain, task = env.split('-')
     self.symbolic = symbolic
     self.render_size = render_size if render_size else 64
     self.use_rgbgr = use_rgbgr
     if self.use_rgbgr:
         self.obs_tuple = (4, render_size, render_size)
     else:
         self.obs_tuple = (3, render_size, render_size)
     self.action_noise_scale = action_noise_scale
     self._env = suite.load(domain_name=domain,
                            task_name=task,
                            task_kwargs={'random': seed})
     if not symbolic:
         self._env = pixels.Wrapper(self._env)
     if self.action_noise_scale is not None:
         self._env = action_noise.Wrapper(self._env,
                                          scale=self.action_noise_scale)
     self.max_episode_length = max_episode_length
     if action_repeat < 0:
         try:
             action_repeat = CONTROL_SUITE_ACTION_REPEATS[domain]
         except KeyError:
             action_repeat = 2
     self.action_repeat = action_repeat
     try:
         if action_repeat != CONTROL_SUITE_ACTION_REPEATS[domain]:
             print(
                 'Using action repeat %d; recommended action repeat for domain is %d'
                 % (action_repeat, CONTROL_SUITE_ACTION_REPEATS[domain]))
     except KeyError:
         pass
     self.bit_depth = bit_depth
Ejemplo n.º 8
0
 def test_getattr(self, attribute_name):
     env = self.make_mock_env()
     wrapped_env = action_noise.Wrapper(env)
     attr = getattr(wrapped_env, attribute_name)
     self.assertIs(attr, getattr(env, attribute_name))
Ejemplo n.º 9
0
 def test_observation_spec(self):
     env = self.make_mock_env()
     wrapped_env = action_noise.Wrapper(env)
     observation_spec = wrapped_env.observation_spec()
     env.observation_spec.assert_called_once_with()
     self.assertIs(observation_spec, env.observation_spec())
Ejemplo n.º 10
0
 def test_reset(self):
     env = self.make_mock_env()
     wrapped_env = action_noise.Wrapper(env)
     time_step = wrapped_env.reset()
     env.reset.assert_called_once_with()
     self.assertIs(time_step, env.reset())