def test_default_batch_properties(self): cartpole_env = gym.spec('CartPole-v1').make() env = alf_gym_wrapper.AlfGymWrapper(cartpole_env) self.assertFalse(env.batched) self.assertEqual(env.batch_size, 1) wrap_env = alf_wrappers.AlfEnvironmentBaseWrapper(env) self.assertEqual(wrap_env.batched, env.batched) self.assertEqual(wrap_env.batch_size, env.batch_size)
def test_batch_properties(self, batch_size): obs_spec = ts.BoundedTensorSpec((2, 3), torch.int32, -10, 10) action_spec = ts.BoundedTensorSpec((1, ), torch.int64, -10, 10) env = RandomAlfEnvironment( obs_spec, action_spec, reward_fn=lambda *_: torch.tensor([1.0], dtype=torch.float32), batch_size=batch_size) wrap_env = alf_wrappers.AlfEnvironmentBaseWrapper(env) self.assertEqual(wrap_env.batched, env.batched) self.assertEqual(wrap_env.batch_size, env.batch_size)
def load(env_name, env_id=None, discount=1.0, max_episode_steps=None, use_success_wrapper=True, gym_env_wrappers=(), alf_env_wrappers=(), wrap_with_process=False): """Loads the selected environment and wraps it with the specified wrappers. Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths to the default benchmarks defined by the registered environments. Args: env_name: Ignored, but required for create_environment in utils.py discount: Discount to use for the environment. max_episode_steps: If None the ``max_episode_steps`` will be set to the default step limit defined in the environment's spec. No limit is applied if set to 0 or if there is no ``timestep_limit`` set in the environment's spec. gym_env_wrappers: Iterable with references to wrapper classes to use directly on the gym environment. alf_env_wrappers: Iterable with references to wrapper classes to use on the torch environment. Returns: An AlfEnvironment instance. """ _unwrapped_env_checker_.check_and_update(wrap_with_process) def env_ctor(env_id=None): return suite_gym.wrap_env(env, env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers) if env_name == "Pusher": env = PusherEnv() elif env_name == "Reacher": env = Reacher3DEnv() env = ActionScalingWrapper(env) if use_success_wrapper: env = PETSSuccessWrapper(env, max_episode_steps) if wrap_with_process: process_env = process_environment.ProcessEnvironment( functools.partial(env_ctor)) process_env.start() torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env) else: torch_env = env_ctor(env_id=env_id) return torch_env
def test_wrapped_method_propagation(self): mock_env = mock.MagicMock() env = alf_wrappers.AlfEnvironmentBaseWrapper(mock_env) env.reset() self.assertEqual(1, mock_env.reset.call_count) action = np.array(0, dtype=np.int64) env.step(action) self.assertEqual(1, mock_env.step.call_count) mock_env.step.assert_called_with(0) env.seed(0) self.assertEqual(1, mock_env.seed.call_count) mock_env.seed.assert_called_with(0) env.render() self.assertEqual(1, mock_env.render.call_count) env.close() self.assertEqual(1, mock_env.close.call_count)
def load(scene, env_id=None, discount=1.0, frame_skip=4, gym_env_wrappers=(), alf_env_wrappers=(), wrap_with_process=False, max_episode_steps=None): """Load deepmind lab envs. Args: scene (str): script for the deepmind_lab env. See available script: `<https://github.com/deepmind/lab/tree/master/game_scripts/levels>`_ env_id (int): (optional) ID of the environment. discount (float): Discount to use for the environment. frame_skip (int): the frequency at which the agent experiences the game gym_env_wrappers (Iterable): Iterable with references to gym_wrappers, classes to use directly on the gym environment. alf_env_wrappers (Iterable): Iterable with references to alf_wrappers classes to use on the ALF environment. wrap_with_process (bool): Whether wrap env in a process max_episode_steps (int): max episode step limit Returns: An AlfEnvironment instance. """ _unwrapped_env_checker_.check_and_update(wrap_with_process) if max_episode_steps is None: max_episode_steps = 0 def env_ctor(env_id=None): return suite_gym.wrap_env( DeepmindLabEnv(scene=scene, action_repeat=frame_skip), env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers) if wrap_with_process: process_env = process_environment.ProcessEnvironment( functools.partial(env_ctor)) process_env.start() torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env) else: torch_env = env_ctor(env_id=env_id) return torch_env
def load(environment_name, env_id=None, concat_desired_goal=True, discount=1.0, max_episode_steps=None, sparse_reward=False, use_success_wrapper=True, gym_env_wrappers=(), alf_env_wrappers=(), wrap_with_process=False): """Loads the selected environment and wraps it with the specified wrappers. Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths to the default benchmarks defined by the registered environments. Args: environment_name: Name for the environment to load. env_id: A scalar ``Tensor`` of the environment ID of the time step. discount: Discount to use for the environment. max_episode_steps: If None the ``max_episode_steps`` will be set to the default step limit defined in the environment's spec. No limit is applied if set to 0 or if there is no ``timestep_limit`` set in the environment's spec. sparse_reward (bool): If True, the game ends once the goal is achieved. Rewards will be added by 1, changed from -1/0 to 0/1. use_success_wrapper (bool): If True, wraps the environment with the SuccessWrapper which will record Success info after a specified amount of timesteps. gym_env_wrappers: Iterable with references to wrapper classes to use directly on the gym environment. alf_env_wrappers: Iterable with references to wrapper classes to use on the torch environment. Returns: An AlfEnvironment instance. """ assert (environment_name.startswith("Fetch") or environment_name.startswith("HandManipulate")), ( "This suite only supports OpenAI's Fetch and ShadowHand envs!") _unwrapped_env_checker_.check_and_update(wrap_with_process) gym_spec = gym.spec(environment_name) env = gym_spec.make() if max_episode_steps is None: if gym_spec.max_episode_steps is not None: max_episode_steps = gym_spec.max_episode_steps else: max_episode_steps = 0 def env_ctor(env_id=None): return suite_gym.wrap_env( env, env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers, image_channel_first=False) # concat robot's observation and the goal location if concat_desired_goal: keys = ["observation", "desired_goal"] try: # for modern Gym (>=0.15.4) from gym.wrappers import FilterObservation, FlattenObservation env = FlattenObservation(FilterObservation(env, keys)) except ImportError: # for older gym (<=0.15.3) from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error env = FlattenDictWrapper(env, keys) if use_success_wrapper: env = SuccessWrapper(env, max_episode_steps) env = ObservationClipWrapper(env) if sparse_reward: env = SparseReward(env) if wrap_with_process: process_env = process_environment.ProcessEnvironment( functools.partial(env_ctor)) process_env.start() torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env) else: torch_env = env_ctor(env_id=env_id) return torch_env
def load(game, env_id=None, state=None, discount=1.0, wrap_with_process=False, frame_skip=4, frame_stack=4, data_format='channels_last', record=False, crop=True, gym_env_wrappers=(), alf_env_wrappers=(), max_episode_steps=4500): """Loads the selected mario game and wraps it . Args: game (str): Name for the environment to load. env_id (int): (optional) ID of the environment. state (str): game state (level) wrap_with_process (bool): Whether wrap env in a process discount (float): Discount to use for the environment. frame_skip (int): the frequency at which the agent experiences the game frame_stack (int): Stack k last frames data_format (str): one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. record (bool): Record the gameplay , see retro.retro_env.RetroEnv.record `False` for not record otherwise record to current working directory or specified director crop (bool): whether to crop frame to fixed size gym_env_wrappers (Iterable): Iterable with references to gym_wrappers, classes to use directly on the gym environment. alf_env_wrappers (Iterable): Iterable with references to alf_wrappers classes to use on the ALF environment. max_episode_steps (int): max episode step limit Returns: An AlfEnvironment instance. """ _unwrapped_env_checker_.check_and_update(wrap_with_process) if max_episode_steps is None: max_episode_steps = 0 def env_ctor(env_id=None): env_args = [game, state] if state else [game] env = retro.make(*env_args, record=record) buttons = env.buttons env = MarioXReward(env) if frame_skip: env = FrameSkip(env, frame_skip) env = ProcessFrame84(env, crop=crop) if frame_stack: env = FrameStack(env, stack_size=frame_stack) env = FrameFormat(env, data_format=data_format) env = LimitedDiscreteActions(env, buttons) return suite_gym.wrap_env( env, env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers, auto_reset=True) # wrap each env in a new process when parallel envs are used # since it cannot create multiple emulator instances per process if wrap_with_process: process_env = process_environment.ProcessEnvironment( functools.partial(env_ctor)) process_env.start() torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env) else: torch_env = env_ctor(env_id=env_id) return torch_env