Esempio n. 1
0
 def test_default_batch_properties(self):
     cartpole_env = gym.spec('CartPole-v1').make()
     env = alf_gym_wrapper.AlfGymWrapper(cartpole_env)
     self.assertFalse(env.batched)
     self.assertEqual(env.batch_size, 1)
     wrap_env = alf_wrappers.AlfEnvironmentBaseWrapper(env)
     self.assertEqual(wrap_env.batched, env.batched)
     self.assertEqual(wrap_env.batch_size, env.batch_size)
Esempio n. 2
0
 def test_batch_properties(self, batch_size):
     obs_spec = ts.BoundedTensorSpec((2, 3), torch.int32, -10, 10)
     action_spec = ts.BoundedTensorSpec((1, ), torch.int64, -10, 10)
     env = RandomAlfEnvironment(
         obs_spec,
         action_spec,
         reward_fn=lambda *_: torch.tensor([1.0], dtype=torch.float32),
         batch_size=batch_size)
     wrap_env = alf_wrappers.AlfEnvironmentBaseWrapper(env)
     self.assertEqual(wrap_env.batched, env.batched)
     self.assertEqual(wrap_env.batch_size, env.batch_size)
Esempio n. 3
0
def load(env_name,
         env_id=None,
         discount=1.0,
         max_episode_steps=None,
         use_success_wrapper=True,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         wrap_with_process=False):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        env_name: Ignored, but required for create_environment in utils.py
        discount: Discount to use for the environment.
        max_episode_steps: If None the ``max_episode_steps`` will be set to the default
            step limit defined in the environment's spec. No limit is applied if set
            to 0 or if there is no ``timestep_limit`` set in the environment's spec.
        gym_env_wrappers: Iterable with references to wrapper classes to use
            directly on the gym environment.
        alf_env_wrappers: Iterable with references to wrapper classes to use on
            the torch environment.

    Returns:
        An AlfEnvironment instance.
    """
    _unwrapped_env_checker_.check_and_update(wrap_with_process)

    def env_ctor(env_id=None):
        return suite_gym.wrap_env(env,
                                  env_id=env_id,
                                  discount=discount,
                                  max_episode_steps=max_episode_steps,
                                  gym_env_wrappers=gym_env_wrappers,
                                  alf_env_wrappers=alf_env_wrappers)

    if env_name == "Pusher":
        env = PusherEnv()
    elif env_name == "Reacher":
        env = Reacher3DEnv()
    env = ActionScalingWrapper(env)
    if use_success_wrapper:
        env = PETSSuccessWrapper(env, max_episode_steps)

    if wrap_with_process:
        process_env = process_environment.ProcessEnvironment(
            functools.partial(env_ctor))
        process_env.start()
        torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env)
    else:
        torch_env = env_ctor(env_id=env_id)

    return torch_env
Esempio n. 4
0
 def test_wrapped_method_propagation(self):
     mock_env = mock.MagicMock()
     env = alf_wrappers.AlfEnvironmentBaseWrapper(mock_env)
     env.reset()
     self.assertEqual(1, mock_env.reset.call_count)
     action = np.array(0, dtype=np.int64)
     env.step(action)
     self.assertEqual(1, mock_env.step.call_count)
     mock_env.step.assert_called_with(0)
     env.seed(0)
     self.assertEqual(1, mock_env.seed.call_count)
     mock_env.seed.assert_called_with(0)
     env.render()
     self.assertEqual(1, mock_env.render.call_count)
     env.close()
     self.assertEqual(1, mock_env.close.call_count)
Esempio n. 5
0
def load(scene,
         env_id=None,
         discount=1.0,
         frame_skip=4,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         wrap_with_process=False,
         max_episode_steps=None):
    """Load deepmind lab envs.
    Args:
        scene (str): script for the deepmind_lab env. See available script:
            `<https://github.com/deepmind/lab/tree/master/game_scripts/levels>`_
        env_id (int): (optional) ID of the environment.
        discount (float): Discount to use for the environment.
        frame_skip (int): the frequency at which the agent experiences the game
        gym_env_wrappers (Iterable): Iterable with references to gym_wrappers,
            classes to use directly on the gym environment.
        alf_env_wrappers (Iterable): Iterable with references to alf_wrappers
            classes to use on the ALF environment.
        wrap_with_process (bool): Whether wrap env in a process
        max_episode_steps (int): max episode step limit
    Returns:
        An AlfEnvironment instance.
    """
    _unwrapped_env_checker_.check_and_update(wrap_with_process)

    if max_episode_steps is None:
        max_episode_steps = 0

    def env_ctor(env_id=None):
        return suite_gym.wrap_env(
            DeepmindLabEnv(scene=scene, action_repeat=frame_skip),
            env_id=env_id,
            discount=discount,
            max_episode_steps=max_episode_steps,
            gym_env_wrappers=gym_env_wrappers,
            alf_env_wrappers=alf_env_wrappers)

    if wrap_with_process:
        process_env = process_environment.ProcessEnvironment(
            functools.partial(env_ctor))
        process_env.start()
        torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env)
    else:
        torch_env = env_ctor(env_id=env_id)
    return torch_env
Esempio n. 6
0
def load(environment_name,
         env_id=None,
         concat_desired_goal=True,
         discount=1.0,
         max_episode_steps=None,
         sparse_reward=False,
         use_success_wrapper=True,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         wrap_with_process=False):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        environment_name: Name for the environment to load.
        env_id: A scalar ``Tensor`` of the environment ID of the time step.
        discount: Discount to use for the environment.
        max_episode_steps: If None the ``max_episode_steps`` will be set to the default
            step limit defined in the environment's spec. No limit is applied if set
            to 0 or if there is no ``timestep_limit`` set in the environment's spec.
        sparse_reward (bool): If True, the game ends once the goal is achieved.
            Rewards will be added by 1, changed from -1/0 to 0/1.
        use_success_wrapper (bool): If True, wraps the environment with the
            SuccessWrapper which will record Success info after a specified
            amount of timesteps.
        gym_env_wrappers: Iterable with references to wrapper classes to use
            directly on the gym environment.
        alf_env_wrappers: Iterable with references to wrapper classes to use on
            the torch environment.

    Returns:
        An AlfEnvironment instance.
    """
    assert (environment_name.startswith("Fetch")
            or environment_name.startswith("HandManipulate")), (
                "This suite only supports OpenAI's Fetch and ShadowHand envs!")

    _unwrapped_env_checker_.check_and_update(wrap_with_process)

    gym_spec = gym.spec(environment_name)
    env = gym_spec.make()

    if max_episode_steps is None:
        if gym_spec.max_episode_steps is not None:
            max_episode_steps = gym_spec.max_episode_steps
        else:
            max_episode_steps = 0

    def env_ctor(env_id=None):
        return suite_gym.wrap_env(
            env,
            env_id=env_id,
            discount=discount,
            max_episode_steps=max_episode_steps,
            gym_env_wrappers=gym_env_wrappers,
            alf_env_wrappers=alf_env_wrappers,
            image_channel_first=False)

    # concat robot's observation and the goal location
    if concat_desired_goal:
        keys = ["observation", "desired_goal"]
        try:  # for modern Gym (>=0.15.4)
            from gym.wrappers import FilterObservation, FlattenObservation
            env = FlattenObservation(FilterObservation(env, keys))
        except ImportError:  # for older gym (<=0.15.3)
            from gym.wrappers import FlattenDictWrapper  # pytype:disable=import-error
            env = FlattenDictWrapper(env, keys)
    if use_success_wrapper:
        env = SuccessWrapper(env, max_episode_steps)
    env = ObservationClipWrapper(env)
    if sparse_reward:
        env = SparseReward(env)

    if wrap_with_process:
        process_env = process_environment.ProcessEnvironment(
            functools.partial(env_ctor))
        process_env.start()
        torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env)
    else:
        torch_env = env_ctor(env_id=env_id)

    return torch_env
Esempio n. 7
0
def load(game,
         env_id=None,
         state=None,
         discount=1.0,
         wrap_with_process=False,
         frame_skip=4,
         frame_stack=4,
         data_format='channels_last',
         record=False,
         crop=True,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         max_episode_steps=4500):
    """Loads the selected mario game and wraps it .
    Args:
        game (str): Name for the environment to load.
        env_id (int): (optional) ID of the environment.
        state (str): game state (level)
        wrap_with_process (bool): Whether wrap env in a process
        discount (float): Discount to use for the environment.
        frame_skip (int): the frequency at which the agent experiences the game
        frame_stack (int): Stack k last frames
        data_format (str): one of `channels_last` (default) or `channels_first`.
                    The ordering of the dimensions in the inputs.
        record (bool): Record the gameplay , see retro.retro_env.RetroEnv.record
               `False` for not record otherwise record to current working directory or
               specified director
        crop (bool): whether to crop frame to fixed size
        gym_env_wrappers (Iterable): Iterable with references to gym_wrappers,
            classes to use directly on the gym environment.
        alf_env_wrappers (Iterable): Iterable with references to alf_wrappers
            classes to use on the ALF environment.
        max_episode_steps (int): max episode step limit

    Returns:
        An AlfEnvironment instance.
    """
    _unwrapped_env_checker_.check_and_update(wrap_with_process)

    if max_episode_steps is None:
        max_episode_steps = 0

    def env_ctor(env_id=None):
        env_args = [game, state] if state else [game]
        env = retro.make(*env_args, record=record)
        buttons = env.buttons
        env = MarioXReward(env)
        if frame_skip:
            env = FrameSkip(env, frame_skip)
        env = ProcessFrame84(env, crop=crop)
        if frame_stack:
            env = FrameStack(env, stack_size=frame_stack)
        env = FrameFormat(env, data_format=data_format)
        env = LimitedDiscreteActions(env, buttons)
        return suite_gym.wrap_env(
            env,
            env_id=env_id,
            discount=discount,
            max_episode_steps=max_episode_steps,
            gym_env_wrappers=gym_env_wrappers,
            alf_env_wrappers=alf_env_wrappers,
            auto_reset=True)

    # wrap each env in a new process when parallel envs are used
    # since it cannot create multiple emulator instances per process
    if wrap_with_process:
        process_env = process_environment.ProcessEnvironment(
            functools.partial(env_ctor))
        process_env.start()
        torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env)
    else:
        torch_env = env_ctor(env_id=env_id)
    return torch_env