Esempio n. 1
0
 def env_ctor():
     return suite_gym.wrap_env(DeepmindLabEnv(scene=scene,
                                              action_repeat=frame_skip),
                               discount=discount,
                               max_episode_steps=max_episode_steps,
                               gym_env_wrappers=gym_env_wrappers,
                               env_wrappers=env_wrappers)
Esempio n. 2
0
 def env_ctor(env_id=None):
     return suite_gym.wrap_env(env,
                               env_id=env_id,
                               discount=discount,
                               max_episode_steps=max_episode_steps,
                               gym_env_wrappers=gym_env_wrappers,
                               alf_env_wrappers=alf_env_wrappers)
Esempio n. 3
0
 def env_ctor(port):
     gym_env = gym_spec.make(port=port)
     return suite_gym.wrap_env(gym_env,
                               discount=discount,
                               max_episode_steps=max_episode_steps,
                               gym_env_wrappers=gym_env_wrappers,
                               env_wrappers=env_wrappers,
                               spec_dtype_map=spec_dtype_map)
Esempio n. 4
0
def load(environment_name,
         env_id=None,
         discount=1.0,
         max_episode_steps=None,
         unconstrained=False,
         gym_env_wrappers=(),
         alf_env_wrappers=()):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        environment_name: Name for the environment to load.
        env_id: A scalar ``Tensor`` of the environment ID of the time step.
        discount: Discount to use for the environment.
        max_episode_steps: If None or 0 the ``max_episode_steps`` will be set to
            the default step limit -1 defined in the environment. Otherwise
            ``max_episode_steps`` will be set to the smaller value of the two.
        unconstrained (bool): if True, the suite will be used just as an
            unconstrained environment. The reward will always be scalar without
            including constraints.
        gym_env_wrappers: Iterable with references to wrapper classes to use
            directly on the gym environment.
        alf_env_wrappers: Iterable with references to wrapper classes to use on
            the torch environment.

    Returns:
        An AlfEnvironment instance.
    """

    # We can directly make the env here because none of the safety gym tasks
    # is registered with a ``max_episode_steps`` argument (the
    # ``gym.wrappers.time_limit.TimeLimit`` won't be applied). But each task
    # will inherently manage the time limit through ``env.num_steps``.
    env = gym.make(environment_name)

    # fill all env info with default values
    env = CompleteEnvInfo(env, environment_name)

    # make vector reward
    if not unconstrained:
        env = VectorReward(env)

    # Have to -1 on top of the original env max steps here, because the
    # underlying gym env will output ``done=True`` when reaching the time limit
    # ``env.num_steps`` (before the ``AlfGymWrapper``), which is incorrect:
    # https://github.com/openai/safety-gym/blob/f31042f2f9ee61b9034dd6a416955972911544f5/safety_gym/envs/engine.py#L1302
    if not max_episode_steps:  # None or 0
        max_episode_steps = env.num_steps - 1
    max_episode_steps = min(env.num_steps - 1, max_episode_steps)

    return suite_gym.wrap_env(env,
                              env_id=env_id,
                              discount=discount,
                              max_episode_steps=max_episode_steps,
                              gym_env_wrappers=gym_env_wrappers,
                              alf_env_wrappers=alf_env_wrappers)
Esempio n. 5
0
 def env_ctor(env_id=None):
     return suite_gym.wrap_env(
         env,
         env_id=env_id,
         discount=discount,
         max_episode_steps=max_episode_steps,
         gym_env_wrappers=gym_env_wrappers,
         alf_env_wrappers=alf_env_wrappers,
         image_channel_first=False)
Esempio n. 6
0
def load(game,
         env_args=dict(),
         discount=1.0,
         frame_skip=None,
         frame_stack=None,
         gym_env_wrappers=(),
         env_wrappers=(),
         max_episode_steps=0,
         spec_dtype_map=None):
    """Loads the specified simple game and wraps it.
    Args:
        game (str): name for the environment to load. The game should have been
            defined in the sub-directory './simple/'.
        env_args (dict): extra args for creating the game.
        discount (float): discount to use for the environment.
        frame_skip (int): the time interval at which the agent experiences the
            game.
        frame_stack (int): stack so many latest frames as the observation input.
        gym_env_wrappers (list): list of gym env wrappers
        env_wrappers (list): list of tf_agents env wrappers
        max_episode_steps (int): max number of steps for an episode.
        spec_dtype_map (dict): a dict that maps gym specs to tf dtypes to use as
            the default dtype for the tensors. An easy way how to configure a
            custom mapping through Gin is to define a gin-configurable function
            that returns desired mapping and call it in your Gin config file, for
            example: `suite_socialbot.load.spec_dtype_map = @get_custom_mapping()`.

    Returns:
        A PyEnvironmentBase instance.
    """

    if spec_dtype_map is None:
        spec_dtype_map = {gym.spaces.Box: np.float32}

    if game == "NoisyArray":
        env = NoisyArray(**env_args)
    else:
        assert False, "No such simple environment!"
    if frame_skip:
        env = FrameSkip(env, frame_skip)
    if frame_stack:
        env = FrameStack(env, stack_size=frame_stack)
    return suite_gym.wrap_env(env,
                              discount=discount,
                              max_episode_steps=max_episode_steps,
                              gym_env_wrappers=gym_env_wrappers,
                              env_wrappers=env_wrappers,
                              spec_dtype_map=spec_dtype_map,
                              auto_reset=True)
Esempio n. 7
0
 def env_ctor(env_id=None):
     env_args = [game, state] if state else [game]
     env = retro.make(*env_args, record=record)
     buttons = env.buttons
     env = MarioXReward(env)
     if frame_skip:
         env = FrameSkip(env, frame_skip)
     env = ProcessFrame84(env, crop=crop)
     if frame_stack:
         env = FrameStack(env, stack_size=frame_stack)
     env = FrameFormat(env, data_format=data_format)
     env = LimitedDiscreteActions(env, buttons)
     return suite_gym.wrap_env(
         env,
         env_id=env_id,
         discount=discount,
         max_episode_steps=max_episode_steps,
         gym_env_wrappers=gym_env_wrappers,
         alf_env_wrappers=alf_env_wrappers,
         auto_reset=True)
Esempio n. 8
0
def load(game,
         env_args=dict(),
         discount=1.0,
         frame_skip=None,
         frame_stack=None,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         max_episode_steps=0):
    """Loads the specified simple game and wraps it.
    Args:
        game (str): name for the environment to load. The game should have been
            defined in the sub-directory ``./simple/``.
        env_args (dict): extra args for creating the game.
        discount (float): discount to use for the environment.
        frame_skip (int): the time interval at which the agent experiences the
            game.
        frame_stack (int): stack so many latest frames as the observation input.
        gym_env_wrappers (list): list of gym env wrappers.
        alf_env_wrappers (list): list of ALF env wrappers.
        max_episode_steps (int): max number of steps for an episode.

    Returns:
        An AlfEnvironment instance.
    """

    if game == "NoisyArray":
        env = NoisyArray(**env_args)
    else:
        assert False, "No such simple environment!"
    if frame_skip:
        env = FrameSkip(env, frame_skip)
    if frame_stack:
        env = FrameStack(env, stack_size=frame_stack)
    return suite_gym.wrap_env(
        env,
        discount=discount,
        max_episode_steps=max_episode_steps,
        gym_env_wrappers=gym_env_wrappers,
        alf_env_wrappers=alf_env_wrappers,
        auto_reset=True)
Esempio n. 9
0
def load(environment_name,
         env_id=None,
         discount=1.0,
         max_episode_steps=None,
         gym_env_wrappers=(),
         alf_env_wrappers=(),
         env_config=None):
    """Loads the selected environment and wraps it with the specified wrappers.

    Note that by default a TimeLimit wrapper is used to limit episode lengths
    to the default benchmarks defined by the registered environments.

    Args:
        environment_name (str): Name for the environment to load.
        env_id (int): (optional) ID of the environment.
        discount (float): Discount to use for the environment.
        max_episode_steps (int): If None or 0 the ``max_episode_steps`` will be
            set to the default step limit defined in the environment. Otherwise
            ``max_episode_steps`` will be set to the smaller value of the two.
        gym_env_wrappers (Iterable): Iterable with references to gym_wrappers
            classes to use directly on the gym environment.
        alf_env_wrappers (Iterable): Iterable with references to alf_wrappers
            classes to use on the ALF environment.
        env_config (dict|None): a dictionary for configuring some aspects of the
            environment. If is None, the default configuration will be used.
            Please refer to the ``default_env_config`` below for
            an example config and the doc for more details:
            https://highway-env.readthedocs.io/en/latest/user_guide.html

    Returns:
        An AlfEnvironment instance.
    """
    assert environment_name in {
        "highway-v0", "merge-v0", "roundabout-v0", "intersection-v0",
        "parking-v0"
    }, "wrong highway environment name"

    gym_spec = gym.spec(environment_name)
    gym_env = gym_spec.make()

    if env_config is None:
        default_env_config = {
            "observation": {
                "type":
                    "Kinematics",
                "vehicles_count":
                    5,
                "features": [
                    "presence", "x", "y", "vx", "vy", "cos_h", "sin_h"
                ],
                "features_range": {
                    "x": [-100, 100],
                    "y": [-100, 100],
                    "vx": [-20, 20],
                    "vy": [-20, 20]
                },
                "absolute":
                    False,
                "order":
                    "sorted"
            },
            "action": {
                "type": "ContinuousAction"
            }
        }
        env_config = default_env_config

    gym_env.configure(env_config)
    gym_env.reset()

    # currently flatten the observations, will support other ways later
    gym_env = FlattenObservation(gym_env)
    gym_env = RemoveActionEnvInfo(gym_env)
    gym_env = ActionScalarization(gym_env)

    # In the original environment, the last step due to time limit is not
    # differentiated from those due to other reasons (e.g. crash):
    # https://github.com/eleurent/highway-env/blob/ede285567a164a58b5bf8a78f1a6792f5a13a3fb/highway_env/envs/highway_env.py#L97-L99
    # Here we -1 on top of the max steps specified by config["duration"] and
    # use the time_limit_wrapper from alf to handle the last step correctly.
    if not max_episode_steps:
        max_episode_steps = gym_env.config["duration"] - 1

    max_episode_steps = min(gym_env.config["duration"] - 1, max_episode_steps)

    return suite_gym.wrap_env(
        gym_env,
        env_id=env_id,
        discount=discount,
        max_episode_steps=max_episode_steps,
        gym_env_wrappers=gym_env_wrappers,
        alf_env_wrappers=alf_env_wrappers)