Beispiel #1
0
def env_maker(environment_id,
              seed,
              serial_id,
              monitor=False,
              allow_early_resets=False,
              normalize_observations=False,
              normalize_returns=False,
              normalize_gamma=0.99):
    """ Create a relatively raw atari environment """
    env = gym.make(environment_id)
    env.seed(seed + serial_id)

    # Monitoring the env
    if monitor:
        logdir = logger.get_dir() and os.path.join(logger.get_dir(),
                                                   str(serial_id))
    else:
        logdir = None

    env = Monitor(env, logdir, allow_early_resets=allow_early_resets)

    if normalize_observations or normalize_returns:
        env = EnvNormalize(env,
                           normalize_observations=normalize_observations,
                           normalize_returns=normalize_returns,
                           gamma=normalize_gamma)

    return env
    def instantiate(self,
                    seed=0,
                    serial_id=1,
                    preset='default',
                    extra_args=None) -> Env:
        """ Create a new Env instance """
        settings = self.settings[preset]

        env = RandomizedRubiksCube(parameters=settings['parameters'],
                                   constants=settings['constants'])

        env = ClipEpisodeLengthWrapper(
            env, max_episode_length=settings['max_episode_frames'])

        # Monitoring the env
        if settings['monitor']:
            logdir = logger.get_dir() and os.path.join(logger.get_dir(),
                                                       str(serial_id))
        else:
            logdir = None

        env = Monitor(env,
                      logdir,
                      allow_early_resets=settings['allow_early_resets'])

        return env
def wrapped_env_maker(environment_id,
                      seed,
                      serial_id,
                      disable_reward_clipping=False,
                      disable_episodic_life=False,
                      monitor=False,
                      allow_early_resets=False,
                      scale_float_frames=False,
                      max_episode_frames=10000,
                      frame_stack=None):
    """ Wrap atari environment so that it's nicer to learn RL algorithms """
    env = env_maker(environment_id)
    env.seed(seed + serial_id)

    if max_episode_frames is not None:
        env = ClipEpisodeLengthWrapper(env,
                                       max_episode_length=max_episode_frames)

    # Monitoring the env
    if monitor:
        logdir = logger.get_dir() and os.path.join(logger.get_dir(),
                                                   str(serial_id))
    else:
        logdir = None

    env = Monitor(env, logdir, allow_early_resets=allow_early_resets)

    if not disable_episodic_life:
        # Make end-of-life == end-of-episode, but only reset on true game over.
        # Done by DeepMind for the DQN and co. since it helps value estimation.
        env = EpisodicLifeEnv(env)

    if 'FIRE' in env.unwrapped.get_action_meanings():
        # Take action on reset for environments that are fixed until firing.
        if disable_episodic_life:
            env = FireEpisodicLifeEnv(env)
        else:
            env = FireResetEnv(env)

    # Warp frames to 84x84 as done in the Nature paper and later work.
    env = WarpFrame(env)

    if scale_float_frames:
        env = ScaledFloatFrame(env)

    if not disable_reward_clipping:
        # Bin reward to {+1, 0, -1} by its sign.
        env = ClipRewardEnv(env)

    if frame_stack is not None:
        env = FrameStack(env, frame_stack)

    return env
Beispiel #4
0
def make_robotics_env(env_id, seed, rank=0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = FlattenDictWrapper(env, ['observation', 'desired_goal'])
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(), str(rank)),
                  info_keywords=('is_success', ))
    env.seed(seed)
    return env
Beispiel #5
0
def env_maker(environment_id, seed, serial_id, monitor=False, allow_early_resets=False):
    """ Create a classic control environment with basic set of wrappers """
    env = gym.make(environment_id)
    env.seed(seed + serial_id)

    # Monitoring the env
    if monitor:
        logdir = logger.get_dir() and os.path.join(logger.get_dir(), str(serial_id))
    else:
        logdir = None

    env = Monitor(env, logdir, allow_early_resets=allow_early_resets)

    return env
Beispiel #6
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(
        logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Beispiel #7
0
def make_env(env_id,
             env_type,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(),
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env