Beispiel #1
0
 def make_env(self, env_id, seed, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0, info_keywords=()):
     """
     Create a wrapped, monitored gym.Env for safety.
     """
     scenario = scenarios.load('{}.py'.format(env_id)).Scenario()
     world = scenario.make_world()
     env_dict = {
         "world": world,
         'reset_callback': scenario.reset_world,
         'reward_callback': scenario.reward, 
         'observation_callback': scenario.observation,
         'info_callback': None,
         'done_callback': scenario.done, 
         'shared_viewer':  True
         }
     env = gym.make('MultiAgent-v0', **env_dict)
     env.seed(seed + subrank if seed is not None else None)
     env = Monitor(env,
                 logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                 allow_early_resets=True,
                 info_keywords=info_keywords)
     env = ClipActionsWrapper(env)
     if reward_scale != 1.0:
         from baselines.common.retro_wrappers import RewardScaler
         env = RewardScaler(env, reward_scale)
     return env
Beispiel #2
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
     env = FrameStack(env, frame_stack)
     env = ScaledFloatFrame(env)
     env = RewardScaler(env, scale=1 / 100.0)
     return env
Beispiel #3
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=450, height=300, grayscale=False)
     env = ScaledFloatFrame(env)
     if normalize_observations:
         env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
     env = RewardScaler(env, scale=reward_scale)
     return env
Beispiel #4
0
        def _thunk():
            env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Beispiel #5
0
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train":train})
    env = Monitor(env, logger_dir, allow_early_resets=True, info_keywords=tuple("s"))
    env.seed(seed)
    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Beispiel #6
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Beispiel #7
0
def wrap_n64(env,
             reward_scale=1 / 100.0,
             frame_skip=4,
             width=150,
             height=100,
             grayscale=True,
             normalize_observations=True):
    env = MaxAndSkipEnv(env, skip=frame_skip)
    env = WarpFrame(env, width=width, height=height, grayscale=grayscale)
    env = ScaledFloatFrame(env)
    if normalize_observations:
        env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
    env = RewardScaler(env, scale=1 / 100.0)
    return env
Beispiel #8
0
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train":train})
    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env, 
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    env.seed(seed)
    env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Beispiel #9
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """Create a wrapped, monitored gym.Env for MuJoCo.
    Daniel: doesn't seem to get called, at least for DDPG on HalfCheetah-v2.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(
        logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
        def _thunk():
            if env_type == 'unity':
                from gym_unity.envs import UnityEnv
                import random; r=random.randint(64,164)
                print ("***** HELLO", mpi_rank + r)
                env = UnityEnv(env_id, mpi_rank + r)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Beispiel #11
0
        def _thunk():
            env = ProstheticsEnv(visualize=False)
            env.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
            env = ForceDictObservation(env)
            env = DictToListFull(env)
            env = JSONable(env)

            env = Monitor(
                env,
                logger.get_dir()
                and os.path.join(logger.get_dir(),
                                 str(mpi_rank) + '.' + str(rank)))

            if reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Beispiel #12
0
def make_env(env_id,
             seed,
             train=True,
             logger_dir=None,
             mpi_rank=0,
             subrank=0,
             reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for safety.
    """
    env = gym.make(env_id, **{"train": train, "penalty": penalty})
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)
    env.seed(seed)
    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env