def make_env(self, env_id, seed, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0, info_keywords=()): """ Create a wrapped, monitored gym.Env for safety. """ scenario = scenarios.load('{}.py'.format(env_id)).Scenario() world = scenario.make_world() env_dict = { "world": world, 'reset_callback': scenario.reset_world, 'reward_callback': scenario.reward, 'observation_callback': scenario.observation, 'info_callback': None, 'done_callback': scenario.done, 'shared_viewer': True } env = gym.make('MultiAgent-v0', **env_dict) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True, info_keywords=info_keywords) env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=450, height=300, grayscale=False) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=reward_scale) return env
def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train":train}) env = Monitor(env, logger_dir, allow_early_resets=True, info_keywords=tuple("s")) env.seed(seed) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def wrap_n64(env, reward_scale=1 / 100.0, frame_skip=4, width=150, height=100, grayscale=True, normalize_observations=True): env = MaxAndSkipEnv(env, skip=frame_skip) env = WarpFrame(env, width=width, height=height, grayscale=grayscale) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=1 / 100.0) return env
def make_env(env_id, seed, train=True, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train":train}) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) env.seed(seed) env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def make_mujoco_env(env_id, seed, reward_scale=1.0): """Create a wrapped, monitored gym.Env for MuJoCo. Daniel: doesn't seem to get called, at least for DDPG on HalfCheetah-v2. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) logger_path = None if logger.get_dir() is None else os.path.join( logger.get_dir(), str(rank)) env = Monitor(env, logger_path, allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
def _thunk(): if env_type == 'unity': from gym_unity.envs import UnityEnv import random; r=random.randint(64,164) print ("***** HELLO", mpi_rank + r) env = UnityEnv(env_id, mpi_rank + r) else: env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def _thunk(): env = ProstheticsEnv(visualize=False) env.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = ForceDictObservation(env) env = DictToListFull(env) env = JSONable(env) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank))) if reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def make_env(env_id, seed, train=True, logger_dir=None, mpi_rank=0, subrank=0, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for safety. """ env = gym.make(env_id, **{"train": train, "penalty": penalty}) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) env.seed(seed) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env