def wrap_deepmind(env, downsample=True, episode_life=True, clip_rewards=True, frame_stack=False, scale=False, color=False): """Configure environment for DeepMind-style Atari. """ if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str( env.spec.id).lower()): env = WarpFrame(env, width=160, height=210, grayscale=False) if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) if downsample: env = WarpFrame(env, grayscale=False) if not color: env = GrayscaleWrapper(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env
def make_sonic_env( game, state, remote_env=False, scale_rew=True, video_dir="", short_life=False, backtracking=False, ): """ Create an environment with some standard wrappers. """ if remote_env: env = grc.RemoteEnv("tmp/sock") else: env = make(game=game, state=state, bk2dir=video_dir) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) # if stack: # env = FrameStack(env, 4) if short_life: env = ShortLife(env) if backtracking: env = AllowBacktracking(env) return env
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False, norm_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari modified as described in RUDDER paper; """ if episode_life: print("Episode Life") env = EpisodicLifeEnv(env) if episode_reward: print("Episode Reward") env = EpisodicRewardEnv(env) if episode_frame: print("Episode Frame") env = EpisodicFrameEnv(env) _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong', 'Bowling', 'Skiing', 'IceHockey', 'Enduro'] original_reward = any([game in env.spec.id for game in _ori_r_games]) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if norm_rewards and not original_reward: print("Normalizing reward....") env = NormRewardEnv(env, 100.) else: print("Normal reward") if frame_stack: env = FrameStack(env, 4) return env
def main(): # create the game enviroment # To use make_atari from baselines name must contains "NoFrameskip" env = make_atari("BreakoutNoFrameskip-v0") # Convert it to gray scale and resize it to 84x84 env = WarpFrame(env) # Stack last 4 frame to create history env = FrameStack(env, k=4) # initialize the model # image input so cnn # convs = [n_outputs, karnel_size, stride] model = deepq.models.cnn_to_mlp(convs=[(32, 3, 1), (32, 3, 1)], hiddens=[256]) # train the model act = deepq.learn( env, q_func=model, lr=1e-2, # number of iteration to optimizer for max_timesteps=10000, buffer_size=1000, # fraction of entire training period over which the exploration rate is annealed exploration_fraction=0.1, # final value of random action probability exploration_final_eps=0.01, print_freq=10) print("Saving model to breakout_model.pkl") act.save("breakout_model.pkl")
def _thunk(): if args.level == 0: mrank = rank % len(LEVELS) else: mrank = args.level % len(LEVELS) env = retro.make(game='SuperMarioWorld-Snes', state=LEVELS[mrank]) env = SnesDiscretizer(env) env = WarpFrame(env) # Uncomment to repeat each action for 4 frame -- standard for normal play but not always good for 'exploitation' if args.skip: # env = MaxAndSkipEnv(env) env = StochasticFrameSkip(env, n=4, stickprob=0.25) env = TransposeImage(env, op=[2, 0, 1]) env = TimeLimit(env, max_episode_steps=args.episode_length) env = ProcessFrameMario(env) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=True) # env = TransposeImage(env, op=[2, 0, 1]) return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False): """ Create an environment with some standard wrappers. """ # if not is_remote: # if game is None or state is None: # import data_set_reader # train_set = data_set_reader.read_train_set() # game, state = random.choice(train_set) # print("it's local env: ", game, state) # from retro_contest.local import make # env = make(game=game, state=state) # else: # print("it's remote env") # import gym_remote.client as grc # env = grc.RemoteEnv('tmp/sock') env = make(game=game, state=state) env.seed(seed) env = AllowBacktracking(env) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True) env = SonicDiscretizer(env, render) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def _make_dqn(unity_env, train_mode, reward_range=(-np.inf, np.inf)): env = MLToGymEnv(unity_env, train_mode=train_mode, reward_range=reward_range) env = FloatToUInt8Frame(env) env = WarpFrame(env) # Makes sure we have 84 x 84 b&w env = FrameStack(env, 4) # Stack last 4 frames return env
def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=450, height=300, grayscale=False) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=reward_scale) return env
def make_goalPacman(): env = make_atari('MsPacmanNoFrameskip-v4') env = LifeLossEnv(env) env = CroppedFrame(env) env = WarpFrame(env) env = FrameStack(env, 4) env = GoalMsPacman(env) return env
def _thunk(): env = make_neyboy_environment(env_id, seed, rank, allow_early_resets, frame_skip=frame_skip, save_obs=save_obs) env = WarpFrame(env) return env
def _thunk(): env = make_neyboy_environment(env_id, seed, rank, allow_early_resets, frame_skip=frame_skip, save_video=save_video) # env = Cropper(env) env = WarpFrame(env) return env
def wrap_deepmind_retro(env, scale=True, frame_stack=0): """ Configure environment for retro games, using config similar to DeepMind-style Atari in wrap_deepmind """ env = WarpFrame(env, grayscale=False) env = ClipRewardEnv(env) if frame_stack > 1: env = FrameStack(env, frame_stack) if scale: env = ScaledFloatFrame(env) return env
def wrap_env(env, episode_life=False): if episode_life: env = EpisodicLifeEnv(env) env = NoopResetEnv(env, 30) env = MaxAndSkipEnv(env, 4) if env.unwrapped.get_action_meanings()[1] == 'FIRE': env = FireResetEnv(env) env = WarpFrame(env) # , width=84, height=84) env = FrameStack(env, 4) env = ScaledFloatFrame(env) return env
def make_env(env, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env_ice(game_name): from baselines.common.atari_wrappers import FrameStack, WarpFrame, MaxAndSkipEnv, ScaledFloatFrame import gym import cvar.dqn.ice_lake env = gym.make(game_name) # env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env) env = ScaledFloatFrame(env) env = FrameStack(env, 4) return env
def make_env_joint(game, state, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = AllowBacktracking(make(game=game, state=state)) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def _thunk(): env = gym.make(**env_base) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = RewardCollector(env) env = EpisodicLifeEnv(env) env = ClipRewardEnv(env) env = WarpFrame(env) env = ScaledFloatFrame(env) env = TransposeImage(env) env = UnrealEnvBaseWrapper(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') env = CustomSonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = retro.make('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1.state') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_local_env(game, state, stack=True, scale_rew=True): """ Create an environment with some standard wrappers, without requiring a container environment. """ env = make(game, state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_remote_env(stack=True, scale_rew=True, socket_dir='/tmp'): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv(socket_dir) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) env = EpisodeInfo(env) return env
def create_env(self, env): env = gym.make(env) env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=4) env = RewardCollector(env) env = EpisodicLifeEnv(env) env = ClipRewardEnv(env) env = WarpFrame(env) env = FrameStack(env, 4) env = ConvertToNumpy(env) env = TransposeImage(env) env = ScaledFloatFrame(env) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = grc.RemoteEnv('tmp/sock') # env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1') env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def make_env_GHZ1(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') # env = make(game=game, state=state) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) # minigrid keep_classes = ['agent', 'goal', 'wall', 'empty'] if 'key' in env_id.lower(): keep_classes.extend(['door', 'key']) if env_id.startswith('MiniGrid'): env = mgwr.FullyObsWrapper(env) env = mgwr.ImgObsWrapper(env) env = mgwr.FullyObsOneHotWrapper(env, drop_color=1, keep_classes=keep_classes, flatten=False) if log_dir is not None: env = bench.Monitor(env, os.path.join(log_dir, str(rank)), allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif len(env.observation_space.shape) == 3: if env_id.startswith('CarRacing'): env = WarpFrame(env, width=96, height=96, grayscale=True) env = ScaledFloatFrame(env) else: raise NotImplementedError # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3: env = TransposeImage(env, op=[2, 0, 1]) return env
def make_env(stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ start_state = train_states.sample().iloc[0] env = make(game=start_state.game, state=start_state.state, max_episode_steps=600) env = SonicDiscretizer(env) # env = AllowBacktracking(env) env = RandomGameReset(env) env = EpisodeInfo(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) return env
def _thunk(): if env_id.startswith("dm"): _, domain, task = env_id.split('.') env = dm_control2gym.make(domain_name=domain, task_name=task) elif env_id == "tetris_single": env = TetrisSingleEnv(obs_type=obs_type, mode=mode) else: env = gym.make(env_id) is_atari = hasattr(gym.envs, 'atari') and isinstance( env.unwrapped, gym.envs.atari.atari_env.AtariEnv) if is_atari: env = make_atari(env_id) env.seed(seed + rank) obs_shape = env.observation_space.shape if str(env.__class__.__name__).find('TimeLimit') >= 0: env = TimeLimitMask(env) # if log_dir is not None: # env = bench.Monitor( # env, # os.path.join(log_dir, str(rank)), # allow_early_resets=allow_early_resets) if is_atari: if len(env.observation_space.shape) == 3: env = wrap_deepmind(env) elif env_id.startswith("tetris"): # env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False) # env = NoopResetEnv(env, noop_max=30) env = MaxAndSkipEnv(env, skip=skip_frames) # pass if obs_type == "image": env = WarpFrame(env, 224, 224) elif len(env.observation_space.shape) == 3: raise NotImplementedError( "CNN models work only for atari,\n" "please use a custom wrapper for a custom pixel input env.\n" "See wrap_deepmind for an example.") # If the input has shape (W,H,3), wrap for PyTorch convolutions obs_shape = env.observation_space.shape if len(obs_shape) == 3 and obs_shape[2] in [1, 3]: env = TransposeImage(env, op=[2, 0, 1]) return env
def wrap_n64(env, reward_scale=1 / 100.0, frame_skip=4, width=150, height=100, grayscale=True, normalize_observations=True): env = MaxAndSkipEnv(env, skip=frame_skip) env = WarpFrame(env, width=width, height=height, grayscale=grayscale) env = ScaledFloatFrame(env) if normalize_observations: env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN) env = RewardScaler(env, scale=1 / 100.0) return env
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False): """Configure environment for DeepMind-style Atari. """ if episode_life: env = EpisodicLifeEnv(env) if 'FIRE' in env.unwrapped.get_action_meanings(): env = FireResetEnv(env) env = WarpFrame(env) if scale: env = ScaledFloatFrame(env) if clip_rewards: env = ClipRewardEnv(env) if frame_stack: env = FrameStack(env, 4) return env