Beispiel #1
0
def wrap_deepmind(env,
                  downsample=True,
                  episode_life=True,
                  clip_rewards=True,
                  frame_stack=False,
                  scale=False,
                  color=False):
    """Configure environment for DeepMind-style Atari.
    """
    if ("videopinball" in str(env.spec.id).lower()) or ('tennis' in str(
            env.spec.id).lower()):
        env = WarpFrame(env, width=160, height=210, grayscale=False)
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    if downsample:
        env = WarpFrame(env, grayscale=False)
    if not color:
        env = GrayscaleWrapper(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env
Beispiel #2
0
def make_sonic_env(
    game,
    state,
    remote_env=False,
    scale_rew=True,
    video_dir="",
    short_life=False,
    backtracking=False,
):
    """
    Create an environment with some standard wrappers.
    """
    if remote_env:
        env = grc.RemoteEnv("tmp/sock")
    else:
        env = make(game=game, state=state, bk2dir=video_dir)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    # if stack:
    #     env = FrameStack(env, 4)
    if short_life:
        env = ShortLife(env)
    if backtracking:
        env = AllowBacktracking(env)
    return env
Beispiel #3
0
def wrap_modified_rr(env, episode_life=True, episode_reward=False, episode_frame=False,
                     norm_rewards=True,
                     frame_stack=False, scale=False):
    """Configure environment for DeepMind-style Atari modified as described in RUDDER paper;
    """
    if episode_life:
        print("Episode Life")
        env = EpisodicLifeEnv(env)
    if episode_reward:
        print("Episode Reward")
        env = EpisodicRewardEnv(env)
    if episode_frame:
        print("Episode Frame")
        env = EpisodicFrameEnv(env)
    _ori_r_games = ['DoubleDunk', 'Boxing', 'Freeway', 'Pong',
                    'Bowling', 'Skiing', 'IceHockey', 'Enduro']
    original_reward = any([game in env.spec.id for game in _ori_r_games])

    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if norm_rewards and not original_reward:
        print("Normalizing reward....")
        env = NormRewardEnv(env, 100.)
    else:
        print("Normal reward")
    if frame_stack:
        env = FrameStack(env, 4)
    return env
def main():
    # create the game enviroment
    # To use make_atari from baselines name must contains "NoFrameskip"
    env = make_atari("BreakoutNoFrameskip-v0")
    # Convert it to gray scale and resize it to 84x84
    env = WarpFrame(env)
    # Stack last 4 frame to create history
    env = FrameStack(env, k=4)
    # initialize the model
    # image input so cnn
    # convs = [n_outputs, karnel_size, stride]
    model = deepq.models.cnn_to_mlp(convs=[(32, 3, 1), (32, 3, 1)],
                                    hiddens=[256])
    # train the model
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-2,
        # number of iteration to optimizer for
        max_timesteps=10000,
        buffer_size=1000,
        # fraction of entire training period over which the exploration rate is annealed
        exploration_fraction=0.1,
        # final value of random action probability
        exploration_final_eps=0.01,
        print_freq=10)
    print("Saving model to breakout_model.pkl")
    act.save("breakout_model.pkl")
Beispiel #5
0
    def _thunk():

        if args.level == 0:
            mrank = rank % len(LEVELS)
        else:
            mrank = args.level % len(LEVELS)

        env = retro.make(game='SuperMarioWorld-Snes', state=LEVELS[mrank])

        env = SnesDiscretizer(env)
        env = WarpFrame(env)

        # Uncomment to repeat each action for 4 frame -- standard for normal play but not always good for 'exploitation'
        if args.skip:
            # env = MaxAndSkipEnv(env)
            env = StochasticFrameSkip(env, n=4, stickprob=0.25)

        env = TransposeImage(env, op=[2, 0, 1])
        env = TimeLimit(env, max_episode_steps=args.episode_length)
        env = ProcessFrameMario(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=True)

        # env = TransposeImage(env, op=[2, 0, 1])

        return env
Beispiel #6
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=150, height=100, grayscale=grayscale)
     env = FrameStack(env, frame_stack)
     env = ScaledFloatFrame(env)
     env = RewardScaler(env, scale=1 / 100.0)
     return env
Beispiel #7
0
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False):
    """
    Create an environment with some standard wrappers.
    """
    # if not is_remote:
    #     if game is None or state is None:
    #         import data_set_reader
    #         train_set = data_set_reader.read_train_set()
    #         game, state = random.choice(train_set)
    #     print("it's local env: ", game, state)
    #     from retro_contest.local import make
    #     env = make(game=game, state=state)
    # else:
    #     print("it's remote env")
    #     import gym_remote.client as grc
    #     env = grc.RemoteEnv('tmp/sock')
    env = make(game=game, state=state)
    env.seed(seed)
    env = AllowBacktracking(env)
    env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True)
    env = SonicDiscretizer(env, render)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #8
0
def _make_dqn(unity_env, train_mode, reward_range=(-np.inf, np.inf)):
    env = MLToGymEnv(unity_env,
                     train_mode=train_mode,
                     reward_range=reward_range)
    env = FloatToUInt8Frame(env)
    env = WarpFrame(env)  # Makes sure we have 84 x 84 b&w
    env = FrameStack(env, 4)  # Stack last 4 frames
    return env
Beispiel #9
0
 def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, normalize_observations=True):
     env = MaxAndSkipEnv(env, skip=4)
     env = WarpFrame(env, width=450, height=300, grayscale=False)
     env = ScaledFloatFrame(env)
     if normalize_observations:
         env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
     env = RewardScaler(env, scale=reward_scale)
     return env
def make_goalPacman():
    env = make_atari('MsPacmanNoFrameskip-v4')
    env = LifeLossEnv(env)
    env = CroppedFrame(env)
    env = WarpFrame(env)
    env = FrameStack(env, 4)
    env = GoalMsPacman(env)
    return env
Beispiel #11
0
 def _thunk():
     env = make_neyboy_environment(env_id,
                                   seed,
                                   rank,
                                   allow_early_resets,
                                   frame_skip=frame_skip,
                                   save_obs=save_obs)
     env = WarpFrame(env)
     return env
Beispiel #12
0
 def _thunk():
     env = make_neyboy_environment(env_id,
                                   seed,
                                   rank,
                                   allow_early_resets,
                                   frame_skip=frame_skip,
                                   save_video=save_video)
     # env = Cropper(env)
     env = WarpFrame(env)
     return env
Beispiel #13
0
def wrap_deepmind_retro(env, scale=True, frame_stack=0):
    """
    Configure environment for retro games, using config similar to DeepMind-style Atari in wrap_deepmind
    """
    env = WarpFrame(env, grayscale=False)
    env = ClipRewardEnv(env)
    if frame_stack > 1:
        env = FrameStack(env, frame_stack)
    if scale:
        env = ScaledFloatFrame(env)
    return env
def wrap_env(env, episode_life=False):
    if episode_life:
        env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, 30)
    env = MaxAndSkipEnv(env, 4)
    if env.unwrapped.get_action_meanings()[1] == 'FIRE':
        env = FireResetEnv(env)
    env = WarpFrame(env)  # , width=84, height=84)
    env = FrameStack(env, 4)
    env = ScaledFloatFrame(env)
    return env
Beispiel #15
0
def make_env(env, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #16
0
def make_env_ice(game_name):
    from baselines.common.atari_wrappers import FrameStack, WarpFrame, MaxAndSkipEnv, ScaledFloatFrame
    import gym
    import cvar.dqn.ice_lake

    env = gym.make(game_name)
    # env = MaxAndSkipEnv(env, skip=4)
    env = WarpFrame(env)
    env = ScaledFloatFrame(env)
    env = FrameStack(env, 4)
    return env
Beispiel #17
0
def make_env_joint(game, state, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = AllowBacktracking(make(game=game, state=state))
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #18
0
 def _thunk():
     env = gym.make(**env_base)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = ScaledFloatFrame(env)
     env = TransposeImage(env)
     env = UnrealEnvBaseWrapper(env)
     return env
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
    env = CustomSonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #20
0
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = retro.make('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1.state')
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #21
0
def make_local_env(game, state, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers, without requiring a container environment.
    """
    env = make(game, state)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #22
0
def make_remote_env(stack=True, scale_rew=True, socket_dir='/tmp'):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv(socket_dir)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    env = EpisodeInfo(env)
    return env
 def create_env(self, env):
     env = gym.make(env)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = FrameStack(env, 4)
     env = ConvertToNumpy(env)
     env = TransposeImage(env)
     env = ScaledFloatFrame(env)
     return env
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = grc.RemoteEnv('tmp/sock')
#    env = make(game='SonicTheHedgehog-Genesis', state='LabyrinthZone.Act1')
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #25
0
def make_env_GHZ1(stack=True, scale_rew=True):
    """
        Create an environment with some standard wrappers.
        """
    env = make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
    # env = make(game=game, state=state)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Beispiel #26
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        # minigrid
        keep_classes = ['agent', 'goal', 'wall', 'empty']
        if 'key' in env_id.lower():
            keep_classes.extend(['door', 'key'])

        if env_id.startswith('MiniGrid'):
            env = mgwr.FullyObsWrapper(env)
            env = mgwr.ImgObsWrapper(env)
            env = mgwr.FullyObsOneHotWrapper(env,
                                             drop_color=1,
                                             keep_classes=keep_classes,
                                             flatten=False)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            if env_id.startswith('CarRacing'):
                env = WarpFrame(env, width=96, height=96, grayscale=True)
                env = ScaledFloatFrame(env)
            else:
                raise NotImplementedError

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Beispiel #27
0
def make_env(stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    start_state = train_states.sample().iloc[0]
    env = make(game=start_state.game, state=start_state.state, max_episode_steps=600)
    env = SonicDiscretizer(env)
    # env = AllowBacktracking(env)
    env = RandomGameReset(env)
    env = EpisodeInfo(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    return env
Beispiel #28
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)

        elif env_id == "tetris_single":
            env = TetrisSingleEnv(obs_type=obs_type, mode=mode)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        # if log_dir is not None:
        #     env = bench.Monitor(
        #         env,
        #         os.path.join(log_dir, str(rank)),
        #         allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif env_id.startswith("tetris"):
            # env = wrap_deepmind(env, episode_life=False, clip_rewards=False, frame_stack=False, scale=False)
            # env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=skip_frames)
            # pass
            if obs_type == "image":
                env = WarpFrame(env, 224, 224)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Beispiel #29
0
def wrap_n64(env,
             reward_scale=1 / 100.0,
             frame_skip=4,
             width=150,
             height=100,
             grayscale=True,
             normalize_observations=True):
    env = MaxAndSkipEnv(env, skip=frame_skip)
    env = WarpFrame(env, width=width, height=height, grayscale=grayscale)
    env = ScaledFloatFrame(env)
    if normalize_observations:
        env = ImageNormalizer(env, mean=SSB64_IMAGE_MEAN)
    env = RewardScaler(env, scale=1 / 100.0)
    return env
Beispiel #30
0
def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False):
    """Configure environment for DeepMind-style Atari.
    """
    if episode_life:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env)
    if scale:
        env = ScaledFloatFrame(env)
    if clip_rewards:
        env = ClipRewardEnv(env)
    if frame_stack:
        env = FrameStack(env, 4)
    return env