Esempio n. 1
0
    def _thunk():

        env = gym.make(env_id)  #this prints
        # print('here')

        # print (env.unwrapped)
        # print (env.unwrapped.get_action_meanings())
        # fdsadsfa

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)

        #so this overwrites the other env? so ill change it
        if is_atari:
            # env = make_atari(env_id)
            #took this from make_atari
            assert 'NoFrameskip' in env.spec.id
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if log_dir != '':
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        # print (env.observation_space.shape)

        if is_atari:
            env = wrap_deepmind(env)
            env = WrapPyTorch(env)

        # print (env.observation_space.shape)
        # fdsafad

        return env
Esempio n. 2
0
    def _thunk():
        random_seed(seed)
        if env_id.startswith("dm"):
            import dm_control2gym
            _, domain, task = env_id.split('-')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)
        env = OriginalReturnWrapper(env)
        if is_atari:
            env = wrap_deepmind(env,
                                episode_life=episode_life,
                                clip_rewards=False,
                                frame_stack=False,
                                scale=False)
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3:
                env = TransposeImage(env)
            env = FrameStack(env, 4)

        return env
Esempio n. 3
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1 import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() if seed is not None else None
    set_global_seeds(workerseed)
    env = make_atari(env_id)
    def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env, policy_fn,
        max_timesteps=int(num_timesteps * 1.1),
        timesteps_per_actorbatch=256,
        clip_param=0.2, entcoeff=0.01,
        optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64,
        gamma=0.99, lam=0.95,
        schedule='linear'
    )
    env.close()
Esempio n. 4
0
def gen_frequencies():
    NUM_STEPS = 10000
    results = {}
    for game in GAMES:
        print()
        env = gym.make(game)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(game)
        env = wrap_deepmind(env)
        num_actions = env.action_space.n

        total_num_rewards = 0
        env.reset()
        for step in range(NUM_STEPS):

            obs, reward, done, info = env.step(np.random.randint(num_actions))
            if step == 0:
                print(info)
                print()
            print(game, step, end="\r")
            if reward > 0 or reward < 0:
                total_num_rewards += 1

            if info['ale.lives'] == 0:
                env.reset()

        print(game, total_num_rewards / NUM_STEPS)
        results[game] = total_num_rewards / NUM_STEPS

    with open('frequency.pkl', 'wb') as handle:
        pickle.dump(results, handle)
Esempio n. 5
0
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, silent_monitor=False):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)

    monitor = MonitorFactory(silent_monitor)
    env = monitor(env,
                  logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 6
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1 import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)
    def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env, policy_fn,
        max_timesteps=int(num_timesteps * 1.1),
        timesteps_per_actorbatch=256,
        clip_param=0.2, entcoeff=0.01,
        optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64,
        gamma=0.99, lam=0.95,
        schedule='linear'
    )
    env.close()
Esempio n. 7
0
def main():
    env = gym.make("PongNoFrameskip-v4")
    env = ScaledFloatFrame(wrap_deepmind(env))
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True  #True
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=10000000,
        buffer_size=500000,
        exploration_fraction=0.1,
        exploration_final_eps=0.1,
        train_freq=4,
        print_freq=1,
        learning_starts=10000,
        target_network_update_freq=10000,
        gamma=0.99,
        prioritized_replay=True  #True
    )
    act.save("pong_model.pkl")
    env.close()
Esempio n. 8
0
    def _thunk():

        env = gym.make(env_id) #this prints
        # print('here')

        # print (env.unwrapped)
        # print (env.unwrapped.get_action_meanings())
        # fdsadsfa

        is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv)

        #so this overwrites the other env? so ill change it
        if is_atari:
            # env = make_atari(env_id)
            #took this from make_atari
            assert 'NoFrameskip' in env.spec.id
            env = NoopResetEnv(env, noop_max=30)
            env = MaxAndSkipEnv(env, skip=4)

        env.seed(seed + rank)

        if log_dir != '':
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        if is_atari:

            warp = False
            env = wrap_deepmind(env, warp=warp)


            env = WrapPyTorch(env)



        return env
Esempio n. 9
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     if logger.get_dir():
         env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
     gym.logger.setLevel(logging.WARN)
     return wrap_deepmind(env)
Esempio n. 10
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        elif 'Mini' in env_id:
            import gym_minigrid
            env = gym_minigrid.envs.dynamicobstacles.DynamicObstaclesEnv(
                size=5, n_obstacles=1)
            # env = gym_minigrid.envs.multiroom.MultiRoomEnv(
            #     minNumRooms=2, maxNumRooms=2, maxRoomSize=4
            # )
            # import pdb; pdb.set_trace()
            # env = gym.make(env_id)
            # env = gym_minigrid.wrappers.NoOpAsync(env, costs=[2, 1])
            env = gym_minigrid.wrappers.NoOpAsync(env,
                                                  costs=[2, 1],
                                                  which_obs='first')

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        is_minigrid = 'minigrid' in env_id.lower()

        env.seed(seed + rank)

        if is_minigrid:
            from gym_minigrid.wrappers import ImgObsWrapper, RGBImgObsWrapper, RGBImgPartialObsWrapper, FlatObsWrapper
            # env = RGBImgPartialObsWrapper(
            #     env, tile_size=2)
            # env = ImgObsWrapper(env)
            env = FlatObsWrapper(env)
            # env.observation_space = env.observation_space['image']

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif is_minigrid:
            pass
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 11
0
    def _thunk():
        # random_seed(seed)
        if env_id.startswith("dm"):
            import dm_control2gym
            _, domain, task = env_id.split('-')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)

        if log_dir is not None:
            # env = Monitor(env=env, filename=os.path.join(log_dir, str(rank)), allow_early_resets=True)
            env = bench.Monitor(env=env,
                                filename=os.path.join(log_dir, str(rank)),
                                allow_early_resets=True)
        if is_atari:
            env = wrap_deepmind(env,
                                episode_life=episode_life,
                                clip_rewards=False,
                                frame_stack=False,
                                scale=False)
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3:
                env = TransposeImage(env)
            env = FrameStack(env, 4)

        return env
Esempio n. 12
0
def enjoy(env_id, seed, policy, model_filename, fps=100):
    if policy == 'cnn':
        policy_fn = CnnPolicy
    elif policy == 'lstm':
        policy_fn = LstmPolicy
    elif policy == 'lnlstm':
        policy_fn = LnLstmPolicy

    env = wrap_deepmind(make_atari(env_id), clip_rewards=False, frame_stack=True)
    env.seed(seed)

    tf.reset_default_graph()
    ob_space = env.observation_space
    ac_space = env.action_space
    nsteps = 5  # default value, change if needed

    model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=1, nsteps=nsteps)
    model.load(model_filename)

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            time.sleep(1.0 / fps)
            action, _, _, _ = model.step_model.step([obs.__array__()])
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
        print('Episode reward:', episode_rew)

    env.close()
Esempio n. 13
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 14
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 15
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     if logger.get_dir():
         env = bench.Monitor(env, os.path.join(logger.get_dir(), "{}.monitor.json".format(rank)))
     gym.logger.setLevel(logging.WARN)
     return wrap_deepmind(env)
Esempio n. 16
0
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 17
0
 def _thunk():
     full_name = f"{name}NoFrameskip-v4"
     env = make_atari(full_name, max_episode_steps=max_ep_len)
     env.seed(seed + rank)
     env = bench.Monitor(env, None)
     env = wrap_deepmind(env, episode_life=True, clip_rewards=False)
     return env
Esempio n. 18
0
def main(arguments: argparse) -> None:
    """
    Play the game
    :param arguments: User input
    """
    print(f'Playing {args.game} using {"cpu" if arguments.cpu else "gpu"}')
    env = wrap_deepmind(make_atari(env_id=arguments.game), frame_stack=True)
    agent = DeepLearningAgent(
        observation_space=env.observation_space,
        action_space=int(env.action_space.n),
        n_envs=1,  # While playing, one environment at the time
        n_steps=1,  # Dummy value, we are not training
        model_path=arguments.model,
        use_cpu=arguments.cpu)
    # This is the current state (or observation)
    observations = reshape_observations_single_env(env.reset())
    actions = agent.get_action(observations, play=False)

    # Play maximum 10 games
    episodes = 10

    while episodes > 0:
        new_observations, rewards, done, info = env.step(actions[0])
        new_observations = reshape_observations_single_env(new_observations)
        actions = agent.get_action(new_observations, play=False)

        env.render()
        if done:
            episodes -= 1
            env.reset()
    env.close()
Esempio n. 19
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        env = bench.Monitor(env, "", allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 20
0
    def _thunk():
        if env_id.startswith("ng_Raw"):
            env = MaintenanceEnv(env_config)
        elif env_id.startswith("ng_Worker"):
            env = WorkerMaintenanceEnv(env_config)
        elif env_id.startswith("ng_Feudal"):
            env = FeudalMaintenanceEnv(env_config)
        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)

        obs_shape = env.observation_space.shape
        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        if is_atari:
            env = wrap_deepmind(env)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = WrapPyTorch(env)

        return env
Esempio n. 21
0
    def _thunk():
        if callable(env_id):
            env = env_id()
        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)

        return env
Esempio n. 22
0
def main():
    args = parse_args()
    
    env = make_atari(args.env)
    env = wrap_deepmind(env)
    
    # setup the model to process actions for one environment and one step at a time
    model = acktr_disc.Model(policies.CnnPolicy, env.observation_space, env.action_space, 1, 1)
    # load the trainable parameters from our trained file
    model.load(args.model_path)
    # keep track of the last 4 frames of observations
    env_width = env.observation_space.shape[0]
    env_height = env.observation_space.shape[1]
    obs_history = np.zeros((1, env_width, env_height, 4), dtype=np.uint8)

    # if we're supposed to show how the model sees the game
    if args.show_observation:
        obs = env.reset()
        import pygame
        from pygame import surfarray
        # the default size is too small, scale it up
        scale_factor = args.scale_factor
        screen = pygame.display.set_mode((env_width*scale_factor, env_height*scale_factor), 0, 8)
        # setup a gray palette
        pygame.display.set_palette(tuple([(i, i, i) for i in range(256)]))
        
    # if we're supposed to record video
    video_path = args.video_path
    if video_path is not None:
        video_recorder = VideoRecorder(
        env, base_path=video_path, enabled=video_path is not None)
        
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            if args.show_observation:
                # use the Kronecker product to scale the array up for display, and also transpose x/y axes because pygame
                # displays as column/row instead of gym's row/column
                transposed = obs_history[0,:,:,-1].transpose((1,0))
                scaled_array = np.uint8(np.kron(transposed, np.ones((scale_factor, scale_factor))))
                surfarray.blit_array(screen, scaled_array)
                pygame.display.flip()
            if video_path is not None:
                video_recorder.capture_frame()
            # add the current observation onto our history list
            obs_history = np.roll(obs_history, shift=-1, axis=3)
            obs_history[:, :, :, -1] = obs[None][:, :, :, 0]
            # get the suggested action for the current observation history
            action, v, _ = model.step(obs_history)
            
            obs, rew, done, info = env.step(action)
            episode_rew += rew
        print("Episode reward", episode_rew)
        # if we're taking video, stop it now and clear video path so no more frames are added if we're out of lives or there are no lives in this game
        if video_path is not None and ('ale.lives' not in info or info['ale.lives'] == 0):
            video_path = None
            video_recorder.close()
Esempio n. 23
0
        def _thunk():
            # import gym
            # return gym.make(env_id)

            env = make_atari(env_id)
            env.seed(seed + rank)
            # env = Monitor(env, os.path.join(monitor_dir, str(rank)))
            return wrap_deepmind(env, scale=True, frame_stack=False)
Esempio n. 24
0
def make_env_atari(game_name, random_action_eps=0.):
    from baselines.common.atari_wrappers import wrap_deepmind, make_atari
    env = make_atari(game_name + "NoFrameskip-v4")
    if random_action_eps > 0:
        env = ActionRandomizer(env, random_action_eps)
    monitored_env = SimpleMonitor(env)
    env = wrap_deepmind(monitored_env, frame_stack=True, scale=True)
    return env, monitored_env
Esempio n. 25
0
def get_env(name_env):
    env = wrap_deepmind(gym.make(name_env),
                        episode_life=True,
                        clip_rewards=False,
                        frame_stack=True,
                        scale=False)
    env.spec.max_episode_steps = 27000  # 108K frames cap
    return env
Esempio n. 26
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     # Monitor is a wrapper of gym env, 对环境Env进行封装, 主要添加了对episode结束时信息的记录。
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 27
0
 def env_fn():
     env = make_atari(env_id)
     env.seed(1 + rank)
     env = bench.Monitor(
         env,
         logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
     env = wrap_deepmind(env)
     return env
 def __init__(self, name: str, **kwargs):
     self._raw_env = make_atari(name)
     self._env = wrap_deepmind(self._raw_env,
                               episode_life=True,
                               clip_rewards=True,
                               frame_stack=True,
                               scale=False)
     super().__init__(name, do_not_normalize=True, **kwargs)
Esempio n. 29
0
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type == 'atari':
        if alg == 'acer':
            env = make_vec_env(env_id, env_type, nenv, seed)
        elif alg == 'deepq':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir())
            env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True)
        elif alg == 'trpo_mpi':
            env = atari_wrappers.make_atari(env_id)
            env.seed(seed)
            env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
            env = atari_wrappers.wrap_deepmind(env)
            # TODO check if the second seeding is necessary, and eventually remove
            env.seed(seed)
        else:
            frame_stack_size = 4
            env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size)

    elif env_type == 'retro':
        import retro
        gamestate = args.gamestate or 'Level1-1'
        env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000,
                                        use_restricted_actions=retro.Actions.DISCRETE)
        env.seed(args.seed)
        env = bench.Monitor(env, logger.get_dir())
        env = retro_wrappers.wrap_deepmind_retro(env)

    else:
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale,
                        steps_until_done=args.env_steps, cont=args.env_cont, norm=args.env_norm,
                        start_index=args.start_index)

        if env_type == 'mujoco':
            env = VecNormalize(env)
    return env
Esempio n. 30
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = bench.Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     gym.logger.setLevel(logging.WARN)
     return wrap_deepmind(env)
Esempio n. 31
0
    def f():
        env_a = make_atari(env_id)
        env_a.seed(SEED + rank)
        # Setup the environment using the deepmind standards
        # Each observation contains 4 stacked frames
        env_a = wrap_deepmind(env_a, frame_stack=True)

        return env_a
Esempio n. 32
0
 def InnerFunc():
     env_id = "PongNoFrameskip-v4"
     oEnv = make_atari(env_id)
     oEnv.seed(iSeed)
     print("set seed", iSeed)
     oEnv = wrap_deepmind(oEnv, frame_stack=True)
     oEnv = MyReward(oEnv)
     return oEnv
Esempio n. 33
0
    def _thunk():

        env = make_atari(env_name)

        env.seed(seed + rank)
        env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        env = wrap_deepmind(env, scale=True)
        return env
Esempio n. 34
0
def make_env(game_name):
    env = gym.make(game_name + "NoFrameskip-v4")
    monitored_env = bench.Monitor(
        env, logger.get_dir()
    )  # puts rewards and number of steps in info, before environment is wrapped
    env = wrap_deepmind(
        monitored_env
    )  # applies a bunch of modification to simplify the observation space (downsample, make b/w)
    return env, monitored_env
Esempio n. 35
0
def make_both_env_types(env_name):
    env = gym.make(env_name) #this prints

    is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv)

    if is_atari:
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)
        # env.seed(seed + rank)

        env2 = wrap_deepmind(env)
        env2 = WrapPyTorch(env2)

    return env, env2
Esempio n. 36
0
def make_env_monitor(env_name, save_dir):
    env = gym.make(env_name) #this prints

    is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv)

    if is_atari:
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=4)

    if is_atari:
        env = wrap_deepmind(env)
        env = WrapPyTorch(env)

    env = gym.wrappers.Monitor(env, save_dir+'/videos/', video_callable=lambda x: True, force=True)
    return env
Esempio n. 37
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 38
0
def wrap_train(env):
    from baselines.common.atari_wrappers import (wrap_deepmind, FrameStack)
    env = wrap_deepmind(env, clip_rewards=True)
    env = FrameStack(env, 4)
    return env
import gym
from baselines.common.atari_wrappers import make_atari, wrap_deepmind

ENV = 'BreakoutNoFrameskip-v4'
# env = gym.make(ENV)
env = wrap_deepmind(make_atari(ENV), frame_stack=True)
env.reset()
# for _ in range(1000):
index = 0
while True:
    env.render()
    _, reward, done, info = env.step(env.action_space.sample())
    print(index, reward, done, info)
    if done:
        break

    index += 1

env.close()
Esempio n. 40
0
def wrap_atari_dqn(env):
    from baselines.common.atari_wrappers import wrap_deepmind
    return wrap_deepmind(env, frame_stack=True, scale=True)