Esempio n. 1
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1 import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() if seed is not None else None
    set_global_seeds(workerseed)
    env = make_atari(env_id)
    def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)
    env = bench.Monitor(env, logger.get_dir() and
        osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env, policy_fn,
        max_timesteps=int(num_timesteps * 1.1),
        timesteps_per_actorbatch=256,
        clip_param=0.2, entcoeff=0.01,
        optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64,
        gamma=0.99, lam=0.95,
        schedule='linear'
    )
    env.close()
Esempio n. 2
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 3
0
def main():
    logger.configure()
    env = make_atari('PongNoFrameskip-v4')
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)

    model = deepq.learn(
        env,
        "conv_only",
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=True,
        lr=1e-4,
        total_timesteps=int(1e7),
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
    )

    model.save('pong_model.pkl')
    env.close()
Esempio n. 4
0
    def _thunk():
        env = make_atari(env_id)
        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        env = wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack=False, scale=True)

        obs_shape = env.observation_space.shape
        env = WrapPyTorch(env)

        return env
Esempio n. 5
0
    def _thunk():
        env = make_atari(env_id)
        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        env = wrap_deepmind(env)

        obs_shape = env.observation_space.shape
        env = WrapPyTorch(env)

        return env
 def _thunk():
     env = gym.make(env_id)
     is_atari = hasattr(gym.envs, 'atari') and isinstance(env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
     if is_atari:
         env = make_atari(env_id)
     env.seed(seed + rank)
     if log_dir is not None:
         env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
     if is_atari:
         env = wrap_deepmind(env)
     # If the input has shape (W,H,3), wrap for PyTorch convolutions
     obs_shape = env.observation_space.shape
     if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
         env = WrapPyTorch(env)
     return env
Esempio n. 7
0
        def _thunk():
            if env_type == 'unity':
                worker_id = 32 + rank
                print ("***** UnityEnv", env_id, worker_id, rank)
                env = UnityEnv(env_id, worker_id)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Esempio n. 8
0
def make_env(env_id,
             env_type,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             distance_threshold=None,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        if distance_threshold is not None:
            env = gym.make(env_id, distance_threshold=distance_threshold)
        else:
            env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(),
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
def train(env_id, num_timesteps, seed, give_state, vf_iters, trial, nsteps, method, hist_len):
    from baselines.guidedcarla.nosharing_cnn_policy import CnnPolicy
    from baselines.guidedcarla import copos_mpi
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    # if rank == 0:
    #     logger.configure()
    # else:
    #     logger.configure(format_strs=[])

    workerseed = seed * 10000
    set_global_seeds(workerseed)

    #TODO:change the environment to carla
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space, ob_name, hist_len): #pylint: disable=W0613
        return CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space, ob_name=ob_name, hist_len=hist_len)

    #TODO: check if monitor can deal with carla
    env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    #TODO: check wrap deepmind and carla
    env = wrap_deepmind(env)
    env.seed(workerseed)

    timesteps_per_batch=nsteps
    beta = -1
    if beta < 0:
        nr_episodes = num_timesteps // timesteps_per_batch
        # Automatically compute beta based on initial entropy and number of iterations
        tmp_pi = policy_fn("tmp_pi", env.observation_space, env.action_space, ob_name="tmp_ob", hist_len=hist_len)
        sess.run(tf.global_variables_initializer())

        tmp_ob = np.zeros((1,) + env.observation_space.shape)
        entropy = sess.run(tmp_pi.pd.entropy(), feed_dict={tmp_pi.ob: tmp_ob})
        beta = 2 * entropy / nr_episodes
        print("Initial entropy: " + str(entropy) + ", episodes: " + str(nr_episodes))
        print("Automatically set beta: " + str(beta))

    copos_mpi.learn(env, policy_fn, timesteps_per_batch=timesteps_per_batch, epsilon=0.01, beta=beta,
                    cg_iters=10, cg_damping=0.1, method=method,
                    max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=vf_iters, vf_stepsize=1e-3,
                    trial=trial, crosskl_coeff=0.01, kl_target=0.01, sess=sess)
    env.close()
Esempio n. 10
0
    def _thunk():
        #print("Make envs", params)
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id, **TspParams.current().ENVIRONMENT_KWARGS)

        is_atari = (hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv))
        if is_atari:
            env = make_atari(env_id)

        is_minigrid = "MiniGrid" in env_id

        if set_time_limit is not None:
            env = TimeLimit(env, set_time_limit)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find(
                'TimeLimit') >= 0 or set_time_limit is not None:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif is_minigrid:
            pass
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 11
0
        def _thunk():
            if env_type == 'unity':
                from gym_unity.envs import UnityEnv
                import random; r=random.randint(64,164)
                print ("***** HELLO", mpi_rank + r)
                env = UnityEnv(env_id, mpi_rank + r)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
def main():

    env = make_atari("BreakoutNoFrameskip-v0")
    env = WarpFrame(env)
    env = FrameStack(env, k=4)

    act = deepq.load("breakout_model.pkl")

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
Esempio n. 13
0
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(
                env,
                logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))

            if rank == start_index and 'video_log_dir' in kwargs:
                env = VideoLogMonitor(
                    env,
                    kwargs['video_log_dir'] + '_rgb',
                    write_attention_video=kwargs['write_attention_video'],
                    hparams=hparams,
                    nsteps=nsteps)

            return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 14
0
def train(env_id, num_timesteps, seed):
    """
    Train TRPO model for the atari environment, for testing purposes

    :param env_id: (str) Environment ID
    :param num_timesteps: (int) The total number of samples
    :param seed: (int) The initial seed for training
    """
    rank = MPI.COMM_WORLD.Get_rank()

    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])

    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space, sess=None, placeholders=None):  # pylint: disable=W0613
        return CnnPolicy(name=name,
                         ob_space=ob_space,
                         ac_space=ac_space,
                         sess=sess,
                         placeholders=placeholders)

    env = bench.Monitor(
        env,
        logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    trpo_mpi.learn(env,
                   policy_fn,
                   timesteps_per_batch=512,
                   max_kl=0.001,
                   cg_iters=10,
                   cg_damping=1e-3,
                   max_timesteps=int(num_timesteps * 1.1),
                   gamma=0.98,
                   lam=1.0,
                   vf_iters=3,
                   vf_stepsize=1e-4,
                   entcoeff=0.00)
    env.close()
Esempio n. 15
0
def make_dataset(env_name):
    # Hyper-parameters
    total_frames_to_generate = 100000
    #env_id = ['PongNoFrameskip-v4', 'SeaquestNoFrameskip-v4'][1]
    env_id = env_name
    save_path = './data/{}/sfmnet/episodes'.format(env_id)
    seed = 0

    # Track how many frames we have created.
    total_frames_generated = 0
    episode_index = 0

    # Create and set-up the environment.
    env = make_atari(env_id)
    env = wrap_deepmind(env)
    env.seed(seed)
    set_global_seeds(seed)

    # Generate frames.
    while total_frames_generated < total_frames_to_generate:
        print("Starting episode {}".format(episode_index))

        obs = env.reset()
        frame_index = 0
        done = False

        while not done and total_frames_generated < total_frames_to_generate:
            # Take a random action.
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)

            # Create a directory to save frames to for this episode.
            episode_save_path = os.path.join(save_path, str(episode_index))
            if not os.path.exists(episode_save_path):
                os.makedirs(episode_save_path)

            # Save the frame
            img = Image.fromarray(np.squeeze(obs), mode='L')
            img.save(
                os.path.join(
                    episode_save_path,
                    '{}_{}_{}.png'.format(frame_index, action, reward)))
            frame_index += 1
            total_frames_generated += 1

        # Start a new episode.
        episode_index += 1
Esempio n. 16
0
def main():
    global abort_training
    global q_pressed

    abort_training = False
    q_pressed = False

    listener = keyboard.Listener(on_press=on_press, on_release=on_release)
    listener.start()

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = deepq.learn(env,
                      q_func=model,
                      lr=1e-4,
                      max_timesteps=args.num_timesteps,
                      buffer_size=10000,
                      exploration_fraction=0.1,
                      exploration_final_eps=0.01,
                      train_freq=4,
                      learning_starts=10000,
                      target_network_update_freq=1000,
                      gamma=0.99,
                      print_freq=1,
                      prioritized_replay=bool(args.prioritized),
                      callback=callback)
    print("Saving model to pong_model.pkl")
    act.save("pong_model.pkl")
    env.close()
Esempio n. 17
0
def main(seed=0, n_episodes=100, epsilon=0.05, occlusion=0):
    np.random.seed(seed)
    logger.configure(dir="breakout_train_log")
    env = make_atari('BreakoutNoFrameskip-v4')
    env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True)
    env = wrap_deepmind(env,
                        frame_stack=True,
                        scale=False,
                        episode_life=False,
                        clip_rewards=False)
    ANN = Net()
    ANN.load_state_dict(
        torch.load('../trained_networks/pytorch_breakout_dqn.pt'))

    if not os.path.isdir("results"):
        os.mkdir("results")

    rewards = np.zeros(n_episodes)
    # outputs = []

    for episode in range(n_episodes):
        obs, done = env.reset(), False
        episode_rew = 0
        index_array = np.array(range(80 * 80))
        index_array = np.reshape(index_array, [80, 80])
        positions = np.random.choice(80 * 80,
                                     size=int(6400 * occlusion / 100),
                                     replace=False)
        indices = np.isin(index_array, positions)
        indices = np.repeat(np.expand_dims(indices, axis=0), 4, axis=0)
        indices = np.expand_dims(indices, axis=0)
        while not done:
            state = torch.tensor(obs[None],
                                 dtype=torch.float).permute(0, 3, 1, 2)
            state[np.where(indices)] = 0
            probabilities = policy(ANN(state)[0], epsilon)
            action = np.random.choice(np.arange(len(probabilities)),
                                      p=probabilities)
            obs, rew, done, _ = env.step(action)
            episode_rew += rew
        rewards[episode] = episode_rew
        print("Episode " + str(episode) + " reward", episode_rew)

    np.savetxt(
        'results/occlusion_' + str(occlusion) + '_' + str(seed) + '.txt',
        rewards)
    env.close()
Esempio n. 18
0
    def get_player(self, train=False):
        if self.env:
            return env

        if self.config['ENV_TYPE'] == 'Classic':
            env = gym.make(self.config['ENV_NAME'])
        elif self.config['ENV_TYPE'] == 'Atari':
            if train:
                env = make_atari(self.config['ENV_NAME'])
                env = bench.Monitor(env, self.logger.get_dir())
                env = deepq.wrap_atari_dqn(env)
            else:
                env = gym.make(self.config['ENV_NAME'])
                env = deepq.wrap_atari_dqn(env)
        else:
            raise Exception('Environment Type %s - Not Supported' % self.config['ENV_TYPE'])
        return env
Esempio n. 19
0
        def _thunk():
            if env_type is 'CustomEnv':
                env = make_virtual_env(steps_until_done, rank)
            else:
                env = make_atari(env_id) if env_type == 'atari' else gym.make(
                    env_id)
            env.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir()
                          and os.path.join(logger.get_dir(),
                                           str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Esempio n. 20
0
def make_env(env_id,
             env_type,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        env = FlattenObservation(env)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 21
0
def train(env_id, args):
    from baselines.ppo1 import cnn_policy
    import baselines.common.tf_util as U
    if args.nokl:
        from baselines.ppo1 import nokl_pposgd_simple as pposgd_simple
    else:
        from baselines.ppo1 import pposgd_simple

    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    print('_'.join([str(arg) for arg in vars(args)]))
    logdir = osp.join(
        './result/', '_'.join([str(getattr(args, arg)) for arg in vars(args)]))
    logger.configure(dir=logdir)
    workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space):  #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=int(args.num_timesteps * 1.1),
                        timesteps_per_actorbatch=args.timesteps_per_actorbatch,
                        clip_param=args.clip,
                        entcoeff=args.entcoeff,
                        optim_epochs=args.optim_epochs,
                        optim_stepsize=args.optim_stepsize,
                        optim_batchsize=args.optim_batchsize,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear')
    env.close()
Esempio n. 22
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*','',env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        env = FlattenObservation(env)
#        keys = env.observation_space.spaces.keys()
#        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)


    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 23
0
def make_eval_env(env_id, dumpdir=None, wrapper_kwargs=None, seed=None):
    if wrapper_kwargs is None: wrapper_kwargs = {}
    wrapper_kwargs['is_monte'] = 'MontezumaRevenge' in env_id
    wrapper_kwargs['is_pong'] = 'Pong' in env_id
    env = make_atari(env_id)
    if seed is not None:
        env.seed(seed)

    env = LimitLength(env, 20000, timeout_penalty=0.0)
    env = gym.wrappers.Monitor(env,
                               dumpdir,
                               video_callable=lambda x: True,
                               force=True)
    return wrap_deepmind(env,
                         frame_stack=True,
                         save_original_reward=True,
                         **wrapper_kwargs)
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank if seed is not None else None)
     if random_action:
         env = RandomRepeat(env)
     if eps_random:
         env = EpsRandom(env)
     if random_fix:
         env = RandomFix(env)
     env = Monitor(env,
                   logger.get_dir()
                   and os.path.join(logger.get_dir(), str(rank)),
                   allow_early_resets=True)
     return wrap_deepmind_custom(env,
                                 episode_life=episode_life,
                                 clip_rewards=clip_rewards,
                                 **wrapper_kwargs)
Esempio n. 25
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--checkpoint-freq', type=int, default=10000)
    parser.add_argument('--checkpoint-path', type=str, default=None)

    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )

    deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        prioritized_replay_alpha=args.prioritized_replay_alpha,
        checkpoint_freq=args.checkpoint_freq,
        checkpoint_path=args.checkpoint_path,
    )

    env.close()
Esempio n. 26
0
    def _thunk():
        print("CUSTOM GYM:", custom_gym)
        if custom_gym is not None and custom_gym != "":
            module = importlib.import_module(custom_gym, package=None)
            print("imported env '{}'".format((custom_gym)))

        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        if not is_atari and scale_img:
            env = WarpFrame(env, color_img)

        if duckietown:
            env = DuckietownRewardWrapper(env)
            if dt_discrete:
                env = DuckietownDiscreteWrapper(env)

        env = Normalize(env)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape
        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=True)
        if is_atari:
            env = wrap_deepmind(env)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = WrapPyTorch(env)

        return env
Esempio n. 27
0
    def _thunk():
        print("CUSTOM GYM:", custom_gym)
        if custom_gym is not None and custom_gym != "":
            module = importlib.import_module(custom_gym, package=None)
            print("imported env '{}'".format((custom_gym)))

        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if not navi:
            if is_atari:
                if len(env.observation_space.shape) == 3:
                    env = wrap_deepmind(env)
            elif len(env.observation_space.shape) == 3:
                raise NotImplementedError(
                    "CNN models work only for atari,\n"
                    "please use a custom wrapper for a custom pixel input env.\n"
                    "See wrap_deepmind for an example.")

            # If the input has shape (W,H,3), wrap for PyTorch convolutions
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
                env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 28
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--train-with-latency', type=int, default=0)
    parser.add_argument('--train-with-all-latency-mode', type=int, default=0)
    args = parser.parse_args()
    loggerid = "L" + (("M" + str(args.train_with_all_latency_mode)) if
                      (args.train_with_all_latency_mode != 0) else str(
                          args.train_with_latency))
    loggerdir = "./data." + loggerid + "/"
    logger.configure(dir=loggerdir)
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        print_freq=1,
        train_with_latency=args.train_with_latency,
        train_with_all_latency_mode=args.train_with_all_latency_mode)
    act.save(loggerdir + args.env + "." + loggerid + ".pkl")
    env.close()
def test(model, save_path):
    # setting a seed
    seed = 42

    # setting the video save path
    video_save_path = '/'.join((save_path, 'video'))

    # initializing the environment
    env = make_atari("BreakoutNoFrameskip-v4")

    # Wrapping the env with deepmind wrapper
    env = wrap_deepmind(env, frame_stack=True, scale=True)
    env.seed(seed)

    # Adding the monitor as a wrapper to the environment
    env = gym.wrappers.Monitor(env,
                               video_save_path,
                               video_callable=lambda episode_id: True,
                               force=True)

    # setting the return parameters
    n_episodes = 10
    rewards = np.zeros(n_episodes, dtype=float)

    for i in range(n_episodes):
        # Resetting the state for each episode
        state = np.array(env.reset())
        done = False

        while not done:
            # Choosing an action based on greedy policy
            state_tensor = tf.convert_to_tensor(state)
            state_tensor = tf.expand_dims(state_tensor, 0)
            action_values = model.predict(state_tensor)
            action = np.argmax(action_values)

            # Perform action and get next state, reward and done
            state_next, reward, done, _ = env.step(action)
            state = np.array(state_next)

            # Update the reward observed at episode i
            rewards[i] += reward

    env.close()
    return rewards
Esempio n. 30
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1_cmaes_layer_pl import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure(filename="PPO1-" + env_id,
                         format_strs=['stdout', 'log', 'csv'])
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space):  # pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    # test_env = bench.Monitor(test_env, logger.get_dir() and
    #     osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=int(num_timesteps * 1.1),
                        timesteps_per_actorbatch=256,
                        clip_param=0.1,
                        entcoeff=0.01,
                        optim_epochs=4,
                        optim_stepsize=1e-6,
                        optim_batchsize=64,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear',
                        seed=seed,
                        env_id=env_id)
    env.close()
Esempio n. 31
0
def worker_process(remote: multiprocessing.connection.Connection, parameters,
                   worker_id, seed):
    """
    This function is used as target by each of the threads in the multiprocess
    to build environment instances and define the commands that can be executed
    by each of the workers.
    """
    # The Atari wrappers are now imported from openAI baselines
    # https://github.com/openai/baselines
    log_dir = './log'
    if parameters['env_type'] == 'atari':
        env = make_atari(parameters['scene'])
        env = bench.Monitor(
                    env,
                    os.path.join(log_dir, str(worker_id)),
                    allow_early_resets=False)
        env = wrap_deepmind(env, True)
    if parameters['env_type'] == 'warehouse':
        env = Warehouse(seed, parameters)
    if parameters['env_type'] == 'sumo':
        env = LoopNetwork(parameters, seed)
    if parameters['env_type'] == 'minigrid':
        env = gym.make(parameters['scene'])
        # env = RGBImgPartialObsWrapper(env, tile_size=12) # Get pixel observations
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        env = wrappers.GrayScaleObservation(env, keep_dim=True) # Gray scale
        env = FeatureVectorWrapper(env)
        env.seed(seed)
        
    while True:
        cmd, data = remote.recv()
        if cmd == 'step':
            obs, reward, done, info = env.step(data)
            if done:
                obs = env.reset()
            remote.send((obs, reward, done, info))
        elif cmd == 'reset':
            remote.send(env.reset())
        elif cmd == 'action_space':
            remote.send(env.action_space.n)
        elif cmd == 'close':
            remote.close()
            break
        else:
            raise NotImplementedError
Esempio n. 32
0
    def _thunk():
        info_keywords = ()
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        elif env_id.startswith("osim"):
            info_keywords = ('rb', )
            # https://github.com/stanfordnmbl/osim-rl
            _, task = env_id.split('.')
            if task == "Prosthetics":
                env = MyProstheticsEnv(integrator_accuracy=1e-4, **kwargs)
            elif task == "Arm2D":
                env = Arm2DEnv(integrator_accuracy=1e-4, **kwargs)
            else:  # task == "L2Run"
                assert task == "L2Run"
                env = L2RunEnv(integrator_accuracy=1e-4, **kwargs)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = Monitor(env,
                          os.path.join(log_dir, str(rank)),
                          info_keywords=info_keywords,
                          allow_early_resets=allow_early_resets)

        if is_atari:
            env = wrap_deepmind(env)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env)

        return env
Esempio n. 33
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        #추가코드
        env.init_dart()
        env.init_sim(True, False)

        #env.start_render()

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
Esempio n. 34
0
def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--checkpoint-freq', type=int, default=10000)
    parser.add_argument('--checkpoint-path', type=str, default=None)

    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )

    deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        prioritized_replay_alpha=args.prioritized_replay_alpha,
        checkpoint_freq=args.checkpoint_freq,
        checkpoint_path=args.checkpoint_path,
    )

    env.close()
Esempio n. 35
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
import gym
from baselines.common.atari_wrappers import make_atari, wrap_deepmind

ENV = 'BreakoutNoFrameskip-v4'
# env = gym.make(ENV)
env = wrap_deepmind(make_atari(ENV), frame_stack=True)
env.reset()
# for _ in range(1000):
index = 0
while True:
    env.render()
    _, reward, done, info = env.step(env.action_space.sample())
    print(index, reward, done, info)
    if done:
        break

    index += 1

env.close()