コード例 #1
0
    def _thunk():
        env = gym.make(env_id)
        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if n_reactive > 1:
            # Reactive policy needs fixed-length histories
            env = HistoryWrapper(env, n_reactive)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        return env
コード例 #2
0
def train(env_id, num_timesteps, seed, theta, name, decay, lr, time_param):
    import os.path
    import datetime
    print(name)
    if name != "":
        name = name + "theta-" + str(theta) + "-decay-" + str(
            decay) + "-lr-" + str(lr) + "-time_param-" + str(time_param) + '/'
    print(name)
    from baselines.ppo1 import mlp_policy, pposgd_simple
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(seed)
    env = gym.make(env_id)

    def policy_fn(name, ob_space, ac_space):
        return mlp_policy.MlpPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space,
                                    hid_size=64,
                                    num_hid_layers=2)

    tmp = os.path.join(
        './../LOG/' + name + 'mujoco_' + str(env_id) + '/',
        str(seed) + '--' +
        datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"))
    print(tmp)
    logger.configure(dir=tmp)
    env = bench.Monitor(env, tmp)
    env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    print("Starting training")
    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=num_timesteps,
                        timesteps_per_actorbatch=2048,
                        clip_param=0.2,
                        entcoeff=0.0,
                        optim_epochs=10,
                        optim_stepsize=lr,
                        optim_batchsize=64,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear',
                        theta=theta,
                        decay=decay,
                        time_param=time_param)
    env.close()
コード例 #3
0
ファイル: envs.py プロジェクト: tailintalent/mela
 def _thunk():
     if "Custom" in env_id:
         env = gym.make(env_id, env_settings=env_settings)
     else:
         env = gym.make(env_id)
     is_atari = hasattr(gym.envs, 'atari') and isinstance(
         env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
     if is_atari:
         env = make_atari(env_id)
     env.seed(seed + rank)
     env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
     if is_atari:
         env = wrap_deepmind(env,
                             clip_rewards=clip_rewards,
                             env_settings=env_settings)
         env = WrapPyTorch(env, env_settings=env_settings)
     return env
コード例 #4
0
ファイル: test_benchmark_ppo.py プロジェクト: yus-nas/garage
def run_baselines(env, seed, log_dir):
    """Create baselines model and training.

    Args:
        env (dict): Environment of the task.
        seed (int): Random positive integer for the trial.
        log_dir (str): Log dir path.

    Returns:
        str: Path to output csv file

    """
    ncpu = max(multiprocessing.cpu_count() // 2, 1)
    config = tf.ConfigProto(allow_soft_placement=True,
                            intra_op_parallelism_threads=ncpu,
                            inter_op_parallelism_threads=ncpu)
    tf.compat.v1.Session(config=config).__enter__()

    # Set up logger for baselines
    configure(dir=log_dir, format_strs=['stdout', 'log', 'csv', 'tensorboard'])
    baselines_logger.info('rank {}: seed={}, logdir={}'.format(
        0, seed, baselines_logger.get_dir()))

    env = DummyVecEnv([
        lambda: bench.Monitor(
            env, baselines_logger.get_dir(), allow_early_resets=True)
    ])

    set_global_seeds(seed)
    policy = MlpPolicy

    ppo2.learn(policy=policy,
               env=env,
               nsteps=hyper_parameters['batch_size'],
               nminibatches=32,
               lam=0.95,
               gamma=0.99,
               noptepochs=10,
               log_interval=1,
               ent_coef=0.0,
               max_grad_norm=None,
               lr=3e-4,
               cliprange=0.2,
               total_timesteps=hyper_parameters['batch_size'] * hyper_parameters['n_epochs'])  # yapf: disable  # noqa: E501

    return osp.join(log_dir, 'progress.csv')
コード例 #5
0
    def _thunk():
        env = gym.make(env_id, **kwargs)

        env.seed(seed + rank)

        # obs_shape = env.observation_space.shape

        # amirabdi: my understanding is that timestep is when "time" is part of the
        # state definition... I don't see it play a role anytime soon.
        # if add_timestep and len(obs_shape) == 1 and str(env).find('TimeLimit') > -1:
        #     env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)
        return env
コード例 #6
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
            if 'micropolis' in env_id.lower():
                print("ENV RANK: ", rank)
                if rank == 0:
                    env.setMapSize(map_width,
                                   print_map=print_map,
                                   parallel_gui=parallel_py2gui,
                                   render_gui=render_gui,
                                   empty_start=True,
                                   noreward=noreward,
                                   max_step=max_step,
                                   rank=rank)
                else:
                    env.setMapSize(map_width, rank=rank)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            env = wrap_deepmind(env)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env)

        return env
コード例 #7
0
    def _thunk():

        env = gym_super_mario_bros.make(env_id)
        env.seed(seed + rank)

        env = BinarySpaceToDiscreteSpaceEnv(env, ACTIONS)

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))

        env = ProcessFrameMario(env, reward_type=reward_type)
        env = smb_warp_frame(env)
        env = smb_scale_frame(env)
        env = smb_stack_and_repeat(env, stack_frames, action_repeat)
        env = WrapPyTorch(env)

        return env
コード例 #8
0
def main(args):
    U.make_session(num_cpu=1).__enter__()
    set_global_seeds(args.seed)
    env = gym.make(args.env_id)

    def policy_fn(name, ob_space, ac_space, reuse=False):
        return mlp_policy.MlpPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space,
                                    reuse=reuse,
                                    hid_size=args.policy_hidden_size,
                                    num_hid_layers=2)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), "monitor.json"))
    env.seed(args.seed)
    gym.logger.setLevel(logging.WARN)
    task_name = get_task_name(args)
    print("task name: {}".format(task_name))
    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    args.log_dir = osp.join(args.log_dir, task_name)

    if args.task == 'train':
        # dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation)
        # reward_giver = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff)
        train(env, args.seed, policy_fn, args.algo, args.policy_entcoeff,
              args.num_timesteps, args.save_per_iter, args.checkpoint_dir,
              args.log_dir, task_name)
    elif args.task == 'visualize':
        policy_run(env,
                   policy_fn,
                   args.checkpoint_dir,
                   number_rollouts=10,
                   stochastic_policy=args.stochastic_policy)
    elif args.task == 'evaluate':
        runner(env,
               policy_fn,
               args.load_model_path,
               timesteps_per_batch=1024,
               number_trajs=10,
               stochastic_policy=args.stochastic_policy,
               save=args.save_sample)
    else:
        raise NotImplementedError
    env.close()
コード例 #9
0
ファイル: envs.py プロジェクト: Silviatulli/LOGEL
  def _thunk():
    """Creates an env and manualy sets its seed, log directory and timestep."""
    # env_id = 'Reacher'
    env = gym.make(env_id)
    env.seed(seed + rank)

    obs_shape = env.observation_space.shape

    if add_timestep and len(
        obs_shape) == 1 and str(env).find('TimeLimit') > -1:
      env = AddTimestep(env)

    if log_dir is not None:
      env = bench.Monitor(env, os.path.join(log_dir, str(rank)),
                          allow_early_resets=allow_early_resets)

    return env
コード例 #10
0
ファイル: run_mujoco.py プロジェクト: o7s8r6/gail-tf
def train(args):
    import baselines.common.tf_util as U
    sess = U.single_threaded_session()
    sess.__enter__()

    rank = MPI.COMM_WORLD.Get_rank()
    if rank != 0:
        logger.set_level(logger.DISABLED)
    workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = gym.make(args.env_id)

    def policy_fn(name, ob_space, ac_space):
        return MlpPolicy(name=name,
                         ob_space=env.observation_space,
                         ac_space=env.action_space,
                         hid_size=32,
                         num_hid_layers=2)

    env = bench.Monitor(
        env,
        logger.get_dir()
        and osp.join(logger.get_dir(), "%i.monitor.json" % rank))
    env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    task_name = "trpo." + args.env_id.split("-")[0] + "." + ("%.2f" %
                                                             args.entcoeff)
    args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
    trpo_mpi.learn(env,
                   policy_fn,
                   timesteps_per_batch=1024,
                   max_kl=0.01,
                   cg_iters=10,
                   cg_damping=0.1,
                   max_timesteps=args.num_timesteps,
                   gamma=0.99,
                   lam=0.98,
                   vf_iters=5,
                   vf_stepsize=1e-3,
                   sample_stochastic=args.sample_stochastic,
                   task_name=task_name,
                   save_per_iter=args.save_per_iter,
                   ckpt_dir=args.checkpoint_dir,
                   load_model_path=args.load_model_path)
    env.close()
コード例 #11
0
    def get_player(self, train=False):
        if self.env:
            return env

        if self.config['ENV_TYPE'] == 'Classic':
            env = gym.make(self.config['ENV_NAME'])
        elif self.config['ENV_TYPE'] == 'Atari':
            if train:
                env = make_atari(self.config['ENV_NAME'])
                env = bench.Monitor(env, self.logger.get_dir())
                env = deepq.wrap_atari_dqn(env)
            else:
                env = gym.make(self.config['ENV_NAME'])
                env = deepq.wrap_atari_dqn(env)
        else:
            raise Exception('Environment Type %s - Not Supported' % self.config['ENV_TYPE'])
        return env
コード例 #12
0
ファイル: envs.py プロジェクト: Mee321/a2c_hapg_storm
    def _thunk():
        env = gym.make(env_id)
        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        # obs_shape = env.observation_space.shape
        # if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
        #     env = TransposeImage(env, op=[2, 0, 1])

        return env
コード例 #13
0
 def _thunk():
     env = gym.make(env_id)
     is_atari = hasattr(gym.envs, 'atari') and isinstance(
         env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
     if is_atari:
         env = make_atari(env_id)
     env.seed(seed + rank)
     if log_dir is not None:
         env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
     if is_atari:
         env = wrap_deepmind(env)
     # If the input has shape (W,H,3), wrap for PyTorch convolutions
     print env.observation_space
     obs_shape = env.observation_space.shape
     if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
         env = WrapPyTorch(env)
     return env
コード例 #14
0
def train(env_id, args):
    from baselines.ppo1 import cnn_policy
    import baselines.common.tf_util as U
    if args.nokl:
        from baselines.ppo1 import nokl_pposgd_simple as pposgd_simple
    else:
        from baselines.ppo1 import pposgd_simple

    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    print('_'.join([str(arg) for arg in vars(args)]))
    logdir = osp.join(
        './result/', '_'.join([str(getattr(args, arg)) for arg in vars(args)]))
    logger.configure(dir=logdir)
    workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space):  #pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=int(args.num_timesteps * 1.1),
                        timesteps_per_actorbatch=args.timesteps_per_actorbatch,
                        clip_param=args.clip,
                        entcoeff=args.entcoeff,
                        optim_epochs=args.optim_epochs,
                        optim_stepsize=args.optim_stepsize,
                        optim_batchsize=args.optim_batchsize,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear')
    env.close()
コード例 #15
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--train-with-latency', type=int, default=0)
    parser.add_argument('--train-with-all-latency-mode', type=int, default=0)
    args = parser.parse_args()
    loggerid = "L" + (("M" + str(args.train_with_all_latency_mode)) if
                      (args.train_with_all_latency_mode != 0) else str(
                          args.train_with_latency))
    loggerdir = "./data." + loggerid + "/"
    logger.configure(dir=loggerdir)
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )
    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        print_freq=1,
        train_with_latency=args.train_with_latency,
        train_with_all_latency_mode=args.train_with_all_latency_mode)
    act.save(loggerdir + args.env + "." + loggerid + ".pkl")
    env.close()
コード例 #16
0
def worker_process(remote: multiprocessing.connection.Connection, parameters,
                   worker_id, seed):
    """
    This function is used as target by each of the threads in the multiprocess
    to build environment instances and define the commands that can be executed
    by each of the workers.
    """
    # The Atari wrappers are now imported from openAI baselines
    # https://github.com/openai/baselines
    log_dir = './log'
    if parameters['env_type'] == 'atari':
        env = make_atari(parameters['scene'])
        env = bench.Monitor(
                    env,
                    os.path.join(log_dir, str(worker_id)),
                    allow_early_resets=False)
        env = wrap_deepmind(env, True)
    if parameters['env_type'] == 'warehouse':
        env = Warehouse(seed, parameters)
    if parameters['env_type'] == 'sumo':
        env = LoopNetwork(parameters, seed)
    if parameters['env_type'] == 'minigrid':
        env = gym.make(parameters['scene'])
        # env = RGBImgPartialObsWrapper(env, tile_size=12) # Get pixel observations
        env = ImgObsWrapper(env) # Get rid of the 'mission' field
        env = wrappers.GrayScaleObservation(env, keep_dim=True) # Gray scale
        env = FeatureVectorWrapper(env)
        env.seed(seed)
        
    while True:
        cmd, data = remote.recv()
        if cmd == 'step':
            obs, reward, done, info = env.step(data)
            if done:
                obs = env.reset()
            remote.send((obs, reward, done, info))
        elif cmd == 'reset':
            remote.send(env.reset())
        elif cmd == 'action_space':
            remote.send(env.action_space.n)
        elif cmd == 'close':
            remote.close()
            break
        else:
            raise NotImplementedError
コード例 #17
0
    def _thunk():
        env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(
                env_id
            )  # --> check that NoFrameskip-v4-env has been chosen; NoopReset
            if train_mode == "probe":
                env = AtariARIWrapper(
                    env
                )  # --> add the labels based on the RAM-state to the info-dict
            elif train_mode == "train_encoder":
                pass
            else:
                raise ValueError

        env.seed(seed + rank)

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=False)

        # env = gym.wrappers.Monitor(env, '/home/cathrin/MA/datadump/videos/' + env_id, force=True)

        # --> in the following order:
        # EpisodicLifeEnv; FireResetEnv, Grayscaling, just for Pong: overlay Scores
        # ScaleObservations to [0, 1]; ClipRewards;
        # for framestacking: MaxAndSkipAndFramestack, without Framstacking: MaxAndSkipEnv
        env = wrap_deepmind(env,
                            downsample=downsample,
                            color=color,
                            frame_stack=frame_stack,
                            use_extended_wrapper=use_extended_wrapper,
                            train_mode=train_mode)

        # convert to pytorch-style (C, H, W)
        env = ImageToPyTorch(env)

        return env
コード例 #18
0
    def _thunk():
        print("CUSTOM GYM:", custom_gym)
        if custom_gym is not None and custom_gym != "":
            module = importlib.import_module(custom_gym, package=None)
            print("imported env '{}'".format((custom_gym)))

        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if not navi:
            if is_atari:
                if len(env.observation_space.shape) == 3:
                    env = wrap_deepmind(env)
            elif len(env.observation_space.shape) == 3:
                raise NotImplementedError(
                    "CNN models work only for atari,\n"
                    "please use a custom wrapper for a custom pixel input env.\n"
                    "See wrap_deepmind for an example.")

            # If the input has shape (W,H,3), wrap for PyTorch convolutions
            obs_shape = env.observation_space.shape
            if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
                env = TransposeImage(env, op=[2, 0, 1])

        return env
コード例 #19
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo1_cmaes_layer_pl import pposgd_simple, cnn_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure(filename="PPO1-" + env_id,
                         format_strs=['stdout', 'log', 'csv'])
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    env = make_atari(env_id)

    def policy_fn(name, ob_space, ac_space):  # pylint: disable=W0613
        return cnn_policy.CnnPolicy(name=name,
                                    ob_space=ob_space,
                                    ac_space=ac_space)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    # test_env = bench.Monitor(test_env, logger.get_dir() and
    #     osp.join(logger.get_dir(), str(rank)))
    env.seed(workerseed)

    env = wrap_deepmind(env)
    env.seed(workerseed)

    pposgd_simple.learn(env,
                        policy_fn,
                        max_timesteps=int(num_timesteps * 1.1),
                        timesteps_per_actorbatch=256,
                        clip_param=0.1,
                        entcoeff=0.01,
                        optim_epochs=4,
                        optim_stepsize=1e-6,
                        optim_batchsize=64,
                        gamma=0.99,
                        lam=0.95,
                        schedule='linear',
                        seed=seed,
                        env_id=env_id)
    env.close()
コード例 #20
0
ファイル: run_atari.py プロジェクト: wjh720/12
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--checkpoint-freq', type=int, default=10000)
    parser.add_argument('--checkpoint-path', type=str, default=None)

    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[256],
        dueling=bool(args.dueling),
    )

    deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=args.num_timesteps,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        prioritized_replay_alpha=args.prioritized_replay_alpha,
        checkpoint_freq=args.checkpoint_freq,
        checkpoint_path=args.checkpoint_path,
    )

    env.close()
コード例 #21
0
    def _thunk():
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        #추가코드
        env.init_dart()
        env.init_sim(True, False)

        #env.start_render()

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
コード例 #22
0
    def _thunk():
        print("CUSTOM GYM:", custom_gym)
        if custom_gym is not None and custom_gym != "":
            module = importlib.import_module(custom_gym, package=None)
            print("imported env '{}'".format((custom_gym)))

        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)
        if not is_atari and scale_img:
            env = WarpFrame(env, color_img)

        if duckietown:
            env = DuckietownRewardWrapper(env)
            if dt_discrete:
                env = DuckietownDiscreteWrapper(env)

        env = Normalize(env)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape
        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=True)
        if is_atari:
            env = wrap_deepmind(env)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = WrapPyTorch(env)

        return env
コード例 #23
0
    def _thunk():
        env = EnvInterface(
            use_vision=use_vision,
            use_pos=use_pos,
            episode_length=episode_length,
            level=level_script,
        )
        random_seed(seed)

        env.seed(seed + rank)

        if log_dir is not None:
            # env = Monitor(env=env, filename=os.path.join(log_dir, str(rank)), allow_early_resets=True)
            env = bench.Monitor(env=env,
                                filename=os.path.join(log_dir, str(rank)),
                                allow_early_resets=True)

        return env
コード例 #24
0
    def __init__(self, sc2env=None, thread_num=999, log_data=False, brain=None, stop=None, t_queue=None, none_state=None):
        super(Environment, self).__init__()
        self.logger = logging.getLogger('sc2rl.' + __name__ + " | " + str(thread_num))
        self.start_time = time.time()
        
        self.episodes = 0
        self.rewards = []
        self.steps = []
        self.log_data = log_data
        self.brain = brain
        self.stop = stop

        if sc2env is not None:
            self.env = sc2env
        else:
            self.env = bench.Monitor(helpers.get_env_wrapper(render=FLAGS.render), os.path.join('logs/', '{}.monitor.json'.format(thread_num)))

        self.agent = Agent(self.env.action_space.n, brain=brain, t_queue=t_queue, none_state=none_state)
コード例 #25
0
    def _thunk():

        if rep_type == "seg":
            n_channels = 1
        else:
            n_channels = 3


        env = VizdoomEnv(env_id=env_id,
                         scenario=scenario,
                         seed=seed,
                         rep_type=rep_type,
                         resolution=resolution,
                         n_channels=n_channels,
                         patch_count=patch_count,
                         reverse_green=reverse_green
                         )
        env.seed(seed + rank)
        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find('TimeLimit') >= 0:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(
                env,
                os.path.join(log_dir, str(rank)),
                allow_early_resets=allow_early_resets)

        # if is_atari:
        #     if len(env.observation_space.shape) == 3:
        #         env = wrap_deepmind(env)
        # elif len(env.observation_space.shape) == 3:
        #     raise NotImplementedError(
        #         "CNN models work only for atari,\n"
        #         "please use a custom wrapper for a custom pixel input env.\n"
        #         "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        #obs_shape = env.observation_space.shape
        #if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
        #    env = TransposeImage(env, op=[2, 0, 1])

        return env
コード例 #26
0
ファイル: envs.py プロジェクト: aupilot/a2c
    def _thunk():
        if env_id == 'MinitaurKirEnv':
            # env = gym.make(env_id)
            env = MinitaurKirEnv(energy_weight=energy)

        elif env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id)
            env.energy_weight = energy

        # is_atari = hasattr(gym.envs, 'atari') and isinstance(
        #     env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        # if is_atari:
        #     env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if add_timestep and len(
                obs_shape) == 1 and str(env).find('TimeLimit') > -1:
            env = AddTimestep(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        # if is_atari:
        #     if len(env.observation_space.shape) == 3:
        #         env = wrap_deepmind(env)
        # elif len(env.observation_space.shape) == 3:
        #     raise NotImplementedError("CNN models work only for atari,\n"
        #         "please use a custom wrapper for a custom pixel input env.\n"
        #         "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env)

        return env
コード例 #27
0
def main(envName='BreakoutNoFrameskip-v4', bufferSize=10000, timesteps=3e6):
    # parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    # parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4')
    # parser.add_argument('--buffer', type=int, default=10000)
    # parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    # parser.add_argument('--prioritized', type=int, default=1)
    # parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    # parser.add_argument('--dueling', type=int, default=1)
    # parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    # parser.add_argument('--checkpoint-freq', type=int, default=10000)
    # parser.add_argument('--checkpoint-path', type=str, default=os.getcwd()+"/logs")
    # args = parser.parse_args()
    # logger.configure(dir=args.checkpoint_path)
    logger.configure(dir=os.getcwd() + "/logs/" + str(envName) + "_" +
                     str(bufferSize))
    set_global_seeds(0)
    env = make_atari(envName)
    env = bench.Monitor(env, logger.get_dir())
    env = deepq.wrap_atari_dqn(env)
    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[512],
        dueling=bool(1),
    )

    act = deepq.learn(
        env,
        q_func=model,
        lr=1e-4,
        max_timesteps=int(timesteps),
        buffer_size=bufferSize,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(1),
        prioritized_replay_alpha=0.6,
        checkpoint_freq=10000,
    )
    act.save(os.getcwd() + "/logs/" + str(envName) + "_" + str(bufferSize) +
             "/model.pkl")
    env.close()
コード例 #28
0
def train(env_id, num_timesteps, seed):
    from baselines.ppo_abysmal2 import mlp_policy, pposgd_simple_test, cnn_policy, capsule_policy
    import baselines.common.tf_util as U
    rank = MPI.COMM_WORLD.Get_rank()
    sess = U.single_threaded_session()
    sess.__enter__()
    if rank == 0:
        logger.configure()
    else:
        logger.configure(format_strs=[])
    workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)
    #env = make_atari(env_id)
    env = gym.make('Abysmal-v0')

    def policy_fn(name, ob_space, ac_space):  #pylint: disable=W0613
        return capsule_policy.Capsule_policy(name=name,
                                             ob_space=ob_space,
                                             ac_space=ac_space)
        #return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=256, num_hid_layers=4)
        #return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space)

    env = bench.Monitor(
        env,
        logger.get_dir() and osp.join(logger.get_dir(), str(rank)))
    #env.seed(workerseed)
    gym.logger.setLevel(logging.WARN)

    #env = wrap_deepmind(env)
    #env.seed(workerseed)

    pposgd_simple_test.learn(env,
                             policy_fn,
                             max_timesteps=int(num_timesteps * 1.1),
                             timesteps_per_actorbatch=0,
                             clip_param=0.2,
                             entcoeff=0.01,
                             optim_epochs=10,
                             optim_stepsize=1e-3,
                             optim_batchsize=32,
                             gamma=0.99,
                             lam=0.95,
                             schedule='linear')
    env.close()
コード例 #29
0
ファイル: run_isaac_acktr.py プロジェクト: james/baselines
def train(env_id, num_timesteps, timesteps_per_batch, seed, num_cpu, hid_size,
          num_hid_layers, resume, agentName, logdir, desired_kl, gamma, lam,
          portnum, num_parallel):
    if num_parallel > 1:
        env = CustomParallelEnv(num_parallel)
    else:
        env = gym.make(env_id)
        env.seed(seed)  # Todo: add seed to the random env too

    if logger.get_dir():
        env = bench.Monitor(env, os.path.join(logger.get_dir(),
                                              "monitor.json"))
    set_global_seeds(seed)

    gym.logger.setLevel(logging.WARN)

    with tf.Session(config=tf.ConfigProto()) as session:
        ob_dim = env.observation_space.shape[0]
        ac_dim = env.action_space.shape[0]
        with tf.variable_scope("vf"):
            vf = NeuralNetValueFunction(ob_dim,
                                        ac_dim,
                                        hid_size=512,
                                        num_hid_layers=2)
        with tf.variable_scope("pi"):
            policy = GaussianMlpPolicy(ob_dim,
                                       ac_dim,
                                       hid_size=512,
                                       num_hid_layers=2)

        learn(env,
              policy=policy,
              vf=vf,
              gamma=gamma,
              lam=0.97,
              timesteps_per_batch=timesteps_per_batch,
              resume=resume,
              desired_kl=desired_kl,
              agentName=agentName,
              logdir=logdir,
              num_timesteps=num_timesteps,
              animate=False)

        env.close()
コード例 #30
0
    def ppo_baselines(log_dir, env_id, seed):
        """Create baselines model and training.

        Args:
            log_dir (str): Experiment log directory.
            env_id (str): Environment id of the task.
            seed (int): Random positive integer for the trial.

        """
        # Set up TF Session
        ncpu = max(multiprocessing.cpu_count() // 2, 1)
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=ncpu,
                                inter_op_parallelism_threads=ncpu)
        tf.compat.v1.Session(config=config).__enter__()

        # Set up baselines logger
        configure(dir=log_dir,
                  format_strs=['stdout', 'log', 'csv', 'tensorboard'])
        baselines_logger.info('rank {}: seed={}, logdir={}'.format(
            0, seed, baselines_logger.get_dir()))

        set_global_seeds(seed)

        env = DummyVecEnv([
            lambda: bench.Monitor(gym.make(env_id),
                                  baselines_logger.get_dir(),
                                  allow_early_resets=True)
        ])

        ppo2.learn(network='mlp',
                   env=env,
                   nsteps=hyper_parameters['batch_size'],
                   nminibatches=32,
                   lam=0.95,
                   gamma=0.99,
                   noptepochs=10,
                   log_interval=1,
                   ent_coef=0.0,
                   max_grad_norm=None,
                   lr=3e-4,
                   cliprange=0.2,
                   total_timesteps=(hyper_parameters['batch_size'] *
                                    hyper_parameters['n_epochs']))