예제 #1
0
def make_mujoco_env(env_id, seed, normalize=False, training=True):
    def make_env():
        env = gym.make(env_id)
        env.seed(seed)
        return env

    env = DummyVecEnv([make_env])
    np.random.seed(seed)
    torch.manual_seed(seed)
    if normalize:
        env = VecNormalize(env, training=training)
    return env
예제 #2
0
def build_env4gail(args, nenv):
    def make_env():
        def _thunk():
            env = gym.make(args.env_id)
            env.seed(args.seed)  # to make the result more reproducibility
            env = Monitor(env, logger.get_dir(), allow_early_resets=True)
            return env

        return _thunk

    envs = [make_env() for i in range(nenv)]
    envs = SubprocVecEnv(envs)
    envs = VecNormalize(envs)

    return envs
예제 #3
0
파일: run.py 프로젝트: Caiit/RL_project
def build_env(args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args.env)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id,
                           env_type,
                           seed=seed,
                           wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id,
                               env_type,
                               nenv,
                               seed,
                               gamestate=args.gamestate,
                               reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)

    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=1,
                                inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        env = make_vec_env(env_id,
                           env_type,
                           args.num_env or 1,
                           seed,
                           reward_scale=args.reward_scale)

        if env_type == 'mujoco':
            env = VecNormalize(env)

    return env
예제 #4
0
def main():
    parser = arg_parser()
    parser.add_argument('--platform', help='environment choice',
                        choices=['atari', 'mujoco', 'humanoid', 'robotics'],
                        default='atari')
    platform_args, environ_args = parser.parse_known_args()
    platform = platform_args.platform
    logger.configure()

    # atari
    if platform == 'atari':
        parser = atari_arg_parser()
        parser.add_argument('--policy', help='Policy architecture',
                            choices=['cnn', 'lstm', 'lnlstm', 'mlp'],
                            default='cnn')
        args = parser.parse_known_args()[0]

        # fit(
        #     args.env,
        #     num_timesteps=args.num_timesteps,
        #     seed=args.seed,
        #     policy=args.policy
        # )
        sess = Agent().init_session().__enter__()
        env = VecFrameStack(make_atari_env(args.env, 8, args.seed), 4)
        policy = {'cnn' : Convnet,
                  'lstm' : Lstm,
                  'lnlstm' : LnLstm,
                  'mlp': Mlp}[args.policy]

        fit(
            policy=policy,
            env=env,
            nsteps=128,
            nminibatches=8,
            lam=0.95,
            gamma=0.99,
            noptepochs=4,
            log_interval=1,
            ent_coef=.01,
            lr=lambda f: f * 2.5e-4,
            cliprange=lambda f: f * 0.1,
            total_timesteps=int(args.num_timesteps * 1.1)
        )

        sess.close()
        env.close()
        del sess

    # mujoco
    if platform == 'mujoco':
        args = mujoco_arg_parser().parse_known_args()[0]

        sess = Agent().init_session().__enter__()
        from utils.monitor import Monitor

        def make_env():
            env = make_mujoco_env(args.env, args.seed)
            # env = gym.make(env_id)
            env = Monitor(env, logger.get_dir(), allow_early_resets=True)
            return env

        env = DummyVecEnv([make_env])
        env = VecNormalize(env)

        model = fit(
            policy=Mlp,
            env=env,
            nsteps=2048,
            nminibatches=32,
            lam=0.95,
            gamma=0.99,
            noptepochs=10,
            log_interval=1,
            ent_coef=0.0,
            lr=3e-4,
            cliprange=0.2,
            total_timesteps=args.num_timesteps
        )

        # return model, env

        if args.play:
            logger.log("Running trained model")
            obs = np.zeros((env.num_envs,) + env.observation_space.shape)
            obs[:] = env.reset()
            while True:
                actions = model.step(obs)[0]
                obs[:]  = env.step(actions)[0]
                env.render()

        sess.close()
        env.close()
        del sess