예제 #1
0
from on_policy_experiments import make_env, env_list, generate_traj

if __name__ == '__main__':

    parser = arp.ArgumentParser(description='Create an expert policy')
    parser.add_argument('-e', '--env', help='Environment index', type=int, default=0)
    parser.add_argument('-n', '--nenvs', help='Number of environments', type=int, default=16)
    parser.add_argument('-s', '--steps', help='Number of episode steps', type=int, default=64)
    parser.add_argument('-u', '--updates', help='Number of updates', type=int, default=1000)
    parser.add_argument('-o', '--output', help='Output directory', default='models')
    parser.add_argument('-c', '--cuda', help='Use CUDA', default=False, type=bool)
    args = parser.parse_args()

    if not args.cuda:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    env_class = env_list[args.env]
    nenvs = args.nenvs
    algorithm = ppo
    totalsteps = args.steps * args.updates * nenvs
    env_fns = [make_env(env_class) for _ in range(nenvs)]
    env = SubprocVecEnv(env_fns)

    postfix = 'expert'
    logdir = f'{args.output}/{env_class.__name__}/{algorithm.__name__}/{policy.__name__}_{postfix}/'
    format_strs = os.getenv('', 'stdout,log,csv').split(',')
    logger.configure(os.path.abspath(logdir), format_strs)
    model = algorithm(policy, env, n_steps=args.steps, verbose=1)
    cb = CheckpointCallback(args.steps * nenvs, logdir, verbose=1)
    model.learn(total_timesteps=totalsteps, callback=cb)
예제 #2
0
                        type=bool)
    parser.add_argument('-t',
                        '--trainer',
                        help='Expert model',
                        default='PPO2/policy_1_expert')
    args = parser.parse_args()

    if not args.cuda:
        os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

    env_class = env_list[args.env]
    nenvs = args.nenvs
    algorithm = algorithm_list[args.algorithm]
    totalsteps = args.steps * args.updates * nenvs
    env_fns = [make_env(env_class) for _ in range(nenvs)]
    env = SubprocVecEnv(env_fns)
    eval_env_fns = [make_env(env_class) for _ in range(1)]
    eval_env = SubprocVecEnv(eval_env_fns)

    if args.trainer is not None:

        postfix = 'bc'
        #checkpoint_file = f'{args.output}/{env_class.__name__}/{args.trainer}/rl_model_{good_checkpoints[args.env]}_steps.zip'
        checkpoint_file = find_checkpoint_with_highest_explained_variance(
            f'{args.output}/{env_class.__name__}/{args.trainer}')
        trainer_model = ppo.load(checkpoint_file)
        trainer_model.set_env(env)
        print('Expert model has been successfully loaded from {0}'.format(
            checkpoint_file))

        trajs = []