from on_policy_experiments import make_env, env_list, generate_traj if __name__ == '__main__': parser = arp.ArgumentParser(description='Create an expert policy') parser.add_argument('-e', '--env', help='Environment index', type=int, default=0) parser.add_argument('-n', '--nenvs', help='Number of environments', type=int, default=16) parser.add_argument('-s', '--steps', help='Number of episode steps', type=int, default=64) parser.add_argument('-u', '--updates', help='Number of updates', type=int, default=1000) parser.add_argument('-o', '--output', help='Output directory', default='models') parser.add_argument('-c', '--cuda', help='Use CUDA', default=False, type=bool) args = parser.parse_args() if not args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" env_class = env_list[args.env] nenvs = args.nenvs algorithm = ppo totalsteps = args.steps * args.updates * nenvs env_fns = [make_env(env_class) for _ in range(nenvs)] env = SubprocVecEnv(env_fns) postfix = 'expert' logdir = f'{args.output}/{env_class.__name__}/{algorithm.__name__}/{policy.__name__}_{postfix}/' format_strs = os.getenv('', 'stdout,log,csv').split(',') logger.configure(os.path.abspath(logdir), format_strs) model = algorithm(policy, env, n_steps=args.steps, verbose=1) cb = CheckpointCallback(args.steps * nenvs, logdir, verbose=1) model.learn(total_timesteps=totalsteps, callback=cb)
type=bool) parser.add_argument('-t', '--trainer', help='Expert model', default='PPO2/policy_1_expert') args = parser.parse_args() if not args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" env_class = env_list[args.env] nenvs = args.nenvs algorithm = algorithm_list[args.algorithm] totalsteps = args.steps * args.updates * nenvs env_fns = [make_env(env_class) for _ in range(nenvs)] env = SubprocVecEnv(env_fns) eval_env_fns = [make_env(env_class) for _ in range(1)] eval_env = SubprocVecEnv(eval_env_fns) if args.trainer is not None: postfix = 'bc' #checkpoint_file = f'{args.output}/{env_class.__name__}/{args.trainer}/rl_model_{good_checkpoints[args.env]}_steps.zip' checkpoint_file = find_checkpoint_with_highest_explained_variance( f'{args.output}/{env_class.__name__}/{args.trainer}') trainer_model = ppo.load(checkpoint_file) trainer_model.set_env(env) print('Expert model has been successfully loaded from {0}'.format( checkpoint_file)) trajs = []