type=int) parser.add_argument('-vae', '--vae-path', help='Path to saved VAE', type=str, default='') args = parser.parse_args() algo = args.algo folder = args.folder model = None vae = None if algo != '': if args.exp_id == 0: args.exp_id = get_latest_run_id(os.path.join(folder, algo), ENV_ID) print('Loading latest experiment, id={}'.format(args.exp_id)) # Sanity checks if args.exp_id > 0: log_path = os.path.join(folder, algo, '{}_{}'.format(ENV_ID, args.exp_id)) else: log_path = os.path.join(folder, algo) model_path = "{}/{}.pkl".format(log_path, ENV_ID) assert os.path.isdir(log_path), "The {} folder was not found".format( log_path) assert os.path.isfile( model_path), "No model found for {} on {}, path: {}".format(
skip_eval = len(x) > 0 except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError): pass if skip_eval: print("Skipping eval...") else: return_code = subprocess.call(["python", "enjoy.py"] + arguments) if return_code != 0: print("Error during evaluation, skipping...") continue x, y = ts2xy(load_results(reward_log), "timesteps") if len(x) > 0: # Retrieve training timesteps from config exp_id = get_latest_run_id(os.path.join(args.log_dir, algo), env_id) log_path = os.path.join(args.log_dir, algo, f"{env_id}_{exp_id}", env_id) hyperparams, _ = get_saved_hyperparams(log_path) # Hack to format it properly if hyperparams["n_timesteps"] < 1e6: n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e3)}k" else: n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e6)}M" mean_reward = np.mean(y) std_reward = np.std(y) results["algo"].append(algo) results["env_id"].append(env_id) results["mean_reward"].append(mean_reward) results["std_reward"].append(std_reward)
for key in sorted(hyperparams.keys())]) # save vae path saved_hyperparams['vae_path'] = args.vae_path if vae is not None: saved_hyperparams['z_size'] = vae.z_size # Save simulation params for key in SIM_PARAMS: saved_hyperparams[key] = eval(key) pprint(saved_hyperparams) # Compute and create log path log_path = os.path.join(args.log_folder, args.algo) save_path = os.path.join( log_path, "{}_{}".format(ENV_ID, get_latest_run_id(log_path, ENV_ID) + 1)) params_path = os.path.join(save_path, ENV_ID) os.makedirs(params_path, exist_ok=True) # Create learning rate schedules for ppo2 and sac if args.algo in ["ppo2", "sac"]: for key in ['learning_rate', 'cliprange']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else:
# Load hyperparameters from yaml file with open('hyperparams/{}.yml'.format(args.algo), 'r') as f: hyperparams = yaml.load(f)[BASE_ENV] # Sort hyperparams that will be saved saved_hyperparams = OrderedDict([(key, hyperparams[key]) for key in sorted(hyperparams.keys())]) # save vae path saved_hyperparams['vae_path'] = args.vae_path if vae is not None: saved_hyperparams['z_size'] = vae.z_size # Compute and create log path log_path = os.path.join(args.log_folder, args.algo) save_path = os.path.join(log_path, "{}_{}".format(ENV_ID, get_latest_run_id(log_path, ENV_ID) + 1)) params_path = os.path.join(save_path, ENV_ID) os.makedirs(params_path, exist_ok=True) # Create learning rate schedules for ppo2 and sac if args.algo in ["ppo2", "sac"]: for key in ['learning_rate', 'cliprange']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else:
# Sort hyperparams that will be saved saved_hyperparams = OrderedDict([(key, hyperparams[key]) for key in sorted(hyperparams.keys())]) # save vae path saved_hyperparams['vae_path'] = args.vae_path if vae is not None: saved_hyperparams['z_size'] = vae.z_size # Save simulation params for key in SIM_PARAMS: saved_hyperparams[key] = eval(key) saved_hyperparams['seed'] = args.seed pprint(saved_hyperparams) # Compute and create log path log_path = os.path.join(args.log_folder, args.algo) save_path = os.path.join(log_path, "{}_{}".format(ID_ENV+'_'+str(SIZE_Z), get_latest_run_id(log_path, ID_ENV) + 1)) params_path = os.path.join(save_path, ID_ENV) os.makedirs(params_path, exist_ok=True) # Create learning rate schedules for ppo2 and sac if args.algo in ["ppo2", "sac"]: for key in ['learning_rate', 'cliprange']: if key not in hyperparams: continue if isinstance(hyperparams[key], str): schedule, initial_value = hyperparams[key].split('_') initial_value = float(initial_value) hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else:
def record_video(env_id: str = "CartPole-v1", algo: str = "ppo", folder: str = "rl-trained-agents", video_folder: str = "logs/videos/", video_length: int = 1000, n_envs: int = 1, deterministic: bool = False, seed: int = 0, no_render: bool = False, exp_id: int = 0): if exp_id == 0: exp_id = get_latest_run_id(os.path.join(folder, algo), env_id) print(f"Loading latest experiment, id={exp_id}") # Sanity checks if exp_id > 0: log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}") else: log_path = os.path.join(folder, algo) model_path = os.path.join(log_path, f"{env_id}.zip") stats_path = os.path.join(log_path, env_id) hyperparams, stats_path = get_saved_hyperparams(stats_path) is_atari = "NoFrameskip" in env_id env = create_test_env( env_id, n_envs=n_envs, stats_path=stats_path, seed=seed, log_dir=None, should_render=not no_render, hyperparams=hyperparams, ) model = ALGOS[algo].load(model_path) obs = env.reset() # Note: apparently it renders by default env = VecVideoRecorder( env, video_folder, record_video_trigger=lambda x: x == 0, video_length=video_length, name_prefix=f"{algo}-{env_id}", ) env.reset() for _ in range(video_length + 1): action, _ = model.predict(obs, deterministic=deterministic) obs, _, _, _ = env.step(action) # Workaround for https://github.com/openai/gym/issues/893 if n_envs == 1 and "Bullet" not in env_id and not is_atari: env = env.venv # DummyVecEnv while isinstance(env, VecEnvWrapper): env = env.venv if isinstance(env, DummyVecEnv): env.envs[0].env.close() else: env.close() else: # SubprocVecEnv env.close()