Exemple #1
0
                        type=int)
    parser.add_argument('-vae',
                        '--vae-path',
                        help='Path to saved VAE',
                        type=str,
                        default='')
    args = parser.parse_args()

    algo = args.algo
    folder = args.folder
    model = None
    vae = None

    if algo != '':
        if args.exp_id == 0:
            args.exp_id = get_latest_run_id(os.path.join(folder, algo), ENV_ID)
            print('Loading latest experiment, id={}'.format(args.exp_id))

        # Sanity checks
        if args.exp_id > 0:
            log_path = os.path.join(folder, algo,
                                    '{}_{}'.format(ENV_ID, args.exp_id))
        else:
            log_path = os.path.join(folder, algo)

        model_path = "{}/{}.pkl".format(log_path, ENV_ID)

        assert os.path.isdir(log_path), "The {} folder was not found".format(
            log_path)
        assert os.path.isfile(
            model_path), "No model found for {} on {}, path: {}".format(
            skip_eval = len(x) > 0
        except (json.JSONDecodeError, pd.errors.EmptyDataError, TypeError):
            pass

    if skip_eval:
        print("Skipping eval...")
    else:
        return_code = subprocess.call(["python", "enjoy.py"] + arguments)
        if return_code != 0:
            print("Error during evaluation, skipping...")
            continue
        x, y = ts2xy(load_results(reward_log), "timesteps")

    if len(x) > 0:
        # Retrieve training timesteps from config
        exp_id = get_latest_run_id(os.path.join(args.log_dir, algo), env_id)
        log_path = os.path.join(args.log_dir, algo, f"{env_id}_{exp_id}",
                                env_id)
        hyperparams, _ = get_saved_hyperparams(log_path)
        # Hack to format it properly
        if hyperparams["n_timesteps"] < 1e6:
            n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e3)}k"
        else:
            n_training_timesteps = f"{int(hyperparams['n_timesteps'] / 1e6)}M"

        mean_reward = np.mean(y)
        std_reward = np.std(y)
        results["algo"].append(algo)
        results["env_id"].append(env_id)
        results["mean_reward"].append(mean_reward)
        results["std_reward"].append(std_reward)
Exemple #3
0
                                 for key in sorted(hyperparams.keys())])
# save vae path
saved_hyperparams['vae_path'] = args.vae_path
if vae is not None:
    saved_hyperparams['z_size'] = vae.z_size

# Save simulation params
for key in SIM_PARAMS:
    saved_hyperparams[key] = eval(key)
pprint(saved_hyperparams)

# Compute and create log path
log_path = os.path.join(args.log_folder, args.algo)
save_path = os.path.join(
    log_path, "{}_{}".format(ENV_ID,
                             get_latest_run_id(log_path, ENV_ID) + 1))
params_path = os.path.join(save_path, ENV_ID)
os.makedirs(params_path, exist_ok=True)

# Create learning rate schedules for ppo2 and sac
if args.algo in ["ppo2", "sac"]:
    for key in ['learning_rate', 'cliprange']:
        if key not in hyperparams:
            continue
        if isinstance(hyperparams[key], str):
            schedule, initial_value = hyperparams[key].split('_')
            initial_value = float(initial_value)
            hyperparams[key] = linear_schedule(initial_value)
        elif isinstance(hyperparams[key], float):
            hyperparams[key] = constfn(hyperparams[key])
        else:
Exemple #4
0
# Load hyperparameters from yaml file
with open('hyperparams/{}.yml'.format(args.algo), 'r') as f:
    hyperparams = yaml.load(f)[BASE_ENV]


# Sort hyperparams that will be saved
saved_hyperparams = OrderedDict([(key, hyperparams[key]) for key in sorted(hyperparams.keys())])
# save vae path
saved_hyperparams['vae_path'] = args.vae_path
if vae is not None:
    saved_hyperparams['z_size'] = vae.z_size

# Compute and create log path
log_path = os.path.join(args.log_folder, args.algo)
save_path = os.path.join(log_path, "{}_{}".format(ENV_ID, get_latest_run_id(log_path, ENV_ID) + 1))
params_path = os.path.join(save_path, ENV_ID)
os.makedirs(params_path, exist_ok=True)

# Create learning rate schedules for ppo2 and sac
if args.algo in ["ppo2", "sac"]:
    for key in ['learning_rate', 'cliprange']:
        if key not in hyperparams:
            continue
        if isinstance(hyperparams[key], str):
            schedule, initial_value = hyperparams[key].split('_')
            initial_value = float(initial_value)
            hyperparams[key] = linear_schedule(initial_value)
        elif isinstance(hyperparams[key], float):
            hyperparams[key] = constfn(hyperparams[key])
        else:
Exemple #5
0
# Sort hyperparams that will be saved
saved_hyperparams = OrderedDict([(key, hyperparams[key]) for key in sorted(hyperparams.keys())])
# save vae path
saved_hyperparams['vae_path'] = args.vae_path
if vae is not None:
    saved_hyperparams['z_size'] = vae.z_size

# Save simulation params
for key in SIM_PARAMS:
    saved_hyperparams[key] = eval(key)
saved_hyperparams['seed'] = args.seed
pprint(saved_hyperparams)

# Compute and create log path
log_path = os.path.join(args.log_folder, args.algo)
save_path = os.path.join(log_path, "{}_{}".format(ID_ENV+'_'+str(SIZE_Z), get_latest_run_id(log_path, ID_ENV) + 1))
params_path = os.path.join(save_path, ID_ENV)
os.makedirs(params_path, exist_ok=True)

# Create learning rate schedules for ppo2 and sac
if args.algo in ["ppo2", "sac"]:
    for key in ['learning_rate', 'cliprange']:
        if key not in hyperparams:
            continue
        if isinstance(hyperparams[key], str):
            schedule, initial_value = hyperparams[key].split('_')
            initial_value = float(initial_value)
            hyperparams[key] = linear_schedule(initial_value)
        elif isinstance(hyperparams[key], float):
            hyperparams[key] = constfn(hyperparams[key])
        else:
def record_video(env_id: str = "CartPole-v1",
                 algo: str = "ppo",
                 folder: str = "rl-trained-agents",
                 video_folder: str = "logs/videos/",
                 video_length: int = 1000,
                 n_envs: int = 1,
                 deterministic: bool = False,
                 seed: int = 0,
                 no_render: bool = False,
                 exp_id: int = 0):

    if exp_id == 0:
        exp_id = get_latest_run_id(os.path.join(folder, algo), env_id)
        print(f"Loading latest experiment, id={exp_id}")
    # Sanity checks
    if exp_id > 0:
        log_path = os.path.join(folder, algo, f"{env_id}_{exp_id}")
    else:
        log_path = os.path.join(folder, algo)

    model_path = os.path.join(log_path, f"{env_id}.zip")

    stats_path = os.path.join(log_path, env_id)
    hyperparams, stats_path = get_saved_hyperparams(stats_path)

    is_atari = "NoFrameskip" in env_id

    env = create_test_env(
        env_id,
        n_envs=n_envs,
        stats_path=stats_path,
        seed=seed,
        log_dir=None,
        should_render=not no_render,
        hyperparams=hyperparams,
    )

    model = ALGOS[algo].load(model_path)

    obs = env.reset()

    # Note: apparently it renders by default
    env = VecVideoRecorder(
        env,
        video_folder,
        record_video_trigger=lambda x: x == 0,
        video_length=video_length,
        name_prefix=f"{algo}-{env_id}",
    )

    env.reset()
    for _ in range(video_length + 1):
        action, _ = model.predict(obs, deterministic=deterministic)
        obs, _, _, _ = env.step(action)

    # Workaround for https://github.com/openai/gym/issues/893
    if n_envs == 1 and "Bullet" not in env_id and not is_atari:
        env = env.venv
        # DummyVecEnv
        while isinstance(env, VecEnvWrapper):
            env = env.venv
        if isinstance(env, DummyVecEnv):
            env.envs[0].env.close()
        else:
            env.close()
    else:
        # SubprocVecEnv
        env.close()