Exemple #1
0
def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon,
         batch_size, ppo_mini_batch_size, ppo_epochs, max_iter, eval_iter,
         save_iter, model_path, log_path, seed):
    base_dir = log_path + env_id + "/PPO_exp{}".format(seed)
    writer = SummaryWriter(base_dir)

    ppo = PPO(env_id=env_id,
              render=render,
              num_process=1,
              min_batch_size=batch_size,
              lr_p=lr_p,
              lr_v=lr_v,
              gamma=gamma,
              tau=tau,
              clip_epsilon=epsilon,
              ppo_epochs=ppo_epochs,
              ppo_mini_batch_size=ppo_mini_batch_size,
              seed=seed,
              model_path='trained_models')

    for i_iter in range(1, 6):

        ppo.eval(i_iter, render=True)

        torch.cuda.empty_cache()
Exemple #2
0
def main(env_id, dim_latent, render, num_process, lr_p, lr_v, gamma, tau,
         epsilon, batch_size, ppo_mini_batch_size, ppo_epochs, max_iter,
         eval_iter, save_iter, model_path, log_path, seed):
    base_dir = log_path + env_id + "/PPO_encoder_exp{}".format(seed)
    writer = SummaryWriter(base_dir)

    ppo = PPO(
        env_id=env_id,
        dim_latent=dim_latent,
        render=render,
        num_process=20,  #cpu_count(),
        min_batch_size=batch_size,
        lr_p=lr_p,
        lr_v=lr_v,
        gamma=gamma,
        tau=tau,
        clip_epsilon=epsilon,
        ppo_epochs=ppo_epochs,
        ppo_mini_batch_size=ppo_mini_batch_size,
        seed=seed)

    for i_iter in range(1, max_iter + 1):
        ppo.learn(writer, i_iter)

        if i_iter % eval_iter == 0:
            ppo.eval(i_iter, render=render)

        if i_iter % save_iter == 0:
            ppo.save(model_path)

            pickle.dump(
                ppo,
                open('{}/{}_ppo_encoder.p'.format(model_path, env_id), 'wb'))

        torch.cuda.empty_cache()
Exemple #3
0
            v_lr=args.v_lr,
            dyn_lr=args.dyn_lr,
            clip_param=args.clip_param,
            value_coef=args.value_coef,
            entropy_coef=args.entropy_coef,
            grad_norm_max=args.grad_norm_max,
            use_clipped_value_loss=True,
            use_tensorboard=args.use_tensorboard,
            add_intrinsic_reward=args.add_intrinsic_reward,
            predict_delta_obs=args.predict_delta_obs,
            device=device,
            share_optim=args.share_optim,
            debug=None)

    ob_rms = agent.load_checkpoint(checkpoint_dir)
    agent.eval()

    # set same statistics for normalization as in training
    if ob_rms is not None and isinstance(envs.venv, VecNormalize):
        envs.venv.ob_rms = ob_rms

    # start testing
    start = time.time()

    for trial in range(args.num_evals):
        print('Trial ', trial, 'of', args.num_evals)
        
        obs = envs.reset()
        agent.rollouts.obs[0].copy_(obs[1])
        agent.rollouts.to(device)