def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size, ppo_mini_batch_size, ppo_epochs, max_iter, eval_iter, save_iter, model_path, log_path, seed): base_dir = log_path + env_id + "/PPO_exp{}".format(seed) writer = SummaryWriter(base_dir) ppo = PPO(env_id=env_id, render=render, num_process=1, min_batch_size=batch_size, lr_p=lr_p, lr_v=lr_v, gamma=gamma, tau=tau, clip_epsilon=epsilon, ppo_epochs=ppo_epochs, ppo_mini_batch_size=ppo_mini_batch_size, seed=seed, model_path='trained_models') for i_iter in range(1, 6): ppo.eval(i_iter, render=True) torch.cuda.empty_cache()
def main(env_id, dim_latent, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size, ppo_mini_batch_size, ppo_epochs, max_iter, eval_iter, save_iter, model_path, log_path, seed): base_dir = log_path + env_id + "/PPO_encoder_exp{}".format(seed) writer = SummaryWriter(base_dir) ppo = PPO( env_id=env_id, dim_latent=dim_latent, render=render, num_process=20, #cpu_count(), min_batch_size=batch_size, lr_p=lr_p, lr_v=lr_v, gamma=gamma, tau=tau, clip_epsilon=epsilon, ppo_epochs=ppo_epochs, ppo_mini_batch_size=ppo_mini_batch_size, seed=seed) for i_iter in range(1, max_iter + 1): ppo.learn(writer, i_iter) if i_iter % eval_iter == 0: ppo.eval(i_iter, render=render) if i_iter % save_iter == 0: ppo.save(model_path) pickle.dump( ppo, open('{}/{}_ppo_encoder.p'.format(model_path, env_id), 'wb')) torch.cuda.empty_cache()
v_lr=args.v_lr, dyn_lr=args.dyn_lr, clip_param=args.clip_param, value_coef=args.value_coef, entropy_coef=args.entropy_coef, grad_norm_max=args.grad_norm_max, use_clipped_value_loss=True, use_tensorboard=args.use_tensorboard, add_intrinsic_reward=args.add_intrinsic_reward, predict_delta_obs=args.predict_delta_obs, device=device, share_optim=args.share_optim, debug=None) ob_rms = agent.load_checkpoint(checkpoint_dir) agent.eval() # set same statistics for normalization as in training if ob_rms is not None and isinstance(envs.venv, VecNormalize): envs.venv.ob_rms = ob_rms # start testing start = time.time() for trial in range(args.num_evals): print('Trial ', trial, 'of', args.num_evals) obs = envs.reset() agent.rollouts.obs[0].copy_(obs[1]) agent.rollouts.to(device)