def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size, ppo_mini_batch_size, ppo_epochs, max_iter, eval_iter, save_iter, model_path, log_path, seed): base_dir = log_path + env_id + "/PPO_exp{}".format(seed) writer = SummaryWriter(base_dir) ppo = PPO(env_id=env_id, render=render, num_process=num_process, min_batch_size=batch_size, lr_p=lr_p, lr_v=lr_v, gamma=gamma, tau=tau, clip_epsilon=epsilon, ppo_epochs=ppo_epochs, ppo_mini_batch_size=ppo_mini_batch_size, seed=seed) for i_iter in range(1, max_iter + 1): ppo.learn(writer, i_iter) if i_iter % eval_iter == 0: ppo.eval(i_iter, render=render) if i_iter % save_iter == 0: ppo.save(model_path) pickle.dump(ppo, open('{}/{}_ppo.p'.format(model_path, env_id), 'wb')) torch.cuda.empty_cache()
def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size, ppo_mini_batch_size, ppo_epochs, model_path, seed, test_epochs): ppo = PPO(env_id=env_id, render=render, num_process=num_process, min_batch_size=batch_size, lr_p=lr_p, lr_v=lr_v, gamma=gamma, tau=tau, clip_epsilon=epsilon, ppo_epochs=ppo_epochs, ppo_mini_batch_size=ppo_mini_batch_size, seed=seed, model_path=model_path) for i_iter in range(1, test_epochs): ppo.eval(i_iter)