Example #1
0
def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size,
         ppo_mini_batch_size, ppo_epochs, max_iter, eval_iter, save_iter, model_path, log_path, seed):
    base_dir = log_path + env_id + "/PPO_exp{}".format(seed)
    writer = SummaryWriter(base_dir)

    ppo = PPO(env_id=env_id,
              render=render,
              num_process=num_process,
              min_batch_size=batch_size,
              lr_p=lr_p,
              lr_v=lr_v,
              gamma=gamma,
              tau=tau,
              clip_epsilon=epsilon,
              ppo_epochs=ppo_epochs,
              ppo_mini_batch_size=ppo_mini_batch_size,
              seed=seed)

    for i_iter in range(1, max_iter + 1):
        ppo.learn(writer, i_iter)

        if i_iter % eval_iter == 0:
            ppo.eval(i_iter, render=render)

        if i_iter % save_iter == 0:
            ppo.save(model_path)

            pickle.dump(ppo,
                        open('{}/{}_ppo.p'.format(model_path, env_id), 'wb'))

        torch.cuda.empty_cache()
Example #2
0
def main(env_id, render, num_process, lr_p, lr_v, gamma, tau, epsilon, batch_size,
         ppo_mini_batch_size, ppo_epochs, model_path, seed, test_epochs):
    ppo = PPO(env_id=env_id,
              render=render,
              num_process=num_process,
              min_batch_size=batch_size,
              lr_p=lr_p,
              lr_v=lr_v,
              gamma=gamma,
              tau=tau,
              clip_epsilon=epsilon,
              ppo_epochs=ppo_epochs,
              ppo_mini_batch_size=ppo_mini_batch_size,
              seed=seed,
              model_path=model_path)

    for i_iter in range(1, test_epochs):
        ppo.eval(i_iter)