lr, entropy_const=entropy_const, value_const=val_const, gamma=gamma, _lambda=_lambda, max_norm=max_norm, norm_advs=norm_advs) if resume: updater.net.load_state_dict(torch.load(exp_name + '_net.p')) updater.optim.load_state_dict(torch.load(exp_name + '_optim.p')) updater.optim.zero_grad() epoch = 0 while True: epoch += 1 print("Begin Epoch", epoch, "– T =", collector.T) for rollout in range(n_rollouts): data = collector.get_data(render) updater.calc_loss(*data, gae, reinforce) updater.update_model() updater.save_model(net_save_file, optim_save_file) updater.print_statistics() print("Grad Norm:", updater.norm, "– Avg Action:", np.mean(data[3])) print("Average Reward:", collector.avg_reward, end='\n\n') # Check for memory leaks gc.collect() max_mem_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss print("Memory Used: {:.2f} memory\n".format(max_mem_used / 1024))
T = 0 avg_rew = 0.5 epoch = 0 trial = 0 while T < total_steps: epoch += 1 # Collect Rollout data = collector.rollout(n_tsteps) avg_rew = .99 * avg_rew + .01 * np.mean(data['rewards']) T += n_tsteps * n_envs # Update Model if evaluate_mode: logger.data["Trial " + str(epoch)] = np.asarray(data['rewards']) else: updater.calc_loss(data) if epoch % epochs_per_update == 0: print("\nStep:", T, " – Epoch", epoch) avg_val = np.mean(data['values']) avg_action = np.mean(data['sparse_actions']) print("Avg Rew:", avg_rew, " – Avg Act:", avg_action, " – Avg Val:", avg_val) updater.update_model(calc_grad=True) # Track Stats updater.print_stats() updater.info["Avg Reward"] = avg_rew updater.info["Avg Action"] = avg_action updater.info["Avg Value Pred"] = avg_val logger.append(updater.info, x_val=T) updater.save_model(save_file)