rewards1 = [np.sum(epi['rews']) for epi in epis1] rewards2 = [np.sum(epi['rews']) for epi in epis2] mean_rew = np.mean(rewards1 + rewards2) logger.record_tabular_misc_stat('Reward1', rewards1) logger.record_tabular_misc_stat('Reward2', rewards2) logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards1 + rewards2, plot_title='humanoid') if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join(args.log, 'models', 'optim_vf_max.pkl')) max_rew = mean_rew torch.save(pol.state_dict(), os.path.join(args.log, 'models', 'pol_last.pkl')) torch.save(vf.state_dict(), os.path.join(args.log, 'models', 'vf_last.pkl')) torch.save(optim_pol.state_dict(), os.path.join(args.log, 'models', 'optim_pol_last.pkl'))
total_epi += traj1.num_epi step = traj1.num_step total_step += step rewards1 = [np.sum(epi['rews']) for epi in epis1] rewards2 = [np.sum(epi['rews']) for epi in epis2] mean_rew = np.mean(rewards1 + rewards2) logger.record_tabular_misc_stat('Reward1', rewards1) logger.record_tabular_misc_stat('Reward2', rewards2) logger.record_results(args.log, result_dict, score_file, total_epi, step, total_step, rewards1 + rewards2, plot_title='humanoid') if mean_rew > max_rew: torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_max.pkl')) torch.save(vf.state_dict(), os.path.join( args.log, 'models', 'vf_max.pkl')) torch.save(optim_pol.state_dict(), os.path.join( args.log, 'models', 'optim_pol_max.pkl')) torch.save(optim_vf.state_dict(), os.path.join( args.log, 'models', 'optim_vf_max.pkl')) max_rew = mean_rew torch.save(pol.state_dict(), os.path.join( args.log, 'models', 'pol_last.pkl')) torch.save(vf.state_dict(), os.path.join( args.log, 'models', 'vf_last.pkl')) torch.save(optim_pol.state_dict(), os.path.join( args.log, 'models', 'optim_pol_last.pkl'))