writer.add_scalar("curious_actor_loss", metrics['curious_actor_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("curious_value_loss", metrics['curious_value_loss'][-1][0], metrics['steps'][-1]) print("episodes: {}, total_steps: {}, train_reward: {} ".format(metrics['episodes'][-1], metrics['steps'][-1], metrics['train_rewards'][-1])) # Checkpoint models if episode % args.checkpoint_interval == 0: # print("checkpoint saving model") torch.save({'transition_model': transition_model.state_dict(), 'observation_model': observation_model.state_dict(), 'reward_model': reward_model.state_dict(), 'encoder': encoder.state_dict(), 'model_optimizer': model_optimizer.state_dict(), }, os.path.join(results_dir, 'models_%d.pth' % episode)) if args.algo=="p2e" or args.algo=="dreamer": # print("checkpoint saving model") torch.save({'actor_model': actor_model.state_dict(), 'value_model': value_model.state_dict(), 'actor_optimizer': actor_optimizer.state_dict(), 'value_optimizer': value_optimizer.state_dict(), }, os.path.join(results_dir, 'actorvalue_models_%d.pth' % episode)) if args.algo=="p2e": # print("checkpoint saving model") torch.save({'curious_actor_model': actor_model.state_dict(), 'curious_value_model': value_model.state_dict(), 'curious_actor_optimizer': actor_optimizer.state_dict(), 'curious_value_optimizer': value_optimizer.state_dict(), }, os.path.join(results_dir, 'curious_models_%d.pth' % episode)) onestep_model_dict = {'onestep_model{}'.format(i) : x.state_dict() for i,x in enumerate(onestep_models)} onestep_model_dict['onestep_optimizer'] = onestep_optimizer.state_dict() torch.save(onestep_model_dict, os.path.join(results_dir, 'onestep_models_%d.pth' % episode))
writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1], metrics['steps'][-1]) writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1], metrics['steps'][-1]) writer.add_scalar("value_loss", metrics['value_loss'][0][-1], metrics['steps'][-1]) print("episodes: {}, total_steps: {}, train_reward: {} ".format( metrics['episodes'][-1], metrics['steps'][-1], metrics['train_rewards'][-1])) # Checkpoint models if episode % args.checkpoint_interval == 0: torch.save( { 'transition_model': transition_model.state_dict(), 'observation_model': observation_model.state_dict(), 'reward_model': reward_model.state_dict(), 'encoder': encoder.state_dict(), 'actor_model': actor_model.state_dict(), 'value_model': value_model.state_dict(), 'model_optimizer': model_optimizer.state_dict(), 'actor_optimizer': actor_optimizer.state_dict(), 'value_optimizer': value_optimizer.state_dict() }, os.path.join(results_dir, 'models_%d.pth' % episode)) if args.checkpoint_experience: torch.save( D, os.path.join(results_dir, 'experience.pth') ) # Warning: will fail with MemoryError with large memory sizes # Close training environment env.close()
# Checkpoint models if episode % args.checkpoint_interval == 0: # print("checkpoint saving model") torch.save( { 'transition_model': transition_model.state_dict(), 'observation_model': observation_model.state_dict(), 'reward_model': reward_model.state_dict(), 'encoder': encoder.state_dict(), 'model_optimizer': model_optimizer.state_dict(), }, os.path.join(results_dir, 'models_%d.pth' % episode)) if args.algo == "p2e" or args.algo == "dreamer": # print("checkpoint saving model") torch.save( { 'actor_model': actor_model.state_dict(), 'value_model': value_model.state_dict(), 'actor_optimizer': actor_optimizer.state_dict(), 'value_optimizer': value_optimizer.state_dict(), }, os.path.join(results_dir, 'actorvalue_models_%d.pth' % episode)) if args.algo == "p2e": # print("checkpoint saving model") torch.save( { 'curious_actor_model': actor_model.state_dict(), 'curious_value_model': value_model.state_dict(), 'curious_actor_optimizer': actor_optimizer.state_dict(), 'curious_value_optimizer': value_optimizer.state_dict(), }, os.path.join(results_dir,