Ejemplo n.º 1
0
  writer.add_scalar("curious_actor_loss", metrics['curious_actor_loss'][-1][0], metrics['steps'][-1]) 
  writer.add_scalar("curious_value_loss", metrics['curious_value_loss'][-1][0], metrics['steps'][-1]) 
  print("episodes: {}, total_steps: {}, train_reward: {} ".format(metrics['episodes'][-1], metrics['steps'][-1], metrics['train_rewards'][-1]))

  # Checkpoint models
  if episode % args.checkpoint_interval == 0:
    # print("checkpoint saving model")
    torch.save({'transition_model': transition_model.state_dict(),
            'observation_model': observation_model.state_dict(),
            'reward_model': reward_model.state_dict(),
            'encoder': encoder.state_dict(),
            'model_optimizer': model_optimizer.state_dict(),
            }, os.path.join(results_dir, 'models_%d.pth' % episode))
    if args.algo=="p2e" or args.algo=="dreamer":
      # print("checkpoint saving model")
      torch.save({'actor_model': actor_model.state_dict(),
                  'value_model': value_model.state_dict(),
                  'actor_optimizer': actor_optimizer.state_dict(),
                  'value_optimizer': value_optimizer.state_dict(),
                  }, os.path.join(results_dir, 'actorvalue_models_%d.pth' % episode))
    if args.algo=="p2e":
      # print("checkpoint saving model")
      torch.save({'curious_actor_model': actor_model.state_dict(),
                  'curious_value_model': value_model.state_dict(),
                  'curious_actor_optimizer': actor_optimizer.state_dict(),
                  'curious_value_optimizer': value_optimizer.state_dict(),
                  }, os.path.join(results_dir, 'curious_models_%d.pth' % episode))
      onestep_model_dict = {'onestep_model{}'.format(i) : x.state_dict() for i,x in enumerate(onestep_models)}
      onestep_model_dict['onestep_optimizer'] = onestep_optimizer.state_dict()
      torch.save(onestep_model_dict, os.path.join(results_dir, 'onestep_models_%d.pth' % episode))
Ejemplo n.º 2
0
    writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1],
                      metrics['steps'][-1])
    writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1],
                      metrics['steps'][-1])
    writer.add_scalar("value_loss", metrics['value_loss'][0][-1],
                      metrics['steps'][-1])
    print("episodes: {}, total_steps: {}, train_reward: {} ".format(
        metrics['episodes'][-1], metrics['steps'][-1],
        metrics['train_rewards'][-1]))

    # Checkpoint models
    if episode % args.checkpoint_interval == 0:
        torch.save(
            {
                'transition_model': transition_model.state_dict(),
                'observation_model': observation_model.state_dict(),
                'reward_model': reward_model.state_dict(),
                'encoder': encoder.state_dict(),
                'actor_model': actor_model.state_dict(),
                'value_model': value_model.state_dict(),
                'model_optimizer': model_optimizer.state_dict(),
                'actor_optimizer': actor_optimizer.state_dict(),
                'value_optimizer': value_optimizer.state_dict()
            }, os.path.join(results_dir, 'models_%d.pth' % episode))
        if args.checkpoint_experience:
            torch.save(
                D, os.path.join(results_dir, 'experience.pth')
            )  # Warning: will fail with MemoryError with large memory sizes

# Close training environment
env.close()
Ejemplo n.º 3
0
 # Checkpoint models
 if episode % args.checkpoint_interval == 0:
     # print("checkpoint saving model")
     torch.save(
         {
             'transition_model': transition_model.state_dict(),
             'observation_model': observation_model.state_dict(),
             'reward_model': reward_model.state_dict(),
             'encoder': encoder.state_dict(),
             'model_optimizer': model_optimizer.state_dict(),
         }, os.path.join(results_dir, 'models_%d.pth' % episode))
     if args.algo == "p2e" or args.algo == "dreamer":
         # print("checkpoint saving model")
         torch.save(
             {
                 'actor_model': actor_model.state_dict(),
                 'value_model': value_model.state_dict(),
                 'actor_optimizer': actor_optimizer.state_dict(),
                 'value_optimizer': value_optimizer.state_dict(),
             },
             os.path.join(results_dir,
                          'actorvalue_models_%d.pth' % episode))
     if args.algo == "p2e":
         # print("checkpoint saving model")
         torch.save(
             {
                 'curious_actor_model': actor_model.state_dict(),
                 'curious_value_model': value_model.state_dict(),
                 'curious_actor_optimizer': actor_optimizer.state_dict(),
                 'curious_value_optimizer': value_optimizer.state_dict(),
             }, os.path.join(results_dir,