예제 #1
0
                 xaxis='step')
        if not args.symbolic_env:
            episode_str = str(episode).zfill(len(str(args.episodes)))
            write_video(video_frames, 'test_episode_%s' % episode_str,
                        results_dir)  # Lossy compression
            save_image(
                torch.as_tensor(video_frames[-1]),
                os.path.join(results_dir, 'test_episode_%s.png' % episode_str))
        torch.save(metrics, os.path.join(results_dir, 'metrics.pth'))

        # Set models to train mode
        transition_model.train()
        observation_model.train()
        reward_model.train()
        encoder.train()
        actor_model.train()
        value_model.train()
        # Close test environments
        test_envs.close()

    writer.add_scalar("train_reward", metrics['train_rewards'][-1],
                      metrics['steps'][-1])
    writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1],
                      metrics['steps'][-1] * args.action_repeat)
    writer.add_scalar("observation_loss", metrics['observation_loss'][0][-1],
                      metrics['steps'][-1])
    writer.add_scalar("reward_loss", metrics['reward_loss'][0][-1],
                      metrics['steps'][-1])
    writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1],
                      metrics['steps'][-1])
    writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1],
예제 #2
0
    # if not args.symbolic_env:
    if True:
      episode_str = str(episode).zfill(len(str(args.episodes)))
      write_video(video_frames, 'test_episode_%s' % episode_str, results_dir)  # Lossy compression
      save_image(torch.as_tensor(video_frames[-1]), os.path.join(results_dir, 'test_episode_%s.png' % episode_str))
    torch.save(metrics, os.path.join(results_dir, 'metrics.pth'))
    test_reward_sum = sum(metrics['test_rewards'][-1])
    writer.add_scalar("test/episode_reward", test_reward_sum/args.test_episodes, metrics['steps'][-1]*args.action_repeat)

    # Set models to train mode
    transition_model.train()
    observation_model.train()
    reward_model.train()
    encoder.train()
    if args.algo=="p2e" or args.algo=="dreamer":
      actor_model.train()
      value_model.train()
      if args.algo=="p2e":
        curious_actor_model.train()
        curious_value_model.train()
    # Close test environments

  writer.add_scalar("train_reward", metrics['train_rewards'][-1], metrics['steps'][-1])
  writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1], metrics['steps'][-1]*args.action_repeat)
  writer.add_scalar("observation_loss", metrics['observation_loss'][-1][0], metrics['steps'][-1])
  writer.add_scalar("reward_loss", metrics['reward_loss'][-1][0], metrics['steps'][-1])
  writer.add_scalar("kl_loss", metrics['kl_loss'][-1][0], metrics['steps'][-1])
  writer.add_scalar("actor_loss", metrics['actor_loss'][-1][0], metrics['steps'][-1])
  writer.add_scalar("value_loss", metrics['value_loss'][-1][0], metrics['steps'][-1])
  writer.add_scalar("onestep_loss", metrics['onestep_loss'][-1][0], metrics['steps'][-1]) 
  writer.add_scalar("curious_actor_loss", metrics['curious_actor_loss'][-1][0], metrics['steps'][-1])