xaxis='step') if not args.symbolic_env: episode_str = str(episode).zfill(len(str(args.episodes))) write_video(video_frames, 'test_episode_%s' % episode_str, results_dir) # Lossy compression save_image( torch.as_tensor(video_frames[-1]), os.path.join(results_dir, 'test_episode_%s.png' % episode_str)) torch.save(metrics, os.path.join(results_dir, 'metrics.pth')) # Set models to train mode transition_model.train() observation_model.train() reward_model.train() encoder.train() actor_model.train() value_model.train() # Close test environments test_envs.close() writer.add_scalar("train_reward", metrics['train_rewards'][-1], metrics['steps'][-1]) writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1], metrics['steps'][-1] * args.action_repeat) writer.add_scalar("observation_loss", metrics['observation_loss'][0][-1], metrics['steps'][-1]) writer.add_scalar("reward_loss", metrics['reward_loss'][0][-1], metrics['steps'][-1]) writer.add_scalar("kl_loss", metrics['kl_loss'][0][-1], metrics['steps'][-1]) writer.add_scalar("actor_loss", metrics['actor_loss'][0][-1],
# if not args.symbolic_env: if True: episode_str = str(episode).zfill(len(str(args.episodes))) write_video(video_frames, 'test_episode_%s' % episode_str, results_dir) # Lossy compression save_image(torch.as_tensor(video_frames[-1]), os.path.join(results_dir, 'test_episode_%s.png' % episode_str)) torch.save(metrics, os.path.join(results_dir, 'metrics.pth')) test_reward_sum = sum(metrics['test_rewards'][-1]) writer.add_scalar("test/episode_reward", test_reward_sum/args.test_episodes, metrics['steps'][-1]*args.action_repeat) # Set models to train mode transition_model.train() observation_model.train() reward_model.train() encoder.train() if args.algo=="p2e" or args.algo=="dreamer": actor_model.train() value_model.train() if args.algo=="p2e": curious_actor_model.train() curious_value_model.train() # Close test environments writer.add_scalar("train_reward", metrics['train_rewards'][-1], metrics['steps'][-1]) writer.add_scalar("train/episode_reward", metrics['train_rewards'][-1], metrics['steps'][-1]*args.action_repeat) writer.add_scalar("observation_loss", metrics['observation_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("reward_loss", metrics['reward_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("kl_loss", metrics['kl_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("actor_loss", metrics['actor_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("value_loss", metrics['value_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("onestep_loss", metrics['onestep_loss'][-1][0], metrics['steps'][-1]) writer.add_scalar("curious_actor_loss", metrics['curious_actor_loss'][-1][0], metrics['steps'][-1])