def train_model(self): episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) current_rewards = np.zeros(self.shell_args.num_processes) total_num_steps = self.start_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) egomotion_loss = 0 video_frames = [] num_episodes = 0 # self.evaluate_model() obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array(obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None num_updates = (int(self.shell_args.num_env_steps) // self.shell_args.num_forward_rollout_steps ) // self.shell_args.num_processes try: for iter_count in range(num_updates): if self.shell_args.tensorboard: if iter_count % 500 == 0: print("Logging conv summaries") self.logger.network_conv_summary( self.agent, total_num_steps) elif iter_count % 100 == 0: print("Logging variable summaries") self.logger.network_variable_summary( self.agent, total_num_steps) if self.shell_args.use_linear_lr_decay: # decrease learning rate linearly update_linear_schedule(self.optimizer.optimizer, iter_count, num_updates, self.shell_args.lr) if self.shell_args.algo == "ppo" and self.shell_args.use_linear_clip_decay: self.optimizer.clip_param = self.shell_args.clip_param * ( 1 - iter_count / float(num_updates)) if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() for step in range(self.shell_args.num_forward_rollout_steps): with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": self.rollouts.obs[step], "target_vector": self.rollouts.additional_observations_dict[ "pointgoal"][step], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][step], }, self.rollouts.recurrent_hidden_states[step], self.rollouts.masks[step], ) action_cpu = pt_util.to_numpy_array(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] if not self.shell_args.end_to_end: self.rollouts.additional_observations_dict[ "visual_encoder_features"][ self.rollouts.step].copy_( self.agent.base.visual_encoder_features ) if self.shell_args.use_motion_loss: if self.shell_args.record_video: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) timers[1] += time.time() - start_t if self.shell_args.record_video: # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy_array( val).copy() best_next_action = draw_obs.pop( "best_next_action", None) if prev_action is not None: draw_obs[ "action_taken"] = pt_util.to_numpy_array( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ ACTION_SPACE_TO_SIM_ACTION[ ACTION_SPACE[ prev_action.squeeze()]]] draw_obs[ "action_prob"] = pt_util.to_numpy_array( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach( ) if (hasattr(self.agent.base, "decoder_outputs") and self.agent.base.decoder_outputs is not None): min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy_array( outputs).copy() min_channel += num_channels draw_obs["rewards"] = current_rewards.copy() draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs[ "egomotion_pred"] = pt_util.to_numpy_array( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) if self.shell_args.algo == "supervised": im_inds = [0, 2, 3, 1, 9, 6, 7, 8, 5, 4] else: im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["reward"] = rewards if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]).to( torch.float32) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to( torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_rewards = pt_util.to_numpy_array(rewards) current_episode_rewards += pt_util.to_numpy_array( rewards).squeeze() current_episode_lengths += 1 for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: final_rgb = draw_obs["rgb"].transpose( 0, 2, 3, 1).squeeze(0) if self.shell_args.task == "pointnav": if infos[ii]["spl"] > 0: draw_obs[ "action_taken_name"] = "Stop. Success" draw_obs["reward"] = [ self.configs[0].TASK. SUCCESS_REWARD ] final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([0, 128, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) else: draw_obs[ "action_taken_name"] = "Timeout. Failed" final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([128, 0, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) elif self.shell_args.task == "exploration" or self.shell_args.task == "flee": draw_obs[ "action_taken_name"] = "End of episode." final_rgb = final_rgb[np.newaxis, ...].transpose( 0, 3, 1, 2) draw_obs["rgb"] = final_rgb images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.extend( [subplot_image] * (self.configs[0].ENVIRONMENT. MAX_EPISODE_STEPS + 30 - len(video_frames))) if "top_down_map" in infos[0]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) from habitat.utils.visualizations import maps import imageio top_down_map = maps.colorize_topdown_map( infos[0]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join( self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) self.train_stats["spl"][ii] = infos[ii][ "spl"] self.train_stats["success"][ ii] = self.train_stats["spl"][ii] > 0 self.train_stats["end_geodesic_distance"][ ii] = (distances[ii] - self.configs[0]. SIMULATOR.FORWARD_STEP_SIZE) self.train_stats[ "delta_geodesic_distance"][ii] = ( self.train_stats[ "start_geodesic_distance"][ii] - self.train_stats[ "end_geodesic_distance"][ii]) self.train_stats["num_steps"][ ii] = current_episode_lengths[ii] elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) self.train_stats["visited_states"][ ii] = infos[ii]["visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) self.train_stats["distance_from_start"][ ii] = infos[ii]["distance_from_start"] self.train_stats["num_episodes"][ii] += 1 self.train_stats["reward"][ ii] = current_episode_rewards[ii] if self.shell_args.tensorboard: log_dict = { "single_episode/reward": self.train_stats["reward"][ii] } if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": self.train_stats["num_steps"][ii], "single_episode/spl": self.train_stats["spl"][ii], "single_episode/success": self.train_stats["success"][ii], "single_episode/start_geodesic_distance": self.train_stats[ "start_geodesic_distance"][ii], "single_episode/end_geodesic_distance": self.train_stats[ "end_geodesic_distance"][ii], "single_episode/delta_geodesic_distance": self.train_stats[ "delta_geodesic_distance"][ii], }) elif self.shell_args.task == "exploration": log_dict[ "single_episode/visited_states"] = self.train_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = self.train_stats[ "distance_from_start"][ii] self.logger.dict_log( log_dict, step=(total_num_steps + self.shell_args.num_processes * step + ii)) episode_rewards.append( current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append( current_episode_lengths[ii]) current_episode_lengths[ii] = 0 self.train_stats["start_geodesic_distance"][ ii] = obs["goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]) bad_masks = torch.FloatTensor( [[0.0] if "bad_transition" in info.keys() else [1.0] for info in infos]) self.rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, rewards, masks, bad_masks) with torch.no_grad(): start_t = time.time() next_value = self.agent.get_value( { "images": self.rollouts.obs[-1], "target_vector": self.rollouts. additional_observations_dict["pointgoal"][-1], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][-1], }, self.rollouts.recurrent_hidden_states[-1], self.rollouts.masks[-1], ).detach() timers[1] += time.time() - start_t self.rollouts.compute_returns(next_value, self.shell_args.use_gae, self.shell_args.gamma, self.shell_args.tau) if not self.shell_args.no_weight_update: start_t = time.time() if self.shell_args.algo == "supervised": ( total_loss, action_loss, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) else: ( total_loss, value_loss, action_loss, dist_entropy, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) timers[2] += time.time() - start_t self.rollouts.after_update() # save for every interval-th episode or for the last epoch if iter_count % self.shell_args.save_interval == 0 or iter_count == num_updates - 1: self.save_checkpoint(5, total_num_steps) total_num_steps += self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps if not self.shell_args.no_weight_update and iter_count % self.shell_args.log_interval == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] backward_spf = timers[2] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS " "{}, \n Last {} training episodes: mean/median reward " "{:.3f}/{:.3f}, min/max reward {:.3f}/{:.3f}\n" ).format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/backward_spf": backward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "stats/backward_fps": 1.0 / (backward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 if self.shell_args.tensorboard: log_dict.update({ "loss/action": action_loss, "loss/0_total": total_loss, "loss/visual/0_total": visual_loss_total, "loss/exploration/egomotion": egomotion_loss, "loss/exploration/forward_model": forward_model_loss, }) if self.shell_args.algo != "supervised": log_dict.update({ "loss/entropy": dist_entropy, "loss/value": value_loss }) for key, val in visual_loss_dict.items(): log_dict["loss/visual/" + key] = val self.logger.dict_log(log_dict, step=total_num_steps) if self.shell_args.eval_interval is not None and total_num_steps % self.shell_args.eval_interval < ( self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps): self.save_checkpoint(-1, total_num_steps) self.set_log_iter(total_num_steps) self.evaluate_model() # reset the env datasets self.envs.unwrapped.call( ["switch_dataset"] * self.shell_args.num_processes, [("train", )] * self.shell_args.num_processes) obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None except: # Catch all exceptions so a final save can be performed import traceback traceback.print_exc() finally: self.save_checkpoint(-1, total_num_steps)
def evaluate_model(self): self.envs.unwrapped.call(["switch_dataset"] * self.shell_args.num_processes, [("val", )] * self.shell_args.num_processes) if not os.path.exists(self.eval_dir): os.makedirs(self.eval_dir) try: eval_net_file_name = sorted( glob.glob( os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "*") + "/*.pt"), key=os.path.getmtime, )[-1] eval_net_file_name = ( self.shell_args.log_prefix.replace(os.sep, "_") + "_" + "_".join(eval_net_file_name.split(os.sep)[-2:])[:-3]) except IndexError: print("Warning, no weights found") eval_net_file_name = "random_weights" eval_output_file = open( os.path.join(self.eval_dir, eval_net_file_name + ".csv"), "w") print("Writing results to", eval_output_file.name) # Save the evaled net for posterity if self.shell_args.save_checkpoints: save_model = self.agent pt_util.save( save_model, os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "eval_weights"), num_to_keep=-1, iteration=self.log_iter, ) print("Wrote model to file for safe keeping") obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) recurrent_hidden_states = torch.zeros( self.shell_args.num_processes, self.agent.recurrent_hidden_state_size, dtype=torch.float32, device=self.device, ) masks = torch.ones(self.shell_args.num_processes, 1, dtype=torch.float32, device=self.device) episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) total_num_steps = self.log_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) num_episodes = 0 print("Config\n", self.configs[0]) # Initialize every time eval is run rather than just at the start dataset_sizes = np.array( [len(dataset.episodes) for dataset in self.eval_datasets]) eval_stats = dict( episode_ids=[None for _ in range(self.shell_args.num_processes)], num_episodes=np.zeros(self.shell_args.num_processes, dtype=np.int32), num_steps=np.zeros(self.shell_args.num_processes, dtype=np.int32), reward=np.zeros(self.shell_args.num_processes, dtype=np.float32), spl=np.zeros(self.shell_args.num_processes, dtype=np.float32), visited_states=np.zeros(self.shell_args.num_processes, dtype=np.int32), success=np.zeros(self.shell_args.num_processes, dtype=np.int32), end_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), start_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), delta_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), distance_from_start=np.zeros(self.shell_args.num_processes, dtype=np.float32), ) eval_stats_means = dict( num_episodes=0, num_steps=0, reward=0, spl=0, visited_states=0, success=0, end_geodesic_distance=0, start_geodesic_distance=0, delta_geodesic_distance=0, distance_from_start=0, ) eval_output_file.write("name,%s,iter,%d\n\n" % (eval_net_file_name, self.log_iter)) if self.shell_args.task == "pointnav": eval_output_file.write(( "episode_id,num_steps,reward,spl,success,start_geodesic_distance," "end_geodesic_distance,delta_geodesic_distance\n")) elif self.shell_args.task == "exploration": eval_output_file.write("episode_id,reward,visited_states\n") elif self.shell_args.task == "flee": eval_output_file.write("episode_id,reward,distance_from_start\n") distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) eval_stats["start_geodesic_distance"][:] = distances progress_bar = tqdm.tqdm(total=self.num_eval_episodes_total) all_done = False iter_count = 0 video_frames = [] previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() while not all_done: with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": obs["rgb"].to(self.device), "target_vector": obs["pointgoal"].to(self.device), "prev_action_one_hot": obs["prev_action_one_hot"].to(self.device), }, recurrent_hidden_states, masks, ) action_cpu = pt_util.to_numpy(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] timers[1] += time.time() - start_t if self.shell_args.record_video: if self.shell_args.use_motion_loss: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy(val).copy() best_next_action = draw_obs.pop("best_next_action", None) if prev_action is not None: draw_obs["action_taken"] = pt_util.to_numpy( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ draw_obs['prev_action'].item()] draw_obs["action_prob"] = pt_util.to_numpy( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach() if hasattr( self.agent.base, "decoder_outputs" ) and self.agent.base.decoder_outputs is not None: min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy(outputs).copy() min_channel += num_channels draw_obs["rewards"] = eval_stats["reward"] draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs["egomotion_pred"] = pt_util.to_numpy( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 4, titles=titles, normalize=normalize, output_width=max(width, 320), output_height=max(height, 320), order=im_inds, fancy_text=True, ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_episode_rewards += pt_util.to_numpy(rewards).squeeze() current_episode_lengths += 1 to_pause = [] for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: if "top_down_map" in infos[ii]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) top_down_map = maps.colorize_topdown_map( infos[ii]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join(self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] eval_stats["episode_ids"][ii] = infos[ii]["episode_id"] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) eval_stats["spl"][ii] = infos[ii]["spl"] eval_stats["success"][ ii] = eval_stats["spl"][ii] > 0 eval_stats["num_steps"][ ii] = current_episode_lengths[ii] eval_stats["end_geodesic_distance"][ii] = ( infos[ii]["final_distance"] if eval_stats["success"][ii] else distances[ii]) eval_stats["delta_geodesic_distance"][ii] = ( eval_stats["start_geodesic_distance"][ii] - eval_stats["end_geodesic_distance"][ii]) elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) eval_stats["visited_states"][ii] = infos[ii][ "visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) eval_stats["distance_from_start"][ii] = infos[ii][ "distance_from_start"] eval_stats["num_episodes"][ii] += 1 eval_stats["reward"][ii] = current_episode_rewards[ii] if eval_stats["num_episodes"][ii] <= dataset_sizes[ii]: progress_bar.update(1) eval_stats_means["num_episodes"] += 1 eval_stats_means["reward"] += eval_stats["reward"][ ii] if self.shell_args.task == "pointnav": eval_output_file.write( "%s,%d,%f,%f,%d,%f,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["num_steps"][ii], eval_stats["reward"][ii], eval_stats["spl"][ii], eval_stats["success"][ii], eval_stats["start_geodesic_distance"] [ii], eval_stats["end_geodesic_distance"] [ii], eval_stats["delta_geodesic_distance"] [ii], )) eval_stats_means["num_steps"] += eval_stats[ "num_steps"][ii] eval_stats_means["spl"] += eval_stats["spl"][ ii] eval_stats_means["success"] += eval_stats[ "success"][ii] eval_stats_means[ "start_geodesic_distance"] += eval_stats[ "start_geodesic_distance"][ii] eval_stats_means[ "end_geodesic_distance"] += eval_stats[ "end_geodesic_distance"][ii] eval_stats_means[ "delta_geodesic_distance"] += eval_stats[ "delta_geodesic_distance"][ii] elif self.shell_args.task == "exploration": eval_output_file.write("%s,%f,%d\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["visited_states"][ii], )) eval_stats_means[ "visited_states"] += eval_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": eval_output_file.write("%s,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["distance_from_start"][ii], )) eval_stats_means[ "distance_from_start"] += eval_stats[ "distance_from_start"][ii] eval_output_file.flush() if eval_stats["num_episodes"][ii] == dataset_sizes[ ii]: to_pause.append(ii) episode_rewards.append(current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append(current_episode_lengths[ii]) current_episode_lengths[ii] = 0 eval_stats["start_geodesic_distance"][ii] = obs[ "goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]).to(self.device) # Reverse in order to maintain order in case of multiple. to_pause.reverse() for ii in to_pause: # Pause the environments that are done from the vectorenv. print("Pausing env", ii) self.envs.unwrapped.pause_at(ii) current_episode_rewards = np.concatenate( (current_episode_rewards[:ii], current_episode_rewards[ii + 1:])) current_episode_lengths = np.concatenate( (current_episode_lengths[:ii], current_episode_lengths[ii + 1:])) for key in eval_stats: eval_stats[key] = np.concatenate( (eval_stats[key][:ii], eval_stats[key][ii + 1:])) dataset_sizes = np.concatenate( (dataset_sizes[:ii], dataset_sizes[ii + 1:])) for key in obs: if type(obs[key]) == torch.Tensor: obs[key] = torch.cat( (obs[key][:ii], obs[key][ii + 1:]), dim=0) else: obs[key] = np.concatenate( (obs[key][:ii], obs[key][ii + 1:]), axis=0) recurrent_hidden_states = torch.cat( (recurrent_hidden_states[:ii], recurrent_hidden_states[ii + 1:]), dim=0) masks = torch.cat((masks[:ii], masks[ii + 1:]), dim=0) if len(dataset_sizes) == 0: progress_bar.close() all_done = True total_num_steps += self.shell_args.num_processes if iter_count % (self.shell_args.log_interval * 100) == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS {}, " "\n Last {} training episodes: mean/median reward {:.3f}/{:.3f}, " "min/max reward {:.3f}/{:.3f}\n").format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) self.eval_logger.dict_log(log_dict, step=self.log_iter) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 iter_count += 1 print("Finished testing") print("Wrote results to", eval_output_file.name) eval_stats_means = { key: val / eval_stats_means["num_episodes"] for key, val in eval_stats_means.items() } if self.shell_args.tensorboard: log_dict = {"single_episode/reward": eval_stats_means["reward"]} if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": eval_stats_means["num_steps"], "single_episode/spl": eval_stats_means["spl"], "single_episode/success": eval_stats_means["success"], "single_episode/start_geodesic_distance": eval_stats_means["start_geodesic_distance"], "single_episode/end_geodesic_distance": eval_stats_means["end_geodesic_distance"], "single_episode/delta_geodesic_distance": eval_stats_means["delta_geodesic_distance"], }) elif self.shell_args.task == "exploration": log_dict["single_episode/visited_states"] = eval_stats_means[ "visited_states"] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = eval_stats_means[ "distance_from_start"] self.eval_logger.dict_log(log_dict, step=self.log_iter) self.envs.unwrapped.resume_all()