def observations_to_image(observation: Dict, info: Dict) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] egocentric_view.append(observation["rgb"][:, :, :3]) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = (observation["depth"].squeeze() * 255).astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert (len(egocentric_view) > 0), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame
def draw_top_down_map(info, heading, output_size): """Generates a map that displays the state of the agent in the given environment, for the current frame. Args: info: environment info for current frame. heading: where the agent heads toward. output_size: height of output map. Returns: the output_size x width x 1 map """ top_down_map = maps.colorize_topdown_map( info["top_down_map"]["map"], info["top_down_map"]["fog_of_war_mask"]) original_map_size = top_down_map.shape[:2] map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) new_map_size = np.round(output_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = info["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent( top_down_map, map_agent_pos, heading - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40, ) return top_down_map
def draw_top_down_map(info): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map(top_down_map) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 25, ) return top_down_map
def plot_top_down_map(info, dataset='replica', pred=None): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] if dataset == 'replica': agent_radius_px = top_down_map.shape[0] // 16 else: agent_radius_px = top_down_map.shape[0] // 50 top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=agent_radius_px) if pred is not None: from habitat.utils.geometry_utils import quaternion_rotate_vector source_rotation = info["top_down_map"]["agent_rotation"] rounded_pred = np.round(pred[1]) direction_vector_agent = np.array( [rounded_pred[1], 0, -rounded_pred[0]]) direction_vector = quaternion_rotate_vector(source_rotation, direction_vector_agent) grid_size = ( (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, ) delta_x = int(-direction_vector[0] / grid_size[0]) delta_y = int(direction_vector[2] / grid_size[1]) x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0]) y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1]) point_padding = 20 for m in range(x - point_padding, x + point_padding + 1): for n in range(y - point_padding, y + point_padding + 1): if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \ 0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]: top_down_map[m, n] = (0, 255, 255) if np.linalg.norm(rounded_pred) < 1: assert delta_x == 0 and delta_y == 0 if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) return top_down_map
def plot_top_down_map(info, dataset='replica'): top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] if dataset == 'replica': agent_radius_px = top_down_map.shape[0] // 16 else: agent_radius_px = top_down_map.shape[0] // 50 top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=agent_radius_px) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) return top_down_map
def draw_top_down_map(info, heading, output_size): top_down_map = maps.colorize_topdown_map(info["top_down_map"]["map"]) original_map_size = top_down_map.shape[:2] map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) new_map_size = np.round(output_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = info["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent( top_down_map, map_agent_pos, heading - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40, ) return top_down_map
def topdown_to_image(topdown_info: np.ndarray) -> np.ndarray: r"""Convert topdown map to an RGB image. """ top_down_map = topdown_info["map"] fog_of_war_mask = topdown_info["fog_of_war_mask"] top_down_map = maps.colorize_topdown_map(top_down_map, fog_of_war_mask) map_agent_pos = topdown_info["agent_map_coord"] # Add zero padding min_map_size = 200 if top_down_map.shape[0] != top_down_map.shape[1]: H = top_down_map.shape[0] W = top_down_map.shape[1] if H > W: pad_value = (H - W) // 2 padding = ((0, 0), (pad_value, pad_value), (0, 0)) map_agent_pos = (map_agent_pos[0], map_agent_pos[1] + pad_value) else: pad_value = (W - H) // 2 padding = ((pad_value, pad_value), (0, 0), (0, 0)) map_agent_pos = (map_agent_pos[0] + pad_value, map_agent_pos[1]) top_down_map = np.pad(top_down_map, padding, mode="constant", constant_values=255) if top_down_map.shape[0] < min_map_size: H, W = top_down_map.shape[:2] top_down_map = cv2.resize(top_down_map, (min_map_size, min_map_size)) map_agent_pos = ( int(map_agent_pos[0] * min_map_size // H), int(map_agent_pos[1] * min_map_size // W), ) top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=topdown_info["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) return top_down_map
def observations_to_image(observation: Dict, info: Dict) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ observation_size = observation["rgb"].shape[0] egocentric_view = observation["rgb"][:, :, -3:] # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) if "goal_coord_in_camera" in observation: _, _, _, xpx, ypx = observation["goal_coord_in_camera"] if xpx != -1 and ypx != -1: xpx = int(xpx * observation_size + observation_size / 2) ypx = int(ypx * observation_size + observation_size / 2) egocentric_view = cv2.circle(egocentric_view, (xpx, ypx), 15, (0, 0, 255), 5) # draw depth map if observation has depth info if "depth" in observation: depth_map = (observation["depth"][:, :, -1] * 255).astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view = np.concatenate((egocentric_view, depth_map), axis=1) if "goalclass" in observation: from habitat.tasks.nav.nav_task_multi_goal import CLASSES index = np.nonzero(observation["goalclass"])[0][0] classes = list(CLASSES.keys()) class_name = classes[index] cv2.putText(egocentric_view, class_name, (15, 15), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255)) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame
def train_model(self): episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) current_rewards = np.zeros(self.shell_args.num_processes) total_num_steps = self.start_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) egomotion_loss = 0 video_frames = [] num_episodes = 0 # self.evaluate_model() obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array(obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None num_updates = (int(self.shell_args.num_env_steps) // self.shell_args.num_forward_rollout_steps ) // self.shell_args.num_processes try: for iter_count in range(num_updates): if self.shell_args.tensorboard: if iter_count % 500 == 0: print("Logging conv summaries") self.logger.network_conv_summary( self.agent, total_num_steps) elif iter_count % 100 == 0: print("Logging variable summaries") self.logger.network_variable_summary( self.agent, total_num_steps) if self.shell_args.use_linear_lr_decay: # decrease learning rate linearly update_linear_schedule(self.optimizer.optimizer, iter_count, num_updates, self.shell_args.lr) if self.shell_args.algo == "ppo" and self.shell_args.use_linear_clip_decay: self.optimizer.clip_param = self.shell_args.clip_param * ( 1 - iter_count / float(num_updates)) if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() for step in range(self.shell_args.num_forward_rollout_steps): with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": self.rollouts.obs[step], "target_vector": self.rollouts.additional_observations_dict[ "pointgoal"][step], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][step], }, self.rollouts.recurrent_hidden_states[step], self.rollouts.masks[step], ) action_cpu = pt_util.to_numpy_array(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] if not self.shell_args.end_to_end: self.rollouts.additional_observations_dict[ "visual_encoder_features"][ self.rollouts.step].copy_( self.agent.base.visual_encoder_features ) if self.shell_args.use_motion_loss: if self.shell_args.record_video: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) timers[1] += time.time() - start_t if self.shell_args.record_video: # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy_array( val).copy() best_next_action = draw_obs.pop( "best_next_action", None) if prev_action is not None: draw_obs[ "action_taken"] = pt_util.to_numpy_array( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ ACTION_SPACE_TO_SIM_ACTION[ ACTION_SPACE[ prev_action.squeeze()]]] draw_obs[ "action_prob"] = pt_util.to_numpy_array( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs[ "action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach( ) if (hasattr(self.agent.base, "decoder_outputs") and self.agent.base.decoder_outputs is not None): min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy_array( outputs).copy() min_channel += num_channels draw_obs["rewards"] = current_rewards.copy() draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs[ "egomotion_pred"] = pt_util.to_numpy_array( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) if self.shell_args.algo == "supervised": im_inds = [0, 2, 3, 1, 9, 6, 7, 8, 5, 4] else: im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["reward"] = rewards if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]).to( torch.float32) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to( torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_rewards = pt_util.to_numpy_array(rewards) current_episode_rewards += pt_util.to_numpy_array( rewards).squeeze() current_episode_lengths += 1 for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: final_rgb = draw_obs["rgb"].transpose( 0, 2, 3, 1).squeeze(0) if self.shell_args.task == "pointnav": if infos[ii]["spl"] > 0: draw_obs[ "action_taken_name"] = "Stop. Success" draw_obs["reward"] = [ self.configs[0].TASK. SUCCESS_REWARD ] final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([0, 128, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) else: draw_obs[ "action_taken_name"] = "Timeout. Failed" final_rgb[:] = final_rgb * np.float32( 0.5) + np.tile( np.array([128, 0, 0], dtype=np.uint8), (final_rgb.shape[0], final_rgb.shape[1], 1), ) elif self.shell_args.task == "exploration" or self.shell_args.task == "flee": draw_obs[ "action_taken_name"] = "End of episode." final_rgb = final_rgb[np.newaxis, ...].transpose( 0, 3, 1, 2) draw_obs["rgb"] = final_rgb images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 5, titles=titles, normalize=normalize, order=im_inds, output_width=max(width, 320), output_height=max(height, 320), ) video_frames.extend( [subplot_image] * (self.configs[0].ENVIRONMENT. MAX_EPISODE_STEPS + 30 - len(video_frames))) if "top_down_map" in infos[0]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) from habitat.utils.visualizations import maps import imageio top_down_map = maps.colorize_topdown_map( infos[0]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join( self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) self.train_stats["spl"][ii] = infos[ii][ "spl"] self.train_stats["success"][ ii] = self.train_stats["spl"][ii] > 0 self.train_stats["end_geodesic_distance"][ ii] = (distances[ii] - self.configs[0]. SIMULATOR.FORWARD_STEP_SIZE) self.train_stats[ "delta_geodesic_distance"][ii] = ( self.train_stats[ "start_geodesic_distance"][ii] - self.train_stats[ "end_geodesic_distance"][ii]) self.train_stats["num_steps"][ ii] = current_episode_lengths[ii] elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) self.train_stats["visited_states"][ ii] = infos[ii]["visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) self.train_stats["distance_from_start"][ ii] = infos[ii]["distance_from_start"] self.train_stats["num_episodes"][ii] += 1 self.train_stats["reward"][ ii] = current_episode_rewards[ii] if self.shell_args.tensorboard: log_dict = { "single_episode/reward": self.train_stats["reward"][ii] } if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": self.train_stats["num_steps"][ii], "single_episode/spl": self.train_stats["spl"][ii], "single_episode/success": self.train_stats["success"][ii], "single_episode/start_geodesic_distance": self.train_stats[ "start_geodesic_distance"][ii], "single_episode/end_geodesic_distance": self.train_stats[ "end_geodesic_distance"][ii], "single_episode/delta_geodesic_distance": self.train_stats[ "delta_geodesic_distance"][ii], }) elif self.shell_args.task == "exploration": log_dict[ "single_episode/visited_states"] = self.train_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = self.train_stats[ "distance_from_start"][ii] self.logger.dict_log( log_dict, step=(total_num_steps + self.shell_args.num_processes * step + ii)) episode_rewards.append( current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append( current_episode_lengths[ii]) current_episode_lengths[ii] = 0 self.train_stats["start_geodesic_distance"][ ii] = obs["goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]) bad_masks = torch.FloatTensor( [[0.0] if "bad_transition" in info.keys() else [1.0] for info in infos]) self.rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, rewards, masks, bad_masks) with torch.no_grad(): start_t = time.time() next_value = self.agent.get_value( { "images": self.rollouts.obs[-1], "target_vector": self.rollouts. additional_observations_dict["pointgoal"][-1], "prev_action_one_hot": self.rollouts.additional_observations_dict[ "prev_action_one_hot"][-1], }, self.rollouts.recurrent_hidden_states[-1], self.rollouts.masks[-1], ).detach() timers[1] += time.time() - start_t self.rollouts.compute_returns(next_value, self.shell_args.use_gae, self.shell_args.gamma, self.shell_args.tau) if not self.shell_args.no_weight_update: start_t = time.time() if self.shell_args.algo == "supervised": ( total_loss, action_loss, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) else: ( total_loss, value_loss, action_loss, dist_entropy, visual_loss_total, visual_loss_dict, egomotion_loss, forward_model_loss, ) = self.optimizer.update(self.rollouts, self.shell_args) timers[2] += time.time() - start_t self.rollouts.after_update() # save for every interval-th episode or for the last epoch if iter_count % self.shell_args.save_interval == 0 or iter_count == num_updates - 1: self.save_checkpoint(5, total_num_steps) total_num_steps += self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps if not self.shell_args.no_weight_update and iter_count % self.shell_args.log_interval == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] backward_spf = timers[2] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS " "{}, \n Last {} training episodes: mean/median reward " "{:.3f}/{:.3f}, min/max reward {:.3f}/{:.3f}\n" ).format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/backward_spf": backward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "stats/backward_fps": 1.0 / (backward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 if self.shell_args.tensorboard: log_dict.update({ "loss/action": action_loss, "loss/0_total": total_loss, "loss/visual/0_total": visual_loss_total, "loss/exploration/egomotion": egomotion_loss, "loss/exploration/forward_model": forward_model_loss, }) if self.shell_args.algo != "supervised": log_dict.update({ "loss/entropy": dist_entropy, "loss/value": value_loss }) for key, val in visual_loss_dict.items(): log_dict["loss/visual/" + key] = val self.logger.dict_log(log_dict, step=total_num_steps) if self.shell_args.eval_interval is not None and total_num_steps % self.shell_args.eval_interval < ( self.shell_args.num_processes * self.shell_args.num_forward_rollout_steps): self.save_checkpoint(-1, total_num_steps) self.set_log_iter(total_num_steps) self.evaluate_model() # reset the env datasets self.envs.unwrapped.call( ["switch_dataset"] * self.shell_args.num_processes, [("train", )] * self.shell_args.num_processes) obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) if self.shell_args.algo == "supervised": obs["best_next_action"] = pt_util.from_numpy( obs["best_next_action"][:, ACTION_SPACE]) self.rollouts.copy_obs(obs, 0) distances = pt_util.to_numpy_array( obs["goal_geodesic_distance"]) self.train_stats["start_geodesic_distance"][:] = distances previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None except: # Catch all exceptions so a final save can be performed import traceback traceback.print_exc() finally: self.save_checkpoint(-1, total_num_steps)
def _eval_checkpoint(self, checkpoint_path: str, writer: TensorboardWriter, checkpoint_index: int = 0, log_diagnostics=[], output_dir='.', label='.', num_eval_runs=1) -> None: r"""Evaluates a single checkpoint. Args: checkpoint_path: path of checkpoint writer: tensorboard writer object for logging to tensorboard checkpoint_index: index of cur checkpoint for logging Returns: None """ if checkpoint_index == -1: ckpt_file = checkpoint_path.split('/')[-1] split_info = ckpt_file.split('.') checkpoint_index = split_info[1] # Map location CPU is almost always better than mapping to a CUDA device. ckpt_dict = self.load_checkpoint(checkpoint_path, map_location="cpu") if self.config.EVAL.USE_CKPT_CONFIG: config = self._setup_eval_config(ckpt_dict["config"]) else: config = self.config.clone() ppo_cfg = config.RL.PPO task_cfg = config.TASK_CONFIG.TASK config.defrost() config.TASK_CONFIG.DATASET.SPLIT = config.EVAL.SPLIT config.freeze() if len(self.config.VIDEO_OPTION) > 0: config.defrost() config.TASK_CONFIG.TASK.MEASUREMENTS.append("TOP_DOWN_MAP") config.TASK_CONFIG.TASK.MEASUREMENTS.append("COLLISIONS") config.freeze() logger.info(f"env config: {config}") self.envs = construct_envs(config, get_env_class(config.ENV_NAME)) # pass in aux config if we're doing attention aux_cfg = self.config.RL.AUX_TASKS self._setup_actor_critic_agent(ppo_cfg, task_cfg, aux_cfg) # Check if we accidentally recorded `visual_resnet` in our checkpoint and drop it (it's redundant with `visual_encoder`) ckpt_dict['state_dict'] = { k: v for k, v in ckpt_dict['state_dict'].items() if 'visual_resnet' not in k } self.agent.load_state_dict(ckpt_dict["state_dict"]) logger.info("agent number of trainable parameters: {}".format( sum(param.numel() for param in self.agent.parameters() if param.requires_grad))) self.actor_critic = self.agent.actor_critic observations = self.envs.reset() batch = batch_obs(observations, device=self.device) current_episode_reward = torch.zeros(self.envs.num_envs, 1, device=self.device) test_recurrent_hidden_states = torch.zeros( self.actor_critic.net.num_recurrent_layers, self.config.NUM_PROCESSES, ppo_cfg.hidden_size, device=self.device, ) _, num_recurrent_memories, _ = self._setup_auxiliary_tasks( aux_cfg, ppo_cfg, task_cfg, is_eval=True) if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: aux_tasks = self.config.RL.AUX_TASKS.tasks num_recurrent_memories = len(self.config.RL.AUX_TASKS.tasks) test_recurrent_hidden_states = test_recurrent_hidden_states.unsqueeze( 2).repeat(1, 1, num_recurrent_memories, 1) prev_actions = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device, dtype=torch.long) not_done_masks = torch.zeros(self.config.NUM_PROCESSES, 1, device=self.device) stats_episodes = dict() # dict of dicts that stores stats per episode rgb_frames = [[] for _ in range(self.config.NUM_PROCESSES) ] # type: List[List[np.ndarray]] if len(self.config.VIDEO_OPTION) > 0: os.makedirs(self.config.VIDEO_DIR, exist_ok=True) number_of_eval_episodes = self.config.TEST_EPISODE_COUNT if number_of_eval_episodes == -1: number_of_eval_episodes = sum(self.envs.number_of_episodes) else: total_num_eps = sum(self.envs.number_of_episodes) if total_num_eps < number_of_eval_episodes: logger.warn( f"Config specified {number_of_eval_episodes} eval episodes" ", dataset only has {total_num_eps}.") logger.warn(f"Evaluating with {total_num_eps} instead.") number_of_eval_episodes = total_num_eps videos_cap = 2 # number of videos to generate per checkpoint if len(log_diagnostics) > 0: videos_cap = 10 # video_indices = random.sample(range(self.config.TEST_EPISODE_COUNT), # min(videos_cap, self.config.TEST_EPISODE_COUNT)) video_indices = range(10) print(f"Videos: {video_indices}") total_stats = [] dones_per_ep = dict() # Logging more extensive evaluation stats for analysis if len(log_diagnostics) > 0: d_stats = {} for d in log_diagnostics: d_stats[d] = [ [] for _ in range(self.config.NUM_PROCESSES) ] # stored as nested list envs x timesteps x k (# tasks) pbar = tqdm.tqdm(total=number_of_eval_episodes * num_eval_runs) self.agent.eval() while (len(stats_episodes) < number_of_eval_episodes * num_eval_runs and self.envs.num_envs > 0): current_episodes = self.envs.current_episodes() with torch.no_grad(): weights_output = None if self.config.RL.PPO.policy in MULTIPLE_BELIEF_CLASSES: weights_output = torch.empty(self.envs.num_envs, len(aux_tasks)) ( _, actions, _, test_recurrent_hidden_states, ) = self.actor_critic.act(batch, test_recurrent_hidden_states, prev_actions, not_done_masks, deterministic=False, weights_output=weights_output) prev_actions.copy_(actions) for i in range(self.envs.num_envs): if Diagnostics.actions in log_diagnostics: d_stats[Diagnostics.actions][i].append( prev_actions[i].item()) if Diagnostics.weights in log_diagnostics: aux_weights = None if weights_output is None else weights_output[ i] if aux_weights is not None: d_stats[Diagnostics.weights][i].append( aux_weights.half().tolist()) outputs = self.envs.step([a[0].item() for a in actions]) observations, rewards, dones, infos = [ list(x) for x in zip(*outputs) ] batch = batch_obs(observations, device=self.device) not_done_masks = torch.tensor( [[0.0] if done else [1.0] for done in dones], dtype=torch.float, device=self.device, ) rewards = torch.tensor(rewards, dtype=torch.float, device=self.device).unsqueeze(1) current_episode_reward += rewards next_episodes = self.envs.current_episodes() envs_to_pause = [] n_envs = self.envs.num_envs for i in range(n_envs): next_k = ( next_episodes[i].scene_id, next_episodes[i].episode_id, ) if dones_per_ep.get(next_k, 0) == num_eval_runs: envs_to_pause.append(i) # wait for the rest if not_done_masks[i].item() == 0: episode_stats = dict() episode_stats["reward"] = current_episode_reward[i].item() episode_stats.update( self._extract_scalars_from_info(infos[i])) current_episode_reward[i] = 0 # use scene_id + episode_id as unique id for storing stats k = ( current_episodes[i].scene_id, current_episodes[i].episode_id, ) dones_per_ep[k] = dones_per_ep.get(k, 0) + 1 if dones_per_ep.get(k, 0) == 1 and len( self.config.VIDEO_OPTION) > 0 and len( stats_episodes) in video_indices: logger.info(f"Generating video {len(stats_episodes)}") category = getattr(current_episodes[i], "object_category", "") if category != "": category += "_" try: generate_video( video_option=self.config.VIDEO_OPTION, video_dir=self.config.VIDEO_DIR, images=rgb_frames[i], episode_id=current_episodes[i].episode_id, checkpoint_idx=checkpoint_index, metrics=self._extract_scalars_from_info( infos[i]), tag=f"{category}{label}", tb_writer=writer, ) except Exception as e: logger.warning(str(e)) rgb_frames[i] = [] stats_episodes[( current_episodes[i].scene_id, current_episodes[i].episode_id, dones_per_ep[k], )] = episode_stats if len(log_diagnostics) > 0: diagnostic_info = dict() for metric in log_diagnostics: diagnostic_info[metric] = d_stats[metric][i] d_stats[metric][i] = [] if Diagnostics.top_down_map in log_diagnostics: top_down_map = torch.tensor([]) if len(self.config.VIDEO_OPTION) > 0: top_down_map = infos[i]["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, fog_of_war_mask=None) diagnostic_info.update( dict(top_down_map=top_down_map)) total_stats.append( dict( stats=episode_stats, did_stop=bool(prev_actions[i] == 0), episode_info=attr.asdict(current_episodes[i]), info=diagnostic_info, )) pbar.update() # episode continues else: if len(self.config.VIDEO_OPTION) > 0: aux_weights = None if weights_output is None else weights_output[ i] frame = observations_to_image( observations[i], infos[i], current_episode_reward[i].item(), aux_weights, aux_tasks) rgb_frames[i].append(frame) if Diagnostics.gps in log_diagnostics: d_stats[Diagnostics.gps][i].append( observations[i]["gps"].tolist()) if Diagnostics.heading in log_diagnostics: d_stats[Diagnostics.heading][i].append( observations[i]["heading"].tolist()) ( self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) = self._pause_envs( envs_to_pause, self.envs, test_recurrent_hidden_states, not_done_masks, current_episode_reward, prev_actions, batch, rgb_frames, ) num_episodes = len(stats_episodes) aggregated_stats = dict() for stat_key in next(iter(stats_episodes.values())).keys(): aggregated_stats[stat_key] = ( sum([v[stat_key] for v in stats_episodes.values()]) / num_episodes) for k, v in aggregated_stats.items(): logger.info(f"Average episode {k}: {v:.4f}") step_id = checkpoint_index if "extra_state" in ckpt_dict and "step" in ckpt_dict["extra_state"]: step_id = ckpt_dict["extra_state"]["step"] writer.add_scalars( "eval_reward", {"average reward": aggregated_stats["reward"]}, step_id, ) metrics = {k: v for k, v in aggregated_stats.items() if k != "reward"} if len(metrics) > 0: writer.add_scalars("eval_metrics", metrics, step_id) logger.info("eval_metrics") logger.info(metrics) if len(log_diagnostics) > 0: os.makedirs(output_dir, exist_ok=True) eval_fn = f"{label}.json" with open(os.path.join(output_dir, eval_fn), 'w', encoding='utf-8') as f: json.dump(total_stats, f, ensure_ascii=False, indent=4) self.envs.close()
def evaluate_model(self): self.envs.unwrapped.call(["switch_dataset"] * self.shell_args.num_processes, [("val", )] * self.shell_args.num_processes) if not os.path.exists(self.eval_dir): os.makedirs(self.eval_dir) try: eval_net_file_name = sorted( glob.glob( os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "*") + "/*.pt"), key=os.path.getmtime, )[-1] eval_net_file_name = ( self.shell_args.log_prefix.replace(os.sep, "_") + "_" + "_".join(eval_net_file_name.split(os.sep)[-2:])[:-3]) except IndexError: print("Warning, no weights found") eval_net_file_name = "random_weights" eval_output_file = open( os.path.join(self.eval_dir, eval_net_file_name + ".csv"), "w") print("Writing results to", eval_output_file.name) # Save the evaled net for posterity if self.shell_args.save_checkpoints: save_model = self.agent pt_util.save( save_model, os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, "eval_weights"), num_to_keep=-1, iteration=self.log_iter, ) print("Wrote model to file for safe keeping") obs = self.envs.reset() if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) recurrent_hidden_states = torch.zeros( self.shell_args.num_processes, self.agent.recurrent_hidden_state_size, dtype=torch.float32, device=self.device, ) masks = torch.ones(self.shell_args.num_processes, 1, dtype=torch.float32, device=self.device) episode_rewards = deque(maxlen=10) current_episode_rewards = np.zeros(self.shell_args.num_processes) episode_lengths = deque(maxlen=10) current_episode_lengths = np.zeros(self.shell_args.num_processes) total_num_steps = self.log_iter fps_timer = [time.time(), total_num_steps] timers = np.zeros(3) num_episodes = 0 print("Config\n", self.configs[0]) # Initialize every time eval is run rather than just at the start dataset_sizes = np.array( [len(dataset.episodes) for dataset in self.eval_datasets]) eval_stats = dict( episode_ids=[None for _ in range(self.shell_args.num_processes)], num_episodes=np.zeros(self.shell_args.num_processes, dtype=np.int32), num_steps=np.zeros(self.shell_args.num_processes, dtype=np.int32), reward=np.zeros(self.shell_args.num_processes, dtype=np.float32), spl=np.zeros(self.shell_args.num_processes, dtype=np.float32), visited_states=np.zeros(self.shell_args.num_processes, dtype=np.int32), success=np.zeros(self.shell_args.num_processes, dtype=np.int32), end_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), start_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), delta_geodesic_distance=np.zeros(self.shell_args.num_processes, dtype=np.float32), distance_from_start=np.zeros(self.shell_args.num_processes, dtype=np.float32), ) eval_stats_means = dict( num_episodes=0, num_steps=0, reward=0, spl=0, visited_states=0, success=0, end_geodesic_distance=0, start_geodesic_distance=0, delta_geodesic_distance=0, distance_from_start=0, ) eval_output_file.write("name,%s,iter,%d\n\n" % (eval_net_file_name, self.log_iter)) if self.shell_args.task == "pointnav": eval_output_file.write(( "episode_id,num_steps,reward,spl,success,start_geodesic_distance," "end_geodesic_distance,delta_geodesic_distance\n")) elif self.shell_args.task == "exploration": eval_output_file.write("episode_id,reward,visited_states\n") elif self.shell_args.task == "flee": eval_output_file.write("episode_id,reward,distance_from_start\n") distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) eval_stats["start_geodesic_distance"][:] = distances progress_bar = tqdm.tqdm(total=self.num_eval_episodes_total) all_done = False iter_count = 0 video_frames = [] previous_visual_features = None egomotion_pred = None prev_action = None prev_action_probs = None if hasattr(self.agent.base, "enable_decoder"): if self.shell_args.record_video: self.agent.base.enable_decoder() else: self.agent.base.disable_decoder() while not all_done: with torch.no_grad(): start_t = time.time() value, action, action_log_prob, recurrent_hidden_states = self.agent.act( { "images": obs["rgb"].to(self.device), "target_vector": obs["pointgoal"].to(self.device), "prev_action_one_hot": obs["prev_action_one_hot"].to(self.device), }, recurrent_hidden_states, masks, ) action_cpu = pt_util.to_numpy(action.squeeze(1)) translated_action_space = ACTION_SPACE[action_cpu] timers[1] += time.time() - start_t if self.shell_args.record_video: if self.shell_args.use_motion_loss: if previous_visual_features is not None: egomotion_pred = self.agent.base.predict_egomotion( self.agent.base.visual_features, previous_visual_features) previous_visual_features = self.agent.base.visual_features.detach( ) # Copy so we don't mess with obs itself draw_obs = OrderedDict() for key, val in obs.items(): draw_obs[key] = pt_util.to_numpy(val).copy() best_next_action = draw_obs.pop("best_next_action", None) if prev_action is not None: draw_obs["action_taken"] = pt_util.to_numpy( self.agent.last_dist.probs).copy() draw_obs["action_taken"][:] = 0 draw_obs["action_taken"][ np.arange(self.shell_args.num_processes), prev_action] = 1 draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ draw_obs['prev_action'].item()] draw_obs["action_prob"] = pt_util.to_numpy( prev_action_probs).copy() else: draw_obs["action_taken"] = None draw_obs["action_taken_name"] = SIM_ACTION_TO_NAME[ SimulatorActions.STOP] draw_obs["action_prob"] = None prev_action = action_cpu prev_action_probs = self.agent.last_dist.probs.detach() if hasattr( self.agent.base, "decoder_outputs" ) and self.agent.base.decoder_outputs is not None: min_channel = 0 for key, num_channels in self.agent.base.decoder_output_info: outputs = self.agent.base.decoder_outputs[:, min_channel: min_channel + num_channels, ...] draw_obs["output_" + key] = pt_util.to_numpy(outputs).copy() min_channel += num_channels draw_obs["rewards"] = eval_stats["reward"] draw_obs["step"] = current_episode_lengths.copy() draw_obs["method"] = self.shell_args.method_name if best_next_action is not None: draw_obs["best_next_action"] = best_next_action if self.shell_args.use_motion_loss: if egomotion_pred is not None: draw_obs["egomotion_pred"] = pt_util.to_numpy( F.softmax(egomotion_pred, dim=1)).copy() else: draw_obs["egomotion_pred"] = None images, titles, normalize = draw_outputs.obs_to_images( draw_obs) im_inds = [0, 2, 3, 1, 6, 7, 8, 5] height, width = images[0].shape[:2] subplot_image = drawing.subplot( images, 2, 4, titles=titles, normalize=normalize, output_width=max(width, 320), output_height=max(height, 320), order=im_inds, fancy_text=True, ) video_frames.append(subplot_image) # save dists from previous step or else on reset they will be overwritten distances = pt_util.to_numpy(obs["goal_geodesic_distance"]) start_t = time.time() obs, rewards, dones, infos = self.envs.step( translated_action_space) timers[0] += time.time() - start_t obs["prev_action_one_hot"] = obs[ "prev_action_one_hot"][:, ACTION_SPACE].to(torch.float32) rewards *= REWARD_SCALAR rewards = np.clip(rewards, -10, 10) if self.shell_args.record_video and not dones[0]: obs["top_down_map"] = infos[0]["top_down_map"] if self.compute_surface_normals: obs["surface_normals"] = pt_util.depth_to_surface_normals( obs["depth"].to(self.device)) current_episode_rewards += pt_util.to_numpy(rewards).squeeze() current_episode_lengths += 1 to_pause = [] for ii, done_e in enumerate(dones): if done_e: num_episodes += 1 if self.shell_args.record_video: if "top_down_map" in infos[ii]: video_dir = os.path.join( self.shell_args.log_prefix, "videos") if not os.path.exists(video_dir): os.makedirs(video_dir) im_path = os.path.join( self.shell_args.log_prefix, "videos", "total_steps_%d.png" % total_num_steps) top_down_map = maps.colorize_topdown_map( infos[ii]["top_down_map"]["map"]) imageio.imsave(im_path, top_down_map) images_to_video( video_frames, os.path.join(self.shell_args.log_prefix, "videos"), "total_steps_%d" % total_num_steps, ) video_frames = [] eval_stats["episode_ids"][ii] = infos[ii]["episode_id"] if self.shell_args.task == "pointnav": print( "FINISHED EPISODE %d Length %d Reward %.3f SPL %.4f" % ( num_episodes, current_episode_lengths[ii], current_episode_rewards[ii], infos[ii]["spl"], )) eval_stats["spl"][ii] = infos[ii]["spl"] eval_stats["success"][ ii] = eval_stats["spl"][ii] > 0 eval_stats["num_steps"][ ii] = current_episode_lengths[ii] eval_stats["end_geodesic_distance"][ii] = ( infos[ii]["final_distance"] if eval_stats["success"][ii] else distances[ii]) eval_stats["delta_geodesic_distance"][ii] = ( eval_stats["start_geodesic_distance"][ii] - eval_stats["end_geodesic_distance"][ii]) elif self.shell_args.task == "exploration": print( "FINISHED EPISODE %d Reward %.3f States Visited %d" % (num_episodes, current_episode_rewards[ii], infos[ii]["visited_states"])) eval_stats["visited_states"][ii] = infos[ii][ "visited_states"] elif self.shell_args.task == "flee": print( "FINISHED EPISODE %d Reward %.3f Distance from start %.4f" % (num_episodes, current_episode_rewards[ii], infos[ii]["distance_from_start"])) eval_stats["distance_from_start"][ii] = infos[ii][ "distance_from_start"] eval_stats["num_episodes"][ii] += 1 eval_stats["reward"][ii] = current_episode_rewards[ii] if eval_stats["num_episodes"][ii] <= dataset_sizes[ii]: progress_bar.update(1) eval_stats_means["num_episodes"] += 1 eval_stats_means["reward"] += eval_stats["reward"][ ii] if self.shell_args.task == "pointnav": eval_output_file.write( "%s,%d,%f,%f,%d,%f,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["num_steps"][ii], eval_stats["reward"][ii], eval_stats["spl"][ii], eval_stats["success"][ii], eval_stats["start_geodesic_distance"] [ii], eval_stats["end_geodesic_distance"] [ii], eval_stats["delta_geodesic_distance"] [ii], )) eval_stats_means["num_steps"] += eval_stats[ "num_steps"][ii] eval_stats_means["spl"] += eval_stats["spl"][ ii] eval_stats_means["success"] += eval_stats[ "success"][ii] eval_stats_means[ "start_geodesic_distance"] += eval_stats[ "start_geodesic_distance"][ii] eval_stats_means[ "end_geodesic_distance"] += eval_stats[ "end_geodesic_distance"][ii] eval_stats_means[ "delta_geodesic_distance"] += eval_stats[ "delta_geodesic_distance"][ii] elif self.shell_args.task == "exploration": eval_output_file.write("%s,%f,%d\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["visited_states"][ii], )) eval_stats_means[ "visited_states"] += eval_stats[ "visited_states"][ii] elif self.shell_args.task == "flee": eval_output_file.write("%s,%f,%f\n" % ( eval_stats["episode_ids"][ii], eval_stats["reward"][ii], eval_stats["distance_from_start"][ii], )) eval_stats_means[ "distance_from_start"] += eval_stats[ "distance_from_start"][ii] eval_output_file.flush() if eval_stats["num_episodes"][ii] == dataset_sizes[ ii]: to_pause.append(ii) episode_rewards.append(current_episode_rewards[ii]) current_episode_rewards[ii] = 0 episode_lengths.append(current_episode_lengths[ii]) current_episode_lengths[ii] = 0 eval_stats["start_geodesic_distance"][ii] = obs[ "goal_geodesic_distance"][ii] # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in dones]).to(self.device) # Reverse in order to maintain order in case of multiple. to_pause.reverse() for ii in to_pause: # Pause the environments that are done from the vectorenv. print("Pausing env", ii) self.envs.unwrapped.pause_at(ii) current_episode_rewards = np.concatenate( (current_episode_rewards[:ii], current_episode_rewards[ii + 1:])) current_episode_lengths = np.concatenate( (current_episode_lengths[:ii], current_episode_lengths[ii + 1:])) for key in eval_stats: eval_stats[key] = np.concatenate( (eval_stats[key][:ii], eval_stats[key][ii + 1:])) dataset_sizes = np.concatenate( (dataset_sizes[:ii], dataset_sizes[ii + 1:])) for key in obs: if type(obs[key]) == torch.Tensor: obs[key] = torch.cat( (obs[key][:ii], obs[key][ii + 1:]), dim=0) else: obs[key] = np.concatenate( (obs[key][:ii], obs[key][ii + 1:]), axis=0) recurrent_hidden_states = torch.cat( (recurrent_hidden_states[:ii], recurrent_hidden_states[ii + 1:]), dim=0) masks = torch.cat((masks[:ii], masks[ii + 1:]), dim=0) if len(dataset_sizes) == 0: progress_bar.close() all_done = True total_num_steps += self.shell_args.num_processes if iter_count % (self.shell_args.log_interval * 100) == 0: log_dict = {} if len(episode_rewards) > 1: end = time.time() nsteps = total_num_steps - fps_timer[1] fps = int((total_num_steps - fps_timer[1]) / (end - fps_timer[0])) timers /= nsteps env_spf = timers[0] forward_spf = timers[1] print(( "{} Updates {}, num timesteps {}, FPS {}, Env FPS {}, " "\n Last {} training episodes: mean/median reward {:.3f}/{:.3f}, " "min/max reward {:.3f}/{:.3f}\n").format( datetime.datetime.now(), iter_count, total_num_steps, fps, int(1.0 / env_spf), len(episode_rewards), np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), np.max(episode_rewards), )) if self.shell_args.tensorboard: log_dict.update({ "stats/full_spf": 1.0 / (fps + 1e-10), "stats/env_spf": env_spf, "stats/forward_spf": forward_spf, "stats/full_fps": fps, "stats/env_fps": 1.0 / (env_spf + 1e-10), "stats/forward_fps": 1.0 / (forward_spf + 1e-10), "episode/mean_rewards": np.mean(episode_rewards), "episode/median_rewards": np.median(episode_rewards), "episode/min_rewards": np.min(episode_rewards), "episode/max_rewards": np.max(episode_rewards), "episode/mean_lengths": np.mean(episode_lengths), "episode/median_lengths": np.median(episode_lengths), "episode/min_lengths": np.min(episode_lengths), "episode/max_lengths": np.max(episode_lengths), }) self.eval_logger.dict_log(log_dict, step=self.log_iter) fps_timer[0] = time.time() fps_timer[1] = total_num_steps timers[:] = 0 iter_count += 1 print("Finished testing") print("Wrote results to", eval_output_file.name) eval_stats_means = { key: val / eval_stats_means["num_episodes"] for key, val in eval_stats_means.items() } if self.shell_args.tensorboard: log_dict = {"single_episode/reward": eval_stats_means["reward"]} if self.shell_args.task == "pointnav": log_dict.update({ "single_episode/num_steps": eval_stats_means["num_steps"], "single_episode/spl": eval_stats_means["spl"], "single_episode/success": eval_stats_means["success"], "single_episode/start_geodesic_distance": eval_stats_means["start_geodesic_distance"], "single_episode/end_geodesic_distance": eval_stats_means["end_geodesic_distance"], "single_episode/delta_geodesic_distance": eval_stats_means["delta_geodesic_distance"], }) elif self.shell_args.task == "exploration": log_dict["single_episode/visited_states"] = eval_stats_means[ "visited_states"] elif self.shell_args.task == "flee": log_dict[ "single_episode/distance_from_start"] = eval_stats_means[ "distance_from_start"] self.eval_logger.dict_log(log_dict, step=self.log_iter) self.envs.unwrapped.resume_all()
def obs_to_images(obs): img = obs["rgb"].copy() images = [img.transpose(0, 2, 3, 1)] # Draw top down view if "visited_grid" in obs: top_down_map = obs["visited_grid"][0, ...] elif "top_down_map" in obs: top_down_map = maps.colorize_topdown_map(obs["top_down_map"]["map"]) map_size = 1024 original_map_size = top_down_map.shape[:2] if original_map_size[0] > original_map_size[1]: map_scale = np.array( (1, original_map_size[1] * 1.0 / original_map_size[0])) else: map_scale = np.array( (original_map_size[0] * 1.0 / original_map_size[1], 1)) new_map_size = np.round(map_size * map_scale).astype(np.int32) # OpenCV expects w, h but map size is in h, w top_down_map = cv2.resize(top_down_map, (new_map_size[1], new_map_size[0])) map_agent_pos = obs["top_down_map"]["agent_map_coord"] map_agent_pos = np.round(map_agent_pos * new_map_size / original_map_size).astype(np.int32) top_down_map = maps.draw_agent(top_down_map, map_agent_pos, obs["heading"] - np.pi / 2, agent_radius_px=top_down_map.shape[0] / 40) else: top_down_map = None normalize = [True] titles = [( ("Method: %s" % obs["method"].replace("_", " ")), ("Step: %03d Reward: %.3f" % (obs["step"][0], obs.get("reward", [0])[0])), ("Action: %s" % string.capwords(obs["action_taken_name"].replace("_", " "))), )] images.append(top_down_map) if "visited" in obs: titles.append((("Visited Cube Count: %d" % obs["visited"][0]), )) elif "distance_from_start" in obs: titles.append("Geo Dist From Origin: %.3f" % obs["distance_from_start"][0]) elif "pointgoal" in obs: titles.append((("Euc Dist: %.3f" % obs["pointgoal"][0, 0]), ("Geo Dist: %.3f" % obs["goal_geodesic_distance"][0]))) normalize.append(False) for key, val in obs.items(): if key == "depth" or key == "output_depth": normalize.append(False) val = val[:, 0, ...] depth = np.clip(val, -0.5, 0.5) depth += 0.5 depth *= 255 titles.append(key) depth = depth.astype(np.uint8) depth = np.reshape(depth, (-1, depth.shape[-1])) images.append(depth) elif key == "surface_normals" or key == "output_surface_normals": titles.append(key) normalize.append(False) val = val.copy() if key == "output_surface_normals": # Still need to be normalized val /= np.sqrt(np.sum(val**2, axis=1, keepdims=True)) surfnorm = (np.clip( (val + 1), 0, 2) * 127).astype(np.uint8).transpose( (0, 2, 3, 1)) images.append(surfnorm) elif key == "semantic": titles.append(key) normalize.append(False) seg = (val * 314.159 % 255).astype(np.uint8) seg = np.reshape(seg, (-1, seg.shape[-1])) images.append(seg) elif key == "output_reconstruction": titles.append(key) normalize.append(False) val = np.clip(val, -0.5, 0.5) val += 0.5 val *= 255 val = val.astype(np.uint8).transpose((0, 2, 3, 1)) images.append(val) elif key in { "action_prob", "action_taken", "egomotion_pred", "best_next_action" }: if key == "action_prob": titles.append(("Output Distribution", "p(Forward) p(Left) p(Right)")) else: titles.append(key) if val is not None: normalize.append(True) prob_hists = np.concatenate( [draw_probability_hist(pi) for pi in val.copy()], axis=0) images.append(prob_hists) else: images.append(None) normalize.append(False) images.append(top_down_map) normalize.append(True) titles = [ string.capwords(title.replace("_", " ")) if isinstance(title, str) else title for title in titles ] return images, titles, normalize
def observations_to_image(observation: Dict, info: Dict, reward, weights_output=None, aux_tasks=[]) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). reward: float to append weights_output: attention weights for viz Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] rgb = observation["rgb"] if not isinstance(rgb, np.ndarray): rgb = rgb.cpu().numpy() egocentric_view.append(rgb) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = observation["depth"].squeeze() * 255.0 if not isinstance(depth_map, np.ndarray): depth_map = depth_map.cpu().numpy() depth_map = depth_map.astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert ( len(egocentric_view) > 0 ), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"] ) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map, (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) frame = np.concatenate((egocentric_view, top_down_map), axis=1) if weights_output is not None and len(aux_tasks) > 1: # add a strip to the right of the video strip_height = observation_size # ~256 -> we'll have 5-10 tasks, let's do 24 pixels each strip_gap = 24 strip_width = strip_gap + 12 strip = np.ones((strip_height, strip_width, 3), dtype=np.uint8) * 255 # white bg num_tasks = weights_output.size(0) total_height = num_tasks * strip_gap offset = int((strip_height - total_height)/2) assert offset > 0, "too many aux tasks to visualize" for i in range(num_tasks): start_height = i * strip_gap + offset strength = int(255 * weights_output[i]) color = np.array([strength, 0, 0]) if weights_output[i] > 1.001: raise Exception(f"weights is {weights_output}, that's too big") strip[start_height: start_height + strip_gap] = color task_name = AUX_ABBREV.get(aux_tasks[i], aux_tasks[i]) task_abbrev = task_name[:3] cv2.putText(img=strip, text=f"{task_abbrev}", org=(2, int(start_height + strip_gap / 2)), fontFace=2, fontScale=.4, color=(256, 256, 256), thickness=1) frame = np.concatenate((frame, strip), axis=1) return frame
(path_point[0] - 2.04), grid_dimensions, pathfinder=sim.pathfinder, ) for path_point in second_xyz ] ground_truth = [ maps.to_grid( -(path_point[2]) + 19.25, (path_point[0] - 2.04), grid_dimensions, pathfinder=sim.pathfinder, ) for path_point in first_xyz ] colored_map = maps.colorize_topdown_map(hablab_topdown_map) trajectory = np.array(trajectory) ground_truth = np.array(ground_truth) #plt.figure(figsize=(20, 20)) fig, ax = plt.subplots() ax.imshow(colored_map) ax.plot(trajectory[:, 1], trajectory[:, 0], linewidth=1, color='b', label='estimated') ax.plot(ground_truth[:, 1], ground_truth[:, 0], linewidth=1, color='r',
def observations_to_image(observation: Dict, info: Dict, pred=None) -> np.ndarray: r"""Generate image of single frame from observation and info returned from a single environment step(). Args: observation: observation returned from an environment step(). info: info returned from an environment step(). Returns: generated image of a single frame. """ egocentric_view = [] if "rgb" in observation: observation_size = observation["rgb"].shape[0] rgb = observation["rgb"] if not isinstance(rgb, np.ndarray): rgb = rgb.cpu().numpy() egocentric_view.append(rgb) # draw depth map if observation has depth info if "depth" in observation: observation_size = observation["depth"].shape[0] depth_map = observation["depth"].squeeze() * 255.0 if not isinstance(depth_map, np.ndarray): depth_map = depth_map.cpu().numpy() depth_map = depth_map.astype(np.uint8) depth_map = np.stack([depth_map for _ in range(3)], axis=2) egocentric_view.append(depth_map) assert (len(egocentric_view) > 0), "Expected at least one visual sensor enabled." egocentric_view = np.concatenate(egocentric_view, axis=1) # draw collision if "collisions" in info and info["collisions"]["is_collision"]: egocentric_view = draw_collision(egocentric_view) frame = egocentric_view if "top_down_map" in info: top_down_map = info["top_down_map"]["map"] top_down_map = maps.colorize_topdown_map( top_down_map, info["top_down_map"]["fog_of_war_mask"]) map_agent_pos = info["top_down_map"]["agent_map_coord"] top_down_map = maps.draw_agent( image=top_down_map, agent_center_coord=map_agent_pos, agent_rotation=info["top_down_map"]["agent_angle"], agent_radius_px=top_down_map.shape[0] // 16, ) if pred is not None: from habitat.utils.geometry_utils import quaternion_rotate_vector # current_position = sim.get_agent_state().position # agent_state = sim.get_agent_state() source_rotation = info["top_down_map"]["agent_rotation"] rounded_pred = np.round(pred[1]) direction_vector_agent = np.array( [rounded_pred[1], 0, -rounded_pred[0]]) direction_vector = quaternion_rotate_vector( source_rotation, direction_vector_agent) # pred_goal_location = source_position + direction_vector.astype(np.float32) grid_size = ( (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, (maps.COORDINATE_MAX - maps.COORDINATE_MIN) / 10000, ) delta_x = int(-direction_vector[0] / grid_size[0]) delta_y = int(direction_vector[2] / grid_size[1]) x = np.clip(map_agent_pos[0] + delta_x, a_min=0, a_max=top_down_map.shape[0]) y = np.clip(map_agent_pos[1] + delta_y, a_min=0, a_max=top_down_map.shape[1]) point_padding = 12 for m in range(x - point_padding, x + point_padding + 1): for n in range(y - point_padding, y + point_padding + 1): if np.linalg.norm(np.array([m - x, n - y])) <= point_padding and \ 0 <= m < top_down_map.shape[0] and 0 <= n < top_down_map.shape[1]: top_down_map[m, n] = (0, 255, 255) if np.linalg.norm(rounded_pred) < 1: assert delta_x == 0 and delta_y == 0 if top_down_map.shape[0] > top_down_map.shape[1]: top_down_map = np.rot90(top_down_map, 1) # scale top down map to align with rgb view if pred is None: old_h, old_w, _ = top_down_map.shape top_down_height = observation_size top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map.astype(np.float32), (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) else: # draw label CATEGORY_INDEX_MAPPING = { 'chair': 0, 'table': 1, 'picture': 2, 'cabinet': 3, 'cushion': 4, 'sofa': 5, 'bed': 6, 'chest_of_drawers': 7, 'plant': 8, 'sink': 9, 'toilet': 10, 'stool': 11, 'towel': 12, 'tv_monitor': 13, 'shower': 14, 'bathtub': 15, 'counter': 16, 'fireplace': 17, 'gym_equipment': 18, 'seating': 19, 'clothes': 20 } index2label = {v: k for k, v in CATEGORY_INDEX_MAPPING.items()} pred_label = index2label[pred[0]] text_height = int(observation_size * 0.1) old_h, old_w, _ = top_down_map.shape top_down_height = observation_size - text_height top_down_width = int(float(top_down_height) / old_h * old_w) # cv2 resize (dsize is width first) top_down_map = cv2.resize( top_down_map.astype(np.float32), (top_down_width, top_down_height), interpolation=cv2.INTER_CUBIC, ) top_down_map = np.concatenate([ np.ones([text_height, top_down_map.shape[1], 3], dtype=np.int32) * 255, top_down_map ], axis=0) top_down_map = cv2.putText(top_down_map, 'C_t: ' + pred_label.replace('_', ' '), (10, text_height - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.4, (0, 0, 0), 2, cv2.LINE_AA) frame = np.concatenate((egocentric_view, top_down_map), axis=1) return frame