def _load_transformed_wall_maps(self, scene_map_info, episode): seen_maps = [] wall_maps = [] start_position = episode.start_position # (X, Y, Z) start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation) start_heading = compute_heading_from_quaternion(start_rotation) for floor_data in scene_map_info: seen_map = np.load(floor_data["seen_map_path"]) wall_map = np.load(floor_data["wall_map_path"]) # ===== Transform the maps relative to the episode start pose ===== map_view_position = floor_data["world_position"] map_view_heading = floor_data["world_heading"] # Originally, Z is downward and X is rightward. # Convert it to X upward and Y rightward x_map, y_map = -map_view_position[2], map_view_position[0] theta_map = map_view_heading x_start, y_start = -start_position[2], start_position[0] theta_start = start_heading # Compute relative coordinates r_rel = math.sqrt((x_start - x_map) ** 2 + (y_start - y_map) ** 2) phi_rel = math.atan2(y_start - y_map, x_start - x_map) - theta_map x_rel = r_rel * math.cos(phi_rel) / self.config.MAP_SCALE y_rel = r_rel * math.sin(phi_rel) / self.config.MAP_SCALE theta_rel = theta_start - theta_map # Convert these to image coordinates with X being rightward and Y # being downward x_img_rel = y_rel y_img_rel = -x_rel theta_img_rel = theta_rel x_trans = torch.Tensor([[x_img_rel, y_img_rel, theta_img_rel]]) # Perform the transformations p_seen_map = rearrange(torch.Tensor(seen_map), "h w c -> () c h w") p_wall_map = rearrange(torch.Tensor(wall_map), "h w c -> () c h w") p_seen_map_trans = spatial_transform_map(p_seen_map, x_trans) p_wall_map_trans = spatial_transform_map(p_wall_map, x_trans) seen_map_trans = asnumpy(p_seen_map_trans) seen_map_trans = rearrange(seen_map_trans, "() c h w -> h w c") wall_map_trans = asnumpy(p_wall_map_trans) wall_map_trans = rearrange(wall_map_trans, "() c h w -> h w c") seen_maps.append(seen_map_trans) wall_maps.append(wall_map_trans) return seen_maps, wall_maps
def _get_mesh_occupancy(self): agent_position = self.current_episode.start_position agent_rotation = quaternion_xyzw_to_wxyz(self.current_episode.start_rotation) a_x, a_y = maps.to_grid( agent_position[0], agent_position[2], self._coordinate_min, self._coordinate_max, self._map_resolution, ) # The map size here represents size around the agent, not infront. mrange = int(self.map_size * 1.5 / 2.0) # Add extra padding if map range is coordinates go out of bounds y_start = a_y - mrange y_end = a_y + mrange x_start = a_x - mrange x_end = a_x + mrange x_l_pad, y_l_pad, x_r_pad, y_r_pad = 0, 0, 0, 0 H, W = self._top_down_map.shape if x_start < 0: x_l_pad = int(-x_start) x_start += x_l_pad x_end += x_l_pad if x_end >= W: x_r_pad = int(x_end - W + 1) if y_start < 0: y_l_pad = int(-y_start) y_start += y_l_pad y_end += y_l_pad if y_end >= H: y_r_pad = int(y_end - H + 1) ego_map = np.pad(self._top_down_map, ((y_l_pad, y_r_pad), (x_l_pad, x_r_pad))) ego_map = ego_map[y_start : (y_end + 1), x_start : (x_end + 1)] if ego_map.shape[0] == 0 or ego_map.shape[1] == 0: ego_map = np.zeros((2 * mrange + 1, 2 * mrange + 1), dtype=np.uint8) # Rotate to get egocentric map # Negative since the value returned is clockwise rotation about Y, # but we need anti-clockwise rotation agent_heading = -compute_heading_from_quaternion(agent_rotation) agent_heading = math.degrees(agent_heading) half_size = ego_map.shape[0] // 2 center = (half_size, half_size) M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0) ego_map = cv2.warpAffine( ego_map, M, (ego_map.shape[1], ego_map.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(1,), ) ego_map = ego_map.astype(np.float32) mrange = int(self.map_size / 2.0) start_coor = half_size - mrange end_coor = int(start_coor + self.map_size - 1) ego_map = ego_map[start_coor : (end_coor + 1), start_coor : (end_coor + 1)] # This map is currently 0 if occupied and 1 if unoccupied. Flip it. ego_map = 1.0 - ego_map # Flip the x axis because to_grid() flips the conventions ego_map = np.flip(ego_map, axis=1) # Append explored status in the 2nd channel ego_map = np.stack([ego_map, np.ones_like(ego_map)], axis=2) return ego_map
def _get_mesh_occupancy(self, episode, agent_state): episode_id = (episode.episode_id, episode.scene_id) if self.current_episode_id != episode_id: self.current_episode_id = episode_id # Transpose to make x rightward and y downward self._top_down_map = self.get_original_map().T agent_position = agent_state.position agent_rotation = agent_state.rotation a_x, a_y = maps.to_grid( agent_position[0], agent_position[2], self._coordinate_min, self._coordinate_max, self._map_resolution, ) # Crop region centered around the agent mrange = int(self.map_size * 1.5) ego_map = self._top_down_map[ (a_y - mrange): (a_y + mrange), (a_x - mrange): (a_x + mrange) ] if ego_map.shape[0] == 0 or ego_map.shape[1] == 0: ego_map = np.zeros((2 * mrange + 1, 2 * mrange + 1), dtype=np.uint8) # Rotate to get egocentric map # Negative since the value returned is clockwise rotation about Y, # but we need anti-clockwise rotation agent_heading = -compute_heading_from_quaternion(agent_rotation) agent_heading = math.degrees(agent_heading) half_size = ego_map.shape[0] // 2 center = (half_size, half_size) M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0) ego_map = ( cv2.warpAffine( ego_map * 255, M, (ego_map.shape[1], ego_map.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(1,), ).astype(np.float32) / 255.0 ) mrange = int(self.map_size) ego_map = ego_map[ (half_size - mrange): (half_size + mrange), (half_size - mrange): (half_size + mrange), ] ego_map[ego_map > 0.5] = 1.0 ego_map[ego_map <= 0.5] = 0.0 # This map is currently 0 if occupied and 1 if unoccupied. Flip it. ego_map = 1.0 - ego_map # Flip the x axis because to_grid() flips the conventions ego_map = np.flip(ego_map, axis=1) # Get forward region infront of the agent half_size = ego_map.shape[0] // 2 quarter_size = ego_map.shape[0] // 4 center = (half_size, half_size) ego_map = ego_map[0:half_size, quarter_size: (quarter_size + half_size)] # Append explored status in the 2nd channel ego_map = np.stack([ego_map, np.ones_like(ego_map)], axis=2) return ego_map
def _get_wall_occupancy(self, episode, agent_state): episode_id = (episode.episode_id, episode.scene_id) # Load the episode specific maps only if the episode has changed if self.current_wall_episode_id != episode_id: self.current_wall_episode_id = episode_id if self.config.GT_TYPE == "wall_occupancy": scene_id = episode.scene_id.split("/")[-1] self._scene_maps_info = self._all_maps_info[scene_id] # Load the maps per floor seen_maps, wall_maps = self._load_transformed_wall_maps( self._scene_maps_info, episode, ) self._scene_maps = {} self._scene_maps["seen_maps"] = seen_maps self._scene_maps["wall_maps"] = wall_maps agent_state = self._sim.get_agent_state() current_height = agent_state.position[1] best_floor_idx = None best_floor_dist = math.inf for floor_idx, floor_data in enumerate(self._scene_maps_info): floor_height = floor_data["floor_height"] if abs(current_height - floor_height) < best_floor_dist: best_floor_idx = floor_idx best_floor_dist = abs(current_height - floor_height) assert best_floor_idx is not None current_wall_map = self._scene_maps["wall_maps"][best_floor_idx] # Take only channel 0 as both channels have save values current_wall_map = current_wall_map[..., 0] # ========= Get local egocentric crop of the current wall map ========= # Compute relative pose of agent from start location start_position = episode.start_position # (X, Y, Z) start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation) start_heading = compute_heading_from_quaternion(start_rotation) start_pose = torch.Tensor( [[-start_position[2], start_position[0], start_heading]] ) agent_position = agent_state.position agent_heading = compute_heading_from_quaternion(agent_state.rotation) agent_pose = torch.Tensor( [[-agent_position[2], agent_position[0], agent_heading]] ) rel_pose = subtract_pose(start_pose, agent_pose)[0] # (3,) # Compute agent position on the map image map_scale = self.config.MAP_SCALE H, W = current_wall_map.shape[:2] Hby2, Wby2 = (H + 1) // 2, (W + 1) // 2 agent_map_x = int(rel_pose[1].item() / map_scale + Wby2) agent_map_y = int(-rel_pose[0].item() / map_scale + Hby2) # Crop the region around the agent. mrange = int(1.5 * self.map_size) # Add extra padding if map range is coordinates go out of bounds y_start = agent_map_y - mrange y_end = agent_map_y + mrange x_start = agent_map_x - mrange x_end = agent_map_x + mrange x_l_pad, y_l_pad, x_r_pad, y_r_pad = 0, 0, 0, 0 H, W = current_wall_map.shape if x_start < 0: x_l_pad = int(-x_start) x_start += x_l_pad x_end += x_l_pad if x_end >= W: x_r_pad = int(x_end - W + 1) if y_start < 0: y_l_pad = int(-y_start) y_start += y_l_pad y_end += y_l_pad if y_end >= H: y_r_pad = int(y_end - H + 1) ego_map = np.pad(current_wall_map, ((y_l_pad, y_r_pad), (x_l_pad, x_r_pad))) ego_map = ego_map[y_start: (y_end + 1), x_start: (x_end + 1)] agent_heading = rel_pose[2].item() agent_heading = math.degrees(agent_heading) half_size = ego_map.shape[0] // 2 center = (half_size, half_size) M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0) ego_map = cv2.warpAffine( ego_map, M, (ego_map.shape[1], ego_map.shape[0]), flags=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(1,), ) ego_map = ego_map.astype(np.float32) mrange = int(self.map_size) ego_map = ego_map[ (half_size - mrange): (half_size + mrange), (half_size - mrange): (half_size + mrange), ] # Get forward region infront of the agent half_size = ego_map.shape[0] // 2 quarter_size = ego_map.shape[0] // 4 center = (half_size, half_size) ego_map = ego_map[0:half_size, quarter_size: (quarter_size + half_size)] # Append explored status in the 2nd channel ego_map = np.stack([ego_map, ego_map], axis=2) return ego_map
def main(args): config = get_config() mapper_config = config.RL.ANS.MAPPER mapper_config.defrost() mapper_config.map_size = 130 mapper_config.map_scale = 0.02 mapper_config.freeze() mapper = Mapper(mapper_config, None) M = args.global_map_size skip_scene = args.skip_scene config_path = args.config_path save_dir = args.save_dir safe_mkdir(save_dir) seen_map_save_root = os.path.join(save_dir, "seen_area_maps") wall_map_save_root = os.path.join(save_dir, "wall_maps") semantic_map_save_root = os.path.join(save_dir, "semantic_maps") json_save_path = os.path.join(save_dir, "all_maps_info.json") config = habitat_extensions.get_extended_config(config_path) scenes_list = glob.glob(f"") dataset_path = config.DATASET.DATA_PATH.replace("{split}", config.DATASET.SPLIT) with gzip.open(dataset_path, "rt") as fp: dataset = json.load(fp) num_episodes = len(dataset["episodes"]) print("===============> Loading data per scene") scene_to_data = {} if num_episodes == 0: content_path = os.path.join( dataset_path[: -len(f"{config.DATASET.SPLIT}.json.gz")], "content" ) scene_paths = glob.glob(f"{content_path}/*") print(f"Number of scenes found: {len(scene_paths)}") for scene_data_path in scene_paths: with gzip.open(scene_data_path, "rt") as fp: scene_data = json.load(fp) num_episodes += len(scene_data["episodes"]) scene_id = scene_data["episodes"][0]["scene_id"].split("/")[-1] scene_to_data[scene_id] = scene_data["episodes"] else: for ep in dataset["episodes"]: scene_id = ep["scene_id"].split("/")[-1] if scene_id not in scene_to_data: scene_to_data[scene_id] = [] scene_to_data[scene_id].append(ep) print("===============> Computing heights for different floors in each scene") scenes_to_floor_heights = {} for scene_id, scene_data in scene_to_data.items(): # Identify the number of unique floors in this scene floor_heights = [] for ep in scene_data: height = ep["start_position"][1] if len(floor_heights) == 0: floor_heights.append(height) # Measure height difference from all existing floors d2floors = map(lambda x: abs(x - height), floor_heights) d2floors = np.array(list(d2floors)) if not np.any(d2floors < 0.5): floor_heights.append(height) # Store this in the dict scenes_to_floor_heights[scene_id] = floor_heights env = DummyRLEnv(config=config) env.seed(1234) _ = env.reset() device = torch.device("cuda:0") safe_mkdir(seen_map_save_root) safe_mkdir(wall_map_save_root) safe_mkdir(semantic_map_save_root) # Data format for saving top-down maps per scene: # For each split, create a json file that contains the following dictionary: # key - scene_id # value - [{'floor_height': ..., # 'seen_map_path': ..., # 'wall_map_path': ..., # 'world_position': ..., # 'world_heading': ...}, # ., # ., # ., # ] # The floor_height specifies a single height value on that floor. # All other heights within 0.5m of this height will correspond to this floor. # The *_map_path specifies the path to a .npy file that contains the # corresponding map. This map is in the world coordinate system, not episode # centric start-view coordinate system. # The world_position is the (X, Y, Z) position of the agent w.r.t. which this # map was computed. The world_heading is the clockwise rotation (-Z to X) # of the agent in the world coordinates. # The .npy files will be stored in seen_map_save_root and wall_map_save_root. # Create top-down maps per scene, per floor per_scene_per_floor_maps = {} print("===============> generate meta information for gt map") for target_scene in tqdm.tqdm(scene_to_data.keys()): per_scene_per_floor_maps[target_scene] = {} for episode in scene_to_data[target_scene]: scene_id = target_scene start_position = episode['start_position'] start_rotation = episode['start_rotation'] start_height = start_position[1] floor_heights = scenes_to_floor_heights[scene_id] d2floors = map(lambda x: abs(x - start_height), floor_heights) d2floors = np.array(list(d2floors)) floor_idx = np.where(d2floors < 0.5)[0][0].item() if floor_idx in per_scene_per_floor_maps[scene_id]: continue start_heading = compute_heading_from_quaternion(quaternion_from_coeff(start_rotation)) seen_map_save_path = f"{seen_map_save_root}/{scene_id}_{floor_idx}.npy" wall_map_save_path = f"{wall_map_save_root}/{scene_id}_{floor_idx}.npy" semantic_map_save_path = f"{semantic_map_save_root}/{scene_id}_{floor_idx}.npy" save_dict = { "seen_map_path": seen_map_save_path, "wall_map_path": wall_map_save_path, "semantic_map_path": semantic_map_save_path, "floor_height": start_height, "start_rotation": start_rotation, "world_position": start_position, "world_heading": start_heading, "scene_id": episode['scene_id'] } per_scene_per_floor_maps[scene_id][floor_idx] = save_dict if len(per_scene_per_floor_maps[scene_id]) == len(scenes_to_floor_heights[scene_id]): break print("===============> save meta information for gt map") save_json = {} for scene in per_scene_per_floor_maps.keys(): scene_save_data = [] for floor_idx, floor_data in per_scene_per_floor_maps[scene].items(): scene_save_data.append(floor_data) save_json[scene] = scene_save_data json.dump(save_json, open(json_save_path, "w")) print("===============> start to draw semantic map") scene_ids = sorted(list(per_scene_per_floor_maps.keys())) print(scene_ids) start_scene = scene_ids[skip_scene] print(f"===============> start with scene {start_scene}") for target_scene in tqdm.tqdm(scene_ids[skip_scene:],desc='scenes', position=0): for floor_idx in per_scene_per_floor_maps[target_scene]: scene_meta_info = per_scene_per_floor_maps[target_scene][floor_idx] # don't regenerate maps if os.path.isfile(scene_meta_info['semantic_map_path']): continue print(scene_meta_info) env.habitat_env.current_episode.start_position = scene_meta_info['world_position'] env.habitat_env.current_episode.start_rotation = scene_meta_info['start_rotation'] env.habitat_env.current_episode.scene_id = scene_meta_info['scene_id'] env.habitat_env.reconfigure(env.habitat_env._config) _ = env.habitat_env.task.reset(env.habitat_env.current_episode) scene_id = target_scene agent_state = env.habitat_env.sim.get_agent_state() start_position = np.array(agent_state.position) global_seen_map, global_wall_map = get_episode_map( env, mapper, M, config, device ) #generate semantic layers global_semantic_map = generate_semantic_layers(env,mapper, M,config,global_seen_map) seen_map_save_path = f"{seen_map_save_root}/{scene_id}_{floor_idx}.npy" wall_map_save_path = f"{wall_map_save_root}/{scene_id}_{floor_idx}.npy" semantic_map_save_path = f"{semantic_map_save_root}/{scene_id}_{floor_idx}.npy" np.save(seen_map_save_path, (global_seen_map > 0)) np.save(wall_map_save_path, (global_wall_map > 0)) np.save(semantic_map_save_path, (global_semantic_map > 0)) # clean the memory to avoid overflow global_seen_map = None global_wall_map = None global_semantic_map = None gc.collect()