Ejemplo n.º 1
0
    def _load_transformed_wall_maps(self, scene_map_info, episode):
        seen_maps = []
        wall_maps = []
        start_position = episode.start_position  # (X, Y, Z)
        start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation)
        start_heading = compute_heading_from_quaternion(start_rotation)
        for floor_data in scene_map_info:
            seen_map = np.load(floor_data["seen_map_path"])
            wall_map = np.load(floor_data["wall_map_path"])
            # ===== Transform the maps relative to the episode start pose =====
            map_view_position = floor_data["world_position"]
            map_view_heading = floor_data["world_heading"]
            # Originally, Z is downward and X is rightward.
            # Convert it to X upward and Y rightward
            x_map, y_map = -map_view_position[2], map_view_position[0]
            theta_map = map_view_heading
            x_start, y_start = -start_position[2], start_position[0]
            theta_start = start_heading
            # Compute relative coordinates
            r_rel = math.sqrt((x_start - x_map) ** 2 + (y_start - y_map) ** 2)
            phi_rel = math.atan2(y_start - y_map, x_start - x_map) - theta_map
            x_rel = r_rel * math.cos(phi_rel) / self.config.MAP_SCALE
            y_rel = r_rel * math.sin(phi_rel) / self.config.MAP_SCALE
            theta_rel = theta_start - theta_map
            # Convert these to image coordinates with X being rightward and Y
            # being downward
            x_img_rel = y_rel
            y_img_rel = -x_rel
            theta_img_rel = theta_rel
            x_trans = torch.Tensor([[x_img_rel, y_img_rel, theta_img_rel]])
            # Perform the transformations
            p_seen_map = rearrange(torch.Tensor(seen_map), "h w c -> () c h w")
            p_wall_map = rearrange(torch.Tensor(wall_map), "h w c -> () c h w")
            p_seen_map_trans = spatial_transform_map(p_seen_map, x_trans)
            p_wall_map_trans = spatial_transform_map(p_wall_map, x_trans)
            seen_map_trans = asnumpy(p_seen_map_trans)
            seen_map_trans = rearrange(seen_map_trans, "() c h w -> h w c")
            wall_map_trans = asnumpy(p_wall_map_trans)
            wall_map_trans = rearrange(wall_map_trans, "() c h w -> h w c")
            seen_maps.append(seen_map_trans)
            wall_maps.append(wall_map_trans)

        return seen_maps, wall_maps
    def _get_mesh_occupancy(self):
        agent_position = self.current_episode.start_position
        agent_rotation = quaternion_xyzw_to_wxyz(self.current_episode.start_rotation)
        a_x, a_y = maps.to_grid(
            agent_position[0],
            agent_position[2],
            self._coordinate_min,
            self._coordinate_max,
            self._map_resolution,
        )

        # The map size here represents size around the agent, not infront.
        mrange = int(self.map_size * 1.5 / 2.0)

        # Add extra padding if map range is coordinates go out of bounds
        y_start = a_y - mrange
        y_end = a_y + mrange
        x_start = a_x - mrange
        x_end = a_x + mrange

        x_l_pad, y_l_pad, x_r_pad, y_r_pad = 0, 0, 0, 0

        H, W = self._top_down_map.shape
        if x_start < 0:
            x_l_pad = int(-x_start)
            x_start += x_l_pad
            x_end += x_l_pad
        if x_end >= W:
            x_r_pad = int(x_end - W + 1)
        if y_start < 0:
            y_l_pad = int(-y_start)
            y_start += y_l_pad
            y_end += y_l_pad
        if y_end >= H:
            y_r_pad = int(y_end - H + 1)

        ego_map = np.pad(self._top_down_map, ((y_l_pad, y_r_pad), (x_l_pad, x_r_pad)))
        ego_map = ego_map[y_start : (y_end + 1), x_start : (x_end + 1)]

        if ego_map.shape[0] == 0 or ego_map.shape[1] == 0:
            ego_map = np.zeros((2 * mrange + 1, 2 * mrange + 1), dtype=np.uint8)

        # Rotate to get egocentric map
        # Negative since the value returned is clockwise rotation about Y,
        # but we need anti-clockwise rotation
        agent_heading = -compute_heading_from_quaternion(agent_rotation)
        agent_heading = math.degrees(agent_heading)

        half_size = ego_map.shape[0] // 2
        center = (half_size, half_size)
        M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0)

        ego_map = cv2.warpAffine(
            ego_map,
            M,
            (ego_map.shape[1], ego_map.shape[0]),
            flags=cv2.INTER_NEAREST,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(1,),
        )

        ego_map = ego_map.astype(np.float32)
        mrange = int(self.map_size / 2.0)
        start_coor = half_size - mrange
        end_coor = int(start_coor + self.map_size - 1)
        ego_map = ego_map[start_coor : (end_coor + 1), start_coor : (end_coor + 1)]

        # This map is currently 0 if occupied and 1 if unoccupied. Flip it.
        ego_map = 1.0 - ego_map

        # Flip the x axis because to_grid() flips the conventions
        ego_map = np.flip(ego_map, axis=1)

        # Append explored status in the 2nd channel
        ego_map = np.stack([ego_map, np.ones_like(ego_map)], axis=2)

        return ego_map
Ejemplo n.º 3
0
    def _get_mesh_occupancy(self, episode, agent_state):
        episode_id = (episode.episode_id, episode.scene_id)
        if self.current_episode_id != episode_id:
            self.current_episode_id = episode_id
            # Transpose to make x rightward and y downward
            self._top_down_map = self.get_original_map().T

        agent_position = agent_state.position
        agent_rotation = agent_state.rotation
        a_x, a_y = maps.to_grid(
            agent_position[0],
            agent_position[2],
            self._coordinate_min,
            self._coordinate_max,
            self._map_resolution,
        )

        # Crop region centered around the agent
        mrange = int(self.map_size * 1.5)
        ego_map = self._top_down_map[
                  (a_y - mrange): (a_y + mrange), (a_x - mrange): (a_x + mrange)
                  ]
        if ego_map.shape[0] == 0 or ego_map.shape[1] == 0:
            ego_map = np.zeros((2 * mrange + 1, 2 * mrange + 1), dtype=np.uint8)

        # Rotate to get egocentric map
        # Negative since the value returned is clockwise rotation about Y,
        # but we need anti-clockwise rotation
        agent_heading = -compute_heading_from_quaternion(agent_rotation)
        agent_heading = math.degrees(agent_heading)

        half_size = ego_map.shape[0] // 2
        center = (half_size, half_size)
        M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0)

        ego_map = (
                cv2.warpAffine(
                    ego_map * 255,
                    M,
                    (ego_map.shape[1], ego_map.shape[0]),
                    flags=cv2.INTER_LINEAR,
                    borderMode=cv2.BORDER_CONSTANT,
                    borderValue=(1,),
                ).astype(np.float32)
                / 255.0
        )

        mrange = int(self.map_size)
        ego_map = ego_map[
                  (half_size - mrange): (half_size + mrange),
                  (half_size - mrange): (half_size + mrange),
                  ]
        ego_map[ego_map > 0.5] = 1.0
        ego_map[ego_map <= 0.5] = 0.0

        # This map is currently 0 if occupied and 1 if unoccupied. Flip it.
        ego_map = 1.0 - ego_map

        # Flip the x axis because to_grid() flips the conventions
        ego_map = np.flip(ego_map, axis=1)

        # Get forward region infront of the agent
        half_size = ego_map.shape[0] // 2
        quarter_size = ego_map.shape[0] // 4
        center = (half_size, half_size)

        ego_map = ego_map[0:half_size, quarter_size: (quarter_size + half_size)]

        # Append explored status in the 2nd channel
        ego_map = np.stack([ego_map, np.ones_like(ego_map)], axis=2)

        return ego_map
Ejemplo n.º 4
0
    def _get_wall_occupancy(self, episode, agent_state):
        episode_id = (episode.episode_id, episode.scene_id)
        # Load the episode specific maps only if the episode has changed
        if self.current_wall_episode_id != episode_id:
            self.current_wall_episode_id = episode_id
            if self.config.GT_TYPE == "wall_occupancy":
                scene_id = episode.scene_id.split("/")[-1]
                self._scene_maps_info = self._all_maps_info[scene_id]
                # Load the maps per floor
                seen_maps, wall_maps = self._load_transformed_wall_maps(
                    self._scene_maps_info, episode,
                )
                self._scene_maps = {}
                self._scene_maps["seen_maps"] = seen_maps
                self._scene_maps["wall_maps"] = wall_maps

        agent_state = self._sim.get_agent_state()
        current_height = agent_state.position[1]
        best_floor_idx = None
        best_floor_dist = math.inf
        for floor_idx, floor_data in enumerate(self._scene_maps_info):
            floor_height = floor_data["floor_height"]
            if abs(current_height - floor_height) < best_floor_dist:
                best_floor_idx = floor_idx
                best_floor_dist = abs(current_height - floor_height)
        assert best_floor_idx is not None
        current_wall_map = self._scene_maps["wall_maps"][best_floor_idx]
        # Take only channel 0 as both channels have save values
        current_wall_map = current_wall_map[..., 0]

        # ========= Get local egocentric crop of the current wall map =========
        # Compute relative pose of agent from start location
        start_position = episode.start_position  # (X, Y, Z)
        start_rotation = quaternion_xyzw_to_wxyz(episode.start_rotation)
        start_heading = compute_heading_from_quaternion(start_rotation)
        start_pose = torch.Tensor(
            [[-start_position[2], start_position[0], start_heading]]
        )
        agent_position = agent_state.position
        agent_heading = compute_heading_from_quaternion(agent_state.rotation)
        agent_pose = torch.Tensor(
            [[-agent_position[2], agent_position[0], agent_heading]]
        )
        rel_pose = subtract_pose(start_pose, agent_pose)[0]  # (3,)

        # Compute agent position on the map image
        map_scale = self.config.MAP_SCALE

        H, W = current_wall_map.shape[:2]
        Hby2, Wby2 = (H + 1) // 2, (W + 1) // 2
        agent_map_x = int(rel_pose[1].item() / map_scale + Wby2)
        agent_map_y = int(-rel_pose[0].item() / map_scale + Hby2)

        # Crop the region around the agent.
        mrange = int(1.5 * self.map_size)

        # Add extra padding if map range is coordinates go out of bounds
        y_start = agent_map_y - mrange
        y_end = agent_map_y + mrange
        x_start = agent_map_x - mrange
        x_end = agent_map_x + mrange

        x_l_pad, y_l_pad, x_r_pad, y_r_pad = 0, 0, 0, 0

        H, W = current_wall_map.shape
        if x_start < 0:
            x_l_pad = int(-x_start)
            x_start += x_l_pad
            x_end += x_l_pad
        if x_end >= W:
            x_r_pad = int(x_end - W + 1)
        if y_start < 0:
            y_l_pad = int(-y_start)
            y_start += y_l_pad
            y_end += y_l_pad
        if y_end >= H:
            y_r_pad = int(y_end - H + 1)

        ego_map = np.pad(current_wall_map, ((y_l_pad, y_r_pad), (x_l_pad, x_r_pad)))
        ego_map = ego_map[y_start: (y_end + 1), x_start: (x_end + 1)]

        agent_heading = rel_pose[2].item()
        agent_heading = math.degrees(agent_heading)

        half_size = ego_map.shape[0] // 2
        center = (half_size, half_size)
        M = cv2.getRotationMatrix2D(center, agent_heading, scale=1.0)

        ego_map = cv2.warpAffine(
            ego_map,
            M,
            (ego_map.shape[1], ego_map.shape[0]),
            flags=cv2.INTER_NEAREST,
            borderMode=cv2.BORDER_CONSTANT,
            borderValue=(1,),
        )

        ego_map = ego_map.astype(np.float32)
        mrange = int(self.map_size)
        ego_map = ego_map[
                  (half_size - mrange): (half_size + mrange),
                  (half_size - mrange): (half_size + mrange),
                  ]

        # Get forward region infront of the agent
        half_size = ego_map.shape[0] // 2
        quarter_size = ego_map.shape[0] // 4
        center = (half_size, half_size)

        ego_map = ego_map[0:half_size, quarter_size: (quarter_size + half_size)]

        # Append explored status in the 2nd channel
        ego_map = np.stack([ego_map, ego_map], axis=2)

        return ego_map
def main(args):
    
    config = get_config()

    mapper_config = config.RL.ANS.MAPPER
    mapper_config.defrost()
    mapper_config.map_size = 130
    mapper_config.map_scale = 0.02
    mapper_config.freeze()

    mapper = Mapper(mapper_config, None)

    M = args.global_map_size
    skip_scene = args.skip_scene
    config_path = args.config_path
    save_dir = args.save_dir
    safe_mkdir(save_dir)

    seen_map_save_root = os.path.join(save_dir, "seen_area_maps")
    wall_map_save_root = os.path.join(save_dir, "wall_maps")
    semantic_map_save_root = os.path.join(save_dir, "semantic_maps")
    json_save_path = os.path.join(save_dir, "all_maps_info.json")

    config = habitat_extensions.get_extended_config(config_path)

    scenes_list = glob.glob(f"")
    dataset_path = config.DATASET.DATA_PATH.replace("{split}", config.DATASET.SPLIT)
    with gzip.open(dataset_path, "rt") as fp:
        dataset = json.load(fp)

    num_episodes = len(dataset["episodes"])

    print("===============> Loading data per scene")
    scene_to_data = {}
    if num_episodes == 0:
        content_path = os.path.join(
            dataset_path[: -len(f"{config.DATASET.SPLIT}.json.gz")], "content"
        )
        scene_paths = glob.glob(f"{content_path}/*")
        print(f"Number of scenes found: {len(scene_paths)}")
        for scene_data_path in scene_paths:
            with gzip.open(scene_data_path, "rt") as fp:
                scene_data = json.load(fp)
            num_episodes += len(scene_data["episodes"])
            scene_id = scene_data["episodes"][0]["scene_id"].split("/")[-1]
            scene_to_data[scene_id] = scene_data["episodes"]
    else:
        for ep in dataset["episodes"]:
            scene_id = ep["scene_id"].split("/")[-1]
            if scene_id not in scene_to_data:
                scene_to_data[scene_id] = []
            scene_to_data[scene_id].append(ep)

    print("===============> Computing heights for different floors in each scene")
    scenes_to_floor_heights = {}
    for scene_id, scene_data in scene_to_data.items():
        # Identify the number of unique floors in this scene
        floor_heights = []
        for ep in scene_data:
            height = ep["start_position"][1]
            if len(floor_heights) == 0:
                floor_heights.append(height)
            # Measure height difference from all existing floors
            d2floors = map(lambda x: abs(x - height), floor_heights)
            d2floors = np.array(list(d2floors))
            if not np.any(d2floors < 0.5):
                floor_heights.append(height)
        # Store this in the dict
        scenes_to_floor_heights[scene_id] = floor_heights

    env = DummyRLEnv(config=config)
    env.seed(1234)
    _ = env.reset()
    device = torch.device("cuda:0")

    safe_mkdir(seen_map_save_root)
    safe_mkdir(wall_map_save_root)
    safe_mkdir(semantic_map_save_root)

    # Data format for saving top-down maps per scene:
    # For each split, create a json file that contains the following dictionary:
    # key - scene_id
    # value - [{'floor_height': ...,
    #           'seen_map_path': ...,
    #           'wall_map_path': ...,
    #           'world_position': ...,
    #           'world_heading': ...},
    #          .,
    #          .,
    #          .,
    #         ]
    # The floor_height specifies a single height value on that floor.
    # All other heights within 0.5m of this height will correspond to this floor.
    # The *_map_path specifies the path to a .npy file that contains the
    # corresponding map. This map is in the world coordinate system, not episode
    # centric start-view coordinate system.
    # The world_position is the (X, Y, Z) position of the agent w.r.t. which this
    # map was computed. The world_heading is the clockwise rotation (-Z to X)
    # of the agent in the world coordinates.
    # The .npy files will be stored in seen_map_save_root and wall_map_save_root.

    # Create top-down maps per scene, per floor
    per_scene_per_floor_maps = {}
    print("===============> generate meta information for gt map")
    for target_scene in tqdm.tqdm(scene_to_data.keys()):
        per_scene_per_floor_maps[target_scene] = {}
        for episode in scene_to_data[target_scene]:
            scene_id = target_scene
            start_position = episode['start_position']
            start_rotation = episode['start_rotation']
            start_height = start_position[1]
            floor_heights = scenes_to_floor_heights[scene_id]
            d2floors = map(lambda x: abs(x - start_height), floor_heights)
            d2floors = np.array(list(d2floors))
            floor_idx = np.where(d2floors < 0.5)[0][0].item()   
            if floor_idx in per_scene_per_floor_maps[scene_id]:
                continue
            start_heading = compute_heading_from_quaternion(quaternion_from_coeff(start_rotation))
            seen_map_save_path = f"{seen_map_save_root}/{scene_id}_{floor_idx}.npy"
            wall_map_save_path = f"{wall_map_save_root}/{scene_id}_{floor_idx}.npy"
            semantic_map_save_path = f"{semantic_map_save_root}/{scene_id}_{floor_idx}.npy"


            save_dict = {
            "seen_map_path": seen_map_save_path,
            "wall_map_path": wall_map_save_path,
            "semantic_map_path": semantic_map_save_path,
            "floor_height": start_height,
            "start_rotation": start_rotation,
            "world_position": start_position,
            "world_heading": start_heading,
            "scene_id": episode['scene_id']
            }

            per_scene_per_floor_maps[scene_id][floor_idx] = save_dict
            if len(per_scene_per_floor_maps[scene_id]) == len(scenes_to_floor_heights[scene_id]):
                break
    print("===============> save meta information for gt map")        
    save_json = {}
    for scene in per_scene_per_floor_maps.keys():
        scene_save_data = []
        for floor_idx, floor_data in per_scene_per_floor_maps[scene].items():
            scene_save_data.append(floor_data)
        save_json[scene] = scene_save_data

    json.dump(save_json, open(json_save_path, "w"))    

    print("===============> start to draw semantic map")  
    scene_ids = sorted(list(per_scene_per_floor_maps.keys()))
    print(scene_ids)
    start_scene = scene_ids[skip_scene]
    print(f"===============> start with scene {start_scene}")

    for target_scene in tqdm.tqdm(scene_ids[skip_scene:],desc='scenes', position=0):
        for floor_idx in per_scene_per_floor_maps[target_scene]:
            
            scene_meta_info = per_scene_per_floor_maps[target_scene][floor_idx]
            # don't regenerate maps
            
            if os.path.isfile(scene_meta_info['semantic_map_path']):
                continue
            print(scene_meta_info)

            env.habitat_env.current_episode.start_position = scene_meta_info['world_position']
            env.habitat_env.current_episode.start_rotation = scene_meta_info['start_rotation']
            env.habitat_env.current_episode.scene_id = scene_meta_info['scene_id']

            env.habitat_env.reconfigure(env.habitat_env._config)
            _ = env.habitat_env.task.reset(env.habitat_env.current_episode)

            scene_id = target_scene
            agent_state = env.habitat_env.sim.get_agent_state()
            start_position = np.array(agent_state.position)

            global_seen_map, global_wall_map = get_episode_map(
                env, mapper, M, config, device
            )

            #generate semantic layers
            global_semantic_map = generate_semantic_layers(env,mapper, M,config,global_seen_map)

            seen_map_save_path = f"{seen_map_save_root}/{scene_id}_{floor_idx}.npy"
            wall_map_save_path = f"{wall_map_save_root}/{scene_id}_{floor_idx}.npy"
            semantic_map_save_path = f"{semantic_map_save_root}/{scene_id}_{floor_idx}.npy"
            np.save(seen_map_save_path, (global_seen_map > 0))
            np.save(wall_map_save_path, (global_wall_map > 0))
            np.save(semantic_map_save_path, (global_semantic_map > 0))

            # clean the memory to avoid overflow
            global_seen_map = None
            global_wall_map = None
            global_semantic_map = None
            gc.collect()