def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10
        bbox2ds = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d.npy')
        bbox2d_probs = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy')
        calib_Rtilt = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy')
        calib_K = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_K.npy')

        if self.use_color and self.use_box2d:
            raise NotImplemented(
                'color and 2d bounding box at the same time is not implemented'
            )
        if not self.use_color:
            #point_cloud = point_cloud[:,0:3]
            point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs,
                                            calib_Rtilt, calib_K)
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        # new for box2d
        #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32)
        #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32)
        #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
        #ret_dict['calib_K'] = calib_K.astype(np.float32)
        return ret_dict
Exemple #2
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6
        bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8
        point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10
        if self.use_imvote:
            # Read camera parameters
            calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()]
            calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F')
            calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F')
            # Read image
            full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg'))
            full_img_height = full_img.shape[0]
            full_img_width = full_img.shape[1]
            
            # ------------------------------- 2D IMAGE VOTES ------------------------------
            cls_id_list = self.cls_id_map[scan_name]
            cls_score_list = self.cls_score_map[scan_name]
            bbox_2d_list = self.bbox_2d_map[scan_name]
            obj_img_list = []
            for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)):
                xmin, ymin, xmax, ymax = box2d
                # During training we randomly drop 2D boxes to reduce over-fitting
                if self.train and np.random.random()>0.5:
                    continue

                obj_img = full_img[ymin:ymax, xmin:xmax, :]
                obj_h = obj_img.shape[0]
                obj_w = obj_img.shape[1]
                # Bounding box coordinates (4 values), class id, index to the semantic cues
                meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d)
                if obj_h == 0 or obj_w == 0:
                    continue

                # Use 2D box center as approximation
                uv_centroid = np.array([int(obj_w/2), int(obj_h/2)])
                uv_centroid = np.expand_dims(uv_centroid, 0)

                v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij')
                img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0))
                img_vote = np.expand_dims(uv_centroid, 0) - img_vote 

                obj_img_list.append((meta_data, img_vote))

            full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32)
            # Empty votes: 2d box index is set to -1
            full_img_votes[:,:,3::4] = -1.

            for obj_img_data in obj_img_list:
                meta_data, img_vote = obj_img_data
                u0, v0, h, w, cls2d, i2d = meta_data
                for u in range(u0, u0+w):
                    for v in range(v0, v0+h):
                        iidx = int(full_img_votes[v,u,0])
                        if iidx >= self.max_imvote_per_pixel: 
                            continue
                        full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:]
                        full_img_votes[v,u,(1+iidx*4+2)] = cls2d
                        full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes
                full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1

            full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32)
            full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten()

            # Semantic cues: one-hot vector for class scores
            cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32)
            # First row is dumpy feature
            len_obj = len(cls_id_list)
            if len_obj:
                ind_obj = np.arange(1,len_obj+1)
                ind_cls = np.array(cls_id_list)
                cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list)

            # Texture cues: normalized RGB values
            full_img = (full_img - 128.) / 255.
            # Serialize data to 1D and save image size so that we can recover the original location in the image
            full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32)
            full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten()

        if not self.use_color:
            point_cloud = point_cloud[:,0:3]
        else:
            point_cloud = point_cloud[:,0:6]
            point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:,2],0.99)
            height = point_cloud[:,2] - floor_height
            point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        scale_ratio = 1.
        if self.augment:
            flip_flag = (np.random.random()>0.5)
            if flip_flag:
                # Flipping along the YZ plane
                point_cloud[:,0] = -1 * point_cloud[:,0]
                bboxes[:,0] = -1 * bboxes[:,0]
                bboxes[:,6] = np.pi - bboxes[:,6]
                point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat))
            point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat))
            point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat))

            point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat))
            bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat))
            bboxes[:,6] -= rot_angle
            point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3]
            point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3]
            point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3]

            if self.use_imvote:
                R_inverse = np.copy(np.transpose(rot_mat))
                if flip_flag:
                    R_inverse[0,:] *= -1
                # Update Rtilt according to the augmentation
                # R_inverse (3x3) * point (3x1) transforms an augmented depth point
                # to original point in upright_depth coordinates
                calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) 

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB
                rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel
                rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel
                rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1)
                point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random()*0.3+0.85
            if self.use_imvote:
                calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt)
            scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0)
            point_cloud[:,0:3] *= scale_ratio_expand
            bboxes[:,0:3] *= scale_ratio_expand
            bboxes[:,3:6] *= scale_ratio_expand
            point_votes[:,1:4] *= scale_ratio_expand
            point_votes[:,4:7] *= scale_ratio_expand
            point_votes[:,7:10] *= scale_ratio_expand
            if self.use_height:
                point_cloud[:,-1] *= scale_ratio

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ,))
        angle_residuals = np.zeros((MAX_NUM_OBJ,))
        size_classes = np.zeros((MAX_NUM_OBJ,))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0],:] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here 
            box3d_size = bbox[3:6]*2
            size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class])
            box3d_centers[i,:] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i,:] = box3d_size

        target_bboxes_mask = label_mask 
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:,0])
            ymin = np.min(corners_3d[:,1])
            zmin = np.min(corners_3d[:,2])
            xmax = np.max(corners_3d[:,0])
            ymax = np.max(corners_3d[:,1])
            zmax = np.max(corners_3d[:,2])
            target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin])
            target_bboxes[i,:] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True)
        point_votes_mask = point_votes[choices,0]
        point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        if self.use_imvote:
            ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)
            ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
            ret_dict['calib_K'] = calib_K.astype(np.float32)
            ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64)
            ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32)
            ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32)
            ret_dict['full_img_1d'] = full_img_1d.astype(np.float32)

        return ret_dict
Exemple #3
0
def extract_sunrgbd_data(idx_filename,
                         split,
                         output_folder,
                         num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False,
                         use_v1=False,
                         skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        if data_idx == 479: continue
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects)==0 or \
            len([obj for obj in objects if obj.classname in type_whitelist])==0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(
            pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder,
                                         '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)),
                obbs)

        if save_votes:
            N = pc_upright_depth_subsampled.shape[0]
            point_votes = np.zeros((N, 10))  # 3 votes and 1 vote mask
            point_vote_idx = np.zeros(
                (N)).astype(np.int32)  # in the range of [0,2]
            indices = np.arange(N)
            for obj in objects:
                if obj.classname not in type_whitelist: continue
                try:
                    # Find all points in this object's OBB
                    box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(
                        obj.centroid, np.array([obj.l, obj.w, obj.h]),
                        obj.heading_angle)
                    pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\
                        pc_upright_depth_subsampled, box3d_pts_3d)
                    # Assign first dimension to indicate it is in an object box
                    point_votes[inds, 0] = 1
                    # Add the votes (all 0 if the point is not in any object's OBB)
                    votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:,
                                                                          0:3]
                    sparse_inds = indices[
                        inds]  # turn dense True,False inds to sparse number-wise inds
                    for i in range(len(sparse_inds)):
                        j = sparse_inds[i]
                        point_votes[j,
                                    int(point_vote_idx[j] * 3 +
                                        1):int((point_vote_idx[j] + 1) * 3 +
                                               1)] = votes[i, :]
                        # Populate votes with the fisrt vote
                        if point_vote_idx[j] == 0:
                            point_votes[j, 4:7] = votes[i, :]
                            point_votes[j, 7:10] = votes[i, :]
                    point_vote_idx[inds] = np.minimum(2,
                                                      point_vote_idx[inds] + 1)
                except:
                    print('ERROR ----', data_idx, obj.classname)
            np.savez_compressed(os.path.join(output_folder,
                                             '%06d_votes.npz' % (data_idx)),
                                point_votes=point_votes)
Exemple #4
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        point_cloud = self.point_cloud_list[idx]  # Nx6
        bboxes = self.bboxes_list[idx]  # K,8
        point_obj_mask = self.point_labels_list[idx][:, 0]
        point_instance_label = self.point_labels_list[idx][:, -1]

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes[:, 0:3] += 1000.0
        size_gts = np.zeros((MAX_NUM_OBJ, 3))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox
            size_gts[i, :] = target_bbox[3:6]

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_obj_mask = point_obj_mask[choices]
        point_instance_label = point_instance_label[choices]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['size_gts'] = size_gts.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64)
        ret_dict['point_instance_label'] = point_instance_label.astype(
            np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        return ret_dict
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_color_sem = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        semantics37 = point_color_sem[:, 6]
        semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37])
        semantics10_multi = [
            DC.class37_2_class10_multi[k] for k in semantics37
        ]
        if not self.use_color:
            point_cloud = point_color_sem[:, 0:3]
        else:
            point_cloud = point_color_sem[:, 0:6]
            point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        # new items
        box3d_angles = np.zeros((MAX_NUM_OBJ, ))

        point_boundary_mask_z = np.zeros(self.num_points)
        point_boundary_mask_xy = np.zeros(self.num_points)
        point_boundary_offset_z = np.zeros([self.num_points, 3])
        point_boundary_offset_xy = np.zeros([self.num_points, 3])
        point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1])
        point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1])
        point_line_mask = np.zeros(self.num_points)
        point_line_offset = np.zeros([self.num_points, 3])
        point_line_sem = np.zeros([self.num_points, 3 + 1])

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size
            box3d_angles[i] = bbox[6]

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        semantics37 = semantics37[choices]
        semantics10 = semantics10[choices]
        semantics10_multi = [semantics10_multi[i] for i in choices]
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        # box angle is -pi to pi
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners = params2bbox(bbox[:3], 2 * bbox[3:6],
                                  clockwise2counter(bbox[6]))
            # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6])

            try:
                x_all_cls, ind_all_cls = extract_pc_in_box3d(
                    point_cloud, corners)
            except:
                continue
            ind_all_cls = np.where(ind_all_cls)[0]  # T/F to index
            # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd

            # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]]
            ind = []
            for j in ind_all_cls:
                if bbox[7] in semantics10_multi[j]:
                    ind.append(j)
            ind = np.array(ind)

            if ind.shape[0] < NUM_POINT_SEM_THRESHOLD:
                pass
            else:
                x = point_cloud[ind, :3]

                ###Get bb planes and boundary points
                plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]])
                para_points = np.array(
                    [corners[1], corners[3], corners[5], corners[7]])
                newd = np.sum(para_points * plane_lower_temp[:3], 1)
                if check_upright(
                        para_points
                ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH:
                    plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]])
                    plane_upper = np.array([0, 0, 1, -np.mean(newd)])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                if check_z(plane_upper, para_points) == False:
                    import pdb
                    pdb.set_trace()
                ### Get the boundary points here
                #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1])
                alldist = np.abs(
                    np.sum(x * plane_lower[:3], 1) + plane_lower[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get lower four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'lower')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[2]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[4] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[0] + corners[4]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[2] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[0] + corners[6]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[4] - corners[0]),
                        np.linalg.norm(corners[2] - corners[0]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                '''
                ### Check for middle z surfaces
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        center = (corners[0] + corners[6]) / 2.0
                        center[2] = np.mean(x[sel][:,2])
                        # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                        sel_global = ind[sel]
                        point_boundary_mask_z[sel_global] = 1.0
                        point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]])
                        point_boundary_offset_z[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_upper[:3], 1) + plane_upper[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get upper four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'upper')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[1] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[5] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[1] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[3] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[1] + corners[7]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[5] - corners[1]),
                        np.linalg.norm(corners[3] - corners[1]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]

                v1 = corners[3] - corners[2]
                v2 = corners[2] - corners[0]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[0])
                a, b, c = cp
                plane_left_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[4], corners[5], corners[6], corners[7]])
                ### Normalize xy here
                plane_left_temp /= np.linalg.norm(plane_left_temp[:3])
                newd = np.sum(para_points * plane_left_temp[:3], 1)
                if plane_left_temp[2] < LOWER_THRESH:
                    plane_left = plane_left_temp  #np.array([cls,res,tempsign,plane_left_temp[-1]])
                    plane_right = np.array([
                        plane_left_temp[0], plane_left_temp[1],
                        plane_left_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_left[:3], 1) + plane_left[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[1]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[2] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_right[:3], 1) + plane_right[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[4] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[6] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[4, 2] + corners[5, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[5, 2] - corners[4, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

                #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0]
                v1 = corners[0] - corners[4]
                v2 = corners[4] - corners[5]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[5])
                a, b, c = cp
                plane_front_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[2], corners[3], corners[6], corners[7]])
                plane_front_temp /= np.linalg.norm(plane_front_temp[:3])
                newd = np.sum(para_points * plane_front_temp[:3], 1)
                if plane_front_temp[2] < LOWER_THRESH:
                    plane_front = plane_front_temp  #np.array([cls,res,tempsign,plane_front_temp[-1]])
                    plane_back = np.array([
                        plane_front_temp[0], plane_front_temp[1],
                        plane_front_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_front[:3], 1) + plane_front[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_back[:3], 1) + plane_back[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[2, 2] + corners[3, 2]) / 2.0
                    ])
                    #point_boundary_offset_xy[sel] = center - x[sel]
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[3, 2] - corners[2, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes

        # new items
        ret_dict['size_label'] = box3d_sizes.astype(np.float32)
        ret_dict['heading_label'] = box3d_angles.astype(np.float32)
        if self.use_height:
            ret_dict['floor_height'] = floor_height

        ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(
            np.float32)
        ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(
            np.float32)
        ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(
            np.float32)
        ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(
            np.float32)
        ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(
            np.float32)
        ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(
            np.float32)

        ret_dict['point_line_mask'] = point_line_mask.astype(np.float32)
        ret_dict['point_line_offset'] = point_line_offset.astype(np.float32)
        ret_dict['point_line_sem'] = point_line_sem.astype(np.float32)

        return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000,
                         type_whitelist=DEFAULT_TYPE_WHITELIST,
                         save_votes=False, use_v1=False, skip_empty_scene=True):
    """ Extract scene point clouds and 
    bounding boxes (centroids, box sizes, heading angles, semantic classes).
    Dumped point clouds and boxes are in upright depth coord.

    Args:
        idx_filename: a TXT file where each line is an int number (index)
        split: training or testing
        save_votes: whether to compute and save Ground truth votes.
        use_v1: use the SUN RGB-D V1 data
        skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist)

    Dumps:
        <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is
            for XYZ and RGB (in 0~1) in upright depth coord
        <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for
            centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class
        <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object,
            then three sets of GT votes for up to three objects. If the point is only in one
            object's OBB, then the three GT votes are the same.
    """
    dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1)
    data_idx_list = [int(line.rstrip()) for line in open(idx_filename)]

    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    all_obbs = []
    all_pc_upright_depth_subsampled = []
    all_point_votes = []
    for data_idx in data_idx_list:
        print('------------- ', data_idx)
        objects = dataset.get_label_objects(data_idx)

        # Skip scenes with 0 object
        if skip_empty_scene and (len(objects) == 0 or
                                 len([obj for obj in objects if obj.classname in type_whitelist]) == 0):
            continue

        object_list = []
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            obb = np.zeros((8))
            obb[0:3] = obj.centroid
            # Note that compared with that in data_viz, we do not time 2 to l,w.h
            # neither do we flip the heading angle
            obb[3:6] = np.array([obj.l, obj.w, obj.h])
            obb[6] = obj.heading_angle
            obb[7] = sunrgbd_utils.type2class[obj.classname]
            object_list.append(obb)
        if len(object_list) == 0:
            obbs = np.zeros((0, 8))
        else:
            obbs = np.vstack(object_list)  # (K,8)
        print(f"{data_idx} has {obbs.shape[0]} gt bboxes")

        pc_upright_depth = dataset.get_depth(data_idx)
        pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point)

        np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)),
                            pc=pc_upright_depth_subsampled)
        np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs)
        # pickle save
        with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(pc_upright_depth_subsampled, f)
            print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!")
        with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(obbs, f)
            print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!")
        # add to collection
        all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled)
        all_obbs.append(obbs)

        N = pc_upright_depth_subsampled.shape[0]
        point_votes = np.zeros((N, 13))  # 1 vote mask + 3 votes and + 3 votes gt ind
        point_votes[:, 10:13] = -1
        point_vote_idx = np.zeros((N)).astype(np.int32)  # in the range of [0,2]
        indices = np.arange(N)
        i_obj = 0
        for obj in objects:
            if obj.classname not in type_whitelist: continue
            try:
                # Find all points in this object's OBB
                box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid,
                                                               np.array([obj.l, obj.w, obj.h]), obj.heading_angle)
                pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \
                    pc_upright_depth_subsampled, box3d_pts_3d)
                # Assign first dimension to indicate it is in an object box
                point_votes[inds, 0] = 1
                # Add the votes (all 0 if the point is not in any object's OBB)
                votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3]
                sparse_inds = indices[inds]  # turn dense True,False inds to sparse number-wise inds
                for i in range(len(sparse_inds)):
                    j = sparse_inds[i]
                    point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i,
                                                                                                          :]
                    point_votes[j, point_vote_idx[j] + 10] = i_obj
                    # Populate votes with the fisrt vote
                    if point_vote_idx[j] == 0:
                        point_votes[j, 4:7] = votes[i, :]
                        point_votes[j, 7:10] = votes[i, :]
                        point_votes[j, 10] = i_obj
                        point_votes[j, 11] = i_obj
                        point_votes[j, 12] = i_obj
                point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1)
                i_obj += 1
            except:
                print('ERROR ----', data_idx, obj.classname)

        # choose the nearest as the first gt for each point
        for ip in range(N):
            is_pos = (point_votes[ip, 0] > 0)
            if is_pos:
                vote_delta1 = point_votes[ip, 1:4].copy()
                vote_delta2 = point_votes[ip, 4:7].copy()
                vote_delta3 = point_votes[ip, 7:10].copy()
                dist1 = np.sum(vote_delta1 ** 2)
                dist2 = np.sum(vote_delta2 ** 2)
                dist3 = np.sum(vote_delta3 ** 2)

                gt_ind1 = int(point_votes[ip, 10].copy())
                # gt_ind2 = int(point_votes[ip, 11].copy())
                # gt_ind3 = int(point_votes[ip, 12].copy())
                # gt1 = obbs[gt_ind1]
                # gt2 = obbs[gt_ind2]
                # gt3 = obbs[gt_ind3]
                # size_norm_vote_delta1 = vote_delta1 / gt1[3:6]
                # size_norm_vote_delta2 = vote_delta2 / gt2[3:6]
                # size_norm_vote_delta3 = vote_delta3 / gt3[3:6]
                # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2)
                # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2)
                # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2)

                near_ind = np.argmin([dist1, dist2, dist3])
                # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3])

                point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy()
                point_votes[ip, 10 + near_ind] = gt_ind1
                point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy()
                point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1
            else:
                assert point_votes[ip, 10] == -1, "error"
                assert point_votes[ip, 11] == -1, "error"
                assert point_votes[ip, 12] == -1, "error"

        print(f"{data_idx}_votes.npz has {i_obj} gt bboxes")
        np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)),
                            point_votes=point_votes)
        with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f:
            pickle.dump(point_votes, f)
            print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!")
        all_point_votes.append(point_votes)

    pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_obbs, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_pc_upright_depth_subsampled, f)
        print(f"{pickle_filename} saved successfully !!")

    pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_votes, f)
        print(f"{pickle_filename} saved successfully !!")

    all_point_labels = []
    for point_votes in all_point_votes:
        point_labels = point_votes[:, [0, 10]]
        all_point_labels.append(point_labels)
    pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl')
    with open(pickle_filename, 'wb') as f:
        pickle.dump(all_point_labels, f)
        print(f"{pickle_filename} saved successfully !!")
    def __getitem__(self, idx):
        scan_name = self.scan_names[idx]
        mesh_vertices = np.load(
            os.path.join(self.data_path, scan_name) + '_vert.npy')
        instance_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_ins_label.npy')
        semantic_labels = np.load(
            os.path.join(self.data_path, scan_name) + '_sem_label.npy').astype(
                np.int32) - 1
        bboxes = np.load(os.path.join(self.data_path, scan_name) + '_bbox.npy')

        if not self.use_color:
            point_cloud = mesh_vertices[:, 0:3]  # do not use color for now
            pcl_color = mesh_vertices[:, 3:6]
        else:
            point_cloud = mesh_vertices[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)

            # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        # instance_labels = instance_labels[choices]
        # semantic_labels = semantic_labels[choices]
        #
        # pcl_color = pcl_color[choices]

        # target_bboxes_mask[0:instance_bboxes.shape[0]] = 1
        # target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6]

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            pass
            # if np.random.random() > 0.5:
            #     # Flipping along the YZ plane
            #     point_cloud[:, 0] = -1 * point_cloud[:, 0]
            #     target_bboxes[:, 0] = -1 * target_bboxes[:, 0]
            #
            # if np.random.random() > 0.5:
            #     # Flipping along the XZ plane
            #     point_cloud[:, 1] = -1 * point_cloud[:, 1]
            #     target_bboxes[:, 1] = -1 * target_bboxes[:, 1]
            #
            #     # Rotation along up-axis/Z-axis
            # rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36  # -5 ~ +5 degree
            # rot_mat = pc_util.rotz(rot_angle)
            # point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat))
            # target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat)

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1

        # compute votes *AFTER* augmentation
        # generate votes
        # Note: since there's no map between bbox instance labels and
        # pc instance_labels (it had been filtered
        # in the data preparation step) we'll compute the instance bbox
        # from the points sharing the same instance label.
        point_votes = np.zeros([self.num_points, 3])
        point_votes_mask = np.zeros(self.num_points)
        for i_instance in np.unique(instance_labels):
            # find all points belong to that instance
            ind = np.where(instance_labels == i_instance)[0]
            # find the semantic label
            if semantic_labels[ind[0]] in set(DC.type2class.values()):
                x = point_cloud[ind, :3]
                center = 0.5 * (x.min(0) + x.max(0))
                point_votes[ind, :] = center - x
                point_votes_mask[ind] = 1.0
        point_votes = np.tile(point_votes, (1, 3))  # make 3 votes identical

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            box3d_size = bbox[3:6]
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes = point_votes[choices]
        point_votes_mask = point_votes_mask[choices]
        #point_votes_mask = point_votes[choices,0]
        #point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        return ret_dict
Exemple #8
0
    def __data_generation_(self, idx):
        '''
        Returns:
            point_cloud:            N,3+C 
            center_label:           MAX_NUM_OBJ, 3
            heading_class_label:    MAX_NUM_OBJ,
            heading_residual_label: MAX_NUM_OBJ, 
            size_class_label:       MAX_NUM_OBJ,
            size_residual_label:    MAX_NUM_OBJ, 3
            sem_cls_label:          MAX_NUM_OBJ, 
            box_label_mask:         MAX_NUM_OBJ,
            vote_label:             N, 9
            vote_label_mask:        N,
        '''
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # N,6
        # Bounding boxes (K,8)
        # [0:3]: centroid coordinate. x,y,z
        # [3:6]: size. height, width, height
        # [6]: heading angle
        # [7]: class one hot label
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8:
        # Votes (N, 10) --3 votes and 1 vote mask
        # [0]: this point is in a bounding box or not (0/1)
        # [1:4],[4:7],[7:10]: if point is not in any bounding box, all zeros;
        # else the offset to bouding box center
        # one point can be assigned to at maximal 3 bounding boxes
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]  # x,y,z
        else:
            point_cloud = point_cloud[:, 0:6]  # x,y,z,r,g,b
            point_cloud[:, 3] = point_cloud[:, 3:] - MEAN_COLOR_RGB

        if self.use_height:
            floor_height = np.percentile(
                point_cloud[:,
                            2], 0.99)  # 0.99% of all height. wired number...
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # N,4 or N,7

        #-------------data augmentation-------------
        if self.augment:
            if np.random.rand() > 0.5:
                # flipping along YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = bboxes[:, 0] * -1
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]
            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.rand() * np.pi /
                         3) - np.pi / 6  # -30~30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)
            point_votes_end = np.zeros_like(point_votes)
            # first, rotate votes "with" the point_cloud
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))
            # then, rotate the point cloud alone
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle  # the original angle is NOT filpped
            # finally, restore the point_votes by recalculate the offset
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # augment the color
            if self.use_color:
                rgb_color = point_cloud[:, 3:
                                        6] + MEAN_COLOR_RGB  # restore color to 0~1
                rgb_color *= (1 + 0.4 * np.random.rand(3) - 0.2
                              )  # random scale brightness 80% ~ 120%
                rgb_color += (0.1 * np.random.rand(3) - 0.05)  # random shift
                rgb_color += np.expand_dims(
                    np.random.rand(point_cloud.shape[0]) * 0.05 - 0.025,
                    -1)  #random jitter
                rgb_color = rgb_color - MEAN_COLOR_RGB
                rgb_color *= np.expand_dims(
                    np.random.rand(point_cloud.shape[0]) > 0.3,
                    -1)  # drop 30% colors

            # scale the size
            scale_ratio = np.random.rand() * 0.3 + 0.85  # 0.85 ~ 1.15
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:6] *= scale_ratio
            point_votes[:, 1:-1] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio

            # shift the point cloud -0.5~0.5
            offset = np.random.rand(3) - 0.5
            offset = np.expand_dims(offset, 0)
            point_cloud[:, 0:3] += offset
            bboxes[:, 0:3] += offset
            # shifting doesn't change: size, votes, height

        # ------------labels------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            # 0:3 - centers
            # 3:6 - size
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choice = pc_utils.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choice, 0]
        point_votes = point_votes[choice, 1:]

        center_label = target_bboxes.astype(np.float32)[:, :3]
        heading_class_label = angle_classes.astype(np.int64)
        heading_residual_label = angle_residuals.astype(np.float32)
        size_class_label = size_classes.astype(np.int64)
        size_residual_label = size_residuals.astype(np.float32)

        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9

        sem_cls_label = target_bboxes_semcls.astype(np.int64)
        box_label_mask = target_bboxes_mask.astype(np.float32)
        vote_label = point_votes.astype(np.float32)
        vote_label_mask = point_votes_mask.astype(np.int64)

        return [point_cloud.astype(np.float32), \
            center_label, \
            heading_class_label, \
            heading_residual_label, \
            size_class_label, \
            size_residual_label, \
            sem_cls_label, \
            box_label_mask, \
            vote_label, \
            vote_label_mask]