def __getitem__(self, idx):
        """
         Returns a dict with following keys:
             point_clouds: (N,3+C)
             center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
             heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
             heading_residual_label: (MAX_NUM_OBJ,)
             size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
             size_residual_label: (MAX_NUM_OBJ,3)
             sem_cls_label: (MAX_NUM_OBJ,) semantic class index
             box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
             vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                 if there is only one vote than X1==X2==X3 etc.
             vote_label_mask: (N,) with 0/1 with 1 indicating the point
                 is in one of the object's OBB.
             scan_idx: int scan index in scan_names list
         """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        #ema_point_cloud = pc_util.random_sampling(point_cloud, self.num_points, return_choices=False) #2021.2.28
        raw_points = point_cloud.copy()  #2021.2.28
        # ------------------------------- DATA AUGMENTATION ------------------------------
        flip_x_axis = 0
        flip_y_axis = 0
        flip_x_axis_ema = 0  #2021.2.28
        flip_y_axis_ema = 0  #2021.2.28
        rot_mat = np.identity(3)
        scale_ratio = np.ones((1, 3))
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                flip_x_axis = 1
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            #TODO: set different degree range (keep consistent with scannet?)
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # TODO: turn on scale augmentation (keep consistent in scannet?)
            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes_mask = np.zeros((MAX_NUM_OBJ))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))

        target_bboxes_mask[0:bboxes.shape[0]] = 1
        target_bboxes[0:bboxes.shape[0], :] = bboxes[:, 0:6]

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            target_bboxes_semcls[i] = semantic_class

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]
        ema_point_cloud = raw_points[choices]  #2021.2.28
        if self.augment:  #2021.2.28
            if np.random.random() > 0.5:  #2021.2.28
                # Flipping along the YZ plane
                flip_x_axis_ema = 1
                ema_point_cloud[:, 0] = -1 * ema_point_cloud[:, 0]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)

        ret_dict['supervised_mask'] = np.array(1).astype(np.int64)
        ret_dict['ema_point_clouds'] = ema_point_cloud.astype(np.float32)
        ret_dict['flip_x_axis'] = np.array(flip_x_axis).astype(np.int64)
        ret_dict['flip_y_axis'] = np.array(flip_y_axis).astype(np.int64)
        ret_dict['rot_mat'] = rot_mat.astype(np.float32)
        ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)

        ret_dict['flip_x_axis_ema'] = np.array(flip_x_axis_ema).astype(
            np.int64)  #2021.2.28
        ret_dict['flip_y_axis_ema'] = np.array(flip_y_axis_ema).astype(
            np.int64)  #2021.2.28
        return ret_dict
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10
        bbox2ds = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d.npy')
        bbox2d_probs = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy')
        calib_Rtilt = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy')
        calib_K = np.load(
            os.path.join(self.data_path, scan_name) + '_calib_K.npy')

        if self.use_color and self.use_box2d:
            raise NotImplemented(
                'color and 2d bounding box at the same time is not implemented'
            )
        if not self.use_color:
            #point_cloud = point_cloud[:,0:3]
            point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs,
                                            calib_Rtilt, calib_K)
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        # new for box2d
        #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32)
        #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32)
        #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
        #ret_dict['calib_K'] = calib_K.astype(np.float32)
        return ret_dict
Beispiel #3
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6
        bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8
        point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10
        if self.use_imvote:
            # Read camera parameters
            calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()]
            calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F')
            calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F')
            # Read image
            full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg'))
            full_img_height = full_img.shape[0]
            full_img_width = full_img.shape[1]
            
            # ------------------------------- 2D IMAGE VOTES ------------------------------
            cls_id_list = self.cls_id_map[scan_name]
            cls_score_list = self.cls_score_map[scan_name]
            bbox_2d_list = self.bbox_2d_map[scan_name]
            obj_img_list = []
            for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)):
                xmin, ymin, xmax, ymax = box2d
                # During training we randomly drop 2D boxes to reduce over-fitting
                if self.train and np.random.random()>0.5:
                    continue

                obj_img = full_img[ymin:ymax, xmin:xmax, :]
                obj_h = obj_img.shape[0]
                obj_w = obj_img.shape[1]
                # Bounding box coordinates (4 values), class id, index to the semantic cues
                meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d)
                if obj_h == 0 or obj_w == 0:
                    continue

                # Use 2D box center as approximation
                uv_centroid = np.array([int(obj_w/2), int(obj_h/2)])
                uv_centroid = np.expand_dims(uv_centroid, 0)

                v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij')
                img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0))
                img_vote = np.expand_dims(uv_centroid, 0) - img_vote 

                obj_img_list.append((meta_data, img_vote))

            full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32)
            # Empty votes: 2d box index is set to -1
            full_img_votes[:,:,3::4] = -1.

            for obj_img_data in obj_img_list:
                meta_data, img_vote = obj_img_data
                u0, v0, h, w, cls2d, i2d = meta_data
                for u in range(u0, u0+w):
                    for v in range(v0, v0+h):
                        iidx = int(full_img_votes[v,u,0])
                        if iidx >= self.max_imvote_per_pixel: 
                            continue
                        full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:]
                        full_img_votes[v,u,(1+iidx*4+2)] = cls2d
                        full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes
                full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1

            full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32)
            full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten()

            # Semantic cues: one-hot vector for class scores
            cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32)
            # First row is dumpy feature
            len_obj = len(cls_id_list)
            if len_obj:
                ind_obj = np.arange(1,len_obj+1)
                ind_cls = np.array(cls_id_list)
                cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list)

            # Texture cues: normalized RGB values
            full_img = (full_img - 128.) / 255.
            # Serialize data to 1D and save image size so that we can recover the original location in the image
            full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32)
            full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten()

        if not self.use_color:
            point_cloud = point_cloud[:,0:3]
        else:
            point_cloud = point_cloud[:,0:6]
            point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:,2],0.99)
            height = point_cloud[:,2] - floor_height
            point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        scale_ratio = 1.
        if self.augment:
            flip_flag = (np.random.random()>0.5)
            if flip_flag:
                # Flipping along the YZ plane
                point_cloud[:,0] = -1 * point_cloud[:,0]
                bboxes[:,0] = -1 * bboxes[:,0]
                bboxes[:,6] = np.pi - bboxes[:,6]
                point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat))
            point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat))
            point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat))

            point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat))
            bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat))
            bboxes[:,6] -= rot_angle
            point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3]
            point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3]
            point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3]

            if self.use_imvote:
                R_inverse = np.copy(np.transpose(rot_mat))
                if flip_flag:
                    R_inverse[0,:] *= -1
                # Update Rtilt according to the augmentation
                # R_inverse (3x3) * point (3x1) transforms an augmented depth point
                # to original point in upright_depth coordinates
                calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) 

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB
                rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel
                rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel
                rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1)
                point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random()*0.3+0.85
            if self.use_imvote:
                calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt)
            scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0)
            point_cloud[:,0:3] *= scale_ratio_expand
            bboxes[:,0:3] *= scale_ratio_expand
            bboxes[:,3:6] *= scale_ratio_expand
            point_votes[:,1:4] *= scale_ratio_expand
            point_votes[:,4:7] *= scale_ratio_expand
            point_votes[:,7:10] *= scale_ratio_expand
            if self.use_height:
                point_cloud[:,-1] *= scale_ratio

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ,))
        angle_residuals = np.zeros((MAX_NUM_OBJ,))
        size_classes = np.zeros((MAX_NUM_OBJ,))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0],:] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here 
            box3d_size = bbox[3:6]*2
            size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class])
            box3d_centers[i,:] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i,:] = box3d_size

        target_bboxes_mask = label_mask 
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:,0])
            ymin = np.min(corners_3d[:,1])
            zmin = np.min(corners_3d[:,2])
            xmax = np.max(corners_3d[:,0])
            ymax = np.max(corners_3d[:,1])
            zmax = np.max(corners_3d[:,2])
            target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin])
            target_bboxes[i,:] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True)
        point_votes_mask = point_votes[choices,0]
        point_votes = point_votes[choices,1:]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        if self.use_imvote:
            ret_dict['scale'] = np.array(scale_ratio).astype(np.float32)
            ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32)
            ret_dict['calib_K'] = calib_K.astype(np.float32)
            ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64)
            ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32)
            ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32)
            ret_dict['full_img_1d'] = full_img_1d.astype(np.float32)

        return ret_dict
Beispiel #4
0
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB.
            point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        point_cloud = self.point_cloud_list[idx]  # Nx6
        bboxes = self.bboxes_list[idx]  # K,8
        point_obj_mask = self.point_labels_list[idx][:, 0]
        point_instance_label = self.point_labels_list[idx][:, -1]

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]
        else:
            point_cloud = point_cloud[:, 0:6]
            point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        target_bboxes[:, 0:3] += 1000.0
        size_gts = np.zeros((MAX_NUM_OBJ, 3))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox
            size_gts[i, :] = target_bbox[3:6]

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_obj_mask = point_obj_mask[choices]
        point_instance_label = point_instance_label[choices]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        ret_dict['size_gts'] = size_gts.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64)
        ret_dict['point_instance_label'] = point_instance_label.astype(
            np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes
        return ret_dict
    def __getitem__(self, idx):
        """
        Returns a dict with following keys:
            point_clouds: (N,3+C)
            center_label: (MAX_NUM_OBJ,3) for GT box center XYZ
            heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1
            heading_residual_label: (MAX_NUM_OBJ,)
            size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER
            size_residual_label: (MAX_NUM_OBJ,3)
            sem_cls_label: (MAX_NUM_OBJ,) semantic class index
            box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box
            vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3)
                if there is only one vote than X1==X2==X3 etc.
            vote_label_mask: (N,) with 0/1 with 1 indicating the point
                is in one of the object's OBB.
            scan_idx: int scan index in scan_names list
            max_gt_bboxes: unused
        """
        scan_name = self.scan_names[idx]
        point_color_sem = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # Nx6
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        semantics37 = point_color_sem[:, 6]
        semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37])
        semantics10_multi = [
            DC.class37_2_class10_multi[k] for k in semantics37
        ]
        if not self.use_color:
            point_cloud = point_color_sem[:, 0:3]
        else:
            point_cloud = point_color_sem[:, 0:6]
            point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB)

        if self.use_height:
            floor_height = np.percentile(point_cloud[:, 2], 0.99)
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # (N,4) or (N,7)

        # ------------------------------- DATA AUGMENTATION ------------------------------
        if self.augment:
            if np.random.random() > 0.5:
                # Flipping along the YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = -1 * bboxes[:, 0]
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]

            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.random() * np.pi /
                         3) - np.pi / 6  # -30 ~ +30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)

            point_votes_end = np.zeros_like(point_votes)
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))

            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # Augment RGB color
            if self.use_color:
                rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB
                rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2
                              )  # brightness change for each channel
                rgb_color += (0.1 * np.random.random(3) - 0.05
                              )  # color shift for each channel
                rgb_color += np.expand_dims(
                    (0.05 * np.random.random(point_cloud.shape[0]) - 0.025),
                    -1)  # jittering on each pixel
                rgb_color = np.clip(rgb_color, 0, 1)
                # randomly drop out 30% of the points' colors
                rgb_color *= np.expand_dims(
                    np.random.random(point_cloud.shape[0]) > 0.3, -1)
                point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB

            # Augment point cloud scale: 0.85x-1.15x
            scale_ratio = np.random.random() * 0.3 + 0.85
            scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0)
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:3] *= scale_ratio
            bboxes[:, 3:6] *= scale_ratio
            point_votes[:, 1:4] *= scale_ratio
            point_votes[:, 4:7] *= scale_ratio
            point_votes[:, 7:10] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio[0, 0]

        # ------------------------------- LABELS ------------------------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        # new items
        box3d_angles = np.zeros((MAX_NUM_OBJ, ))

        point_boundary_mask_z = np.zeros(self.num_points)
        point_boundary_mask_xy = np.zeros(self.num_points)
        point_boundary_offset_z = np.zeros([self.num_points, 3])
        point_boundary_offset_xy = np.zeros([self.num_points, 3])
        point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1])
        point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1])
        point_line_mask = np.zeros(self.num_points)
        point_line_offset = np.zeros([self.num_points, 3])
        point_line_sem = np.zeros([self.num_points, 3 + 1])

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size
            box3d_angles[i] = bbox[6]

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choices = pc_util.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        semantics37 = semantics37[choices]
        semantics10 = semantics10[choices]
        semantics10_multi = [semantics10_multi[i] for i in choices]
        point_votes_mask = point_votes[choices, 0]
        point_votes = point_votes[choices, 1:]

        # box angle is -pi to pi
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners = params2bbox(bbox[:3], 2 * bbox[3:6],
                                  clockwise2counter(bbox[6]))
            # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6])

            try:
                x_all_cls, ind_all_cls = extract_pc_in_box3d(
                    point_cloud, corners)
            except:
                continue
            ind_all_cls = np.where(ind_all_cls)[0]  # T/F to index
            # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd

            # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]]
            ind = []
            for j in ind_all_cls:
                if bbox[7] in semantics10_multi[j]:
                    ind.append(j)
            ind = np.array(ind)

            if ind.shape[0] < NUM_POINT_SEM_THRESHOLD:
                pass
            else:
                x = point_cloud[ind, :3]

                ###Get bb planes and boundary points
                plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]])
                para_points = np.array(
                    [corners[1], corners[3], corners[5], corners[7]])
                newd = np.sum(para_points * plane_lower_temp[:3], 1)
                if check_upright(
                        para_points
                ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH:
                    plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]])
                    plane_upper = np.array([0, 0, 1, -np.mean(newd)])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                if check_z(plane_upper, para_points) == False:
                    import pdb
                    pdb.set_trace()
                ### Get the boundary points here
                #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1])
                alldist = np.abs(
                    np.sum(x * plane_lower[:3], 1) + plane_lower[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get lower four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'lower')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[2]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[4] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[0] + corners[4]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[2] + corners[6]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[0] + corners[6]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[4] - corners[0]),
                        np.linalg.norm(corners[2] - corners[0]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]
                '''
                ### Check for middle z surfaces
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        center = (corners[0] + corners[6]) / 2.0
                        center[2] = np.mean(x[sel][:,2])
                        # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                        sel_global = ind[sel]
                        point_boundary_mask_z[sel_global] = 1.0
                        point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]])
                        point_boundary_offset_z[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_upper[:3], 1) + plane_upper[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)

                ## Get upper four lines
                line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel(
                    x[sel], corners, 'upper')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[1] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[5] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel3) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel3]] = 1.0
                    linecenter = (corners[1] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel3]] = linecenter - x[sel][line_sel3]
                    point_line_sem[ind[sel][line_sel3]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel4) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel4]] = 1.0
                    linecenter = (corners[3] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel4]] = linecenter - x[sel][line_sel4]
                    point_line_sem[ind[sel][line_sel4]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])

                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])])
                    center = (corners[1] + corners[7]) / 2.0
                    center[2] = np.mean(x[sel][:, 2])
                    sel_global = ind[sel]
                    point_boundary_mask_z[sel_global] = 1.0
                    point_boundary_sem_z[sel_global] = np.array([
                        center[0], center[1], center[2],
                        np.linalg.norm(corners[5] - corners[1]),
                        np.linalg.norm(corners[3] - corners[1]), bbox[7]
                    ])
                    point_boundary_offset_z[sel_global] = center - x[sel]

                v1 = corners[3] - corners[2]
                v2 = corners[2] - corners[0]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[0])
                a, b, c = cp
                plane_left_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[4], corners[5], corners[6], corners[7]])
                ### Normalize xy here
                plane_left_temp /= np.linalg.norm(plane_left_temp[:3])
                newd = np.sum(para_points * plane_left_temp[:3], 1)
                if plane_left_temp[2] < LOWER_THRESH:
                    plane_left = plane_left_temp  #np.array([cls,res,tempsign,plane_left_temp[-1]])
                    plane_right = np.array([
                        plane_left_temp[0], plane_left_temp[1],
                        plane_left_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_left[:3], 1) + plane_left[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[0] + corners[1]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[2] + corners[3]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]])
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                ## Get upper four lines
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_right[:3], 1) + plane_right[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax)
                line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right')
                if np.sum(line_sel1) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel1]] = 1.0
                    linecenter = (corners[4] + corners[5]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel1]] = linecenter - x[sel][line_sel1]
                    point_line_sem[ind[sel][line_sel1]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(line_sel2) > NUM_POINT_LINE:
                    point_line_mask[ind[sel][line_sel2]] = 1.0
                    linecenter = (corners[6] + corners[7]) / 2.0
                    point_line_offset[
                        ind[sel][line_sel2]] = linecenter - x[sel][line_sel2]
                    point_line_sem[ind[sel][line_sel2]] = np.array(
                        [linecenter[0], linecenter[1], linecenter[2], bbox[7]])
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[4, 2] + corners[5, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[5, 2] - corners[4, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

                #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0]
                v1 = corners[0] - corners[4]
                v2 = corners[4] - corners[5]
                cp = np.cross(v1, v2)
                d = -np.dot(cp, corners[5])
                a, b, c = cp
                plane_front_temp = np.array([a, b, c, d])
                para_points = np.array(
                    [corners[2], corners[3], corners[6], corners[7]])
                plane_front_temp /= np.linalg.norm(plane_front_temp[:3])
                newd = np.sum(para_points * plane_front_temp[:3], 1)
                if plane_front_temp[2] < LOWER_THRESH:
                    plane_front = plane_front_temp  #np.array([cls,res,tempsign,plane_front_temp[-1]])
                    plane_back = np.array([
                        plane_front_temp[0], plane_front_temp[1],
                        plane_front_temp[2], -np.mean(newd)
                    ])
                else:
                    import pdb
                    pdb.set_trace()
                    print("error with upright")
                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_front[:3], 1) + plane_front[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[0, 2] + corners[1, 2]) / 2.0
                    ])
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[1, 2] - corners[0, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]
                '''
                [count, val] = np.histogram(alldist, bins=20)
                mind_middle = val[np.argmax(count)]
                sel_pre = np.copy(sel)
                sel = np.abs(alldist - mind_middle) < DIST_THRESH
                if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH:
                    ### Do not use line for middle surfaces
                    if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH:
                        # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                        center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0])
                        sel_global = ind[sel]
                        point_boundary_mask_xy[sel_global] = 1.0
                        point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]])
                        point_boundary_offset_xy[sel_global] = center - x[sel]
                '''

                ### Get the boundary points here
                alldist = np.abs(
                    np.sum(x * plane_back[:3], 1) + plane_back[-1])
                mind = np.min(alldist)
                #[count, val] = np.histogram(alldist, bins=20)
                #mind = val[np.argmax(count)]
                sel = np.abs(alldist - mind) < DIST_THRESH
                if np.sum(sel) > NUM_POINT and np.var(
                        alldist[sel]) < VAR_THRESH:
                    #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax)
                    # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0])
                    center = np.array([
                        np.mean(x[sel][:, 0]),
                        np.mean(x[sel][:, 1]),
                        (corners[2, 2] + corners[3, 2]) / 2.0
                    ])
                    #point_boundary_offset_xy[sel] = center - x[sel]
                    sel_global = ind[sel]
                    point_boundary_mask_xy[sel_global] = 1.0
                    point_boundary_sem_xy[sel_global] = np.array([
                        center[0], center[1], center[2],
                        corners[3, 2] - corners[2, 2], bbox[7]
                    ])
                    point_boundary_offset_xy[sel_global] = center - x[sel]

        ret_dict = {}
        ret_dict['point_clouds'] = point_cloud.astype(np.float32)
        ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3]
        ret_dict['heading_class_label'] = angle_classes.astype(np.int64)
        ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32)
        ret_dict['size_class_label'] = size_classes.astype(np.int64)
        ret_dict['size_residual_label'] = size_residuals.astype(np.float32)
        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9
        ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64)
        ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32)
        ret_dict['vote_label'] = point_votes.astype(np.float32)
        ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64)
        ret_dict['scan_idx'] = np.array(idx).astype(np.int64)
        ret_dict['max_gt_bboxes'] = max_bboxes

        # new items
        ret_dict['size_label'] = box3d_sizes.astype(np.float32)
        ret_dict['heading_label'] = box3d_angles.astype(np.float32)
        if self.use_height:
            ret_dict['floor_height'] = floor_height

        ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype(
            np.float32)
        ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype(
            np.float32)
        ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype(
            np.float32)
        ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype(
            np.float32)
        ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype(
            np.float32)
        ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype(
            np.float32)

        ret_dict['point_line_mask'] = point_line_mask.astype(np.float32)
        ret_dict['point_line_offset'] = point_line_offset.astype(np.float32)
        ret_dict['point_line_sem'] = point_line_sem.astype(np.float32)

        return ret_dict
Beispiel #6
0
    def __data_generation_(self, idx):
        '''
        Returns:
            point_cloud:            N,3+C 
            center_label:           MAX_NUM_OBJ, 3
            heading_class_label:    MAX_NUM_OBJ,
            heading_residual_label: MAX_NUM_OBJ, 
            size_class_label:       MAX_NUM_OBJ,
            size_residual_label:    MAX_NUM_OBJ, 3
            sem_cls_label:          MAX_NUM_OBJ, 
            box_label_mask:         MAX_NUM_OBJ,
            vote_label:             N, 9
            vote_label_mask:        N,
        '''
        scan_name = self.scan_names[idx]
        point_cloud = np.load(
            os.path.join(self.data_path, scan_name) + '_pc.npz')['pc']  # N,6
        # Bounding boxes (K,8)
        # [0:3]: centroid coordinate. x,y,z
        # [3:6]: size. height, width, height
        # [6]: heading angle
        # [7]: class one hot label
        bboxes = np.load(
            os.path.join(self.data_path, scan_name) + '_bbox.npy')  # K,8:
        # Votes (N, 10) --3 votes and 1 vote mask
        # [0]: this point is in a bounding box or not (0/1)
        # [1:4],[4:7],[7:10]: if point is not in any bounding box, all zeros;
        # else the offset to bouding box center
        # one point can be assigned to at maximal 3 bounding boxes
        point_votes = np.load(
            os.path.join(self.data_path, scan_name) +
            '_votes.npz')['point_votes']  # Nx10

        if not self.use_color:
            point_cloud = point_cloud[:, 0:3]  # x,y,z
        else:
            point_cloud = point_cloud[:, 0:6]  # x,y,z,r,g,b
            point_cloud[:, 3] = point_cloud[:, 3:] - MEAN_COLOR_RGB

        if self.use_height:
            floor_height = np.percentile(
                point_cloud[:,
                            2], 0.99)  # 0.99% of all height. wired number...
            height = point_cloud[:, 2] - floor_height
            point_cloud = np.concatenate(
                [point_cloud, np.expand_dims(height, 1)], 1)  # N,4 or N,7

        #-------------data augmentation-------------
        if self.augment:
            if np.random.rand() > 0.5:
                # flipping along YZ plane
                point_cloud[:, 0] = -1 * point_cloud[:, 0]
                bboxes[:, 0] = bboxes[:, 0] * -1
                bboxes[:, 6] = np.pi - bboxes[:, 6]
                point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]]
            # Rotation along up-axis/Z-axis
            rot_angle = (np.random.rand() * np.pi /
                         3) - np.pi / 6  # -30~30 degree
            rot_mat = sunrgbd_utils.rotz(rot_angle)
            point_votes_end = np.zeros_like(point_votes)
            # first, rotate votes "with" the point_cloud
            point_votes_end[:, 1:4] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 1:4],
                np.transpose(rot_mat))
            point_votes_end[:, 4:7] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 4:7],
                np.transpose(rot_mat))
            point_votes_end[:, 7:10] = np.dot(
                point_cloud[:, 0:3] + point_votes[:, 7:10],
                np.transpose(rot_mat))
            # then, rotate the point cloud alone
            point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3],
                                         np.transpose(rot_mat))
            bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat))
            bboxes[:, 6] -= rot_angle  # the original angle is NOT filpped
            # finally, restore the point_votes by recalculate the offset
            point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3]
            point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3]
            point_votes[:,
                        7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3]

            # augment the color
            if self.use_color:
                rgb_color = point_cloud[:, 3:
                                        6] + MEAN_COLOR_RGB  # restore color to 0~1
                rgb_color *= (1 + 0.4 * np.random.rand(3) - 0.2
                              )  # random scale brightness 80% ~ 120%
                rgb_color += (0.1 * np.random.rand(3) - 0.05)  # random shift
                rgb_color += np.expand_dims(
                    np.random.rand(point_cloud.shape[0]) * 0.05 - 0.025,
                    -1)  #random jitter
                rgb_color = rgb_color - MEAN_COLOR_RGB
                rgb_color *= np.expand_dims(
                    np.random.rand(point_cloud.shape[0]) > 0.3,
                    -1)  # drop 30% colors

            # scale the size
            scale_ratio = np.random.rand() * 0.3 + 0.85  # 0.85 ~ 1.15
            point_cloud[:, 0:3] *= scale_ratio
            bboxes[:, 0:6] *= scale_ratio
            point_votes[:, 1:-1] *= scale_ratio
            if self.use_height:
                point_cloud[:, -1] *= scale_ratio

            # shift the point cloud -0.5~0.5
            offset = np.random.rand(3) - 0.5
            offset = np.expand_dims(offset, 0)
            point_cloud[:, 0:3] += offset
            bboxes[:, 0:3] += offset
            # shifting doesn't change: size, votes, height

        # ------------labels------------
        box3d_centers = np.zeros((MAX_NUM_OBJ, 3))
        box3d_sizes = np.zeros((MAX_NUM_OBJ, 3))
        angle_classes = np.zeros((MAX_NUM_OBJ, ))
        angle_residuals = np.zeros((MAX_NUM_OBJ, ))
        size_classes = np.zeros((MAX_NUM_OBJ, ))
        size_residuals = np.zeros((MAX_NUM_OBJ, 3))
        label_mask = np.zeros((MAX_NUM_OBJ))
        label_mask[0:bboxes.shape[0]] = 1
        max_bboxes = np.zeros((MAX_NUM_OBJ, 8))
        max_bboxes[0:bboxes.shape[0], :] = bboxes

        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            semantic_class = bbox[7]
            box3d_center = bbox[0:3]
            angle_class, angle_residual = DC.angle2class(bbox[6])
            # NOTE: The mean size stored in size2class is of full length of box edges,
            # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here
            box3d_size = bbox[3:6] * 2
            size_class, size_residual = DC.size2class(
                box3d_size, DC.class2type[semantic_class])
            box3d_centers[i, :] = box3d_center
            angle_classes[i] = angle_class
            angle_residuals[i] = angle_residual
            size_classes[i] = size_class
            size_residuals[i] = size_residual
            box3d_sizes[i, :] = box3d_size

        target_bboxes_mask = label_mask
        target_bboxes = np.zeros((MAX_NUM_OBJ, 6))
        for i in range(bboxes.shape[0]):
            bbox = bboxes[i]
            corners_3d = sunrgbd_utils.my_compute_box_3d(
                bbox[0:3], bbox[3:6], bbox[6])
            # compute axis aligned box
            xmin = np.min(corners_3d[:, 0])
            ymin = np.min(corners_3d[:, 1])
            zmin = np.min(corners_3d[:, 2])
            xmax = np.max(corners_3d[:, 0])
            ymax = np.max(corners_3d[:, 1])
            zmax = np.max(corners_3d[:, 2])
            # 0:3 - centers
            # 3:6 - size
            target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2,
                                    (zmin + zmax) / 2, xmax - xmin,
                                    ymax - ymin, zmax - zmin])
            target_bboxes[i, :] = target_bbox

        point_cloud, choice = pc_utils.random_sampling(point_cloud,
                                                       self.num_points,
                                                       return_choices=True)
        point_votes_mask = point_votes[choice, 0]
        point_votes = point_votes[choice, 1:]

        center_label = target_bboxes.astype(np.float32)[:, :3]
        heading_class_label = angle_classes.astype(np.int64)
        heading_residual_label = angle_residuals.astype(np.float32)
        size_class_label = size_classes.astype(np.int64)
        size_residual_label = size_residuals.astype(np.float32)

        target_bboxes_semcls = np.zeros((MAX_NUM_OBJ))
        target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1]  # from 0 to 9

        sem_cls_label = target_bboxes_semcls.astype(np.int64)
        box_label_mask = target_bboxes_mask.astype(np.float32)
        vote_label = point_votes.astype(np.float32)
        vote_label_mask = point_votes_mask.astype(np.int64)

        return [point_cloud.astype(np.float32), \
            center_label, \
            heading_class_label, \
            heading_residual_label, \
            size_class_label, \
            size_residual_label, \
            sem_cls_label, \
            box_label_mask, \
            vote_label, \
            vote_label_mask]