def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 bbox2ds = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d.npy') bbox2d_probs = np.load( os.path.join(self.data_path, scan_name) + '_bbox2d_prob.npy') calib_Rtilt = np.load( os.path.join(self.data_path, scan_name) + '_calib_Rtilt.npy') calib_K = np.load( os.path.join(self.data_path, scan_name) + '_calib_K.npy') if self.use_color and self.use_box2d: raise NotImplemented( 'color and 2d bounding box at the same time is not implemented' ) if not self.use_color: #point_cloud = point_cloud[:,0:3] point_cloud = get_box2d_feature(point_cloud, bbox2ds, bbox2d_probs, calib_Rtilt, calib_K) else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new for box2d #ret_dict['bbox2ds'] = bbox2ds.astype(np.float32) #ret_dict['bbox2d_probs'] = bbox2d_probs.astype(np.float32) #ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) #ret_dict['calib_K'] = calib_K.astype(np.float32) return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_cloud = np.load(os.path.join(self.data_path, scan_name)+'_pc.npz')['pc'] # Nx6 bboxes = np.load(os.path.join(self.data_path, scan_name)+'_bbox.npy') # K,8 point_votes = np.load(os.path.join(self.data_path, scan_name)+'_votes.npz')['point_votes'] # Nx10 if self.use_imvote: # Read camera parameters calib_lines = [line for line in open(os.path.join(self.raw_data_path, 'calib', scan_name+'.txt')).readlines()] calib_Rtilt = np.reshape(np.array([float(x) for x in calib_lines[0].rstrip().split(' ')]), (3,3), 'F') calib_K = np.reshape(np.array([float(x) for x in calib_lines[1].rstrip().split(' ')]), (3,3), 'F') # Read image full_img = sunrgbd_utils.load_image(os.path.join(self.raw_data_path, 'image', scan_name+'.jpg')) full_img_height = full_img.shape[0] full_img_width = full_img.shape[1] # ------------------------------- 2D IMAGE VOTES ------------------------------ cls_id_list = self.cls_id_map[scan_name] cls_score_list = self.cls_score_map[scan_name] bbox_2d_list = self.bbox_2d_map[scan_name] obj_img_list = [] for i2d, (cls2d, box2d) in enumerate(zip(cls_id_list, bbox_2d_list)): xmin, ymin, xmax, ymax = box2d # During training we randomly drop 2D boxes to reduce over-fitting if self.train and np.random.random()>0.5: continue obj_img = full_img[ymin:ymax, xmin:xmax, :] obj_h = obj_img.shape[0] obj_w = obj_img.shape[1] # Bounding box coordinates (4 values), class id, index to the semantic cues meta_data = (xmin, ymin, obj_h, obj_w, cls2d, i2d) if obj_h == 0 or obj_w == 0: continue # Use 2D box center as approximation uv_centroid = np.array([int(obj_w/2), int(obj_h/2)]) uv_centroid = np.expand_dims(uv_centroid, 0) v_coords, u_coords = np.meshgrid(range(obj_h), range(obj_w), indexing='ij') img_vote = np.transpose(np.array([u_coords, v_coords]), (1,2,0)) img_vote = np.expand_dims(uv_centroid, 0) - img_vote obj_img_list.append((meta_data, img_vote)) full_img_votes = np.zeros((full_img_height,full_img_width,self.vote_dims), dtype=np.float32) # Empty votes: 2d box index is set to -1 full_img_votes[:,:,3::4] = -1. for obj_img_data in obj_img_list: meta_data, img_vote = obj_img_data u0, v0, h, w, cls2d, i2d = meta_data for u in range(u0, u0+w): for v in range(v0, v0+h): iidx = int(full_img_votes[v,u,0]) if iidx >= self.max_imvote_per_pixel: continue full_img_votes[v,u,(1+iidx*4):(1+iidx*4+2)] = img_vote[v-v0,u-u0,:] full_img_votes[v,u,(1+iidx*4+2)] = cls2d full_img_votes[v,u,(1+iidx*4+3)] = i2d + 1 # add +1 here as we need a dummy feature for pixels outside all boxes full_img_votes[v0:(v0+h), u0:(u0+w), 0] += 1 full_img_votes_1d = np.zeros((MAX_NUM_PIXEL*self.vote_dims), dtype=np.float32) full_img_votes_1d[0:full_img_height*full_img_width*self.vote_dims] = full_img_votes.flatten() # Semantic cues: one-hot vector for class scores cls_score_feats = np.zeros((1+MAX_NUM_2D_DET,NUM_CLS), dtype=np.float32) # First row is dumpy feature len_obj = len(cls_id_list) if len_obj: ind_obj = np.arange(1,len_obj+1) ind_cls = np.array(cls_id_list) cls_score_feats[ind_obj, ind_cls] = np.array(cls_score_list) # Texture cues: normalized RGB values full_img = (full_img - 128.) / 255. # Serialize data to 1D and save image size so that we can recover the original location in the image full_img_1d = np.zeros((MAX_NUM_PIXEL*3), dtype=np.float32) full_img_1d[:full_img_height*full_img_width*3] = full_img.flatten() if not self.use_color: point_cloud = point_cloud[:,0:3] else: point_cloud = point_cloud[:,0:6] point_cloud[:,3:] = (point_cloud[:,3:]-MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:,2],0.99) height = point_cloud[:,2] - floor_height point_cloud = np.concatenate([point_cloud, np.expand_dims(height, 1)],1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ scale_ratio = 1. if self.augment: flip_flag = (np.random.random()>0.5) if flip_flag: # Flipping along the YZ plane point_cloud[:,0] = -1 * point_cloud[:,0] bboxes[:,0] = -1 * bboxes[:,0] bboxes[:,6] = np.pi - bboxes[:,6] point_votes[:,[1,4,7]] = -1 * point_votes[:,[1,4,7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random()*np.pi/3) - np.pi/6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:,1:4] = np.dot(point_cloud[:,0:3] + point_votes[:,1:4], np.transpose(rot_mat)) point_votes_end[:,4:7] = np.dot(point_cloud[:,0:3] + point_votes[:,4:7], np.transpose(rot_mat)) point_votes_end[:,7:10] = np.dot(point_cloud[:,0:3] + point_votes[:,7:10], np.transpose(rot_mat)) point_cloud[:,0:3] = np.dot(point_cloud[:,0:3], np.transpose(rot_mat)) bboxes[:,0:3] = np.dot(bboxes[:,0:3], np.transpose(rot_mat)) bboxes[:,6] -= rot_angle point_votes[:,1:4] = point_votes_end[:,1:4] - point_cloud[:,0:3] point_votes[:,4:7] = point_votes_end[:,4:7] - point_cloud[:,0:3] point_votes[:,7:10] = point_votes_end[:,7:10] - point_cloud[:,0:3] if self.use_imvote: R_inverse = np.copy(np.transpose(rot_mat)) if flip_flag: R_inverse[0,:] *= -1 # Update Rtilt according to the augmentation # R_inverse (3x3) * point (3x1) transforms an augmented depth point # to original point in upright_depth coordinates calib_Rtilt = np.dot(np.transpose(R_inverse), calib_Rtilt) # Augment RGB color if self.use_color: rgb_color = point_cloud[:,3:6] + MEAN_COLOR_RGB rgb_color *= (1+0.4*np.random.random(3)-0.2) # brightness change for each channel rgb_color += (0.1*np.random.random(3)-0.05) # color shift for each channel rgb_color += np.expand_dims((0.05*np.random.random(point_cloud.shape[0])-0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims(np.random.random(point_cloud.shape[0])>0.3,-1) point_cloud[:,3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random()*0.3+0.85 if self.use_imvote: calib_Rtilt = np.dot(np.array([[scale_ratio,0,0],[0,scale_ratio,0],[0,0,scale_ratio]]), calib_Rtilt) scale_ratio_expand = np.expand_dims(np.tile(scale_ratio,3),0) point_cloud[:,0:3] *= scale_ratio_expand bboxes[:,0:3] *= scale_ratio_expand bboxes[:,3:6] *= scale_ratio_expand point_votes[:,1:4] *= scale_ratio_expand point_votes[:,4:7] *= scale_ratio_expand point_votes[:,7:10] *= scale_ratio_expand if self.use_height: point_cloud[:,-1] *= scale_ratio # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ,)) angle_residuals = np.zeros((MAX_NUM_OBJ,)) size_classes = np.zeros((MAX_NUM_OBJ,)) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0],:] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6]*2 size_class, size_residual = DC.size2class(box3d_size, DC.class2type[semantic_class]) box3d_centers[i,:] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i,:] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d(bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:,0]) ymin = np.min(corners_3d[:,1]) zmin = np.min(corners_3d[:,2]) xmax = np.max(corners_3d[:,0]) ymax = np.max(corners_3d[:,1]) zmax = np.max(corners_3d[:,2]) target_bbox = np.array([(xmin+xmax)/2, (ymin+ymax)/2, (zmin+zmax)/2, xmax-xmin, ymax-ymin, zmax-zmin]) target_bboxes[i,:] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choices,0] point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:,0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:,-1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes if self.use_imvote: ret_dict['scale'] = np.array(scale_ratio).astype(np.float32) ret_dict['calib_Rtilt'] = calib_Rtilt.astype(np.float32) ret_dict['calib_K'] = calib_K.astype(np.float32) ret_dict['full_img_width'] = np.array(full_img_width).astype(np.int64) ret_dict['cls_score_feats'] = cls_score_feats.astype(np.float32) ret_dict['full_img_votes_1d'] = full_img_votes_1d.astype(np.float32) ret_dict['full_img_1d'] = full_img_1d.astype(np.float32) return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) for data_idx in data_idx_list: print('------------- ', data_idx) if data_idx == 479: continue objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects)==0 or \ len([obj for obj in objects if obj.classname in type_whitelist])==0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling( pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) if save_votes: N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 10)) # 3 votes and 1 vote mask point_vote_idx = np.zeros( (N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d( obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[ inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) except: print('ERROR ----', data_idx, obj.classname) np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes)
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ sem_cls_label: (MAX_NUM_OBJ,) semantic class index heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box point_obj_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. point_instance_label: (N,) with int values in -1,...,num_box, indicating which object the point belongs to, -1 means a backgound point. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ point_cloud = self.point_cloud_list[idx] # Nx6 bboxes = self.bboxes_list[idx] # K,8 point_obj_mask = self.point_labels_list[idx][:, 0] point_instance_label = self.point_labels_list[idx][:, -1] if not self.use_color: point_cloud = point_cloud[:, 0:3] else: point_cloud = point_cloud[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes[:, 0:3] += 1000.0 size_gts = np.zeros((MAX_NUM_OBJ, 3)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox size_gts[i, :] = target_bbox[3:6] point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_obj_mask = point_obj_mask[choices] point_instance_label = point_instance_label[choices] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) ret_dict['size_gts'] = size_gts.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['point_obj_mask'] = point_obj_mask.astype(np.int64) ret_dict['point_instance_label'] = point_instance_label.astype( np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes return ret_dict
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_color_sem = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 semantics37 = point_color_sem[:, 6] semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37]) semantics10_multi = [ DC.class37_2_class10_multi[k] for k in semantics37 ] if not self.use_color: point_cloud = point_color_sem[:, 0:3] else: point_cloud = point_color_sem[:, 0:6] point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes # new items box3d_angles = np.zeros((MAX_NUM_OBJ, )) point_boundary_mask_z = np.zeros(self.num_points) point_boundary_mask_xy = np.zeros(self.num_points) point_boundary_offset_z = np.zeros([self.num_points, 3]) point_boundary_offset_xy = np.zeros([self.num_points, 3]) point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1]) point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1]) point_line_mask = np.zeros(self.num_points) point_line_offset = np.zeros([self.num_points, 3]) point_line_sem = np.zeros([self.num_points, 3 + 1]) for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size box3d_angles[i] = bbox[6] target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) semantics37 = semantics37[choices] semantics10 = semantics10[choices] semantics10_multi = [semantics10_multi[i] for i in choices] point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] # box angle is -pi to pi for i in range(bboxes.shape[0]): bbox = bboxes[i] corners = params2bbox(bbox[:3], 2 * bbox[3:6], clockwise2counter(bbox[6])) # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6]) try: x_all_cls, ind_all_cls = extract_pc_in_box3d( point_cloud, corners) except: continue ind_all_cls = np.where(ind_all_cls)[0] # T/F to index # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]] ind = [] for j in ind_all_cls: if bbox[7] in semantics10_multi[j]: ind.append(j) ind = np.array(ind) if ind.shape[0] < NUM_POINT_SEM_THRESHOLD: pass else: x = point_cloud[ind, :3] ###Get bb planes and boundary points plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]]) para_points = np.array( [corners[1], corners[3], corners[5], corners[7]]) newd = np.sum(para_points * plane_lower_temp[:3], 1) if check_upright( para_points ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH: plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]]) plane_upper = np.array([0, 0, 1, -np.mean(newd)]) else: import pdb pdb.set_trace() print("error with upright") if check_z(plane_upper, para_points) == False: import pdb pdb.set_trace() ### Get the boundary points here #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1]) alldist = np.abs( np.sum(x * plane_lower[:3], 1) + plane_lower[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get lower four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'lower') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[2]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[4] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[0] + corners[4]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[2] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Check for middle z surfaces [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:,2]) # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_upper[:3], 1) + plane_upper[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'upper') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[1] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[5] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[1] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[3] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[1] + corners[7]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[5] - corners[1]), np.linalg.norm(corners[3] - corners[1]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] v1 = corners[3] - corners[2] v2 = corners[2] - corners[0] cp = np.cross(v1, v2) d = -np.dot(cp, corners[0]) a, b, c = cp plane_left_temp = np.array([a, b, c, d]) para_points = np.array( [corners[4], corners[5], corners[6], corners[7]]) ### Normalize xy here plane_left_temp /= np.linalg.norm(plane_left_temp[:3]) newd = np.sum(para_points * plane_left_temp[:3], 1) if plane_left_temp[2] < LOWER_THRESH: plane_left = plane_left_temp #np.array([cls,res,tempsign,plane_left_temp[-1]]) plane_right = np.array([ plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_left[:3], 1) + plane_left[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[1]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[2] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_right[:3], 1) + plane_right[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[4] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[6] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[4, 2] + corners[5, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[5, 2] - corners[4, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0] v1 = corners[0] - corners[4] v2 = corners[4] - corners[5] cp = np.cross(v1, v2) d = -np.dot(cp, corners[5]) a, b, c = cp plane_front_temp = np.array([a, b, c, d]) para_points = np.array( [corners[2], corners[3], corners[6], corners[7]]) plane_front_temp /= np.linalg.norm(plane_front_temp[:3]) newd = np.sum(para_points * plane_front_temp[:3], 1) if plane_front_temp[2] < LOWER_THRESH: plane_front = plane_front_temp #np.array([cls,res,tempsign,plane_front_temp[-1]]) plane_back = np.array([ plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_front[:3], 1) + plane_front[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_back[:3], 1) + plane_back[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[2, 2] + corners[3, 2]) / 2.0 ]) #point_boundary_offset_xy[sel] = center - x[sel] sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[3, 2] - corners[2, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new items ret_dict['size_label'] = box3d_sizes.astype(np.float32) ret_dict['heading_label'] = box3d_angles.astype(np.float32) if self.use_height: ret_dict['floor_height'] = floor_height ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype( np.float32) ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype( np.float32) ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype( np.float32) ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype( np.float32) ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype( np.float32) ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype( np.float32) ret_dict['point_line_mask'] = point_line_mask.astype(np.float32) ret_dict['point_line_offset'] = point_line_offset.astype(np.float32) ret_dict['point_line_sem'] = point_line_sem.astype(np.float32) return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) all_obbs = [] all_pc_upright_depth_subsampled = [] all_point_votes = [] for data_idx in data_idx_list: print('------------- ', data_idx) objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects) == 0 or len([obj for obj in objects if obj.classname in type_whitelist]) == 0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) print(f"{data_idx} has {obbs.shape[0]} gt bboxes") pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) # pickle save with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f: pickle.dump(pc_upright_depth_subsampled, f) print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!") with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f: pickle.dump(obbs, f) print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!") # add to collection all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled) all_obbs.append(obbs) N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 13)) # 1 vote mask + 3 votes and + 3 votes gt ind point_votes[:, 10:13] = -1 point_vote_idx = np.zeros((N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) i_obj = 0 for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] point_votes[j, point_vote_idx[j] + 10] = i_obj # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_votes[j, 10] = i_obj point_votes[j, 11] = i_obj point_votes[j, 12] = i_obj point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) i_obj += 1 except: print('ERROR ----', data_idx, obj.classname) # choose the nearest as the first gt for each point for ip in range(N): is_pos = (point_votes[ip, 0] > 0) if is_pos: vote_delta1 = point_votes[ip, 1:4].copy() vote_delta2 = point_votes[ip, 4:7].copy() vote_delta3 = point_votes[ip, 7:10].copy() dist1 = np.sum(vote_delta1 ** 2) dist2 = np.sum(vote_delta2 ** 2) dist3 = np.sum(vote_delta3 ** 2) gt_ind1 = int(point_votes[ip, 10].copy()) # gt_ind2 = int(point_votes[ip, 11].copy()) # gt_ind3 = int(point_votes[ip, 12].copy()) # gt1 = obbs[gt_ind1] # gt2 = obbs[gt_ind2] # gt3 = obbs[gt_ind3] # size_norm_vote_delta1 = vote_delta1 / gt1[3:6] # size_norm_vote_delta2 = vote_delta2 / gt2[3:6] # size_norm_vote_delta3 = vote_delta3 / gt3[3:6] # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2) # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2) # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2) near_ind = np.argmin([dist1, dist2, dist3]) # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3]) point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy() point_votes[ip, 10 + near_ind] = gt_ind1 point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy() point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1 else: assert point_votes[ip, 10] == -1, "error" assert point_votes[ip, 11] == -1, "error" assert point_votes[ip, 12] == -1, "error" print(f"{data_idx}_votes.npz has {i_obj} gt bboxes") np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes) with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f: pickle.dump(point_votes, f) print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!") all_point_votes.append(point_votes) pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_obbs, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_pc_upright_depth_subsampled, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_votes, f) print(f"{pickle_filename} saved successfully !!") all_point_labels = [] for point_votes in all_point_votes: point_labels = point_votes[:, [0, 10]] all_point_labels.append(point_labels) pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_labels, f) print(f"{pickle_filename} saved successfully !!")
def __getitem__(self, idx): scan_name = self.scan_names[idx] mesh_vertices = np.load( os.path.join(self.data_path, scan_name) + '_vert.npy') instance_labels = np.load( os.path.join(self.data_path, scan_name) + '_ins_label.npy') semantic_labels = np.load( os.path.join(self.data_path, scan_name) + '_sem_label.npy').astype( np.int32) - 1 bboxes = np.load(os.path.join(self.data_path, scan_name) + '_bbox.npy') if not self.use_color: point_cloud = mesh_vertices[:, 0:3] # do not use color for now pcl_color = mesh_vertices[:, 3:6] else: point_cloud = mesh_vertices[:, 0:6] point_cloud[:, 3:] = (point_cloud[:, 3:] - MEAN_COLOR_RGB) / 256.0 if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # ------------------------------- LABELS ------------------------------ target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) target_bboxes_mask = np.zeros((MAX_NUM_OBJ)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) # instance_labels = instance_labels[choices] # semantic_labels = semantic_labels[choices] # # pcl_color = pcl_color[choices] # target_bboxes_mask[0:instance_bboxes.shape[0]] = 1 # target_bboxes[0:instance_bboxes.shape[0], :] = instance_bboxes[:, 0:6] # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: pass # if np.random.random() > 0.5: # # Flipping along the YZ plane # point_cloud[:, 0] = -1 * point_cloud[:, 0] # target_bboxes[:, 0] = -1 * target_bboxes[:, 0] # # if np.random.random() > 0.5: # # Flipping along the XZ plane # point_cloud[:, 1] = -1 * point_cloud[:, 1] # target_bboxes[:, 1] = -1 * target_bboxes[:, 1] # # # Rotation along up-axis/Z-axis # rot_angle = (np.random.random() * np.pi / 18) - np.pi / 36 # -5 ~ +5 degree # rot_mat = pc_util.rotz(rot_angle) # point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) # target_bboxes = rotate_aligned_boxes(target_bboxes, rot_mat) # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 # compute votes *AFTER* augmentation # generate votes # Note: since there's no map between bbox instance labels and # pc instance_labels (it had been filtered # in the data preparation step) we'll compute the instance bbox # from the points sharing the same instance label. point_votes = np.zeros([self.num_points, 3]) point_votes_mask = np.zeros(self.num_points) for i_instance in np.unique(instance_labels): # find all points belong to that instance ind = np.where(instance_labels == i_instance)[0] # find the semantic label if semantic_labels[ind[0]] in set(DC.type2class.values()): x = point_cloud[ind, :3] center = 0.5 * (x.min(0) + x.max(0)) point_votes[ind, :] = center - x point_votes_mask[ind] = 1.0 point_votes = np.tile(point_votes, (1, 3)) # make 3 votes identical for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) box3d_size = bbox[3:6] size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes = point_votes[choices] point_votes_mask = point_votes_mask[choices] #point_votes_mask = point_votes[choices,0] #point_votes = point_votes[choices,1:] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) return ret_dict
def __data_generation_(self, idx): ''' Returns: point_cloud: N,3+C center_label: MAX_NUM_OBJ, 3 heading_class_label: MAX_NUM_OBJ, heading_residual_label: MAX_NUM_OBJ, size_class_label: MAX_NUM_OBJ, size_residual_label: MAX_NUM_OBJ, 3 sem_cls_label: MAX_NUM_OBJ, box_label_mask: MAX_NUM_OBJ, vote_label: N, 9 vote_label_mask: N, ''' scan_name = self.scan_names[idx] point_cloud = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # N,6 # Bounding boxes (K,8) # [0:3]: centroid coordinate. x,y,z # [3:6]: size. height, width, height # [6]: heading angle # [7]: class one hot label bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8: # Votes (N, 10) --3 votes and 1 vote mask # [0]: this point is in a bounding box or not (0/1) # [1:4],[4:7],[7:10]: if point is not in any bounding box, all zeros; # else the offset to bouding box center # one point can be assigned to at maximal 3 bounding boxes point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 if not self.use_color: point_cloud = point_cloud[:, 0:3] # x,y,z else: point_cloud = point_cloud[:, 0:6] # x,y,z,r,g,b point_cloud[:, 3] = point_cloud[:, 3:] - MEAN_COLOR_RGB if self.use_height: floor_height = np.percentile( point_cloud[:, 2], 0.99) # 0.99% of all height. wired number... height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # N,4 or N,7 #-------------data augmentation------------- if self.augment: if np.random.rand() > 0.5: # flipping along YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = bboxes[:, 0] * -1 bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.rand() * np.pi / 3) - np.pi / 6 # -30~30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) # first, rotate votes "with" the point_cloud point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) # then, rotate the point cloud alone point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle # the original angle is NOT filpped # finally, restore the point_votes by recalculate the offset point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # augment the color if self.use_color: rgb_color = point_cloud[:, 3: 6] + MEAN_COLOR_RGB # restore color to 0~1 rgb_color *= (1 + 0.4 * np.random.rand(3) - 0.2 ) # random scale brightness 80% ~ 120% rgb_color += (0.1 * np.random.rand(3) - 0.05) # random shift rgb_color += np.expand_dims( np.random.rand(point_cloud.shape[0]) * 0.05 - 0.025, -1) #random jitter rgb_color = rgb_color - MEAN_COLOR_RGB rgb_color *= np.expand_dims( np.random.rand(point_cloud.shape[0]) > 0.3, -1) # drop 30% colors # scale the size scale_ratio = np.random.rand() * 0.3 + 0.85 # 0.85 ~ 1.15 point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:6] *= scale_ratio point_votes[:, 1:-1] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio # shift the point cloud -0.5~0.5 offset = np.random.rand(3) - 0.5 offset = np.expand_dims(offset, 0) point_cloud[:, 0:3] += offset bboxes[:, 0:3] += offset # shifting doesn't change: size, votes, height # ------------labels------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) # 0:3 - centers # 3:6 - size target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choice = pc_utils.random_sampling(point_cloud, self.num_points, return_choices=True) point_votes_mask = point_votes[choice, 0] point_votes = point_votes[choice, 1:] center_label = target_bboxes.astype(np.float32)[:, :3] heading_class_label = angle_classes.astype(np.int64) heading_residual_label = angle_residuals.astype(np.float32) size_class_label = size_classes.astype(np.int64) size_residual_label = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 sem_cls_label = target_bboxes_semcls.astype(np.int64) box_label_mask = target_bboxes_mask.astype(np.float32) vote_label = point_votes.astype(np.float32) vote_label_mask = point_votes_mask.astype(np.int64) return [point_cloud.astype(np.float32), \ center_label, \ heading_class_label, \ heading_residual_label, \ size_class_label, \ size_residual_label, \ sem_cls_label, \ box_label_mask, \ vote_label, \ vote_label_mask]