def get_roi_ptcloud(inputs, batch_pred_boxes_params, enlarge_ratio=1.2, num_point_roi=512, min_num_point=100): """ Generate ROI point cloud w.r.t predicted box :param inputs: dict {'point_clouds'} input point clouds of the whole scene batch_pred_boxes_params: (B, num_proposals, 7), numpy array predicted bounding box from detector enlarge_ratio: scalar the value to enlarge the predicted box size num_point_roi: scalar the number of points to be sampled in each enlarged box :return: batch_pc_roi: (B, num_proposals, num_sampled_points, input_pc_features) numpy array nonempty_roi_mask: (B, num_proposals) numpy array """ batch_pc = inputs['point_clouds'].detach().cpu().numpy()[:, :, :] # B,N,C bsize = batch_pred_boxes_params.shape[0] K = batch_pred_boxes_params.shape[1] batch_pc_roi = np.zeros((bsize, K, num_point_roi, batch_pc.shape[2]), dtype=np.float32) nonempty_roi_mask = np.ones((bsize, K)) for i in range(bsize): pc = batch_pc[i, :, :] # (N,C) for j in range(K): box_params = batch_pred_boxes_params[i, j, :] # (7) center = box_params[0:3] center_upright_camera = flip_axis_to_camera( center) #.reshape(1,-1))[0] box_size = box_params[3:6] * enlarge_ratio #enlarge the box size heading_angle = box_params[6] box3d = get_3d_box(box_size, heading_angle, center_upright_camera) box3d = flip_axis_to_depth(box3d) pc_in_box, inds = extract_pc_in_box3d(pc, box3d) # print('The number of points in roi box is ', pc_in_box.shape[0]) if len(pc_in_box) >= min_num_point: batch_pc_roi[i, j, :, :] = random_sampling( pc_in_box, num_point_roi) else: nonempty_roi_mask[i, j] = 0 return batch_pc_roi, nonempty_roi_mask
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) for data_idx in data_idx_list: print('------------- ', data_idx) if data_idx == 479: continue objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects)==0 or \ len([obj for obj in objects if obj.classname in type_whitelist])==0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling( pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) if save_votes: N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 10)) # 3 votes and 1 vote mask point_vote_idx = np.zeros( (N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d( obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d,inds = sunrgbd_utils.extract_pc_in_box3d(\ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[ inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) except: print('ERROR ----', data_idx, obj.classname) np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes)
def parse_predictions(end_points, config_dict): """ Parse predictions to OBB parameters and suppress overlapping boxes Args: end_points: dict {point_clouds, center, heading_scores, heading_residuals, size_scores, size_residuals, sem_cls_scores} config_dict: dict {dataset_config, remove_empty_box, use_3d_nms, nms_iou, use_old_type_nms, conf_thresh, per_class_proposal} Returns: batch_pred_map_cls: a list of len == batch size (BS) [pred_list_i], i = 0, 1, ..., BS-1 where pred_list_i = [(pred_sem_cls, box_params, box_score)_j] where j = 0, ..., num of valid detections - 1 from sample input i """ pred_center = end_points['center'] # B,num_proposal,3 pred_heading_class = torch.argmax(end_points['heading_scores'], 2) # B,num_proposal,3 pred_heading_residual = torch.gather( end_points['heading_residuals'], 2, pred_heading_class.unsqueeze(2)) # B,num_proposal,3 pred_heading_residual.squeeze_(2) pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal pred_size_residual = torch.gather( end_points['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3)) # B,num_proposal,1,3 pred_size_residual.squeeze_(2) pred_sem_cls = torch.argmax(end_points['sem_cls_scores'], -1) # B,num_proposal sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().numpy( )) # B,num_proposal,10 pred_sem_cls_prob = np.max(sem_cls_probs, -1) # B,num_proposal num_proposal = pred_center.shape[1] # Since we operate in upright_depth coord for points, while util functions # assume upright_camera coord. bsize = pred_center.shape[0] pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3)) pred_center_upright_camera = flip_axis_to_camera( pred_center.detach().cpu().numpy()) for i in range(bsize): for j in range(num_proposal): heading_angle = config_dict['dataset_config'].class2angle(\ pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy()) box_size = config_dict['dataset_config'].class2size(\ int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy()) corners_3d_upright_camera = new_get_3d_box( pred_center_upright_camera[i, j, :], box_size, heading_angle) pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera K = pred_center.shape[1] # K==num_proposal nonempty_box_mask = np.ones((bsize, K)) if config_dict['remove_empty_box']: # ------------------------------------- # Remove predicted boxes without any point within them.. batch_pc = end_points['point_clouds'].cpu().numpy()[:, :, 0:3] # B,N,3 for i in range(bsize): pc = batch_pc[i, :, :] # (N,3) for j in range(K): box3d = pred_corners_3d_upright_camera[i, j, :, :] # (8,3) box3d = flip_axis_to_depth(box3d) pc_in_box, inds = extract_pc_in_box3d(pc, box3d) if len(pc_in_box) < 5: nonempty_box_mask[i, j] = 0 # ------------------------------------- obj_logits = end_points['objectness_scores'].detach().cpu().numpy() obj_prob = softmax(obj_logits)[:, :, 1] # (B,K) if not config_dict['use_3d_nms']: # ---------- NMS input: pred_with_prob in (B,K,7) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_2d_with_prob = np.zeros((K, 5)) for j in range(K): boxes_2d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_2d_with_prob[j, 2] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_2d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_2d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_2d_with_prob[j, 4] = obj_prob[i, j] nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_2d_faster( boxes_2d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- elif config_dict['use_3d_nms'] and (not config_dict['cls_nms']): # ---------- NMS input: pred_with_prob in (B,K,7) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_3d_with_prob = np.zeros((K, 7)) for j in range(K): boxes_3d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 2] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 4] = np.max( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 5] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 6] = obj_prob[i, j] nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_3d_faster( boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- elif config_dict['use_3d_nms'] and config_dict['cls_nms']: # ---------- NMS input: pred_with_prob in (B,K,8) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_3d_with_prob = np.zeros((K, 8)) for j in range(K): boxes_3d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 2] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 4] = np.max( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 5] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 6] = obj_prob[i, j] boxes_3d_with_prob[j, 7] = pred_sem_cls[ i, j] # only suppress if the two boxes are of the same class!! nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_3d_faster_samecls( boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- batch_pred_map_cls = [ ] # a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1) for i in range(bsize): if config_dict['per_class_proposal']: cur_list = [] for ii in range(config_dict['dataset_config'].num_class): cur_list += [(ii, pred_corners_3d_upright_camera[i,j], sem_cls_probs[i,j,ii]*obj_prob[i,j]) \ for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']] batch_pred_map_cls.append(cur_list) else: batch_pred_map_cls.append([(pred_sem_cls[i,j].item(), pred_corners_3d_upright_camera[i,j], obj_prob[i,j]) \ for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']]) end_points['batch_pred_map_cls'] = batch_pred_map_cls return batch_pred_map_cls
def __getitem__(self, idx): """ Returns a dict with following keys: point_clouds: (N,3+C) center_label: (MAX_NUM_OBJ,3) for GT box center XYZ heading_class_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_HEADING_BIN-1 heading_residual_label: (MAX_NUM_OBJ,) size_classe_label: (MAX_NUM_OBJ,) with int values in 0,...,NUM_SIZE_CLUSTER size_residual_label: (MAX_NUM_OBJ,3) sem_cls_label: (MAX_NUM_OBJ,) semantic class index box_label_mask: (MAX_NUM_OBJ) as 0/1 with 1 indicating a unique box vote_label: (N,9) with votes XYZ (3 votes: X1Y1Z1, X2Y2Z2, X3Y3Z3) if there is only one vote than X1==X2==X3 etc. vote_label_mask: (N,) with 0/1 with 1 indicating the point is in one of the object's OBB. scan_idx: int scan index in scan_names list max_gt_bboxes: unused """ scan_name = self.scan_names[idx] point_color_sem = np.load( os.path.join(self.data_path, scan_name) + '_pc.npz')['pc'] # Nx6 bboxes = np.load( os.path.join(self.data_path, scan_name) + '_bbox.npy') # K,8 point_votes = np.load( os.path.join(self.data_path, scan_name) + '_votes.npz')['point_votes'] # Nx10 semantics37 = point_color_sem[:, 6] semantics10 = np.array([DC.class37_2_class10[k] for k in semantics37]) semantics10_multi = [ DC.class37_2_class10_multi[k] for k in semantics37 ] if not self.use_color: point_cloud = point_color_sem[:, 0:3] else: point_cloud = point_color_sem[:, 0:6] point_cloud[:, 3:6] = (point_color_sem[:, 3:6] - MEAN_COLOR_RGB) if self.use_height: floor_height = np.percentile(point_cloud[:, 2], 0.99) height = point_cloud[:, 2] - floor_height point_cloud = np.concatenate( [point_cloud, np.expand_dims(height, 1)], 1) # (N,4) or (N,7) # ------------------------------- DATA AUGMENTATION ------------------------------ if self.augment: if np.random.random() > 0.5: # Flipping along the YZ plane point_cloud[:, 0] = -1 * point_cloud[:, 0] bboxes[:, 0] = -1 * bboxes[:, 0] bboxes[:, 6] = np.pi - bboxes[:, 6] point_votes[:, [1, 4, 7]] = -1 * point_votes[:, [1, 4, 7]] # Rotation along up-axis/Z-axis rot_angle = (np.random.random() * np.pi / 3) - np.pi / 6 # -30 ~ +30 degree rot_mat = sunrgbd_utils.rotz(rot_angle) point_votes_end = np.zeros_like(point_votes) point_votes_end[:, 1:4] = np.dot( point_cloud[:, 0:3] + point_votes[:, 1:4], np.transpose(rot_mat)) point_votes_end[:, 4:7] = np.dot( point_cloud[:, 0:3] + point_votes[:, 4:7], np.transpose(rot_mat)) point_votes_end[:, 7:10] = np.dot( point_cloud[:, 0:3] + point_votes[:, 7:10], np.transpose(rot_mat)) point_cloud[:, 0:3] = np.dot(point_cloud[:, 0:3], np.transpose(rot_mat)) bboxes[:, 0:3] = np.dot(bboxes[:, 0:3], np.transpose(rot_mat)) bboxes[:, 6] -= rot_angle point_votes[:, 1:4] = point_votes_end[:, 1:4] - point_cloud[:, 0:3] point_votes[:, 4:7] = point_votes_end[:, 4:7] - point_cloud[:, 0:3] point_votes[:, 7:10] = point_votes_end[:, 7:10] - point_cloud[:, 0:3] # Augment RGB color if self.use_color: rgb_color = point_cloud[:, 3:6] + MEAN_COLOR_RGB rgb_color *= (1 + 0.4 * np.random.random(3) - 0.2 ) # brightness change for each channel rgb_color += (0.1 * np.random.random(3) - 0.05 ) # color shift for each channel rgb_color += np.expand_dims( (0.05 * np.random.random(point_cloud.shape[0]) - 0.025), -1) # jittering on each pixel rgb_color = np.clip(rgb_color, 0, 1) # randomly drop out 30% of the points' colors rgb_color *= np.expand_dims( np.random.random(point_cloud.shape[0]) > 0.3, -1) point_cloud[:, 3:6] = rgb_color - MEAN_COLOR_RGB # Augment point cloud scale: 0.85x-1.15x scale_ratio = np.random.random() * 0.3 + 0.85 scale_ratio = np.expand_dims(np.tile(scale_ratio, 3), 0) point_cloud[:, 0:3] *= scale_ratio bboxes[:, 0:3] *= scale_ratio bboxes[:, 3:6] *= scale_ratio point_votes[:, 1:4] *= scale_ratio point_votes[:, 4:7] *= scale_ratio point_votes[:, 7:10] *= scale_ratio if self.use_height: point_cloud[:, -1] *= scale_ratio[0, 0] # ------------------------------- LABELS ------------------------------ box3d_centers = np.zeros((MAX_NUM_OBJ, 3)) box3d_sizes = np.zeros((MAX_NUM_OBJ, 3)) angle_classes = np.zeros((MAX_NUM_OBJ, )) angle_residuals = np.zeros((MAX_NUM_OBJ, )) size_classes = np.zeros((MAX_NUM_OBJ, )) size_residuals = np.zeros((MAX_NUM_OBJ, 3)) label_mask = np.zeros((MAX_NUM_OBJ)) label_mask[0:bboxes.shape[0]] = 1 max_bboxes = np.zeros((MAX_NUM_OBJ, 8)) max_bboxes[0:bboxes.shape[0], :] = bboxes # new items box3d_angles = np.zeros((MAX_NUM_OBJ, )) point_boundary_mask_z = np.zeros(self.num_points) point_boundary_mask_xy = np.zeros(self.num_points) point_boundary_offset_z = np.zeros([self.num_points, 3]) point_boundary_offset_xy = np.zeros([self.num_points, 3]) point_boundary_sem_z = np.zeros([self.num_points, 3 + 2 + 1]) point_boundary_sem_xy = np.zeros([self.num_points, 3 + 1 + 1]) point_line_mask = np.zeros(self.num_points) point_line_offset = np.zeros([self.num_points, 3]) point_line_sem = np.zeros([self.num_points, 3 + 1]) for i in range(bboxes.shape[0]): bbox = bboxes[i] semantic_class = bbox[7] box3d_center = bbox[0:3] angle_class, angle_residual = DC.angle2class(bbox[6]) # NOTE: The mean size stored in size2class is of full length of box edges, # while in sunrgbd_data.py data dumping we dumped *half* length l,w,h.. so have to time it by 2 here box3d_size = bbox[3:6] * 2 size_class, size_residual = DC.size2class( box3d_size, DC.class2type[semantic_class]) box3d_centers[i, :] = box3d_center angle_classes[i] = angle_class angle_residuals[i] = angle_residual size_classes[i] = size_class size_residuals[i] = size_residual box3d_sizes[i, :] = box3d_size box3d_angles[i] = bbox[6] target_bboxes_mask = label_mask target_bboxes = np.zeros((MAX_NUM_OBJ, 6)) for i in range(bboxes.shape[0]): bbox = bboxes[i] corners_3d = sunrgbd_utils.my_compute_box_3d( bbox[0:3], bbox[3:6], bbox[6]) # compute axis aligned box xmin = np.min(corners_3d[:, 0]) ymin = np.min(corners_3d[:, 1]) zmin = np.min(corners_3d[:, 2]) xmax = np.max(corners_3d[:, 0]) ymax = np.max(corners_3d[:, 1]) zmax = np.max(corners_3d[:, 2]) target_bbox = np.array([(xmin + xmax) / 2, (ymin + ymax) / 2, (zmin + zmax) / 2, xmax - xmin, ymax - ymin, zmax - zmin]) target_bboxes[i, :] = target_bbox point_cloud, choices = pc_util.random_sampling(point_cloud, self.num_points, return_choices=True) semantics37 = semantics37[choices] semantics10 = semantics10[choices] semantics10_multi = [semantics10_multi[i] for i in choices] point_votes_mask = point_votes[choices, 0] point_votes = point_votes[choices, 1:] # box angle is -pi to pi for i in range(bboxes.shape[0]): bbox = bboxes[i] corners = params2bbox(bbox[:3], 2 * bbox[3:6], clockwise2counter(bbox[6])) # corners_votenet = sunrgbd_utils.my_compute_box_3d(bbox[:3], bbox[3:6], bbox[6]) try: x_all_cls, ind_all_cls = extract_pc_in_box3d( point_cloud, corners) except: continue ind_all_cls = np.where(ind_all_cls)[0] # T/F to index # find point with same semantic as bbox, note semantics is 37 cls in sunrgbd # ind = ind_all_cls[np.where(semantics10[ind_all_cls] == bbox[7])[0]] ind = [] for j in ind_all_cls: if bbox[7] in semantics10_multi[j]: ind.append(j) ind = np.array(ind) if ind.shape[0] < NUM_POINT_SEM_THRESHOLD: pass else: x = point_cloud[ind, :3] ###Get bb planes and boundary points plane_lower_temp = np.array([0, 0, 1, -corners[6, -1]]) para_points = np.array( [corners[1], corners[3], corners[5], corners[7]]) newd = np.sum(para_points * plane_lower_temp[:3], 1) if check_upright( para_points ) and plane_lower_temp[0] + plane_lower_temp[1] < LOWER_THRESH: plane_lower = np.array([0, 0, 1, plane_lower_temp[-1]]) plane_upper = np.array([0, 0, 1, -np.mean(newd)]) else: import pdb pdb.set_trace() print("error with upright") if check_z(plane_upper, para_points) == False: import pdb pdb.set_trace() ### Get the boundary points here #alldist = np.abs(np.sum(point_cloud[:,:3]*plane_lower[:3], 1) + plane_lower[-1]) alldist = np.abs( np.sum(x * plane_lower[:3], 1) + plane_lower[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get lower four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'lower') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[2]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[4] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[0] + corners[4]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[2] + corners[6]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Check for middle z surfaces [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,2]) - np.mean(x[sel][:,2])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: center = (corners[0] + corners[6]) / 2.0 center[2] = np.mean(x[sel][:,2]) # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([center[0], center[1], center[2], np.linalg.norm(corners[4] - corners[0]), np.linalg.norm(corners[2] - corners[0]), bbox[7]]) point_boundary_offset_z[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_upper[:3], 1) + plane_upper[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2, line_sel3, line_sel4 = get_linesel( x[sel], corners, 'upper') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[1] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[5] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel3) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel3]] = 1.0 linecenter = (corners[1] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel3]] = linecenter - x[sel][line_sel3] point_line_sem[ind[sel][line_sel3]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel4) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel4]] = 1.0 linecenter = (corners[3] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel4]] = linecenter - x[sel][line_sel4] point_line_sem[ind[sel][line_sel4]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([(xmin+xmax)/2.0, (ymin+ymax)/2.0, np.mean(x[sel][:,2])]) center = (corners[1] + corners[7]) / 2.0 center[2] = np.mean(x[sel][:, 2]) sel_global = ind[sel] point_boundary_mask_z[sel_global] = 1.0 point_boundary_sem_z[sel_global] = np.array([ center[0], center[1], center[2], np.linalg.norm(corners[5] - corners[1]), np.linalg.norm(corners[3] - corners[1]), bbox[7] ]) point_boundary_offset_z[sel_global] = center - x[sel] v1 = corners[3] - corners[2] v2 = corners[2] - corners[0] cp = np.cross(v1, v2) d = -np.dot(cp, corners[0]) a, b, c = cp plane_left_temp = np.array([a, b, c, d]) para_points = np.array( [corners[4], corners[5], corners[6], corners[7]]) ### Normalize xy here plane_left_temp /= np.linalg.norm(plane_left_temp[:3]) newd = np.sum(para_points * plane_left_temp[:3], 1) if plane_left_temp[2] < LOWER_THRESH: plane_left = plane_left_temp #np.array([cls,res,tempsign,plane_left_temp[-1]]) plane_right = np.array([ plane_left_temp[0], plane_left_temp[1], plane_left_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_left[:3], 1) + plane_left[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines line_sel1, line_sel2 = get_linesel(x[sel], corners, 'left') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[0] + corners[1]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[2] + corners[3]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 # point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], zmax - zmin, np.where(DC.nyu40ids == meta_vertices[ind[0],-1])[0][0]]) point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) ## Get upper four lines sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,0]) - np.mean(x[sel][:,0])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_right[:3], 1) + plane_right[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) & (point_cloud[:,1] >= ymin) & (point_cloud[:,1] <= ymax) line_sel1, line_sel2 = get_linesel(x[sel], corners, 'right') if np.sum(line_sel1) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel1]] = 1.0 linecenter = (corners[4] + corners[5]) / 2.0 point_line_offset[ ind[sel][line_sel1]] = linecenter - x[sel][line_sel1] point_line_sem[ind[sel][line_sel1]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(line_sel2) > NUM_POINT_LINE: point_line_mask[ind[sel][line_sel2]] = 1.0 linecenter = (corners[6] + corners[7]) / 2.0 point_line_offset[ ind[sel][line_sel2]] = linecenter - x[sel][line_sel2] point_line_sem[ind[sel][line_sel2]] = np.array( [linecenter[0], linecenter[1], linecenter[2], bbox[7]]) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[4, 2] + corners[5, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[5, 2] - corners[4, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] #plane_front_temp = leastsq(residuals, [0,1,0,0], args=(None, np.array([corners[0], corners[1], corners[4], corners[5]]).T))[0] v1 = corners[0] - corners[4] v2 = corners[4] - corners[5] cp = np.cross(v1, v2) d = -np.dot(cp, corners[5]) a, b, c = cp plane_front_temp = np.array([a, b, c, d]) para_points = np.array( [corners[2], corners[3], corners[6], corners[7]]) plane_front_temp /= np.linalg.norm(plane_front_temp[:3]) newd = np.sum(para_points * plane_front_temp[:3], 1) if plane_front_temp[2] < LOWER_THRESH: plane_front = plane_front_temp #np.array([cls,res,tempsign,plane_front_temp[-1]]) plane_back = np.array([ plane_front_temp[0], plane_front_temp[1], plane_front_temp[2], -np.mean(newd) ]) else: import pdb pdb.set_trace() print("error with upright") ### Get the boundary points here alldist = np.abs( np.sum(x * plane_front[:3], 1) + plane_front[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[0, 2] + corners[1, 2]) / 2.0 ]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' [count, val] = np.histogram(alldist, bins=20) mind_middle = val[np.argmax(count)] sel_pre = np.copy(sel) sel = np.abs(alldist - mind_middle) < DIST_THRESH if np.abs(np.mean(x[sel_pre][:,1]) - np.mean(x[sel][:,1])) > MIND_THRESH: ### Do not use line for middle surfaces if np.sum(sel) > NUM_POINT and np.var(alldist[sel]) < VAR_THRESH: # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (corners[0, 2] + corners[1, 2])/2.0]) sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([center[0], center[1], center[2], corners[1, 2] - corners[0, 2], bbox[7]]) point_boundary_offset_xy[sel_global] = center - x[sel] ''' ### Get the boundary points here alldist = np.abs( np.sum(x * plane_back[:3], 1) + plane_back[-1]) mind = np.min(alldist) #[count, val] = np.histogram(alldist, bins=20) #mind = val[np.argmax(count)] sel = np.abs(alldist - mind) < DIST_THRESH if np.sum(sel) > NUM_POINT and np.var( alldist[sel]) < VAR_THRESH: #sel = (np.abs(alldist - mind) < DIST_THRESH) & (point_cloud[:,0] >= xmin) & (point_cloud[:,0] <= xmax) & (point_cloud[:,2] >= zmin) & (point_cloud[:,2] <= zmax) # center = np.array([np.mean(x[sel][:,0]), np.mean(x[sel][:,1]), (zmin+zmax)/2.0]) center = np.array([ np.mean(x[sel][:, 0]), np.mean(x[sel][:, 1]), (corners[2, 2] + corners[3, 2]) / 2.0 ]) #point_boundary_offset_xy[sel] = center - x[sel] sel_global = ind[sel] point_boundary_mask_xy[sel_global] = 1.0 point_boundary_sem_xy[sel_global] = np.array([ center[0], center[1], center[2], corners[3, 2] - corners[2, 2], bbox[7] ]) point_boundary_offset_xy[sel_global] = center - x[sel] ret_dict = {} ret_dict['point_clouds'] = point_cloud.astype(np.float32) ret_dict['center_label'] = target_bboxes.astype(np.float32)[:, 0:3] ret_dict['heading_class_label'] = angle_classes.astype(np.int64) ret_dict['heading_residual_label'] = angle_residuals.astype(np.float32) ret_dict['size_class_label'] = size_classes.astype(np.int64) ret_dict['size_residual_label'] = size_residuals.astype(np.float32) target_bboxes_semcls = np.zeros((MAX_NUM_OBJ)) target_bboxes_semcls[0:bboxes.shape[0]] = bboxes[:, -1] # from 0 to 9 ret_dict['sem_cls_label'] = target_bboxes_semcls.astype(np.int64) ret_dict['box_label_mask'] = target_bboxes_mask.astype(np.float32) ret_dict['vote_label'] = point_votes.astype(np.float32) ret_dict['vote_label_mask'] = point_votes_mask.astype(np.int64) ret_dict['scan_idx'] = np.array(idx).astype(np.int64) ret_dict['max_gt_bboxes'] = max_bboxes # new items ret_dict['size_label'] = box3d_sizes.astype(np.float32) ret_dict['heading_label'] = box3d_angles.astype(np.float32) if self.use_height: ret_dict['floor_height'] = floor_height ret_dict['point_boundary_mask_z'] = point_boundary_mask_z.astype( np.float32) ret_dict['point_boundary_mask_xy'] = point_boundary_mask_xy.astype( np.float32) ret_dict['point_boundary_offset_z'] = point_boundary_offset_z.astype( np.float32) ret_dict['point_boundary_offset_xy'] = point_boundary_offset_xy.astype( np.float32) ret_dict['point_boundary_sem_z'] = point_boundary_sem_z.astype( np.float32) ret_dict['point_boundary_sem_xy'] = point_boundary_sem_xy.astype( np.float32) ret_dict['point_line_mask'] = point_line_mask.astype(np.float32) ret_dict['point_line_offset'] = point_line_offset.astype(np.float32) ret_dict['point_line_sem'] = point_line_sem.astype(np.float32) return ret_dict
def extract_sunrgbd_data(idx_filename, split, output_folder, num_point=20000, type_whitelist=DEFAULT_TYPE_WHITELIST, save_votes=False, use_v1=False, skip_empty_scene=True): """ Extract scene point clouds and bounding boxes (centroids, box sizes, heading angles, semantic classes). Dumped point clouds and boxes are in upright depth coord. Args: idx_filename: a TXT file where each line is an int number (index) split: training or testing save_votes: whether to compute and save Ground truth votes. use_v1: use the SUN RGB-D V1 data skip_empty_scene: if True, skip scenes that contain no object (no objet in whitelist) Dumps: <id>_pc.npz of (N,6) where N is for number of subsampled points and 6 is for XYZ and RGB (in 0~1) in upright depth coord <id>_bbox.npy of (K,8) where K is the number of objects, 8 is for centroids (cx,cy,cz), dimension (l,w,h), heanding_angle and semantic_class <id>_votes.npz of (N,10) with 0/1 indicating whether the point belongs to an object, then three sets of GT votes for up to three objects. If the point is only in one object's OBB, then the three GT votes are the same. """ dataset = sunrgbd_object('./sunrgbd_trainval', split, use_v1=use_v1) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] if not os.path.exists(output_folder): os.mkdir(output_folder) all_obbs = [] all_pc_upright_depth_subsampled = [] all_point_votes = [] for data_idx in data_idx_list: print('------------- ', data_idx) objects = dataset.get_label_objects(data_idx) # Skip scenes with 0 object if skip_empty_scene and (len(objects) == 0 or len([obj for obj in objects if obj.classname in type_whitelist]) == 0): continue object_list = [] for obj in objects: if obj.classname not in type_whitelist: continue obb = np.zeros((8)) obb[0:3] = obj.centroid # Note that compared with that in data_viz, we do not time 2 to l,w.h # neither do we flip the heading angle obb[3:6] = np.array([obj.l, obj.w, obj.h]) obb[6] = obj.heading_angle obb[7] = sunrgbd_utils.type2class[obj.classname] object_list.append(obb) if len(object_list) == 0: obbs = np.zeros((0, 8)) else: obbs = np.vstack(object_list) # (K,8) print(f"{data_idx} has {obbs.shape[0]} gt bboxes") pc_upright_depth = dataset.get_depth(data_idx) pc_upright_depth_subsampled = pc_util.random_sampling(pc_upright_depth, num_point) np.savez_compressed(os.path.join(output_folder, '%06d_pc.npz' % (data_idx)), pc=pc_upright_depth_subsampled) np.save(os.path.join(output_folder, '%06d_bbox.npy' % (data_idx)), obbs) # pickle save with open(os.path.join(output_folder, '%06d_pc.pkl' % (data_idx)), 'wb') as f: pickle.dump(pc_upright_depth_subsampled, f) print(f"{os.path.join(output_folder, '%06d_pc.pkl' % (data_idx))} saved successfully !!") with open(os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx)), 'wb') as f: pickle.dump(obbs, f) print(f"{os.path.join(output_folder, '%06d_bbox.pkl' % (data_idx))} saved successfully !!") # add to collection all_pc_upright_depth_subsampled.append(pc_upright_depth_subsampled) all_obbs.append(obbs) N = pc_upright_depth_subsampled.shape[0] point_votes = np.zeros((N, 13)) # 1 vote mask + 3 votes and + 3 votes gt ind point_votes[:, 10:13] = -1 point_vote_idx = np.zeros((N)).astype(np.int32) # in the range of [0,2] indices = np.arange(N) i_obj = 0 for obj in objects: if obj.classname not in type_whitelist: continue try: # Find all points in this object's OBB box3d_pts_3d = sunrgbd_utils.my_compute_box_3d(obj.centroid, np.array([obj.l, obj.w, obj.h]), obj.heading_angle) pc_in_box3d, inds = sunrgbd_utils.extract_pc_in_box3d( \ pc_upright_depth_subsampled, box3d_pts_3d) # Assign first dimension to indicate it is in an object box point_votes[inds, 0] = 1 # Add the votes (all 0 if the point is not in any object's OBB) votes = np.expand_dims(obj.centroid, 0) - pc_in_box3d[:, 0:3] sparse_inds = indices[inds] # turn dense True,False inds to sparse number-wise inds for i in range(len(sparse_inds)): j = sparse_inds[i] point_votes[j, int(point_vote_idx[j] * 3 + 1):int((point_vote_idx[j] + 1) * 3 + 1)] = votes[i, :] point_votes[j, point_vote_idx[j] + 10] = i_obj # Populate votes with the fisrt vote if point_vote_idx[j] == 0: point_votes[j, 4:7] = votes[i, :] point_votes[j, 7:10] = votes[i, :] point_votes[j, 10] = i_obj point_votes[j, 11] = i_obj point_votes[j, 12] = i_obj point_vote_idx[inds] = np.minimum(2, point_vote_idx[inds] + 1) i_obj += 1 except: print('ERROR ----', data_idx, obj.classname) # choose the nearest as the first gt for each point for ip in range(N): is_pos = (point_votes[ip, 0] > 0) if is_pos: vote_delta1 = point_votes[ip, 1:4].copy() vote_delta2 = point_votes[ip, 4:7].copy() vote_delta3 = point_votes[ip, 7:10].copy() dist1 = np.sum(vote_delta1 ** 2) dist2 = np.sum(vote_delta2 ** 2) dist3 = np.sum(vote_delta3 ** 2) gt_ind1 = int(point_votes[ip, 10].copy()) # gt_ind2 = int(point_votes[ip, 11].copy()) # gt_ind3 = int(point_votes[ip, 12].copy()) # gt1 = obbs[gt_ind1] # gt2 = obbs[gt_ind2] # gt3 = obbs[gt_ind3] # size_norm_vote_delta1 = vote_delta1 / gt1[3:6] # size_norm_vote_delta2 = vote_delta2 / gt2[3:6] # size_norm_vote_delta3 = vote_delta3 / gt3[3:6] # size_norm_dist1 = np.sum(size_norm_vote_delta1 ** 2) # size_norm_dist2 = np.sum(size_norm_vote_delta2 ** 2) # size_norm_dist3 = np.sum(size_norm_vote_delta3 ** 2) near_ind = np.argmin([dist1, dist2, dist3]) # near_ind = np.argmin([size_norm_dist1, size_norm_dist2, size_norm_dist3]) point_votes[ip, 10] = point_votes[ip, 10 + near_ind].copy() point_votes[ip, 10 + near_ind] = gt_ind1 point_votes[ip, 1:4] = point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)].copy() point_votes[ip, int(near_ind * 3 + 1):int((near_ind + 1) * 3 + 1)] = vote_delta1 else: assert point_votes[ip, 10] == -1, "error" assert point_votes[ip, 11] == -1, "error" assert point_votes[ip, 12] == -1, "error" print(f"{data_idx}_votes.npz has {i_obj} gt bboxes") np.savez_compressed(os.path.join(output_folder, '%06d_votes.npz' % (data_idx)), point_votes=point_votes) with open(os.path.join(output_folder, '%06d_votes.pkl' % (data_idx)), 'wb') as f: pickle.dump(point_votes, f) print(f"{os.path.join(output_folder, '%06d_votes.pkl' % (data_idx))} saved successfully !!") all_point_votes.append(point_votes) pickle_filename = os.path.join(output_folder, 'all_obbs_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_obbs, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_pc_modified_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_pc_upright_depth_subsampled, f) print(f"{pickle_filename} saved successfully !!") pickle_filename = os.path.join(output_folder, 'all_point_votes_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_votes, f) print(f"{pickle_filename} saved successfully !!") all_point_labels = [] for point_votes in all_point_votes: point_labels = point_votes[:, [0, 10]] all_point_labels.append(point_labels) pickle_filename = os.path.join(output_folder, 'all_point_labels_nearest_has_empty.pkl') with open(pickle_filename, 'wb') as f: pickle.dump(all_point_labels, f) print(f"{pickle_filename} saved successfully !!")
def parse_crop_predictions(end_points, point_cloud, DC): pred_center = end_points['center'].clone() # B,num_proposal,3 pred_heading_class = torch.argmax(end_points['heading_scores'].clone(), -1) # B,num_proposal pred_heading_residual = torch.gather( end_points['heading_residuals'].clone(), 2, pred_heading_class.unsqueeze(-1)) # B,num_proposal,1 pred_heading_residual.squeeze_(2) pred_size_class = torch.argmax(end_points['size_scores'].clone(), -1) # B,num_proposal pred_size_residual = torch.gather( end_points['size_residuals'].clone(), 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3)) # B,num_proposal,1,3 pred_size_residual.squeeze_(2) pred_sem_cls = torch.argmax(end_points['sem_cls_scores'].clone(), -1) # B,num_proposal sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().clone( ).numpy()) # B,num_proposal,10 pred_sem_cls_prob = np.max(sem_cls_probs, -1) # B,num_proposal bsize = pred_center.shape[0] assert bsize == 1 num_proposal = pred_center.shape[1] # Since we operate in upright_depth coord for points, while util functions # assume upright_camera coord. pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3)) pred_center_upright_camera = flip_axis_to_camera( pred_center.detach().cpu().numpy()) for i in range(bsize): for j in range(num_proposal): heading_angle = DC.class2angle( pred_heading_class[i, j].detach().cpu().numpy(), pred_heading_residual[i, j].detach().cpu().numpy()) box_size = DC.class2size( int(pred_size_class[i, j].detach().cpu().numpy()), pred_size_residual[i, j].detach().cpu().numpy()) corners_3d_upright_camera = get_3d_box( box_size, heading_angle, pred_center_upright_camera[i, j, :]) pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera K = pred_center.shape[1] # K==num_proposal nonempty_box_mask = np.ones((bsize, K)) # ------------------------------------- # Remove predicted boxes without any point within them.. batch_pc = point_cloud.copy()[:, 0:3] # B,N,3 i = 0 pc = batch_pc[:, :] # (N,3) for j in range(K): box3d = pred_corners_3d_upright_camera[i, j, :, :] # (8,3) # -- OG version of getting in-box points box3d = flip_axis_to_depth(box3d) pc_in_box, inds = extract_pc_in_box3d(pc, box3d) if len(pc_in_box) < 5: nonempty_box_mask[i, j] = 0 # -- new version #min_bound_box = np.min(box3d, axis=0) #max_bound_box = np.max(box3d, axis=0) #in_bound_pc = np.all(pc > min_bound_box, axis=1) *\ # np.all(pc < max_bound_box, axis=1) #if np.sum(in_bound_pc) < 5: # nonempty_box_mask[i,j] = 0 end_points['center'] = end_points['center'][i][nonempty_box_mask[i, :], :] end_points['heading_scores'] = end_points['heading_scores'][i][ nonempty_box_mask[i, :], :] end_points['heading_residuals'] = end_points['heading_residuals'][i][ nonempty_box_mask[i, :], :] end_points['heading_residuals_normalized'] = end_points[ 'heading_residuals_normalized'][i][nonempty_box_mask[i, :], :] end_points['size_scores'] = end_points['size_scores'][i][nonempty_box_mask[ i, :], :] end_points['size_residuals'] = end_points['size_residuals'][i][ nonempty_box_mask[i, :], :] end_points['size_residuals_normalized'] = end_points[ 'size_residuals_normalized'][i][nonempty_box_mask[i, :], :] end_points['sem_cls_scores'] = end_points['sem_cls_scores'][i][ nonempty_box_mask[i, :], :] end_points['objectness_scores'] = end_points['objectness_scores'][i][ nonempty_box_mask[i, :], :] end_points['center'] = end_points['center'].unsqueeze(0) end_points['heading_scores'] = end_points['heading_scores'].unsqueeze(0) end_points['heading_residuals'] = end_points[ 'heading_residuals'].unsqueeze(0) end_points['heading_residuals_normalized'] = end_points[ 'heading_residuals_normalized'].unsqueeze(0) end_points['size_scores'] = end_points['size_scores'].unsqueeze(0) end_points['size_residuals'] = end_points['size_residuals'].unsqueeze(0) end_points['size_residuals_normalized'] = end_points[ 'size_residuals_normalized'].unsqueeze(0) end_points['sem_cls_scores'] = end_points['sem_cls_scores'].unsqueeze(0) end_points['objectness_scores'] = end_points[ 'objectness_scores'].unsqueeze(0) return end_points
def extract_frustum_data(sunrgbd_dir, idx_filename, split, output_filename, type_whitelist, perturb_box2d=False, augmentX=1, with_down_sample=False): dataset = sunrgbd_object(sunrgbd_dir, split) data_idx_list = [int(line.rstrip()) for line in open(idx_filename)] id_list = [] # int number box2d_list = [] # [xmin,ymin,xmax,ymax] box3d_list = [] # (8,3) array in upright depth coord input_list = [] # channel number = 6, xyz,rgb in upright depth coord label_list = [] # 1 for roi object, 0 for clutter type_list = [] # string e.g. bed heading_list = [ ] # face of object angle, radius of clockwise angle from positive x axis in upright camera coord box3d_size_list = [] # array of l,w,h frustum_angle_list = [ ] # angle of 2d box center from pos x-axis (clockwise) img_coord_list = [] calib_K_list = [] calib_R_list = [] pos_cnt = 0 all_cnt = 0 for data_idx in data_idx_list: print('------------- ', data_idx) calib = dataset.get_calibration(data_idx) objects = dataset.get_label_objects(data_idx) pc_upright_depth = dataset.get_pointcloud(data_idx) pc_upright_camera = np.zeros_like(pc_upright_depth) pc_upright_camera[:, 0:3] = calib.project_upright_depth_to_upright_camera( pc_upright_depth[:, 0:3]) pc_upright_camera[:, 3:] = pc_upright_depth[:, 3:] if with_down_sample: idx = down_sample(pc_upright_camera[:, :3], 0.01) # print(len(idx), len(pc_upright_camera)) pc_upright_camera = pc_upright_camera[idx] pc_upright_depth = pc_upright_depth[idx] # img = dataset.get_image(data_idx) # img_height, img_width, img_channel = img.shape pc_image_coord, _ = calib.project_upright_depth_to_image( pc_upright_depth) for obj_idx in range(len(objects)): obj = objects[obj_idx] if obj.classname not in type_whitelist: continue # 2D BOX: Get pts rect backprojected box2d = obj.box2d for _ in range(augmentX): if perturb_box2d: xmin, ymin, xmax, ymax = random_shift_box2d(box2d) # print(xmin,ymin,xmax,ymax) else: xmin, ymin, xmax, ymax = box2d box_fov_inds = (pc_image_coord[:, 0] < xmax) & (pc_image_coord[:, 0] >= xmin) & ( pc_image_coord[:, 1] < ymax) & (pc_image_coord[:, 1] >= ymin) coord_in_box_fov = pc_image_coord[box_fov_inds, :] pc_in_box_fov = pc_upright_camera[box_fov_inds, :] # Get frustum angle (according to center pixel in 2D BOX) box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0]) uvdepth = np.zeros((1, 3)) uvdepth[0, 0:2] = box2d_center uvdepth[0, 2] = 20 # some random depth box2d_center_upright_camera = calib.project_image_to_upright_camera( uvdepth) # print('UVdepth, center in upright camera: ', uvdepth, box2d_center_upright_camera) frustum_angle = -1 * np.arctan2( box2d_center_upright_camera[0, 2], box2d_center_upright_camera[0, 0] ) # angle as to positive x-axis as in the Zoox paper # print('Frustum angle: ', frustum_angle) # 3D BOX: Get pts velo in 3d box box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib) box3d_pts_3d = calib.project_upright_depth_to_upright_camera( box3d_pts_3d) try: _, inds = extract_pc_in_box3d(pc_in_box_fov, box3d_pts_3d) except Exception as e: print(e) continue label = np.zeros((pc_in_box_fov.shape[0])) label[inds] = 1 box3d_size = np.array([2 * obj.l, 2 * obj.w, 2 * obj.h]) # Subsample points.. num_point = pc_in_box_fov.shape[0] if num_point > 2048: choice = np.random.choice(pc_in_box_fov.shape[0], 2048, replace=False) coord_in_box_fov = coord_in_box_fov[choice, :] pc_in_box_fov = pc_in_box_fov[choice, :] label = label[choice] # Reject object with too few points if np.sum(label) < 5: continue id_list.append(data_idx) box2d_list.append( np.array([xmin, ymin, xmax, ymax], dtype=np.float32)) box3d_list.append(box3d_pts_3d) input_list.append(pc_in_box_fov.astype(np.float32)) label_list.append(label.astype(np.bool)) type_list.append(obj.classname) heading_list.append(obj.heading_angle) box3d_size_list.append(box3d_size) frustum_angle_list.append(frustum_angle) img_coord_list.append(coord_in_box_fov.astype(np.float32)) calib_K_list.append(calib.K) calib_R_list.append(calib.Rtilt) # collect statistics pos_cnt += np.sum(label) all_cnt += pc_in_box_fov.shape[0] print('Average pos ratio: ', pos_cnt / float(all_cnt)) print('Average npoints: ', float(all_cnt) / len(id_list)) data_dict = { 'id': id_list, 'box2d': box2d_list, 'box3d': box3d_list, 'box3d_size': box3d_size_list, 'box3d_heading': heading_list, 'type': type_list, 'input': input_list, 'frustum_angle': frustum_angle_list, 'label': label_list, 'calib_K': calib_K_list, 'calib_R': calib_R_list, # 'image_coord': img_coord_list, } with open(output_filename, 'wb') as f: pickle.dump(data_dict, f, -1) print("save in {}".format(output_filename))