def parse_predictions(end_points, config_dict): """ Parse predictions to OBB parameters and suppress overlapping boxes Args: end_points: dict {point_clouds, center, heading_scores, heading_residuals, size_scores, size_residuals, sem_cls_scores} config_dict: dict {dataset_config, remove_empty_box, use_3d_nms, nms_iou, use_old_type_nms, conf_thresh, per_class_proposal} Returns: batch_pred_map_cls: a list of len == batch size (BS) [pred_list_i], i = 0, 1, ..., BS-1 where pred_list_i = [(pred_sem_cls, box_params, box_score)_j] where j = 0, ..., num of valid detections - 1 from sample input i """ pred_center = end_points['center'] # B,num_proposal,3 pred_heading_class = torch.argmax(end_points['heading_scores'], 2) # B,num_proposal,3 pred_heading_residual = torch.gather( end_points['heading_residuals'], 2, pred_heading_class.unsqueeze(2)) # B,num_proposal,3 pred_heading_residual.squeeze_(2) pred_size_class = torch.argmax(end_points['size_scores'], -1) # B,num_proposal pred_size_residual = torch.gather( end_points['size_residuals'], 2, pred_size_class.unsqueeze(-1).unsqueeze(-1).repeat( 1, 1, 1, 3)) # B,num_proposal,1,3 pred_size_residual.squeeze_(2) pred_sem_cls = torch.argmax(end_points['sem_cls_scores'], -1) # B,num_proposal sem_cls_probs = softmax(end_points['sem_cls_scores'].detach().cpu().numpy( )) # B,num_proposal,10 pred_sem_cls_prob = np.max(sem_cls_probs, -1) # B,num_proposal num_proposal = pred_center.shape[1] # Since we operate in upright_depth coord for points, while util functions # assume upright_camera coord. bsize = pred_center.shape[0] pred_corners_3d_upright_camera = np.zeros((bsize, num_proposal, 8, 3)) pred_center_upright_camera = flip_axis_to_camera( pred_center.detach().cpu().numpy()) for i in range(bsize): for j in range(num_proposal): heading_angle = config_dict['dataset_config'].class2angle(\ pred_heading_class[i,j].detach().cpu().numpy(), pred_heading_residual[i,j].detach().cpu().numpy()) box_size = config_dict['dataset_config'].class2size(\ int(pred_size_class[i,j].detach().cpu().numpy()), pred_size_residual[i,j].detach().cpu().numpy()) corners_3d_upright_camera = new_get_3d_box( pred_center_upright_camera[i, j, :], box_size, heading_angle) pred_corners_3d_upright_camera[i, j] = corners_3d_upright_camera K = pred_center.shape[1] # K==num_proposal nonempty_box_mask = np.ones((bsize, K)) if config_dict['remove_empty_box']: # ------------------------------------- # Remove predicted boxes without any point within them.. batch_pc = end_points['point_clouds'].cpu().numpy()[:, :, 0:3] # B,N,3 for i in range(bsize): pc = batch_pc[i, :, :] # (N,3) for j in range(K): box3d = pred_corners_3d_upright_camera[i, j, :, :] # (8,3) box3d = flip_axis_to_depth(box3d) pc_in_box, inds = extract_pc_in_box3d(pc, box3d) if len(pc_in_box) < 5: nonempty_box_mask[i, j] = 0 # ------------------------------------- obj_logits = end_points['objectness_scores'].detach().cpu().numpy() obj_prob = softmax(obj_logits)[:, :, 1] # (B,K) if not config_dict['use_3d_nms']: # ---------- NMS input: pred_with_prob in (B,K,7) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_2d_with_prob = np.zeros((K, 5)) for j in range(K): boxes_2d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_2d_with_prob[j, 2] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_2d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_2d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_2d_with_prob[j, 4] = obj_prob[i, j] nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_2d_faster( boxes_2d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- elif config_dict['use_3d_nms'] and (not config_dict['cls_nms']): # ---------- NMS input: pred_with_prob in (B,K,7) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_3d_with_prob = np.zeros((K, 7)) for j in range(K): boxes_3d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 2] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 4] = np.max( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 5] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 6] = obj_prob[i, j] nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_3d_faster( boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- elif config_dict['use_3d_nms'] and config_dict['cls_nms']: # ---------- NMS input: pred_with_prob in (B,K,8) ----------- pred_mask = np.zeros((bsize, K)) for i in range(bsize): boxes_3d_with_prob = np.zeros((K, 8)) for j in range(K): boxes_3d_with_prob[j, 0] = np.min( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 1] = np.min( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 2] = np.min( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 3] = np.max( pred_corners_3d_upright_camera[i, j, :, 0]) boxes_3d_with_prob[j, 4] = np.max( pred_corners_3d_upright_camera[i, j, :, 1]) boxes_3d_with_prob[j, 5] = np.max( pred_corners_3d_upright_camera[i, j, :, 2]) boxes_3d_with_prob[j, 6] = obj_prob[i, j] boxes_3d_with_prob[j, 7] = pred_sem_cls[ i, j] # only suppress if the two boxes are of the same class!! nonempty_box_inds = np.where(nonempty_box_mask[i, :] == 1)[0] pick = nms_3d_faster_samecls( boxes_3d_with_prob[nonempty_box_mask[i, :] == 1, :], config_dict['nms_iou'], config_dict['use_old_type_nms']) assert (len(pick) > 0) pred_mask[i, nonempty_box_inds[pick]] = 1 end_points['pred_mask'] = pred_mask # ---------- NMS output: pred_mask in (B,K) ----------- batch_pred_map_cls = [ ] # a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1) for i in range(bsize): if config_dict['per_class_proposal']: cur_list = [] for ii in range(config_dict['dataset_config'].num_class): cur_list += [(ii, pred_corners_3d_upright_camera[i,j], sem_cls_probs[i,j,ii]*obj_prob[i,j]) \ for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']] batch_pred_map_cls.append(cur_list) else: batch_pred_map_cls.append([(pred_sem_cls[i,j].item(), pred_corners_3d_upright_camera[i,j], obj_prob[i,j]) \ for j in range(pred_center.shape[1]) if pred_mask[i,j]==1 and obj_prob[i,j]>config_dict['conf_thresh']]) end_points['batch_pred_map_cls'] = batch_pred_map_cls return batch_pred_map_cls
def parse_predictions(objectness_score_normalized, center, heading, size, sem_class_scores, conf_thresh, nms_iou, num_class, per_class_proposal=False, cls_nms=False): ''' Parse predictions to OBB parameters and suppress overlapping boxes NOTE: inputs are numpy array, not Tensorflow tensor Args: objectness_score_normalized: B,num_proposals,2 center: B,num_proposals,3 heading: B,num_proposals size: B,num_proposals,3 sem_class_scores: B,num_proposals,num_class conf_thresh: threshhold of objectness nms_iou: threshold of IoU Returns: pred_mask: B,K - 0/1 batch_pred_map_cls: a list (len: batch_size) of list (len: num of predictions per sample) of tuples of pred_cls, pred_box and conf (0-1) ''' obj_prob = objectness_score_normalized[:,:,1] # B,K. score for positive sem_class = np.argmax(sem_class_scores,axis=-1) # B,K B, K = sem_class.shape # batch size, num_proposals center_upright_camera = flip_axis_to_camera(center) corners_3d_upright_camera = np.zeros((B,K,8,3)) for i in range(B): for j in range(K): if heading[i,j] > np.pi: heading[i,j] -= 2*np.pi if np.all(size[i,j] == np.array([0,0,0])): print("size zero!") # for debugging corners_3d_upright_camera[i,j] = get_3d_box(size[i,j], heading[i,j], center_upright_camera[i,j,:]) if cls_nms: pred_mask = np.zeros((B,K)) for i in range(B): boxes_3d_with_prob = np.zeros((K,8)) # bbox for one scene for j in range(K): boxes_3d_with_prob[j,0] = np.min(corners_3d_upright_camera[i,j,:,0]) # x_min boxes_3d_with_prob[j,1] = np.min(corners_3d_upright_camera[i,j,:,1]) # y_min boxes_3d_with_prob[j,2] = np.min(corners_3d_upright_camera[i,j,:,2]) # z_min boxes_3d_with_prob[j,3] = np.max(corners_3d_upright_camera[i,j,:,0]) # x_max boxes_3d_with_prob[j,4] = np.max(corners_3d_upright_camera[i,j,:,1]) # y_max boxes_3d_with_prob[j,5] = np.max(corners_3d_upright_camera[i,j,:,2]) # z_max boxes_3d_with_prob[j,6] = obj_prob[i,j] boxes_3d_with_prob[j,7] = sem_class[i,j] # use aixs aligned bbox to do the NMS pick = nms_3d_faster_samecls(boxes_3d_with_prob, nms_iou) # get index of picked bbox assert len(pick)>0 pred_mask[i, pick]=1 else: pred_mask = np.zeros((B,K)) for i in range(B): boxes_3d_with_prob = np.zeros((K,7)) for j in range(K): boxes_3d_with_prob[j,0] = np.min(corners_3d_upright_camera[i,j,:,0]) boxes_3d_with_prob[j,1] = np.min(corners_3d_upright_camera[i,j,:,1]) boxes_3d_with_prob[j,2] = np.min(corners_3d_upright_camera[i,j,:,2]) boxes_3d_with_prob[j,3] = np.max(corners_3d_upright_camera[i,j,:,0]) boxes_3d_with_prob[j,4] = np.max(corners_3d_upright_camera[i,j,:,1]) boxes_3d_with_prob[j,5] = np.max(corners_3d_upright_camera[i,j,:,2]) boxes_3d_with_prob[j,6] = obj_prob[i,j] pick = nms_3d_faster(boxes_3d_with_prob, nms_iou) # get index of picked bbox assert len(pick)>0 pred_mask[i, pick]=1 batch_pred_map_cls = [] for i in range(B): if per_class_proposal: cur_list = [] for ii in range(num_class): cur_list += [(ii, corners_3d_upright_camera[i,j], sem_class_scores[i,j,ii]*obj_prob[i,j]) \ for j in range(K) if pred_mask[i,j]==1 and obj_prob[i,j]>conf_thresh] batch_pred_map_cls.append(cur_list) else: batch_pred_map_cls.append([(sem_class[i,j].item(), corners_3d_upright_camera[i,j], obj_prob[i,j]) \ for j in range(K) if pred_mask[i,j]==1 and obj_prob[i,j]>conf_thresh]) return batch_pred_map_cls