def image_eval(pred, gt, ignore, iou_thresh): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() if max_overlap >= iou_thresh: if ignore[max_idx] == 0: recall_list[max_idx] = -1 proposal_list[h] = -1 elif recall_list[max_idx] == 0: recall_list[max_idx] = 1 r_keep_index = np.where(recall_list == 1)[0] pred_recall[h] = len(r_keep_index) return pred_recall, proposal_list
def cal_target(self, gt_box3d): ''' Calculate the positive and negative anchors, and the target Parameters: gt_box3d (arr): (N, 8, 3) ground truth bounding boxes in corners notation Returns: arr: positive anchor positions arr: negative anchor positions arr: targets ''' # _______________ # dᵃ = √ (lᵃ)² + (wᵃ)² is the diagonal of the base # of the anchor box (See 2.2) anchors_diagonal = np.sqrt( self.anchors[:, 4] ** 2 + self.anchors[:, 5] ** 2) pos_equal_one = np.zeros((*self.feature_map_shape, 2)) neg_equal_one = np.zeros((*self.feature_map_shape, 2)) # Convert from corner to center notation ((N, 8, 3) -> (N, 7)) gt_xyzhwlr = box3d_corner_to_center_batch(gt_box3d) # Convert anchors to corner notation (BEV) anchors_corner = anchors_center_to_corner(self.anchors) # Convert to from all corners to only 2 [xyxy] anchors_standup_2d = corner_to_standup_box2d(anchors_corner) gt_standup_2d = corner_to_standup_box2d(gt_box3d) # Calculate IoU of the ground truth and anchors (BEV) iou = bbox_overlaps( np.ascontiguousarray(anchors_standup_2d).astype(np.float32), np.ascontiguousarray(gt_standup_2d).astype(np.float32), ) # Indices of X highest anchors by IoU, X = number of ground truths id_highest = np.argmax(iou.T, axis=1) # Array containg [0, 1, 2, ..., X-1], X = number of ground truths id_highest_gt = np.arange(iou.T.shape[0]) # Make sure the anchor we picked has an IoU > 0 mask = iou.T[id_highest_gt, id_highest] > 0 id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] # An anchor is considered as positive if it has the highest IoU # with a ground truth or its IoU with ground truth is ≥ pos_threshold # (in BEV). (See 3.1) # id_pos: Index of anchor id_pos_gt: Index of ground truth id_pos, id_pos_gt = np.where(iou > self.config['IOU_pos_threshold']) # An anchor is considered as negative if the IoU between it and all # ground truth boxes is less than neg_threshold. (See 3.1) id_neg = np.where(np.sum(iou < self.config['IOU_neg_threshold'], axis=1) == iou.shape[1])[0] id_neg.sort() id_pos = np.concatenate([id_pos, id_highest]) id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt]) # Filter out repeats (above pos_threshold and max) id_pos, index = np.unique(id_pos, return_index=True) id_pos_gt = id_pos_gt[index] # Calculate target (𝘂*) and set corresponding feature map spaces to 1 index_x, index_y, index_z = np.unravel_index( id_pos, (*self.feature_map_shape, self.config['anchors_per_position'])) pos_equal_one[index_x, index_y, index_z] = 1 # To retrieve the ground truth box from a matching positive anchor, # we define the residual vector 𝘂* ('targets') containing the 7 # regression targets corresponding to center location ∆x,∆y,∆z, # three dimensions ∆l,∆w,∆h, and the rotation ∆θ (See 2.2) targets = np.zeros((*self.feature_map_shape, 14)) # Δx = (xᵍ - xᵃ) / dᵃ targets[index_x, index_y, np.array(index_z) * 7] = \ (gt_xyzhwlr[id_pos_gt, 0] - self.anchors[id_pos, 0]) \ / anchors_diagonal[id_pos] # Δy = (yᵍ - yᵃ) / dᵃ targets[index_x, index_y, np.array(index_z) * 7 + 1] = \ (gt_xyzhwlr[id_pos_gt, 1] - self.anchors[id_pos, 1]) \ / anchors_diagonal[id_pos] # Δz = (zᵍ - zᵃ) / hᵃ targets[index_x, index_y, np.array(index_z) * 7 + 2] = \ (gt_xyzhwlr[id_pos_gt, 2] - self.anchors[id_pos, 2]) \ / self.anchors[id_pos, 3] # Δh = log(hᵍ / hᵃ) targets[index_x, index_y, np.array(index_z) * 7 + 3] = \ np.log(gt_xyzhwlr[id_pos_gt, 3] / self.anchors[id_pos, 3]) # Δw = log(wᵍ / wᵃ) targets[index_x, index_y, np.array(index_z) * 7 + 4] = \ np.log(gt_xyzhwlr[id_pos_gt, 4] / self.anchors[id_pos, 4]) # Δl = log(lᵍ / lᵃ) targets[index_x, index_y, np.array(index_z) * 7 + 5] = \ np.log(gt_xyzhwlr[id_pos_gt, 5] / self.anchors[id_pos, 5]) # Δ𝜃 = 𝜃ᵍ - 𝜃ᵃ targets[index_x, index_y, np.array(index_z) * 7 + 6] = ( gt_xyzhwlr[id_pos_gt, 6] - self.anchors[id_pos, 6]) index_x, index_y, index_z = np.unravel_index( id_neg, (*self.feature_map_shape, self.config['anchors_per_position'])) neg_equal_one[index_x, index_y, index_z] = 1 # To avoid a box being positive and negative index_x, index_y, index_z = np.unravel_index( id_highest, (*self.feature_map_shape, self.config['anchors_per_position'])) neg_equal_one[index_x, index_y, index_z] = 0 return pos_equal_one, neg_equal_one, targets
def cal_target(self, gt_box3d): # Input: # labels: (N,) # feature_map_shape: (w, l) # anchors: (w, l, 2, 7) # Output: # pos_equal_one (w, l, 2) # neg_equal_one (w, l, 2) # targets (w, l, 14) # attention: cal IoU on birdview anchors_d = np.sqrt(self.anchors[:, 4]**2 + self.anchors[:, 5]**2) pos_equal_one = np.zeros((*self.feature_map_shape, 2)) neg_equal_one = np.zeros((*self.feature_map_shape, 2)) targets = np.zeros((*self.feature_map_shape, 14)) gt_xyzhwlr = box3d_corner_to_center_batch(gt_box3d) anchors_corner = anchors_center_to_corner(self.anchors) anchors_standup_2d = corner_to_standup_box2d_batch(anchors_corner) # BOTTLENECK gt_standup_2d = corner_to_standup_box2d_batch(gt_box3d) iou = bbox_overlaps( np.ascontiguousarray(anchors_standup_2d).astype(np.float32), np.ascontiguousarray(gt_standup_2d).astype(np.float32), ) id_highest = np.argmax(iou.T, axis=1) # the maximum anchor's ID id_highest_gt = np.arange(iou.T.shape[0]) mask = iou.T[id_highest_gt, id_highest] > 0 id_highest, id_highest_gt = id_highest[mask], id_highest_gt[mask] # find anchor iou > cfg.XXX_POS_IOU id_pos, id_pos_gt = np.where(iou > self.pos_threshold) # find anchor iou < cfg.XXX_NEG_IOU id_neg = np.where( np.sum(iou < self.neg_threshold, axis=1) == iou.shape[1])[0] id_pos = np.concatenate([id_pos, id_highest]) id_pos_gt = np.concatenate([id_pos_gt, id_highest_gt]) # TODO: uniquify the array in a more scientific way id_pos, index = np.unique(id_pos, return_index=True) id_pos_gt = id_pos_gt[index] id_neg.sort() # cal the target and set the equal one index_x, index_y, index_z = np.unravel_index( id_pos, (*self.feature_map_shape, self.anchors_per_position)) pos_equal_one[index_x, index_y, index_z] = 1 # ATTENTION: index_z should be np.array targets[index_x, index_y, np.array(index_z) * 7] = \ (gt_xyzhwlr[id_pos_gt, 0] - self.anchors[id_pos, 0]) / anchors_d[id_pos] targets[index_x, index_y, np.array(index_z) * 7 + 1] = \ (gt_xyzhwlr[id_pos_gt, 1] - self.anchors[id_pos, 1]) / anchors_d[id_pos] targets[index_x, index_y, np.array(index_z) * 7 + 2] = \ (gt_xyzhwlr[id_pos_gt, 2] - self.anchors[id_pos, 2]) / self.anchors[id_pos, 3] targets[index_x, index_y, np.array(index_z) * 7 + 3] = np.log( gt_xyzhwlr[id_pos_gt, 3] / self.anchors[id_pos, 3]) targets[index_x, index_y, np.array(index_z) * 7 + 4] = np.log( gt_xyzhwlr[id_pos_gt, 4] / self.anchors[id_pos, 4]) targets[index_x, index_y, np.array(index_z) * 7 + 5] = np.log( gt_xyzhwlr[id_pos_gt, 5] / self.anchors[id_pos, 5]) targets[index_x, index_y, np.array(index_z) * 7 + 6] = (gt_xyzhwlr[id_pos_gt, 6] - self.anchors[id_pos, 6]) index_x, index_y, index_z = np.unravel_index( id_neg, (*self.feature_map_shape, self.anchors_per_position)) neg_equal_one[index_x, index_y, index_z] = 1 # to avoid a box be pos/neg in the same time index_x, index_y, index_z = np.unravel_index( id_highest, (*self.feature_map_shape, self.anchors_per_position)) neg_equal_one[index_x, index_y, index_z] = 0 return pos_equal_one, neg_equal_one, targets