def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice(pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int( min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice(neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32)) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def calc_detection_voc_prec_rec(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. Args: pred_bboxes (iterable of numpy.ndarray): An iterable of :math:`N` sets of bounding boxes. Its index corresponds to an index for the base dataset. Each element of :obj:`pred_bboxes` is a set of coordinates of bounding boxes. This is an array whose shape is :math:`(R, 4)`, where :math:`R` corresponds to the number of bounding boxes, which may vary among boxes. The second axis corresponds to :math:`y_{min}, x_{min}, y_{max}, x_{max}` of a bounding box. pred_labels (iterable of numpy.ndarray): An iterable of labels. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. pred_scores (iterable of numpy.ndarray): An iterable of confidence scores for predicted bounding boxes. Similar to :obj:`pred_bboxes`, its index corresponds to an index for the base dataset. Its length is :math:`N`. gt_bboxes (iterable of numpy.ndarray): An iterable of ground truth bounding boxes whose length is :math:`N`. An element of :obj:`gt_bboxes` is a bounding box whose shape is :math:`(R, 4)`. Note that the number of bounding boxes in each image does not need to be same as the number of corresponding predicted boxes. gt_labels (iterable of numpy.ndarray): An iterable of ground truth labels which are organized similarly to :obj:`gt_bboxes`. gt_difficults (iterable of numpy.ndarray): An iterable of boolean arrays which is organized similarly to :obj:`gt_bboxes`. This tells whether the corresponding ground truth bounding box is difficult or not. By default, this is :obj:`None`. In that case, this function considers all bounding boxes to be not difficult. iou_thresh (float): A prediction is correct if its Intersection over Union with the ground truth is above this value.. Returns: tuple of two lists: This function returns two lists: :obj:`prec` and :obj:`rec`. * :obj:`prec`: A list of arrays. :obj:`prec[l]` is precision \ for class :math:`l`. If class :math:`l` does not exist in \ either :obj:`pred_labels` or :obj:`gt_labels`, :obj:`prec[l]` is \ set to :obj:`None`. * :obj:`rec`: A list of arrays. :obj:`rec[l]` is recall \ for class :math:`l`. If class :math:`l` that is not marked as \ difficult does not exist in \ :obj:`gt_labels`, :obj:`rec[l]` is \ set to :obj:`None`. """ pred_bboxes = iter(pred_bboxes) pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_bboxes = iter(gt_bboxes) gt_labels = iter(gt_labels) if gt_difficults is None: gt_difficults = itertools.repeat(None) else: gt_difficults = iter(gt_difficults) n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in \ six.moves.zip( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = bbox_iou(pred_bbox_l, gt_bbox_l) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) for iter_ in (pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if next(iter_, None) is not None: raise ValueError('Length of input iterables need to be same.') n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
def __call__(self, roi, bbox, label, loc_normalize_mean=(0., 0., 0., 0.), loc_normalize_std=(0.1, 0.1, 0.2, 0.2)): """Assigns ground truth to sampled proposals. This function samples total of :obj:`self.n_sample` RoIs from the combination of :obj:`roi` and :obj:`bbox`. The RoIs are assigned with the ground truth class labels as well as bounding box offsets and scales to match the ground truth bounding boxes. As many as :obj:`pos_ratio * self.n_sample` RoIs are sampled as foregrounds. Offsets and scales of bounding boxes are calculated using :func:`model.utils.bbox_tools.bbox2loc`. Also, types of input arrays and output arrays are same. Here are notations. * :math:`S` is the total number of sampled RoIs, which equals \ :obj:`self.n_sample`. * :math:`L` is number of object classes possibly including the \ background. Args: roi (array): Region of Interests (RoIs) from which we sample. Its shape is :math:`(R, 4)` bbox (array): The coordinates of ground truth bounding boxes. Its shape is :math:`(R', 4)`. label (array): Ground truth bounding box labels. Its shape is :math:`(R',)`. Its range is :math:`[0, L - 1]`, where :math:`L` is the number of foreground classes. loc_normalize_mean (tuple of four floats): Mean values to normalize coordinates of bouding boxes. loc_normalize_std (tupler of four floats): Standard deviation of the coordinates of bounding boxes. Returns: (array, array, array): * **sample_roi**: Regions of interests that are sampled. \ Its shape is :math:`(S, 4)`. * **gt_roi_loc**: Offsets and scales to match \ the sampled RoIs to the ground truth bounding boxes. \ Its shape is :math:`(S, 4)`. * **gt_roi_label**: Labels assigned to sampled RoIs. Its shape is \ :math:`(S,)`. Its range is :math:`[0, L]`. The label with \ value 0 is the background. """ n_bbox, _ = bbox.shape roi = np.concatenate((roi, bbox), axis=0) pos_roi_per_image = np.round(self.n_sample * self.pos_ratio) iou = bbox_iou(roi, bbox) gt_assignment = iou.argmax(axis=1) max_iou = iou.max(axis=1) # Offset range of classes from [0, n_fg_class - 1] to [1, n_fg_class]. # The label with value 0 is the background. gt_roi_label = label[gt_assignment] + 1 # Select foreground RoIs as those with >= pos_iou_thresh IoU. pos_index = np.where(max_iou >= self.pos_iou_thresh)[0] pos_roi_per_this_image = int(min(pos_roi_per_image, pos_index.size)) if pos_index.size > 0: pos_index = np.random.choice( pos_index, size=pos_roi_per_this_image, replace=False) # Select background RoIs as those within # [neg_iou_thresh_lo, neg_iou_thresh_hi). neg_index = np.where((max_iou < self.neg_iou_thresh_hi) & (max_iou >= self.neg_iou_thresh_lo))[0] neg_roi_per_this_image = self.n_sample - pos_roi_per_this_image neg_roi_per_this_image = int(min(neg_roi_per_this_image, neg_index.size)) if neg_index.size > 0: neg_index = np.random.choice( neg_index, size=neg_roi_per_this_image, replace=False) # The indices that we're selecting (both positive and negative). keep_index = np.append(pos_index, neg_index) gt_roi_label = gt_roi_label[keep_index] gt_roi_label[pos_roi_per_this_image:] = 0 # negative labels --> 0 sample_roi = roi[keep_index] # Compute offsets and scales to match sampled RoIs to the GTs. gt_roi_loc = bbox2loc(sample_roi, bbox[gt_assignment[keep_index]]) gt_roi_loc = ((gt_roi_loc - np.array(loc_normalize_mean, np.float32) ) / np.array(loc_normalize_std, np.float32)) return sample_roi, gt_roi_loc, gt_roi_label
def calc_detection_voc_prec_rec(pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults=None, iou_thresh=0.5, area_range=None): pred_bboxes = iter(pred_bboxes) pred_labels = iter(pred_labels) pred_scores = iter(pred_scores) gt_bboxes = iter(gt_bboxes) gt_labels = iter(gt_labels) if gt_difficults is None: gt_difficults = itertools.repeat(None) else: gt_difficults = iter(gt_difficults) n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for pred_bbox, pred_label, pred_score, gt_bbox, gt_label, gt_difficult in zip( pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults ): if gt_difficult is None: gt_difficult = np.zeros(gt_bbox.shape[0], dtype=bool) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] # generate ignore gt list by area_range def _is_ignore(bb): if area_range is None: return False area = (bb[2] - bb[0]) * (bb[3] - bb[1]) return not (area_range[0] <= area <= area_range[1]) gt_ignore = [_is_ignore(bb) for bb in gt_bbox_l] score[l].extend(pred_score_l) for difficult, ignore in zip(gt_difficult_l, gt_ignore): if not difficult and not ignore: n_pos[l] += 1 if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0,) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 ious = bbox_iou(pred_bbox_l, gt_bbox_l) # sort gt_bbox by ignore list gt_sort = np.argsort(gt_ignore, kind='stable') gt_bbo_lx = [gt_bbox_l[i] for i in gt_sort] gt_ignore = [gt_ignore[i] for i in gt_sort] ious = ious[:, gt_sort] gtm = {} dtm = {} for d_idx, d in enumerate(pred_bbox_l): # information about best match so far (m=-1 -> unmatched) iou = min(iou_thresh, 1 - 1e-10) m = -1 for g_idx, g in enumerate(gt_bbox_l): # if this gt already matched, continue if g_idx in gtm: continue # if dt matched to reg gt, and on ignore gt, stop if m > -1 and gt_ignore[m] == False and gt_ignore[g_idx] == True: break # continue to next gt unless better match made if ious[d_idx, g_idx] < iou: continue # if match successful and best so far, store appropriately iou = ious[d_idx, g_idx] m = g_idx # if match made store id of match for both dt and gt if m == -1: continue dtm[d_idx] = m gtm[m] = d_idx # generate ignore list for dts dt_ignore = [] for d_idx, d in enumerate(pred_bbox_l): if d_idx in dtm: dt_ignore.append(gt_ignore[dtm[d_idx]]) else: dt_ignore.append(_is_ignore(d)) for d_idx in range(len(pred_bbox_l)): if not dt_ignore[d_idx]: if d_idx in dtm: if gt_difficult_l[dtm[d_idx]]: match[l].append(-1) else: match[l].append(1) else: match[l].append(0) else: match[l].append(-1) for iter_ in (pred_bboxes, pred_labels, pred_scores, gt_bboxes, gt_labels, gt_difficults): if next(iter_, None) is not None: raise ValueError('Length of input iterables need to be same.') n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec