def encode(self, gt_data, overlap_threshold=0.5, debug=False): # calculation is done with normalized sizes # TODO: empty ground truth if gt_data.shape[0] == 0: print('gt_data', type(gt_data), gt_data.shape) num_classes = 2 num_priors = self.priors.shape[0] gt_polygons = np.copy(gt_data[:, :8]) # normalized quadrilaterals gt_rboxes = np.array( [polygon_to_rbox3(np.reshape(p, (-1, 2))) for p in gt_data[:, :8]]) # minimum horizontal bounding rectangles gt_xmin = np.min(gt_data[:, 0:8:2], axis=1) gt_ymin = np.min(gt_data[:, 1:8:2], axis=1) gt_xmax = np.max(gt_data[:, 0:8:2], axis=1) gt_ymax = np.max(gt_data[:, 1:8:2], axis=1) gt_boxes = self.gt_boxes = np.array( [gt_xmin, gt_ymin, gt_xmax, gt_ymax]).T # normalized xmin, ymin, xmax, ymax gt_class_idx = np.asarray(gt_data[:, -1] + 0.5, dtype=np.int) gt_one_hot = np.zeros([len(gt_class_idx), num_classes]) gt_one_hot[range(len(gt_one_hot)), gt_class_idx] = 1 # one_hot classes including background gt_iou = np.array([iou(b, self.priors_norm) for b in gt_boxes]).T # assigne gt to priors max_idxs = np.argmax(gt_iou, axis=1) max_val = gt_iou[np.arange(num_priors), max_idxs] prior_mask = max_val > overlap_threshold match_indices = max_idxs[prior_mask] self.match_indices = dict( zip(list(np.ix_(prior_mask)[0]), list(match_indices))) # prior labels confidence = np.zeros((num_priors, num_classes)) confidence[:, 0] = 1 confidence[prior_mask] = gt_one_hot[match_indices] gt_xy = (gt_boxes[:, 2:4] + gt_boxes[:, 0:2]) / 2. gt_wh = gt_boxes[:, 2:4] - gt_boxes[:, 0:2] gt_xy = gt_xy[match_indices] gt_wh = gt_wh[match_indices] gt_polygons = gt_polygons[match_indices] gt_rboxes = gt_rboxes[match_indices] priors_xy = self.priors_xy[prior_mask] / self.image_size priors_wh = self.priors_wh[prior_mask] / self.image_size variances_xy = self.priors[prior_mask, -4:-2] variances_wh = self.priors[prior_mask, -2:] # compute local offsets for offsets = np.zeros((num_priors, 4)) offsets[prior_mask, 0:2] = (gt_xy - priors_xy) / priors_wh offsets[prior_mask, 2:4] = np.log(gt_wh / priors_wh) offsets[prior_mask, 0:2] /= variances_xy offsets[prior_mask, 2:4] /= variances_wh # compute local offsets for quadrilaterals offsets_quads = np.zeros((num_priors, 8)) priors_xy_minmax = np.hstack( [priors_xy - priors_wh / 2, priors_xy + priors_wh / 2]) #ref = np.tile(priors_xy, (1,4)) ref = priors_xy_minmax[:, (0, 1, 2, 1, 2, 3, 0, 3)] # corner points offsets_quads[prior_mask, :] = (gt_polygons - ref) / np.tile( priors_wh, (1, 4)) / np.tile(variances_xy, (1, 4)) # compute local offsets for rotated bounding boxes offsets_rboxs = np.zeros((num_priors, 5)) offsets_rboxs[prior_mask, 0:2] = (gt_rboxes[:, 0:2] - priors_xy) / priors_wh / variances_xy offsets_rboxs[prior_mask, 2:4] = (gt_rboxes[:, 2:4] - priors_xy) / priors_wh / variances_xy offsets_rboxs[prior_mask, 4] = np.log( gt_rboxes[:, 4] / priors_wh[:, 1]) / variances_wh[:, 1] return np.concatenate( [offsets, offsets_quads, offsets_rboxs, confidence], axis=1)
def evaluate_results(ground_truth, detection_results, gt_util, iou_thresh=0.5, max_dets=None, figsize=(10, 10), return_fmeasure=False): """Evaluates detection results, plots precision-recall curves and calculates mean Average Precision. # Arguments ground_truth: List of ground truth data with shape (objects, x1+y1+x2+y2+label) detection_results: List of corresponding detection Results with shape (objects, x1+y1+x2+y2+confidence+label) gt_util: Instance of BaseGTUtility containing metadata about the dataset. iou_thresh: Minimum intersection over union required to associate a detected bounding box to a ground truth bounding box. max_dets: Maximal number of used detections per image. # Notes The maximum number of detections per image can also be limited by keep_top_k argument in PriorUtil.decode. """ err = np.geterr() np.seterr(divide='ignore', invalid='ignore') gt = ground_truth dt = detection_results num_classes = gt_util.num_classes colors = gt_util.colors TP = [] FP = [] FN_sum = np.zeros(num_classes) num_groundtruth_boxes = np.zeros(num_classes) num_detections = np.zeros(num_classes) conf = [] for i in range(len(gt)): gt_boxes = gt[i][:, :4] gt_labels = gt[i][:, -1].astype(np.int32) conf_img = dt[i][:, 4] order = np.argsort(-conf_img) # sort by confidence order = order[:max_dets] # only max_dets detections per image conf.append(conf_img[order]) dt_img = dt[i][order] dt_boxes = dt_img[:, :4] dt_labels = dt_img[:, -1].astype(np.int32) num_dt_img = len(dt_labels) TP_img = np.zeros((num_dt_img, num_classes)) FP_img = np.zeros((num_dt_img, num_classes)) FN_img_sum = np.zeros(num_classes) for c in range(1, num_classes): gt_idxs = np.argwhere(gt_labels == c)[:, 0] dt_idxs = np.argwhere(dt_labels == c)[:, 0] num_gt = len(gt_idxs) num_dt = len(dt_idxs) num_groundtruth_boxes[c] += num_gt num_detections[c] += num_dt assignment = np.zeros(num_gt, dtype=np.bool) if num_dt > 0: for dt_idx in dt_idxs: if len(gt_idxs) > 0: gt_iou = iou(dt_boxes[dt_idx], gt_boxes[gt_idxs]) max_gt_idx = np.argmax(gt_iou) if gt_iou[max_gt_idx] > iou_thresh: if not assignment[max_gt_idx]: # true positive TP_img[dt_idx, c] = 1 assignment[max_gt_idx] = True continue # false positive (multiple detections) # false positive (intersection to low) # false positive (no ground truth of this class) FP_img[dt_idx, c] = 1 FN_img_sum[c] = np.sum(np.logical_not(assignment)) if False: # debug plt.figure(figsize=[10] * 2) plt.imshow(images[i]) gt_util.plot_gt(gt[i]) for b in dt[i]: plot_box(b[:4], 'percent', color='b') plt.show() print('%-19s %2s %2s %2s' % ('', 'TP', 'FP', 'FN')) for i in range(num_classes): num_TP_img = np.sum(TP_img[:, i]) num_FP_img = np.sum(FP_img[:, i]) num_FN_img = FN_img_sum[i] if num_TP_img > 0 or num_FP_img > 0 or num_FN_img > 0: print('%2i %-16s %2i %2i %2i' % (i, gt_util.classes[i], num_TP_img, num_FP_img, num_FN_img)) TP.append(TP_img) FP.append(FP_img) FN_sum += FN_img_sum conf = np.concatenate(conf) order = np.argsort(-conf) TP = np.concatenate(TP)[order] FP = np.concatenate(FP)[order] TP_sum = np.sum(TP, axis=0) FP_sum = np.sum(FP, axis=0) if return_fmeasure: TP_sum = np.sum(TP_sum) FP_sum = np.sum(FP_sum) FN_sum = np.sum(FN_sum) recall = TP_sum / (TP_sum + FN_sum) precision = TP_sum / (TP_sum + FP_sum) fmeasure = 2 * precision * recall / (precision + recall + eps) np.seterr(**err) return fmeasure # TP + FN = num_groundtruth_boxes #print(np.sum(TP, axis=0) + FN_sum) #print(num_groundtruth_boxes) # TP + FP = num_detections #print(np.sum(TP) + np.sum(FP), len(conf)) tp = np.cumsum(TP, axis=0) fp = np.cumsum(FP, axis=0) recall = tp / num_groundtruth_boxes precision = tp / (tp + fp) # add boundary values mrec = np.empty((len(conf) + 2, num_classes)) mrec[0, :] = 0 mrec[1:-1, :] = recall mrec[-1, :] = 1 mpre = np.empty((len(conf) + 2, num_classes)) mpre[0, :] = 0 mpre[1:-1, :] = np.nan_to_num(precision) mpre[-1, :] = 0 # AP according Pascal VOC 2012 # cummax in reverse order mpre = np.flip(np.maximum.accumulate(np.flip(mpre, axis=0), axis=0), axis=0) AP = np.sum((mrec[1:, :] - mrec[:-1, :]) * mpre[1:, :], axis=0) MAP = np.mean(AP[1:]) print('%-19s %8s %8s %8s %6s' % ('Class', 'TP', 'FP', 'FN', 'AP')) for i in range(1, num_classes): print('%2i %-16s %8i %8i %8i %6.3f' % (i, gt_util.classes[i], TP_sum[i], FP_sum[i], FN_sum[i], AP[i])) print('%-19s %8i %8i %8i %6.3f @ %g %s' % ('Sum / mAP', np.sum(TP_sum), np.sum(FP_sum), np.sum(FN_sum), MAP, iou_thresh, max_dets)) plt.figure(figsize=figsize) ax = plt.gca() if False: # colors ax.set_prop_cycle(plt.cycler('color', colors[1:])) ax.set_xlim(0.0, 1.0) ax.set_ylim(0.0, 1.0) ax.grid() plt.step(mrec[:, 1:], mpre[:, 1:], where='pre') plt.legend(gt_util.classes[1:], bbox_to_anchor=(1.04, 1), loc="upper left") plt.xlabel('recall') plt.ylabel('precision') plt.show() np.seterr(**err)