Beispiel #1
0
    def encode(self, gt_data, overlap_threshold=0.5, debug=False):
        # calculation is done with normalized sizes

        # TODO: empty ground truth
        if gt_data.shape[0] == 0:
            print('gt_data', type(gt_data), gt_data.shape)

        num_classes = 2
        num_priors = self.priors.shape[0]

        gt_polygons = np.copy(gt_data[:, :8])  # normalized quadrilaterals
        gt_rboxes = np.array(
            [polygon_to_rbox3(np.reshape(p, (-1, 2))) for p in gt_data[:, :8]])

        # minimum horizontal bounding rectangles
        gt_xmin = np.min(gt_data[:, 0:8:2], axis=1)
        gt_ymin = np.min(gt_data[:, 1:8:2], axis=1)
        gt_xmax = np.max(gt_data[:, 0:8:2], axis=1)
        gt_ymax = np.max(gt_data[:, 1:8:2], axis=1)
        gt_boxes = self.gt_boxes = np.array(
            [gt_xmin, gt_ymin, gt_xmax,
             gt_ymax]).T  # normalized xmin, ymin, xmax, ymax

        gt_class_idx = np.asarray(gt_data[:, -1] + 0.5, dtype=np.int)
        gt_one_hot = np.zeros([len(gt_class_idx), num_classes])
        gt_one_hot[range(len(gt_one_hot)),
                   gt_class_idx] = 1  # one_hot classes including background

        gt_iou = np.array([iou(b, self.priors_norm) for b in gt_boxes]).T

        # assigne gt to priors
        max_idxs = np.argmax(gt_iou, axis=1)
        max_val = gt_iou[np.arange(num_priors), max_idxs]
        prior_mask = max_val > overlap_threshold
        match_indices = max_idxs[prior_mask]

        self.match_indices = dict(
            zip(list(np.ix_(prior_mask)[0]), list(match_indices)))

        # prior labels
        confidence = np.zeros((num_priors, num_classes))
        confidence[:, 0] = 1
        confidence[prior_mask] = gt_one_hot[match_indices]

        gt_xy = (gt_boxes[:, 2:4] + gt_boxes[:, 0:2]) / 2.
        gt_wh = gt_boxes[:, 2:4] - gt_boxes[:, 0:2]
        gt_xy = gt_xy[match_indices]
        gt_wh = gt_wh[match_indices]
        gt_polygons = gt_polygons[match_indices]
        gt_rboxes = gt_rboxes[match_indices]

        priors_xy = self.priors_xy[prior_mask] / self.image_size
        priors_wh = self.priors_wh[prior_mask] / self.image_size
        variances_xy = self.priors[prior_mask, -4:-2]
        variances_wh = self.priors[prior_mask, -2:]

        # compute local offsets for
        offsets = np.zeros((num_priors, 4))
        offsets[prior_mask, 0:2] = (gt_xy - priors_xy) / priors_wh
        offsets[prior_mask, 2:4] = np.log(gt_wh / priors_wh)
        offsets[prior_mask, 0:2] /= variances_xy
        offsets[prior_mask, 2:4] /= variances_wh

        # compute local offsets for quadrilaterals
        offsets_quads = np.zeros((num_priors, 8))
        priors_xy_minmax = np.hstack(
            [priors_xy - priors_wh / 2, priors_xy + priors_wh / 2])
        #ref = np.tile(priors_xy, (1,4))
        ref = priors_xy_minmax[:, (0, 1, 2, 1, 2, 3, 0, 3)]  # corner points
        offsets_quads[prior_mask, :] = (gt_polygons - ref) / np.tile(
            priors_wh, (1, 4)) / np.tile(variances_xy, (1, 4))

        # compute local offsets for rotated bounding boxes
        offsets_rboxs = np.zeros((num_priors, 5))
        offsets_rboxs[prior_mask, 0:2] = (gt_rboxes[:, 0:2] -
                                          priors_xy) / priors_wh / variances_xy
        offsets_rboxs[prior_mask, 2:4] = (gt_rboxes[:, 2:4] -
                                          priors_xy) / priors_wh / variances_xy
        offsets_rboxs[prior_mask, 4] = np.log(
            gt_rboxes[:, 4] / priors_wh[:, 1]) / variances_wh[:, 1]

        return np.concatenate(
            [offsets, offsets_quads, offsets_rboxs, confidence], axis=1)
Beispiel #2
0
def evaluate_results(ground_truth,
                     detection_results,
                     gt_util,
                     iou_thresh=0.5,
                     max_dets=None,
                     figsize=(10, 10),
                     return_fmeasure=False):
    """Evaluates detection results, plots precision-recall curves and
    calculates mean Average Precision.

    # Arguments
        ground_truth: List of ground truth data with
            shape (objects, x1+y1+x2+y2+label)
        detection_results: List of corresponding detection Results with
            shape (objects, x1+y1+x2+y2+confidence+label)
        gt_util: Instance of BaseGTUtility containing metadata about the
            dataset.
        iou_thresh: Minimum intersection over union required to associate
            a detected bounding box to a ground truth bounding box.
        max_dets: Maximal number of used detections per image.

    # Notes
        The maximum number of detections per image can also be limited by
        keep_top_k argument in PriorUtil.decode.

    """

    err = np.geterr()
    np.seterr(divide='ignore', invalid='ignore')

    gt = ground_truth
    dt = detection_results

    num_classes = gt_util.num_classes
    colors = gt_util.colors

    TP = []
    FP = []
    FN_sum = np.zeros(num_classes)
    num_groundtruth_boxes = np.zeros(num_classes)
    num_detections = np.zeros(num_classes)

    conf = []

    for i in range(len(gt)):
        gt_boxes = gt[i][:, :4]
        gt_labels = gt[i][:, -1].astype(np.int32)

        conf_img = dt[i][:, 4]
        order = np.argsort(-conf_img)  # sort by confidence
        order = order[:max_dets]  # only max_dets detections per image
        conf.append(conf_img[order])
        dt_img = dt[i][order]

        dt_boxes = dt_img[:, :4]
        dt_labels = dt_img[:, -1].astype(np.int32)

        num_dt_img = len(dt_labels)
        TP_img = np.zeros((num_dt_img, num_classes))
        FP_img = np.zeros((num_dt_img, num_classes))
        FN_img_sum = np.zeros(num_classes)

        for c in range(1, num_classes):
            gt_idxs = np.argwhere(gt_labels == c)[:, 0]
            dt_idxs = np.argwhere(dt_labels == c)[:, 0]
            num_gt = len(gt_idxs)
            num_dt = len(dt_idxs)

            num_groundtruth_boxes[c] += num_gt
            num_detections[c] += num_dt

            assignment = np.zeros(num_gt, dtype=np.bool)

            if num_dt > 0:
                for dt_idx in dt_idxs:
                    if len(gt_idxs) > 0:
                        gt_iou = iou(dt_boxes[dt_idx], gt_boxes[gt_idxs])
                        max_gt_idx = np.argmax(gt_iou)
                        if gt_iou[max_gt_idx] > iou_thresh:
                            if not assignment[max_gt_idx]:
                                # true positive
                                TP_img[dt_idx, c] = 1
                                assignment[max_gt_idx] = True
                                continue
                            # false positive (multiple detections)
                        # false positive (intersection to low)
                    # false positive (no ground truth of this class)
                    FP_img[dt_idx, c] = 1

            FN_img_sum[c] = np.sum(np.logical_not(assignment))

        if False:  # debug
            plt.figure(figsize=[10] * 2)
            plt.imshow(images[i])
            gt_util.plot_gt(gt[i])
            for b in dt[i]:
                plot_box(b[:4], 'percent', color='b')
            plt.show()

            print('%-19s %2s %2s %2s' % ('', 'TP', 'FP', 'FN'))
            for i in range(num_classes):
                num_TP_img = np.sum(TP_img[:, i])
                num_FP_img = np.sum(FP_img[:, i])
                num_FN_img = FN_img_sum[i]
                if num_TP_img > 0 or num_FP_img > 0 or num_FN_img > 0:
                    print('%2i %-16s %2i %2i %2i' %
                          (i, gt_util.classes[i], num_TP_img, num_FP_img,
                           num_FN_img))

        TP.append(TP_img)
        FP.append(FP_img)
        FN_sum += FN_img_sum

    conf = np.concatenate(conf)
    order = np.argsort(-conf)
    TP = np.concatenate(TP)[order]
    FP = np.concatenate(FP)[order]

    TP_sum = np.sum(TP, axis=0)
    FP_sum = np.sum(FP, axis=0)

    if return_fmeasure:
        TP_sum = np.sum(TP_sum)
        FP_sum = np.sum(FP_sum)
        FN_sum = np.sum(FN_sum)

        recall = TP_sum / (TP_sum + FN_sum)
        precision = TP_sum / (TP_sum + FP_sum)
        fmeasure = 2 * precision * recall / (precision + recall + eps)

        np.seterr(**err)
        return fmeasure

    # TP + FN = num_groundtruth_boxes
    #print(np.sum(TP, axis=0) + FN_sum)
    #print(num_groundtruth_boxes)
    # TP + FP = num_detections
    #print(np.sum(TP) + np.sum(FP), len(conf))

    tp = np.cumsum(TP, axis=0)
    fp = np.cumsum(FP, axis=0)

    recall = tp / num_groundtruth_boxes
    precision = tp / (tp + fp)

    # add boundary values
    mrec = np.empty((len(conf) + 2, num_classes))
    mrec[0, :] = 0
    mrec[1:-1, :] = recall
    mrec[-1, :] = 1
    mpre = np.empty((len(conf) + 2, num_classes))
    mpre[0, :] = 0
    mpre[1:-1, :] = np.nan_to_num(precision)
    mpre[-1, :] = 0

    # AP according Pascal VOC 2012
    # cummax in reverse order
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre, axis=0), axis=0),
                   axis=0)
    AP = np.sum((mrec[1:, :] - mrec[:-1, :]) * mpre[1:, :], axis=0)

    MAP = np.mean(AP[1:])

    print('%-19s %8s %8s %8s %6s' % ('Class', 'TP', 'FP', 'FN', 'AP'))
    for i in range(1, num_classes):
        print('%2i %-16s %8i %8i %8i %6.3f' %
              (i, gt_util.classes[i], TP_sum[i], FP_sum[i], FN_sum[i], AP[i]))
    print('%-19s %8i %8i %8i %6.3f @ %g %s' %
          ('Sum / mAP', np.sum(TP_sum), np.sum(FP_sum), np.sum(FN_sum), MAP,
           iou_thresh, max_dets))

    plt.figure(figsize=figsize)
    ax = plt.gca()
    if False:  # colors
        ax.set_prop_cycle(plt.cycler('color', colors[1:]))
    ax.set_xlim(0.0, 1.0)
    ax.set_ylim(0.0, 1.0)
    ax.grid()
    plt.step(mrec[:, 1:], mpre[:, 1:], where='pre')
    plt.legend(gt_util.classes[1:], bbox_to_anchor=(1.04, 1), loc="upper left")
    plt.xlabel('recall')
    plt.ylabel('precision')
    plt.show()

    np.seterr(**err)