Beispiel #1
0
    def _get_iou(self, bbox1_val, bbox2_val):
        """Get IoU for two sets of bounding boxes.

        It also checks that both implementations return the same before
        returning.

        Args:
            bbox1_val: Array of shape (total_bbox1, 4).
            bbox2_val: Array of shape (total_bbox2, 4).

        Returns:
            iou: Array of shape (total_bbox1, total_bbox2)
        """
        bbox1 = tf.placeholder(tf.float32, (None, 4))
        bbox2 = tf.placeholder(tf.float32, (None, 4))
        iou = bbox_overlap_tf(bbox1, bbox2)

        with self.test_session() as sess:
            iou_val_tf = sess.run(iou, feed_dict={
                bbox1: np.array(bbox1_val),
                bbox2: np.array(bbox2_val),
            })

        iou_val_np = bbox_overlap(np.array(bbox1_val), np.array(bbox2_val))
        self.assertAllClose(iou_val_np, iou_val_tf)
        return iou_val_tf
Beispiel #2
0
def get_valid_match_iou(i, j, gt_boxes, predicted_boxes, iou_threshold):
    """
    Returns all groundtruth classes, predicted classes, matched groundtruth
    classes, matched predicted classes with iou greater than iou_threshold,
    after filtering the predicted classes with confidence greater than
    confidence_threshold for one image given in image_path with format
    input_image_format

    Args:
        i: index in groundtruth boxes list to compare
        j: index in predicted boxes list to compare with
        gt_boxes: list of 4 arrayed groundtruth bounding boxes of format
        [[xmin, ymin, xmax, ymax], [xmin1, ymin1, xmax1, ymax1], ..]
        predicted_boxes: list of 4 arrayed predicted bounding boxes of format
        [[xmin, ymin, xmax, ymax], [xmin1, ymin1, xmax1, ymax1], ..]
        iou_threshold: float, IOU threshold below which the
            match of the predicted bounding box with the
            ground truth box is invalid
    Returns:
        list: [index_gt, index_predicted, iou] or None
    """
    iou = bbox_overlap(
        np.array(gt_boxes[i]).reshape(1, 4),
        np.array(predicted_boxes[j]).reshape(1, 4))[0][0]
    if iou >= iou_threshold:
        return [i, j, iou]
Beispiel #3
0
    def _get_iou(self, bbox1_val, bbox2_val):
        """Get IoU for two sets of bounding boxes.

        It also checks that both implementations return the same before
        returning.

        Args:
            bbox1_val: Array of shape (total_bbox1, 4).
            bbox2_val: Array of shape (total_bbox2, 4).

        Returns:
            iou: Array of shape (total_bbox1, total_bbox2)
        """
        bbox1 = tf.placeholder(tf.float32, (None, 4))
        bbox2 = tf.placeholder(tf.float32, (None, 4))
        iou = bbox_overlap_tf(bbox1, bbox2)

        with self.test_session() as sess:
            iou_val_tf = sess.run(iou,
                                  feed_dict={
                                      bbox1: np.array(bbox1_val),
                                      bbox2: np.array(bbox2_val),
                                  })

        iou_val_np = bbox_overlap(np.array(bbox1_val), np.array(bbox2_val))
        self.assertAllClose(iou_val_np, iou_val_tf)
        return iou_val_tf
Beispiel #4
0
def calculate_metrics(output_per_batch, num_classes):
    """Calculates mAP and mAR from the detector's output.

    The procedure for calculating the average precision for class ``C`` is as
    follows (see `VOC mAP metric`_ for more details):

    Start by ranking all the predictions (for a given image and said class) in
    order of confidence.  Each of these predictions is marked as correct (true
    positive, when it has a IoU-threshold greater or equal to `iou_thresholds`)
    or incorrect (false positive, in the other case).  This matching is
    performed greedily over the confidence scores, so a higher-confidence
    prediction will be matched over another lower-confidence one even if the
    latter has better IoU.  Also, each prediction is matched at most once, so
    repeated detections are counted as false positives.

    We then integrate over the interpolated PR curve, thus obtaining the value
    for the class' average precision.  This interpolation makes sure the
    precision curve is monotonically decreasing; for this, we go through the
    precisions and make sure it's always decreasing.  The integration is
    performed over 101 fixed points over the curve (``[0.0, 0.01, ..., 1.0]``).

    Average the result among all the classes to obtain the final, ``mAP``,
    value.

    Args:
        output_per_batch (dict): Output of the detector to calculate mAP.
            Expects the following keys: ``bboxes``, ``classes``, ``scores``,
            ``gt_bboxes``, ``gt_classes``. Under each key, there should be a
            list of the results per batch as returned by the detector.
        num_classes (int): Number of classes on the dataset.

    Returns:
        (``np.ndarray``, ``ndarray``) tuple. The first value is an array of
        size (`num_classes`,), with the AP value per class, while the second
        one is an array for the AR.

    .. _VOC mAP metric:
        http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf
    """
    iou_thresholds = np.linspace(0.50, 0.95,
                                 np.round((0.95 - 0.50) / 0.05) + 1)
    # 101 recall levels, same as COCO evaluation.
    rec_thresholds = np.linspace(0.00, 1.00,
                                 np.round((1.00 - 0.00) / 0.01) + 1)

    # List; first by class, then by example. Each entry is a tuple of ndarrays
    # of size (D_{c,i},), for tp/fp labels and for score, where D_{c,i} is the
    # number of detected boxes for class `c` on image `i`.
    tp_fp_labels_by_class = [[] for _ in range(num_classes)]
    num_examples_per_class = [0 for _ in range(num_classes)]

    # For each image, order predictions by score and classify each as a true
    # positive or a false positive.
    num_batches = len(output_per_batch["bboxes"])
    for idx in range(num_batches):

        # Get the results of the batch.
        classes = output_per_batch["classes"][idx]  # (D_{c,i},)
        bboxes = output_per_batch["bboxes"][idx]  # (D_{c,i}, 4)
        scores = output_per_batch["scores"][idx]  # (D_{c,i},)

        gt_classes = output_per_batch["gt_classes"][idx]
        gt_bboxes = output_per_batch["gt_bboxes"][idx]

        # Analysis must be made per-class.
        for cls in range(num_classes):
            # Get the bounding boxes of `cls` only.
            cls_bboxes = bboxes[classes == cls, :]
            cls_scores = scores[classes == cls]
            cls_gt_bboxes = gt_bboxes[gt_classes == cls, :]

            num_gt = cls_gt_bboxes.shape[0]
            num_examples_per_class[cls] += num_gt

            # Sort by score descending, so we prioritize higher-confidence
            # results when matching.
            sorted_indices = np.argsort(-cls_scores)

            # Whether the ground-truth has been previously detected.
            is_detected = np.zeros((num_gt, len(iou_thresholds)))

            # TP/FP labels for detected bboxes of (class, image).
            tp_fp_labels = np.zeros((len(sorted_indices), len(iou_thresholds)))

            if num_gt == 0:
                # If no ground truth examples for class, all predictions must
                # be false positives.
                tp_fp_labels_by_class[cls].append(
                    (tp_fp_labels, cls_scores[sorted_indices]))
                continue

            # Get the IoUs for the class' bboxes.
            ious = bbox_overlap(cls_bboxes, cls_gt_bboxes)

            # Greedily assign bboxes to ground truths (highest score first).
            for bbox_idx in sorted_indices:
                gt_match = np.argmax(ious[bbox_idx, :])
                # TODO: Try to vectorize.
                for iou_idx, iou_threshold in enumerate(iou_thresholds):
                    if ious[bbox_idx, gt_match] >= iou_threshold:
                        # Over IoU threshold.
                        if not is_detected[gt_match, iou_idx]:
                            # And first detection: it's a true positive.
                            tp_fp_labels[bbox_idx, iou_idx] = True
                            is_detected[gt_match, iou_idx] = True

            tp_fp_labels_by_class[cls].append(
                (tp_fp_labels, cls_scores[sorted_indices]))

    # Calculate average precision per class.
    ap_per_class = np.zeros((num_classes, len(iou_thresholds)))
    ar_per_class = np.zeros((num_classes, len(iou_thresholds)))
    for cls in range(num_classes):
        tp_fp_labels = tp_fp_labels_by_class[cls]
        num_examples = num_examples_per_class[cls]

        # Flatten the tp/fp labels into a single ndarray.
        labels, scores = zip(*tp_fp_labels)
        labels = np.concatenate(labels)
        scores = np.concatenate(scores)

        # Sort the tp/fp labels by decreasing confidence score and calculate
        # precision and recall at every position of this ranked output.
        sorted_indices = np.argsort(-scores)
        true_positives = labels[sorted_indices, :]
        false_positives = 1 - true_positives

        sum_true_positives = np.cumsum(true_positives, axis=0)
        sum_false_positives = np.cumsum(false_positives, axis=0)

        recall = sum_true_positives.astype(float) / num_examples
        precision = np.divide(sum_true_positives.astype(float),
                              sum_true_positives + sum_false_positives)

        # Find AP by integrating over PR curve, with interpolated precision.
        for iou_idx in range(len(iou_thresholds)):
            p = precision[:, iou_idx]
            r = recall[:, iou_idx]

            # Interpolate the precision. (Make it monotonically-increasing.)
            for i in range(len(p) - 1, 0, -1):
                if p[i] > p[i - 1]:
                    p[i - 1] = p[i]

            ap = 0
            inds = np.searchsorted(r, rec_thresholds)
            for ridx, pidx in enumerate(inds):
                if pidx >= len(r):
                    # Out of bounds, no recall higher than threshold for any of
                    # the remaining thresholds (as they're ordered).
                    break

                ap += p[pidx] / len(rec_thresholds)

            ap_per_class[cls, iou_idx] = ap
            if len(r):
                ar_per_class[cls, iou_idx] = r[-1]
            else:
                ar_per_class[cls, iou_idx] = 0

    return ap_per_class, ar_per_class
Beispiel #5
0
def calculate_map(output_per_batch, num_classes, iou_threshold=0.5):
    """Calculates mAP@iou_threshold from the detector's output.

    The procedure for calculating the average precision for class ``C`` is as
    follows (see `VOC mAP metric`_ for more details):

    Start by ranking all the predictions (for a given image and said class) in
    order of confidence.  Each of these predictions is marked as correct (true
    positive, when it has a IoU-threshold greater or equal to `iou_threshold`)
    or incorrect (false positive, in the other case).  This matching is
    performed greedily over the confidence scores, so a higher-confidence
    prediction will be matched over another lower-confidence one even if the
    latter has better IoU.  Also, each prediction is matched at most once, so
    repeated detections are counted as false positives.

    We then integrate over the interpolated PR curve, thus obtaining the value
    for the class' average precision.  This interpolation makes sure the
    precision curve is monotonically decreasing; for this, at each recall point
    ``r``, the precision is the maximum precision value among all recalls
    higher than ``r``.  The integration is performed over 11 fixed points over
    the curve (``[0.0, 0.1, ..., 1.0]``).

    Average the result among all the classes to obtain the final, ``mAP``,
    value.

    Args:
        output_per_batch (dict): Output of the detector to calculate mAP.
            Expects the following keys: ``bboxes``, ``classes``, ``scores``,
            ``gt_bboxes``, ``gt_classes``. Under each key, there should be a
            list of the results per batch as returned by the detector.
        num_classes (int): Number of classes on the dataset.
        threshold (float): IoU threshold for considering a match.

    Returns:
        (``np.float``, ``ndarray``) tuple. The first value is the mAP, while
        the second is an array of size (`num_classes`,), with the AP value per
        class.

    Note:
        The "difficult example" flag of VOC dataset is being ignored.

    Todo:
        * Use VOC2012-style for integrating the curve. That is, use all recall
          points instead of a fixed number of points like in VOC2007.

    .. _VOC mAP metric:
        http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf
    """
    # List; first by class, then by example. Each entry is a tuple of ndarrays
    # of size (D_{c,i},), for tp/fp labels and for score, where D_{c,i} is the
    # number of detected boxes for class `c` on image `i`.
    tp_fp_labels_by_class = [[] for _ in range(num_classes)]
    num_examples_per_class = [0 for _ in range(num_classes)]

    # For each image, order predictions by score and classify each as a true
    # positive or a false positive.
    num_batches = len(output_per_batch['bboxes'])
    for idx in range(num_batches):

        # Get the results of the batch.
        classes = output_per_batch['classes'][idx]  # (D_{c,i},)
        bboxes = output_per_batch['bboxes'][idx]  # (D_{c,i}, 4)
        scores = output_per_batch['scores'][idx]  # (D_{c,i},)

        gt_classes = output_per_batch['gt_classes'][idx]
        gt_bboxes = output_per_batch['gt_bboxes'][idx]

        # Analysis must be made per-class.
        for cls in range(num_classes):
            # Get the bounding boxes of `cls` only.
            cls_bboxes = bboxes[classes == cls, :]
            cls_scores = scores[classes == cls]
            cls_gt_bboxes = gt_bboxes[gt_classes == cls, :]

            num_gt = cls_gt_bboxes.shape[0]
            num_examples_per_class[cls] += num_gt

            # Sort by score descending, so we prioritize higher-confidence
            # results when matching.
            sorted_indices = np.argsort(-cls_scores)

            # Whether the ground-truth has been previously detected.
            is_detected = np.zeros(num_gt)

            # TP/FP labels for detected bboxes of (class, image).
            tp_fp_labels = np.zeros(len(sorted_indices))

            if num_gt == 0:
                # If no ground truth examples for class, all predictions must
                # be false positives.
                tp_fp_labels_by_class[cls].append(
                    (tp_fp_labels, cls_scores[sorted_indices])
                )
                continue

            # Get the IoUs for the class' bboxes.
            ious = bbox_overlap(cls_bboxes, cls_gt_bboxes)

            # Greedily assign bboxes to ground truths (highest score first).
            for bbox_idx in sorted_indices:
                gt_match = np.argmax(ious[bbox_idx, :])
                if ious[bbox_idx, gt_match] >= iou_threshold:
                    # Over IoU threshold.
                    if not is_detected[gt_match]:
                        # And first detection: it's a true positive.
                        tp_fp_labels[bbox_idx] = True
                        is_detected[gt_match] = True

            tp_fp_labels_by_class[cls].append(
                (tp_fp_labels, cls_scores[sorted_indices])
            )

    # Calculate average precision per class.
    ap_per_class = np.zeros(num_classes)
    for cls in range(num_classes):
        tp_fp_labels = tp_fp_labels_by_class[cls]
        num_examples = num_examples_per_class[cls]

        # Flatten the tp/fp labels into a single ndarray.
        labels, scores = zip(*tp_fp_labels)
        labels = np.concatenate(labels)
        scores = np.concatenate(scores)

        # Sort the tp/fp labels by decreasing confidence score and calculate
        # precision and recall at every position of this ranked output.
        sorted_indices = np.argsort(-scores)
        true_positives = labels[sorted_indices]
        false_positives = 1 - true_positives

        cum_true_positives = np.cumsum(true_positives)
        cum_false_positives = np.cumsum(false_positives)

        recall = cum_true_positives.astype(float) / num_examples
        precision = np.divide(
            cum_true_positives.astype(float),
            cum_true_positives + cum_false_positives
        )

        # Find AP by integrating over PR curve, with interpolated precision.
        ap = 0
        for t in np.linspace(0, 1, 11):
            if not np.any(recall >= t):
                # Recall is never higher than `t`, continue.
                continue
            ap += np.max(precision[recall >= t]) / 11  # Interpolated.

        ap_per_class[cls] = ap

    # Finally, mAP.
    mean_ap = np.mean(ap_per_class)

    return mean_ap, ap_per_class
def calculate_map(output_per_batch, num_classes, iou_threshold=0.5):
    """Calculates mAP@iou_threshold from the detector's output.

    The procedure for calculating the average precision for class ``C`` is as
    follows (see `VOC mAP metric`_ for more details):

    Start by ranking all the predictions (for a given image and said class) in
    order of confidence.  Each of these predictions is marked as correct (true
    positive, when it has a IoU-threshold greater or equal to `iou_threshold`)
    or incorrect (false positive, in the other case).  This matching is
    performed greedily over the confidence scores, so a higher-confidence
    prediction will be matched over another lower-confidence one even if the
    latter has better IoU.  Also, each prediction is matched at most once, so
    repeated detections are counted as false positives.

    We then integrate over the interpolated PR curve, thus obtaining the value
    for the class' average precision.  This interpolation makes sure the
    precision curve is monotonically decreasing; for this, at each recall point
    ``r``, the precision is the maximum precision value among all recalls
    higher than ``r``.  The integration is performed over 11 fixed points over
    the curve (``[0.0, 0.1, ..., 1.0]``).

    Average the result among all the classes to obtain the final, ``mAP``,
    value.

    Args:
        output_per_batch (dict): Output of the detector to calculate mAP.
            Expects the following keys: ``bboxes``, ``classes``, ``scores``,
            ``gt_bboxes``, ``gt_classes``. Under each key, there should be a
            list of the results per batch as returned by the detector.
        num_classes (int): Number of classes on the dataset.
        threshold (float): IoU threshold for considering a match.

    Returns:
        (``np.float``, ``ndarray``) tuple. The first value is the mAP, while
        the second is an array of size (`num_classes`,), with the AP value per
        class.

    Note:
        The "difficult example" flag of VOC dataset is being ignored.

    Todo:
        * Use VOC2012-style for integrating the curve. That is, use all recall
          points instead of a fixed number of points like in VOC2007.

    .. _VOC mAP metric:
        http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham10.pdf
    """
    # List; first by class, then by example. Each entry is a tuple of ndarrays
    # of size (D_{c,i},), for tp/fp labels and for score, where D_{c,i} is the
    # number of detected boxes for class `c` on image `i`.
    tp_fp_labels_by_class = [[] for _ in range(num_classes)]
    num_examples_per_class = [0 for _ in range(num_classes)]

    # For each image, order predictions by score and classify each as a true
    # positive or a false positive.
    num_batches = len(output_per_batch['bboxes'])
    for idx in range(num_batches):

        # Get the results of the batch.
        classes = output_per_batch['classes'][idx]  # (D_{c,i},)
        bboxes = output_per_batch['bboxes'][idx]  # (D_{c,i}, 4)
        scores = output_per_batch['scores'][idx]  # (D_{c,i},)

        gt_classes = output_per_batch['gt_classes'][idx]
        gt_bboxes = output_per_batch['gt_bboxes'][idx]

        # Analysis must be made per-class.
        for cls in range(num_classes):
            # Get the bounding boxes of `cls` only.
            cls_bboxes = bboxes[classes == cls, :]
            cls_scores = scores[classes == cls]
            cls_gt_bboxes = gt_bboxes[gt_classes == cls, :]

            num_gt = cls_gt_bboxes.shape[0]
            num_examples_per_class[cls] += num_gt

            # Sort by score descending, so we prioritize higher-confidence
            # results when matching.
            sorted_indices = np.argsort(-cls_scores)

            # Whether the ground-truth has been previously detected.
            is_detected = np.zeros(num_gt)

            # TP/FP labels for detected bboxes of (class, image).
            tp_fp_labels = np.zeros(len(sorted_indices))

            if num_gt == 0:
                # If no ground truth examples for class, all predictions must
                # be false positives.
                tp_fp_labels_by_class[cls].append(
                    (tp_fp_labels, cls_scores[sorted_indices])
                )
                continue

            # Get the IoUs for the class' bboxes.
            ious = bbox_overlap(cls_bboxes, cls_gt_bboxes)

            # Greedily assign bboxes to ground truths (highest score first).
            for bbox_idx in sorted_indices:
                gt_match = np.argmax(ious[bbox_idx, :])
                if ious[bbox_idx, gt_match] >= iou_threshold:
                    # Over IoU threshold.
                    if not is_detected[gt_match]:
                        # And first detection: it's a true positive.
                        tp_fp_labels[bbox_idx] = True
                        is_detected[gt_match] = True

            tp_fp_labels_by_class[cls].append(
                (tp_fp_labels, cls_scores[sorted_indices])
            )

    # Calculate average precision per class.
    ap_per_class = np.zeros(num_classes)
    for cls in range(num_classes):
        tp_fp_labels = tp_fp_labels_by_class[cls]
        num_examples = num_examples_per_class[cls]

        # Flatten the tp/fp labels into a single ndarray.
        labels, scores = zip(*tp_fp_labels)
        labels = np.concatenate(labels)
        scores = np.concatenate(scores)

        # Sort the tp/fp labels by decreasing confidence score and calculate
        # precision and recall at every position of this ranked output.
        sorted_indices = np.argsort(-scores)
        true_positives = labels[sorted_indices]
        false_positives = 1 - true_positives

        cum_true_positives = np.cumsum(true_positives)
        cum_false_positives = np.cumsum(false_positives)

        recall = cum_true_positives.astype(float) / num_examples
        precision = np.divide(
            cum_true_positives.astype(float),
            cum_true_positives + cum_false_positives
        )

        # Find AP by integrating over PR curve, with interpolated precision.
        ap = 0
        for t in np.linspace(0, 1, 11):
            if not np.any(recall >= t):
                # Recall is never higher than `t`, continue.
                continue
            ap += np.max(precision[recall >= t]) / 11  # Interpolated.

        ap_per_class[cls] = ap

    # Finally, mAP.
    mean_ap = np.mean(ap_per_class)

    return mean_ap, ap_per_class