Esempio n. 1
0
  def setUp(self):
    num_groundtruth_classes = 1
    matching_iou_threshold_high_iou = 0.5
    matching_iou_threshold_low_iou = 0.1
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000
    self.eval_high_iou = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes, matching_iou_threshold_high_iou,
        nms_iou_threshold, nms_max_output_boxes)

    self.eval_low_iou = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes, matching_iou_threshold_low_iou,
        nms_iou_threshold, nms_max_output_boxes)

    self.detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
                                   dtype=float)
    self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
    detected_masks_0 = np.array([[0, 1, 1, 0],
                                 [0, 0, 1, 0],
                                 [0, 0, 0, 0]], dtype=np.uint8)
    detected_masks_1 = np.array([[1, 0, 0, 0],
                                 [1, 1, 0, 0],
                                 [0, 0, 0, 0]], dtype=np.uint8)
    detected_masks_2 = np.array([[0, 0, 0, 0],
                                 [0, 1, 1, 0],
                                 [0, 1, 0, 0]], dtype=np.uint8)
    self.detected_masks = np.stack(
        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)
    def __init__(self,
                 num_groundtruth_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000):
        self.per_image_eval = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
            nms_max_output_boxes)
        self.num_class = num_groundtruth_classes

        self.groundtruth_boxes = {}
        self.groundtruth_class_labels = {}
        self.groundtruth_is_difficult_list = {}
        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self.detection_keys = set()
        self.scores_per_class = [[] for _ in range(self.num_class)]
        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
        self.average_precision_per_class = np.empty(
            self.num_class, dtype=float)
        self.average_precision_per_class.fill(np.nan)
        self.precisions_per_class = []
        self.recalls_per_class = []
        self.corloc_per_class = np.ones(self.num_class, dtype=float)
    def __init__(self,
                 num_groundtruth_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000,
                 use_weighted_mean_ap=False,
                 label_id_offset=0,
                 group_of_weight=0.0):
        if num_groundtruth_classes < 1:
            raise ValueError(
                'Need at least 1 groundtruth class for evaluation.')

        self.per_image_eval = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes=num_groundtruth_classes,
            matching_iou_threshold=matching_iou_threshold,
            nms_iou_threshold=nms_iou_threshold,
            nms_max_output_boxes=nms_max_output_boxes,
            group_of_weight=group_of_weight)
        self.group_of_weight = group_of_weight
        self.num_class = num_groundtruth_classes
        self.use_weighted_mean_ap = use_weighted_mean_ap
        self.label_id_offset = label_id_offset

        self.groundtruth_boxes = {}
        self.groundtruth_class_labels = {}
        self.groundtruth_masks = {}
        self.groundtruth_is_difficult_list = {}
        self.groundtruth_is_group_of_list = {}
        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self._initialize_detections()
Esempio n. 4
0
  def setUp(self):
    num_groundtruth_classes = 1
    matching_iou_threshold = 0.5
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000
    self.group_of_weight = 0.5
    self.eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
        nms_max_output_boxes, self.group_of_weight)

    self.detected_boxes = np.array(
        [[0, 0, 1, 1], [0, 0, 2, 1], [0, 0, 3, 1]], dtype=float)
    self.detected_scores = np.array([0.8, 0.6, 0.5], dtype=float)
    detected_masks_0 = np.array(
        [[0, 1, 1, 0], [0, 0, 1, 0], [0, 0, 0, 0]], dtype=np.uint8)
    detected_masks_1 = np.array(
        [[1, 0, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0]], dtype=np.uint8)
    detected_masks_2 = np.array(
        [[0, 0, 0, 0], [0, 1, 1, 0], [0, 1, 0, 0]], dtype=np.uint8)
    self.detected_masks = np.stack(
        [detected_masks_0, detected_masks_1, detected_masks_2], axis=0)

    self.groundtruth_boxes = np.array(
        [[0, 0, 1, 1], [0, 0, 5, 5], [10, 10, 20, 20]], dtype=float)
    groundtruth_masks_0 = np.array(
        [[1, 0, 0, 0], [1, 0, 0, 0], [1, 0, 0, 0]], dtype=np.uint8)
    groundtruth_masks_1 = np.array(
        [[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]], dtype=np.uint8)
    groundtruth_masks_2 = np.array(
        [[0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0]], dtype=np.uint8)
    self.groundtruth_masks = np.stack(
        [groundtruth_masks_0, groundtruth_masks_1, groundtruth_masks_2], axis=0)
Esempio n. 5
0
 def test_tp_fp(self):
   num_groundtruth_classes = 3
   matching_iou_threshold = 0.5
   nms_iou_threshold = 1.0
   nms_max_output_boxes = 10000
   eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
                                                   matching_iou_threshold,
                                                   nms_iou_threshold,
                                                   nms_max_output_boxes)
   detected_boxes = np.array([[0, 0, 1, 1], [10, 10, 5, 5], [0, 0, 2, 2],
                              [5, 10, 10, 5], [10, 5, 5, 10], [0, 0, 3, 3]],
                             dtype=float)
   detected_scores = np.array([0.8, 0.1, 0.8, 0.9, 0.7, 0.8], dtype=float)
   detected_class_labels = np.array([0, 1, 1, 2, 0, 2], dtype=int)
   groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3.5, 3.5]], dtype=float)
   groundtruth_class_labels = np.array([0, 2], dtype=int)
   groundtruth_groundtruth_is_difficult_list = np.zeros(2, dtype=float)
   groundtruth_groundtruth_is_group_of_list = np.array(
       [False, False], dtype=bool)
   scores, tp_fp_labels, _ = eval1.compute_object_detection_metrics(
       detected_boxes, detected_scores, detected_class_labels,
       groundtruth_boxes, groundtruth_class_labels,
       groundtruth_groundtruth_is_difficult_list,
       groundtruth_groundtruth_is_group_of_list)
   expected_scores = [np.array([0.8], dtype=float)] * 3
   expected_tp_fp_labels = [np.array([True]), np.array([False]), np.array([True
                                                                          ])]
   for i in range(len(expected_scores)):
     self.assertTrue(np.allclose(expected_scores[i], scores[i]))
     self.assertTrue(np.array_equal(expected_tp_fp_labels[i], tp_fp_labels[i]))
Esempio n. 6
0
    def setUp(self):
        num_groundtruth_classes = 1
        matching_iou_threshold1 = 0.5
        matching_iou_threshold2 = 0.1
        nms_iou_threshold = 1.0
        nms_max_output_boxes = 10000
        self.eval1 = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold1,
            nms_iou_threshold, nms_max_output_boxes)

        self.eval2 = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold2,
            nms_iou_threshold, nms_max_output_boxes)

        self.detected_boxes = np.array(
            [[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]], dtype=float)
        self.detected_scores = np.array([0.6, 0.8, 0.5], dtype=float)
Esempio n. 7
0
 def setUp(self):
   num_groundtruth_classes = 1
   matching_iou_threshold_iou = 0.5
   nms_iou_threshold = 1.0
   nms_max_output_boxes = 10000
   self.eval = per_image_evaluation.PerImageEvaluation(
       num_groundtruth_classes, matching_iou_threshold_iou, nms_iou_threshold,
       nms_max_output_boxes)
Esempio n. 8
0
    def __init__(self,
                 num_groundtruth_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000,
                 use_weighted_mean_ap=False,
                 label_id_offset=0):
        self.per_image_eval = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
            nms_max_output_boxes)
        self.num_class = num_groundtruth_classes
        self.label_id_offset = label_id_offset

        self.groundtruth_boxes = {}
        self.groundtruth_class_labels = {}
        self.groundtruth_is_difficult_list = {}
        self.groundtruth_is_group_of_list = {}
        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self.detection_keys = set()
        self.scores_per_class = [[] for _ in range(self.num_class)]
        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)

        self.average_precision_per_class = np.empty(self.num_class,
                                                    dtype=float)
        self.average_precision_per_class.fill(np.nan)
        self.precisions_per_class = []
        self.recalls_per_class = []
        self.corloc_per_class = np.ones(self.num_class, dtype=float)

        self.iou_list = [.5, .55, .6, .65, .7, .75, .8, .85, .9, .95]
        self.scores_per_class_per_iou = \
          [ [[] for _ in range(self.num_class)] for _ in range(len(self.iou_list)) ]
        self.tp_fp_labels_per_class_per_iou = \
          [ [[] for _ in range(self.num_class)] for _ in range(len(self.iou_list)) ]
        self.num_images_correctly_detected_per_class_per_iou = \
          [np.zeros(self.num_class) for _ in range(len(self.iou_list)) ]
        self.average_precision_per_class_per_iou = \
          [np.empty(self.num_class, dtype=float) for _ in range(len(self.iou_list))]
        for average_precision_per_class in self.average_precision_per_class_per_iou:
            average_precision_per_class.fill(np.nan)
        self.precisions_per_class_per_iou = \
          [[] for _ in range(len(self.iou_list))]
        self.recalls_per_class_per_iou = \
          [[] for _ in range(len(self.iou_list))]
        self.corloc_per_class_per_iou = \
          [np.ones(self.num_class, dtype=float) for _ in range(len(self.iou_list))]

        self.use_weighted_mean_ap = use_weighted_mean_ap
Esempio n. 9
0
    def __init__(self,
                 num_groundtruth_classes,
                 matching_iou_threshold=0.5,
                 nms_type='standard',
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000,
                 soft_nms_sigma=0.5,
                 subset_names=('default', )):
        self.per_image_eval = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold, nms_type,
            nms_iou_threshold, nms_max_output_boxes, soft_nms_sigma)
        self.num_class = num_groundtruth_classes
        self.subset_names = subset_names

        self.groundtruth_boxes = {}
        self.groundtruth_class_labels = {}
        self.groundtruth_subset = {s: {} for s in self.subset_names}
        self.num_gt_instances_per_class = {
            s: np.zeros(self.num_class, dtype=int)
            for s in self.subset_names
        }
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self.detection_keys = set()
        self.scores_per_class = {
            s: [[] for _ in range(self.num_class)]
            for s in self.subset_names
        }
        self.tp_fp_labels_per_class = {
            s: [[] for _ in range(self.num_class)]
            for s in self.subset_names
        }
        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
        self.average_precision_per_class \
            = {s: np.empty(self.num_class, dtype=float)
               for s in self.subset_names}
        for s in self.subset_names:
            self.average_precision_per_class[s].fill(np.nan)
        self.precisions_per_class = {s: [] for s in self.subset_names}
        self.recalls_per_class = {s: [] for s in self.subset_names}
        self.corloc_per_class = np.ones(self.num_class, dtype=float)
Esempio n. 10
0
    def __init__(self,
                 num_groundtruth_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000,
                 use_weighted_mean_ap=False,
                 label_id_offset=0):
        self.per_image_eval = per_image_evaluation.PerImageEvaluation(
            num_groundtruth_classes, matching_iou_threshold, nms_iou_threshold,
            nms_max_output_boxes)
        self.num_class = num_groundtruth_classes
        self.label_id_offset = label_id_offset

        self.groundtruth_boxes = {}
        self.groundtruth_class_labels = {}
        self.groundtruth_is_difficult_list = {}
        self.groundtruth_is_group_of_list = {}
        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=int)
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self.detection_keys = set()
        self.scores_per_class = [[] for _ in range(self.num_class)]
        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
        self.average_precision_per_class = np.empty(self.num_class,
                                                    dtype=float)
        self.average_precision_per_class.fill(np.nan)
        self.precisions_per_class = []
        self.recalls_per_class = []
        self.corloc_per_class = np.ones(self.num_class, dtype=float)

        self.use_weighted_mean_ap = use_weighted_mean_ap

        # for image-level classification
        self.groundtruth_image_class_labels = []
        self.detected_image_scores = []
        self.detected_image_class_labels = []

        # eval result for class in bounding boxes
        self.eval_result_in_box_classifcation = []
Esempio n. 11
0
  def test_compute_corloc_with_very_large_iou_threshold(self):
    num_groundtruth_classes = 3
    matching_iou_threshold = 0.9
    nms_iou_threshold = 1.0
    nms_max_output_boxes = 10000
    eval1 = per_image_evaluation.PerImageEvaluation(num_groundtruth_classes,
                                                    matching_iou_threshold,
                                                    nms_iou_threshold,
                                                    nms_max_output_boxes)
    detected_boxes = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3],
                               [0, 0, 5, 5]], dtype=float)
    detected_scores = np.array([0.9, 0.9, 0.1, 0.9], dtype=float)
    detected_class_labels = np.array([0, 1, 0, 2], dtype=int)
    groundtruth_boxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3], [0, 0, 6, 6]],
                                 dtype=float)
    groundtruth_class_labels = np.array([0, 0, 2], dtype=int)

    is_class_correctly_detected_in_image = eval1._compute_cor_loc(
        detected_boxes, detected_scores, detected_class_labels,
        groundtruth_boxes, groundtruth_class_labels)
    expected_result = np.array([1, 0, 0], dtype=int)
    self.assertTrue(np.array_equal(expected_result,
                                   is_class_correctly_detected_in_image))
Esempio n. 12
0
 def __init__(self,
              num_classes: int,
              matching_iou_threshold: float = 0.5,
              detections_nms_iou_threshold: float = 1.0,
              detections_nms_max_output_boxes: int = 10000,
              image_size=None,
              groundtruth_boxes_in_normalized: bool = True,
              detection_boxes_in_normalized: bool = True,
              **kpi_plugin_kwargs):
     super().__init__(**kpi_plugin_kwargs)
     self._per_image_evaluation = per_image_evaluation.PerImageEvaluation(
         num_groundtruth_classes=num_classes,
         matching_iou_threshold=matching_iou_threshold,
         nms_iou_threshold=detections_nms_iou_threshold,
         nms_max_output_boxes=detections_nms_max_output_boxes,
     )
     self.num_classes = num_classes
     self.matching_iou_threshold = matching_iou_threshold
     self.detections_nms_iou_threshold = detections_nms_iou_threshold
     self.detections_nms_max_output_boxes = detections_nms_max_output_boxes
     self.image_size = image_size
     self.groundtruth_boxes_in_normalized = groundtruth_boxes_in_normalized
     self.detection_boxes_in_normalized = detection_boxes_in_normalized
def compute_precision_recall_per_cat(detection_file, db_file):

    print('Loading detection file...')

    with open(detection_file) as f:
        detection_results = pickle.load(f)

    with open(db_file, 'r') as f:
        data = json.load(f)

    im_to_seq = {}
    for im in data['images']:
        im_to_seq[im['id']] = im['seq_id']

    im_to_cat = {}
    for ann in data['annotations']:
        im_to_cat[ann['image_id']] = ann['category_id']
    #add empty category
    empty_id = max([cat['id'] for cat in data['categories']]) + 1
    data['categories'].append({'name': 'empty', 'id': empty_id})
    #add all images that don't have annotations, with cat empty
    for im in data['images']:
        if im['id'] not in im_to_cat:
            im_to_cat[im['id']] = empty_id

    cat_id_to_cat = {}
    for cat in data['categories']:
        cat_id_to_cat[cat['id']] = cat['name']

    cat_to_ims = {cat_id: [] for cat_id in cat_id_to_cat}
    for im in data['images']:
        cat_to_ims[im_to_cat[im['id']]].append(im['id'])

    seqs = {}
    for im in detection_results['images']:
        if im in im_to_seq:
            if im_to_seq[im] not in seqs:
                seqs[im_to_seq[im]] = []
            seqs[im_to_seq[im]].append(im)

    print('Clustering detections by image...')
    #print(detection_results.keys())
    # group the detections and gts by image id:
    per_image_detections, per_image_gts = cluster_detections_by_image(
        detection_results)

    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=1,
        matching_iou_threshold=0.5,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    detection_labels = {cat: [] for cat in cat_to_ims}
    detection_scores = {cat: [] for cat in cat_to_ims}
    num_total_gts = {cat: 0 for cat in cat_to_ims}
    count = {cat: 0 for cat in cat_to_ims}

    precision = {}
    recall = {}
    average_precision = {}

    for cat, images in cat_to_ims.iteritems():

        for image_id in images:
            if image_id not in per_image_detections:
                #print(image_id)
                count[cat] += 1
                continue

            scores, tp_fp_labels = get_results_per_image(
                per_image_detections[image_id], per_image_gts[image_id],
                per_image_eval)

            detection_labels[cat].append(tp_fp_labels)
            detection_scores[cat].append(scores)
            num_gts = len(per_image_gts[image_id]['bboxes'])
            num_total_gts[cat] += num_gts

        if len(detection_scores[cat]) > 0:

            scores = np.concatenate(detection_scores[cat])
            labels = np.concatenate(detection_labels[cat]).astype(np.bool)
            #print(len(scores))
            #print(len(labels))
            precision[cat], recall[cat] = metrics.compute_precision_recall(
                scores, labels, num_total_gts[cat])

            average_precision[cat] = metrics.compute_average_precision(
                precision[cat], recall[cat])
        else:
            print("no detections for " + cat_id_to_cat[cat])
        print(cat_id_to_cat[cat], count[cat], len(images))

    return precision, recall, average_precision, cat_id_to_cat
Esempio n. 14
0
def compute_precision_recall(detection_file,
                             detection_results=None,
                             images_to_consider='all',
                             get_night_day=None):

    if detection_results == None:
        print('Loading detection file...')

        with open(detection_file) as f:
            detection_results = pickle.load(f)

    print('Clustering detections by image...')
    #print(detection_results.keys())
    # group the detections by image id:

    use_im = get_images_to_consider(detection_results, images_to_consider,
                                    get_night_day)

    per_image_detections, per_image_gts = cluster_detections_by_image(
        detection_results, use_im)

    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=1,
        matching_iou_threshold=0.5,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    print('Running per-object analysis...')

    detection_labels = []
    detection_scores = []
    num_total_gts = 0
    count = 0
    for image_id, dets in per_image_detections.iteritems():

        num_detections = len(dets['bboxes'])

        # [ymin, xmin, ymax, xmax] in absolute image coordinates.
        detected_boxes = np.zeros([num_detections, 4], dtype=np.float32)
        # detection scores for the boxes
        detected_scores = np.zeros([num_detections], dtype=np.float32)
        # 0-indexed detection classes for the boxes
        detected_class_labels = np.zeros([num_detections], dtype=np.int32)
        detected_masks = None

        for i in range(num_detections):
            x1, y1, x2, y2 = dets['bboxes'][i]
            detected_boxes[i] = np.array([y1, x1, y2, x2])
            detected_scores[i] = dets['scores'][i]
            detected_class_labels[i] = dets['labels'][i] - 1

        gts = per_image_gts[image_id]
        #print(gts)
        num_gts = len(gts['bboxes'])
        #print(num_gts)
        if num_gts > 0:

            # [ymin, xmin, ymax, xmax] in absolute image coordinates
            groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32)
            # 0-indexed groundtruth classes for the boxes
            groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32)
            groundtruth_masks = None
            groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool)
            groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool)

            for i in range(num_gts):
                x1, y1, x2, y2 = gts['bboxes'][i]
                groundtruth_boxes[i] = np.array([y1, x1, y2, x2])
                groundtruth_class_labels[i] = gts['labels'][i] - 1

            #print(groundtruth_boxes, groundtruth_class_labels,detected_scores[0],detected_boxes[0], detected_class_labels[:2])
            scores, tp_fp_labels, is_class_correctly_detected_in_image = (
                per_image_eval.compute_object_detection_metrics(
                    detected_boxes=detected_boxes,
                    detected_scores=detected_scores,
                    detected_class_labels=detected_class_labels,
                    groundtruth_boxes=groundtruth_boxes,
                    groundtruth_class_labels=groundtruth_class_labels,
                    groundtruth_is_difficult_list=groundtruth_is_difficult_list,
                    groundtruth_is_group_of_list=groundtruth_is_group_of_list,
                    detected_masks=detected_masks,
                    groundtruth_masks=groundtruth_masks))

            #print(scores, tp_fp_labels)

            detection_labels.append(tp_fp_labels[0])
            detection_scores.append(scores[0])
            num_total_gts += num_gts

            count += 1
            if count % 1000 == 0:
                print(str(count) + ' images complete')

            #if (tp_fp_labels[0].shape[0] != num_detections):
            #    print('Incorrect label length')
            #if scores[0].shape[0] != num_detections:
            #    print('Incorrect score length')
            #if tp_fp_labels[0].sum() > num_gts:
            #    print('Too many correct detections')

        else:
            detection_labels.append(np.zeros(num_detections, dtype=np.int32))
            detection_scores.append(detected_scores)

    scores = np.concatenate(detection_scores)
    labels = np.concatenate(detection_labels).astype(np.bool)

    precision, recall = metrics.compute_precision_recall(
        scores, labels, num_total_gts)

    average_precision = metrics.compute_average_precision(precision, recall)

    return precision, recall, average_precision
def compute_precision_recall_with_images(detection_file):

    print('Loading detection file...')

    with open(detection_file) as f:
        detection_results = pickle.load(f)

    print('Clustering detections by image...')

    # group the detections by image id:
    per_image_detections = {
        detection_results['images'][idx]: {
            'bboxes': detection_results['detections'][idx],
            'scores': detection_results['detection_scores'][idx],
            'labels': detection_results['detection_labels'][idx]
        }
        for idx in range(len(detection_results['images']))
    }

    # group the ground truth annotations by image id:
    per_image_gts = {
        detection_results['images'][idx]: {
            'bboxes': detection_results['gts'][idx],
            'labels': detection_results['gt_labels'][idx]
        }
        for idx in range(len(detection_results['images']))
    }

    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=1,
        matching_iou_threshold=0.5,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    print('Running per-image analysis...')

    detection_labels = []
    detection_scores = []
    num_total_gts = 0
    count = 0
    for image_id, dets in per_image_detections.iteritems():
        im_detection_labels = []
        im_detection_scores = []
        im_num_gts = []
        max_im_scores = []

        num_detections = len(dets['bboxes'])

        # [ymin, xmin, ymax, xmax] in absolute image coordinates.
        detected_boxes = np.zeros([num_detections, 4], dtype=np.float32)
        # detection scores for the boxes
        detected_scores = np.zeros([num_detections], dtype=np.float32)
        # 0-indexed detection classes for the boxes
        detected_class_labels = np.zeros([num_detections], dtype=np.int32)
        detected_masks = None

        for i in range(num_detections):
            x1, y1, x2, y2 = dets['bboxes'][i]
            detected_boxes[i] = np.array([y1, x1, y2, x2])
            detected_scores[i] = dets['scores'][i]
            detected_class_labels[i] = dets['labels'][i] - 1

        max_im_scores.append(np.max(detected_scores))
        box_id = np.argmax(detected_scores)

        gts = per_image_gts[image_id]
        num_gts = len(gts['bboxes'])
        im_num_gts = num_gts

        if num_gts > 0:

            # [ymin, xmin, ymax, xmax] in absolute image coordinates
            groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32)
            # 0-indexed groundtruth classes for the boxes
            groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32)
            groundtruth_masks = None
            groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool)
            groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool)

            for i in range(num_gts):
                x1, y1, x2, y2 = gts['bboxes'][i]
                groundtruth_boxes[i] = np.array([y1, x1, y2, x2])
                groundtruth_class_labels[i] = gts['labels'][i] - 1

            ious = np_box_ops.iou(detected_boxes, groundtruth_boxes)
            if np.max(ious[box_id, :]) < 0.5:
                max_im_scores[-1] = 0

                #print('detected animal box')

            #print(groundtruth_boxes, groundtruth_class_labels,detected_scores[0],detected_boxes[0], detected_class_labels[0])

            scores, tp_fp_labels, is_class_correctly_detected_in_image = (
                per_image_eval.compute_object_detection_metrics(
                    detected_boxes=detected_boxes,
                    detected_scores=detected_scores,
                    detected_class_labels=detected_class_labels,
                    groundtruth_boxes=groundtruth_boxes,
                    groundtruth_class_labels=groundtruth_class_labels,
                    groundtruth_is_difficult_list=groundtruth_is_difficult_list,
                    groundtruth_is_group_of_list=groundtruth_is_group_of_list,
                    detected_masks=detected_masks,
                    groundtruth_masks=groundtruth_masks))
            #print(scores, tp_fp_labels)
            im_detection_labels = tp_fp_labels[0]
            im_detection_scores = scores[0]
            #num_total_gts += num_gts

            count += 1
            if count % 1000 == 0:
                print(str(count) + ' images complete')

            #if (tp_fp_labels[0].shape[0] != num_detections):
            #    print('Incorrect label length')
            #if scores[0].shape[0] != num_detections:
            #    print('Incorrect score length')
            #if tp_fp_labels[0].sum() > num_gts:
            #    print('Too many correct detections')

        else:
            im_detection_labels = np.zeros(num_detections, dtype=np.int32)
            im_detection_scores = detected_scores
            max_im_scores[-1] = 0

        best_score = np.max(max_im_scores)
        if best_score > 0:
            #print('valid box')
            best_im = np.argmax(max_im_scores)
            #print(best_im, best_score)

            temp_labels = np.zeros(len(im_detection_labels), dtype=np.int32)
            temp_scores = np.zeros(len(im_detection_scores), dtype=np.float32)
            for j in range(min(im_num_gts, len(im_detection_labels))):
                temp_labels[
                    j] = True  #TODO: this currently only works for oneclass?
                temp_scores[j] = best_score
            im_detection_labels = temp_labels
            im_detection_scores = temp_scores

        num_total_gts += im_num_gts

        detection_labels.append(im_detection_labels)
        detection_scores.append(im_detection_scores)

    print(len(detection_scores), len(detection_scores[0]),
          len(detection_scores[1]))
    scores = np.concatenate(detection_scores)
    labels = np.concatenate(detection_labels).astype(np.bool)

    precision, recall = metrics.compute_precision_recall(
        scores, labels, num_total_gts)

    average_precision = metrics.compute_average_precision(precision, recall)

    return precision, recall, average_precision
Esempio n. 16
0
     feed_dict={image_tensor: image_np_expanded})
 use_time = time.time() - start_time
 # vis_util.visualize_boxes_and_labels_on_image_array(
 #     image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores),
 #     category_index, use_normalized_coordinates=True, min_score_thresh=0.8, line_thickness=2)
 eval_dicts = {
     'boxes': boxes,
     'scores': scores,
     'classes': classes,
     'num_detections': num_detections
 }
 scores, tp_fp_labels, is_class_correctly_detected_in_image = per_image_evaluation.PerImageEvaluation(
 ).compute_object_detection_metrics(
     detected_boxes=np.squeeze(boxes),
     detected_scores=np.squeeze(scores),
     detected_class_labels=np.squeeze(classes).astype(np.int32),
     groundtruth_boxes=gt_boxes,
     groundtruth_class_labels=gt_class_labels,
     groundtruth_is_difficult_list=gt_is_difficult_list,
     groundtruth_is_group_of_list=gt_is_group_of_list)
 #scores=np.array(scores),
 tp_fp_labels = np.array(tp_fp_labels)
 precision, recall = metrics.compute_precision_recall(
     np.array(scores), tp_fp_labels[1].astype(float), 2)
 print(scores)
 print('---------')
 print(len(tp_fp_labels))
 #f_name = re.split('/',path_f)
 #print(category_index.get(value))
 plt.figure(figsize=IMAGE_SIZE)
 plt.imshow(image_np)
Esempio n. 17
0
def compute_precision_recall_bbox(per_image_detections,
                                  per_image_gts,
                                  num_gt_classes,
                                  matching_iou_threshold=0.5):
    """
    Compute the precision and recall at each confidence level for detection results of various classes.
    Args:
        per_image_detections: dict of image_id to a dict with fields `boxes`, `scores` and `labels`
        per_image_gts: dict of image_id to a dict with fields `gt_boxes` and `gt_labels`
        num_gt_classes: number of classes in the ground truth labels
        matching_iou_threshold: IoU above which a detected and a ground truth box are considered overlapping

    Returns:
    A dict `per_cat_metrics`, where the keys are the possible gt classes and `one_class` which considers
    all classes. Each key corresponds to a dict with the fields precision, recall, average_precision, etc.

    """
    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=num_gt_classes,
        matching_iou_threshold=matching_iou_threshold,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    print('Running per-object analysis...')

    detection_tp_fp = defaultdict(
        list)  # key is the category; in each list, 1 is tp, 0 is fp
    detection_scores = defaultdict(list)
    num_total_gt = defaultdict(int)

    for image_id, dets in tqdm(per_image_detections.items()):
        detected_boxes = np.array(dets['boxes'], dtype=np.float32)
        detected_scores = np.array(dets['scores'], dtype=np.float32)
        # labels input to compute_object_detection_metrics() needs to start at 0, not 1
        detected_labels = np.array(dets['labels'],
                                   dtype=np.int) - 1  # start at 0
        # num_detections = len(dets['boxes'])

        gts = per_image_gts[image_id]
        gt_boxes = np.array(gts['gt_boxes'], dtype=np.float32)
        gt_labels = np.array(gts['gt_labels'], dtype=np.int) - 1  # start at 0
        num_gts = len(gts['gt_boxes'])

        groundtruth_is_difficult_list = np.zeros(
            num_gts, dtype=bool)  # place holders - we don't have these
        groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool)

        # to prevent 'Invalid dimensions for box data.' error
        if num_gts == 0:
            # this box will not match any detections
            gt_boxes = np.array([[0, 0, 0, 0]], dtype=np.float32)

        scores, tp_fp_labels, is_class_correctly_detected_in_image = (
            per_image_eval.compute_object_detection_metrics(
                detected_boxes=detected_boxes,
                detected_scores=detected_scores,
                detected_class_labels=detected_labels,
                groundtruth_boxes=gt_boxes,
                groundtruth_class_labels=gt_labels,
                groundtruth_is_difficult_list=groundtruth_is_difficult_list,
                groundtruth_is_group_of_list=groundtruth_is_group_of_list))

        for i, tp_fp_labels_cat in enumerate(tp_fp_labels):
            assert sum(tp_fp_labels_cat) <= sum(
                gt_labels == i)  # true positives < gt of that category
            cat = i + 1  # categories start at 1
            detection_tp_fp[cat].append(tp_fp_labels_cat)
            detection_scores[cat].append(scores[i])
            num_total_gt[cat] += sum(gt_labels == i)  # gt_labels start at 0

    all_scores = []
    all_tp_fp = []

    print('Computing precision recall for each category...')
    per_cat_metrics = {}
    for i in range(num_gt_classes):
        cat = i + 1
        scores_cat = np.concatenate(detection_scores[cat])
        tp_fp_cat = np.concatenate(detection_tp_fp[cat]).astype(np.bool)
        all_scores.append(scores_cat)
        all_tp_fp.append(tp_fp_cat)

        precision, recall = metrics.compute_precision_recall(
            scores_cat, tp_fp_cat, num_total_gt[cat])
        average_precision = metrics.compute_average_precision(
            precision, recall)

        per_cat_metrics[cat] = {
            'category': cat,
            'precision': precision,
            'recall': recall,
            'average_precision': average_precision,
            'scores': scores_cat,
            'tp_fp': tp_fp_cat,
            'num_gt': num_total_gt[cat]
        }
        print('Number of ground truth in category {} is {}'.format(
            cat, num_total_gt[cat]))

    # compute one-class precision/recall/average precision (if every box is just of an object class)
    all_scores = np.concatenate(all_scores)
    all_tp_fp = np.concatenate(all_tp_fp)
    overall_gt_count = sum(num_total_gt.values())

    one_class_prec, one_class_recall = metrics.compute_precision_recall(
        all_scores, all_tp_fp, overall_gt_count)
    one_class_average_precision = metrics.compute_average_precision(
        one_class_prec, one_class_recall)

    per_cat_metrics['one_class'] = {
        'category': 'one_class',
        'precision': one_class_prec,
        'recall': one_class_recall,
        'average_precision': one_class_average_precision,
        'scores': all_scores,
        'tp_fp': all_tp_fp,
        'num_gt': overall_gt_count
    }

    return per_cat_metrics
def compute_precision_recall_with_sequences(detection_file, db_file,detection_results=None,images_to_consider='all', get_night_day = None):
    
    if detection_results == None:
        print('Loading detection file...')
    
        with open(detection_file) as f:
            detection_results = pickle.load(f)

    im_to_seq = get_im_to_seq_map(db_file)
    seqs = {}
    for im in detection_results['images']:
        if im in im_to_seq:
            if im_to_seq[im] not in seqs:
                seqs[im_to_seq[im]] = []
            seqs[im_to_seq[im]].append(im)
    
    print('Clustering detections by image...')

    use_im = get_images_to_consider(detection_results, images_to_consider, get_night_day)

    per_image_detections, per_image_gts = cluster_detections_by_image(detection_results, use_im)

    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=1,
        matching_iou_threshold=0.5,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000
    )
    
    print('Running per-image analysis...')

    detection_labels = []
    detection_scores = []
    num_total_gts = 0
    count = 0
    for seq in seqs:
        seq_detection_labels = []
        seq_detection_scores = []
        seq_num_gts  = []
        is_gt_in_seq = False
        max_seq_scores = []
        valid_max_scores = []
        #print(seq)
        for image_id in seqs[seq]:
                    
        #for image_id, dets in per_image_detections.iteritems():
            dets = per_image_detections[image_id]
            num_detections = len(dets['bboxes'])

            # [ymin, xmin, ymax, xmax] in absolute image coordinates.
            detected_boxes = np.zeros([num_detections, 4], dtype=np.float32)
            # detection scores for the boxes
            detected_scores = np.zeros([num_detections], dtype=np.float32)
            # 0-indexed detection classes for the boxes
            detected_class_labels = np.zeros([num_detections], dtype=np.int32)
            detected_masks = None

            count +=1
            if count % 1000 == 0:
                print(str(count) + ' images complete')


            for i in range(num_detections):
                x1, y1, x2, y2 = dets['bboxes'][i]
                detected_boxes[i] = np.array([y1, x1, y2, x2])
                detected_scores[i] = dets['scores'][i]
                detected_class_labels[i] = dets['labels'][i] - 1

            max_seq_scores.append(np.max(detected_scores))
            valid_max_scores.append(np.max(detected_scores))
            box_id = np.argmax(detected_scores)
            
            gts = per_image_gts[image_id]
            num_gts = len(gts['bboxes'])
            #seq_num_gts.append(num_gts)
            #print(num_gts)
            if num_gts > 0:
                seq_num_gts.append(1)
                is_gt_in_seq = True
                # [ymin, xmin, ymax, xmax] in absolute image coordinates
                groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32)
                # 0-indexed groundtruth classes for the boxes
                groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32)
                groundtruth_masks = None
                groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool)
                groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool)

             
                for i in range(num_gts):
                    x1, y1, x2, y2 = gts['bboxes'][i]
                    groundtruth_boxes[i] = np.array([y1, x1, y2, x2])
                    groundtruth_class_labels[i] = gts['labels'][i] - 1

                ious = np_box_ops.iou(detected_boxes,groundtruth_boxes)
                if np.max(ious[box_id, :]) < 0.5:
                    valid_max_scores[-1] = 0
                
                scores, tp_fp_labels, is_class_correctly_detected_in_image = (
                per_image_eval.compute_object_detection_metrics(
                    detected_boxes=detected_boxes,
                    detected_scores=detected_scores,
                    detected_class_labels=detected_class_labels,
                    groundtruth_boxes=groundtruth_boxes,
                    groundtruth_class_labels=groundtruth_class_labels,
                    groundtruth_is_difficult_list=groundtruth_is_difficult_list,
                    groundtruth_is_group_of_list=groundtruth_is_group_of_list,
                    detected_masks=detected_masks,
                    groundtruth_masks=groundtruth_masks
                    )
                )
                
                seq_detection_labels.append(tp_fp_labels[0])
                seq_detection_scores.append(scores[0])
                #num_total_gts += 1
            
            else:
                seq_num_gts.append(0)
                seq_detection_labels.append(np.zeros(num_detections, dtype=np.int32))
                seq_detection_scores.append(detected_scores)
                valid_max_scores[-1] = 0

        seq_detection_label = np.zeros(1, dtype=np.int32)
        seq_detection_score = np.zeros(1, dtype=np.float32)

        best_score = np.max(valid_max_scores)
        if best_score > 0:
            if not is_gt_in_seq:
                print(is_gt_in_seq)
                print('matched box with no gt')
                print(valid_max_scores)
            #print('valid box')
            best_im = np.argmax(max_seq_scores)
            #print(best_im, best_score)
            for i in range(len(seqs[seq])):
                
                temp_labels = np.zeros(len(seq_detection_labels[i]),  dtype=np.int32)
                temp_scores = np.zeros(len(seq_detection_scores[i]), dtype=np.float32)
                for j in range(min(seq_num_gts[i], len(temp_labels))):
                    temp_labels[j] = True #TODO: this currently only works for oneclass?
                    temp_scores[j] = best_score
                seq_detection_labels[i] = temp_labels
                seq_detection_scores[i] = temp_scores
            seq_detection_label[0] = True
            seq_detection_score[0] = best_score
        else:
            #print('no valid box')
            seq_detection_label[0] = False
            seq_detection_score[0] = np.max(max_seq_scores)
        

        #if sum(seq_num_gts)>0:
        if is_gt_in_seq:
            num_total_gts+=1
        
       
        detection_labels.append(seq_detection_label)
        detection_scores.append(seq_detection_score)

    scores = np.concatenate(detection_scores)
    labels = np.concatenate(detection_labels).astype(np.bool)
    print(count)
    print(len(seqs.keys()))
    print(sum([1 for i in range(len(detection_labels)) if detection_labels[i] == True]), num_total_gts)
    precision, recall = metrics.compute_precision_recall(
        scores, labels, num_total_gts
    )

    average_precision = metrics.compute_average_precision(precision, recall)
    
    
    return precision, recall, average_precision
Esempio n. 19
0
def compute_precision_recall_per_loc(detection_file, db_file):

    print('Loading detection file...')

    with open(detection_file) as f:
        detection_results = pickle.load(f)

    with open(db_file, 'r') as f:
        data = json.load(f)
    print('Images: ', len(data['images']))
    print('Detection result Images: ', len(detection_results['images']))

    loc_to_ims = {}
    for im in data['images']:
        if im['location'] not in loc_to_ims:
            loc_to_ims[im['location']] = []
        loc_to_ims[im['location']].append(im['id'])

    print('Clustering detections by image...')
    #print(detection_results.keys())
    # group the detections and gts by image id:
    per_image_detections, per_image_gts = cluster_detections_by_image(
        detection_results)

    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=1,
        matching_iou_threshold=0.5,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    detection_labels = {loc: [] for loc in loc_to_ims}
    detection_scores = {loc: [] for loc in loc_to_ims}
    num_total_gts = {loc: 0 for loc in loc_to_ims}
    count = {loc: 0 for loc in loc_to_ims}

    precision = {}
    recall = {}
    average_precision = {}

    for cat, images in loc_to_ims.iteritems():

        for image_id in images:
            if image_id not in per_image_detections:
                #print(image_id)
                count[cat] += 1
                continue
            scores, tp_fp_labels = get_results_per_image(
                per_image_detections[image_id], per_image_gts[image_id],
                per_image_eval)

            detection_labels[cat].append(tp_fp_labels)
            detection_scores[cat].append(scores)
            num_gts = len(per_image_gts[image_id]['bboxes'])
            num_total_gts[cat] += num_gts

        if len(detection_scores[cat]) > 0:

            scores = np.concatenate(detection_scores[cat])
            labels = np.concatenate(detection_labels[cat]).astype(np.bool)
            #print(len(scores))
            #print(len(labels))
            precision[cat], recall[cat] = metrics.compute_precision_recall(
                scores, labels, num_total_gts[cat])

            average_precision[cat] = metrics.compute_average_precision(
                precision[cat], recall[cat])
        else:
            print("no detections for " + cat)
        print(cat, count[cat], len(images))

    return precision, recall, average_precision
Esempio n. 20
0
    def get_detect_info(self, image_path, threshold):

        jpg_file_name, jpg_file_path = get_file_name_path(image_path,
                                                          format_key=".jpg")
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                computer_label = 3
                # print(jpg_file_path)
                info_list = []
                for fp, i in zip(jpg_file_path, range(len(jpg_file_path))):

                    image = cv2.imread(fp)
                    # size = np.shape(image)
                    height, width, depth = image.shape
                    print(height, width, depth)
                    groundtruth_boxes = []
                    xml_info = read_xml(fp[:-4] + ".xml")
                    for x in range(len(xml_info)):
                        print(xml_info[x][1])
                        if int(xml_info[x][1]) == computer_label:
                            groundtruth_boxes.append(xml_info[x][2:])
                    print(groundtruth_boxes)
                    # print(size)
                    image_np_expanded = np.expand_dims(image, axis=0)
                    image_tensor = self.detection_graph.get_tensor_by_name(
                        'image_tensor:0')
                    boxes = self.detection_graph.get_tensor_by_name(
                        'detection_boxes:0')
                    scores = self.detection_graph.get_tensor_by_name(
                        'detection_scores:0')
                    classes = self.detection_graph.get_tensor_by_name(
                        'detection_classes:0')
                    num_detections = self.detection_graph.get_tensor_by_name(
                        'num_detections:0')
                    # Actual detection.
                    (boxes, scores, classes, num_detections) = sess.run(
                        [boxes, scores, classes, num_detections],
                        feed_dict={image_tensor: image_np_expanded})

                    py_scores = np.array(scores[0])
                    py_classes = np.array(classes[0])
                    py_boxes = np.array(boxes[0])
                    scores = np.squeeze(scores)
                    classes = np.squeeze(classes)
                    groundtruth_groundtruth_is_difficult_list = np.array(
                        np.ones(len(groundtruth_boxes)), dtype=bool)
                    groundtruth_groundtruth_is_group_of_list = np.array(
                        np.ones(len(groundtruth_boxes)), dtype=bool)
                    #print("=====:", groundtruth_groundtruth_is_group_of_list)
                    detected_boxes = []
                    detected_scores = []
                    for num in range(len(py_scores)):
                        # print(py_scores[0])
                        #print(num)
                        if classes[num] == computer_label:
                            #print(py_boxes[num])
                            boxes = [
                                float(py_boxes[num][0] * height),
                                float(py_boxes[num][1] * width),
                                float(py_boxes[num][2] * height),
                                float(py_boxes[num][3] * width)
                            ]
                            detected_boxes.append(boxes)
                            detected_scores.append(scores[num])
                    #print(detected_boxes,"\n", ":", detected_scores)

                    print("++++++++++++++++++++++++++++++")

                    num_groundtruth_classes = 3
                    matching_iou_threshold = 0.5
                    nms_iou_threshold = 1.0
                    nms_max_output_boxes = 10000
                    group_of_weight = 0.5
                    eval = per_image_evaluation.PerImageEvaluation(
                        num_groundtruth_classes, matching_iou_threshold,
                        nms_iou_threshold, nms_max_output_boxes,
                        group_of_weight)
                    scores, tp_fp_labels = eval._compute_tp_fp_for_single_class(
                        np.array(detected_boxes), np.array(detected_scores),
                        np.array(groundtruth_boxes),
                        groundtruth_groundtruth_is_difficult_list,
                        groundtruth_groundtruth_is_group_of_list)
                    print(scores, "\n", tp_fp_labels)
Esempio n. 21
0
def compute_precision_recall_bbox(
    per_image_detections: Mapping[str, Mapping[str, Any]],
    per_image_gts: Mapping[str, Mapping[str, Any]],
    num_gt_classes: int,
    matching_iou_threshold: float = 0.5
) -> Dict[Union[str, int], Dict[str, Any]]:
    """
    Compute the precision and recall at each confidence level for detection
    results of various classes.

    Args:
        per_image_detections: dict, image_id (str) => dict with fields
            'boxes': array-like, shape [N, 4], type float, each row is
                [ymin, xmin, ymax, xmax] in normalized coordinates
            'scores': array-like, shape [N], float
            'labels': array-like, shape [N], integers in [1, num_gt_classes]
        per_image_gts: dic, image_id (str) => dict with fields
            'gt_boxes': array-like, shape [M, 4], type float, each row is
                [ymin, xmin, ymax, xmax] in normalized coordinates
            'gt_labels': array-like, shape [M], integers in [1, num_gt_classes]
        num_gt_classes: int, number of classes in the ground truth labels
        matching_iou_threshold: float, IoU above which a detected and a ground
            truth box are considered overlapping

    Returns: dict, per-class metrics, keys are integers in [1, num_gt_classes]
        and 'one_class' which considers all classes. Each value is a dict with
        fields ['precision', 'recall', 'average_precision', ...]
    """
    per_image_eval = per_image_evaluation.PerImageEvaluation(
        num_groundtruth_classes=num_gt_classes,
        matching_iou_threshold=matching_iou_threshold,
        nms_iou_threshold=1.0,
        nms_max_output_boxes=10000)

    print('Running per-object analysis...', flush=True)

    # keys are categories (int)
    detection_tp_fp = defaultdict(list)  # in each list, 1 is tp, 0 is fp
    detection_scores = defaultdict(list)
    num_total_gt: Dict[int, int] = defaultdict(int)

    for image_id, dets in tqdm(per_image_detections.items()):
        # we force *_boxes to have shape [N, 4], even in case that N = 0
        detected_boxes = np.asarray(dets['boxes'],
                                    dtype=np.float32).reshape(-1, 4)
        detected_scores = np.asarray(dets['scores'])
        # labels input to compute_object_detection_metrics() needs to start at 0, not 1
        detected_labels = np.asarray(dets['labels'],
                                     dtype=np.int) - 1  # start at 0
        # num_detections = len(dets['boxes'])

        gts = per_image_gts[image_id]
        gt_boxes = np.asarray(gts['gt_boxes'], dtype=np.float32).reshape(-1, 4)
        gt_labels = np.asarray(gts['gt_labels'],
                               dtype=np.int) - 1  # start at 0
        num_gts = len(gts['gt_boxes'])

        # place holders - we don't have these
        groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool)
        groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool)

        results = per_image_eval.compute_object_detection_metrics(
            detected_boxes=detected_boxes,
            detected_scores=detected_scores,
            detected_class_labels=detected_labels,
            groundtruth_boxes=gt_boxes,
            groundtruth_class_labels=gt_labels,
            groundtruth_is_difficult_list=groundtruth_is_difficult_list,
            groundtruth_is_group_of_list=groundtruth_is_group_of_list)
        scores, tp_fp_labels, is_class_correctly_detected_in_image = results

        for i, tp_fp_labels_cat in enumerate(tp_fp_labels):
            # true positives < gt of that category
            assert sum(tp_fp_labels_cat) <= sum(gt_labels == i)

            cat = i + 1  # categories start at 1
            detection_tp_fp[cat].append(tp_fp_labels_cat)
            detection_scores[cat].append(scores[i])
            num_total_gt[cat] += sum(gt_labels == i)  # gt_labels start at 0

    all_scores = []
    all_tp_fp = []

    print('Computing precision recall for each category...')
    per_cat_metrics: Dict[Union[int, str], Dict[str, Any]] = {}
    for i in range(num_gt_classes):
        cat = i + 1
        scores_cat = np.concatenate(detection_scores[cat])
        tp_fp_cat = np.concatenate(detection_tp_fp[cat]).astype(np.bool)
        all_scores.append(scores_cat)
        all_tp_fp.append(tp_fp_cat)

        precision, recall = metrics.compute_precision_recall(
            scores_cat, tp_fp_cat, num_total_gt[cat])
        average_precision = metrics.compute_average_precision(
            precision, recall)

        per_cat_metrics[cat] = {
            'category': cat,
            'precision': precision,
            'recall': recall,
            'average_precision': average_precision,
            'scores': scores_cat,
            'tp_fp': tp_fp_cat,
            'num_gt': num_total_gt[cat]
        }
        print(f'Number of ground truth in category {cat}: {num_total_gt[cat]}')

    # compute one-class precision/recall/average precision (if every box is just
    # of an object class)
    all_scores = np.concatenate(all_scores)
    all_tp_fp = np.concatenate(all_tp_fp)
    overall_gt_count = sum(num_total_gt.values())

    one_class_prec, one_class_recall = metrics.compute_precision_recall(
        all_scores, all_tp_fp, overall_gt_count)
    one_class_average_precision = metrics.compute_average_precision(
        one_class_prec, one_class_recall)

    per_cat_metrics['one_class'] = {
        'category': 'one_class',
        'precision': one_class_prec,
        'recall': one_class_recall,
        'average_precision': one_class_average_precision,
        'scores': all_scores,
        'tp_fp': all_tp_fp,
        'num_gt': overall_gt_count
    }

    return per_cat_metrics