def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=float) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) logging.info('Scores and tpfp per class label: %d', class_index) logging.info(tp_fp_labels) logging.info(scores) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class[class_index] = precision self.recalls_per_class[class_index] = recall average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics( self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=float) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) logging.info('Scores and tpfp per class label: %d', class_index) logging.info(tp_fp_labels) logging.info(scores) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics( self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def evaluate(self): """Computes evaluation result. Returns: A named tuple with the following fields - average_precision: a float number corresponding to average precision. precisions: an array of precisions. recalls: an array of recalls. recall@50: recall computed on 50 top-scoring samples. recall@100: recall computed on 100 top-scoring samples. median_rank@50: median rank computed on 50 top-scoring samples. median_rank@100: median rank computed on 100 top-scoring samples. """ if self._num_gt_instances == 0: logging.warn('No ground truth instances') if not self._scores: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self._scores) tp_fp_labels = np.concatenate(self._tp_fp_labels) relation_field_values = np.concatenate(self._relation_field_values) for relation_field_value, _ in ( self._num_gt_instances_per_relationship.iteritems()): precisions, recalls = metrics.compute_precision_recall( scores[relation_field_values == relation_field_value], tp_fp_labels[relation_field_values == relation_field_value], self._num_gt_instances_per_relationship[relation_field_value]) self._average_precisions[ relation_field_value] = metrics.compute_average_precision( precisions, recalls) self._mean_average_precision = np.mean(self._average_precisions.values()) self._precisions, self._recalls = metrics.compute_precision_recall( scores, tp_fp_labels, self._num_gt_instances) self._weighted_average_precision = metrics.compute_average_precision( self._precisions, self._recalls) self._recall_50 = ( metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances, 50)) self._median_rank_50 = ( metrics.compute_median_rank_at_k(self._tp_fp_labels, 50)) self._recall_100 = ( metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances, 100)) self._median_rank_100 = ( metrics.compute_median_rank_at_k(self._tp_fp_labels, 100)) return VRDDetectionEvalMetrics( self._weighted_average_precision, self._mean_average_precision, self._average_precisions, self._precisions, self._recalls, self._recall_50, self._recall_100, self._median_rank_50, self._median_rank_100)
def evaluate(self): """Computes evaluation result. Returns: A named tuple with the following fields - average_precision: a float number corresponding to average precision. precisions: an array of precisions. recalls: an array of recalls. recall@50: recall computed on 50 top-scoring samples. recall@100: recall computed on 100 top-scoring samples. median_rank@50: median rank computed on 50 top-scoring samples. median_rank@100: median rank computed on 100 top-scoring samples. """ if self._num_gt_instances == 0: logging.warning('No ground truth instances') if not self._scores: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self._scores) tp_fp_labels = np.concatenate(self._tp_fp_labels) relation_field_values = np.concatenate(self._relation_field_values) for relation_field_value, _ in (six.iteritems( self._num_gt_instances_per_relationship)): precisions, recalls = metrics.compute_precision_recall( scores[relation_field_values == relation_field_value], tp_fp_labels[relation_field_values == relation_field_value], self._num_gt_instances_per_relationship[relation_field_value]) self._average_precisions[ relation_field_value] = metrics.compute_average_precision( precisions, recalls) self._mean_average_precision = np.mean( list(self._average_precisions.values())) self._precisions, self._recalls = metrics.compute_precision_recall( scores, tp_fp_labels, self._num_gt_instances) self._weighted_average_precision = metrics.compute_average_precision( self._precisions, self._recalls) self._recall_50 = (metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances, 50)) self._median_rank_50 = (metrics.compute_median_rank_at_k( self._tp_fp_labels, 50)) self._recall_100 = (metrics.compute_recall_at_k( self._tp_fp_labels, self._num_gt_instances, 100)) self._median_rank_100 = (metrics.compute_median_rank_at_k( self._tp_fp_labels, 100)) return VRDDetectionEvalMetrics( self._weighted_average_precision, self._mean_average_precision, self._average_precisions, self._precisions, self._recalls, self._recall_50, self._recall_100, self._median_rank_50, self._median_rank_100)
def evaluate(self): if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=float) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate( self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision( precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics(self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def get_pr_ap(info_list, num_label): ''' info_list存储预测列表[文件路径,标签映射, bbox, scores, gt_label] num_label标签映射数字 ''' label_info = [] num_gt_l = [] for inf in info_list: print(inf) if inf[1] == num_label: label_info.append(inf) if inf[4] == 1.0: num_gt_l.append(inf[4]) num_gt = len(num_gt_l) label_info = np.array(label_info).T # print(label_info) y_true = np.array(label_info[4, :], np.float) y_scores = np.array(label_info[3, :], np.float) precision, recall = metrics.compute_precision_recall( y_scores, y_true, num_gt) print("precision:", precision, "recall", recall) average_precision = metrics.compute_average_precision(precision, recall) average_precision = '{:.3f}'.format(average_precision) print("average_precision:", average_precision) return precision, recall, average_precision
def test_compute_precision_recall(self): num_gt = 10 scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float) accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6]) expected_recall = accumulated_tp_count / num_gt precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) precision_float_type, recall_float_type = metrics.compute_precision_recall( scores, labels_float_type, num_gt) self.assertAllClose(precision, expected_precision) self.assertAllClose(recall, expected_recall) self.assertAllClose(precision_float_type, expected_precision) self.assertAllClose(recall_float_type, expected_recall)
def evaluate(self): """Compute evaluation result. Returns: average_precision_per_class: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions_per_class: List of precisions, each precision is a float numpy array recalls_per_class: List of recalls, each recall is a float numpy array corloc_per_class: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0))) average_recall_per_class = {} for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue scores = np.concatenate(self.scores_per_class[class_index]) # print(self.scores_per_class[class_index][0].shape, self.scores_per_class[class_index][0]) # print(self.tp_fp_labels_per_class[class_index][0].shape, self.tp_fp_labels_per_class[class_index][0]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_recall_per_class[class_index] = np.mean(recall) # print('ind',class_index,np.mean(precision),np.mean(recall)) # print(len(precision), '\n', precision) # print(len(recall), '\n', recall) average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return (self.average_precision_per_class, mean_ap, self.precisions_per_class, average_recall_per_class, self.corloc_per_class, mean_corloc)
def test_compute_precision_recall_float(self): num_gt = 10 scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float) expected_precision = np.array( [0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float) expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float) precision, recall = metrics.compute_precision_recall( scores, labels_float, num_gt) self.assertAllClose(precision, expected_precision) self.assertAllClose(recall, expected_recall)
def test_compute_precision_recall_and_ap_no_groundtruth(self): num_gt = 0 scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool) expected_precision = None expected_recall = None precision, recall = metrics.compute_precision_recall(scores, labels, num_gt) self.assertEquals(precision, expected_precision) self.assertEquals(recall, expected_recall) ap = metrics.compute_average_precision(precision, recall) self.assertTrue(np.isnan(ap))
def evaluate(self): """Compute evaluation result. Returns: average_precision_per_class: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions_per_class: List of precisions, each precision is a float numpy array recalls_per_class: List of recalls, each recall is a float numpy array corloc_per_class: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ # compute mAP mean_ap = {} for subset in self.subset_names: if (self.num_gt_instances_per_class[subset] == 0).any(): logging.warning( 'The following classes in subset %s have no ground truth examples: ' '%s', subset, np.squeeze( np.argwhere(self.num_gt_instances_per_class == 0))) for class_index in range(self.num_class): if self.num_gt_instances_per_class[subset][class_index] == 0: continue scores = np.concatenate( self.scores_per_class[subset][class_index]) tp_fp_labels = np.concatenate( self.tp_fp_labels_per_class[subset][class_index]) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[subset][class_index]) self.precisions_per_class[subset].append(precision) self.recalls_per_class[subset].append(recall) average_precision = metrics.compute_average_precision( precision, recall) self.average_precision_per_class[subset][class_index] = \ average_precision mean_ap[subset] = np.nanmean( self.average_precision_per_class[subset]) # compute CorLoc self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return (self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def test_compute_precision_recall(self): num_gt = 10 scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float) labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool) labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float) accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float) expected_precision = accumulated_tp_count / np.array( [1, 2, 3, 4, 5, 6]) expected_recall = accumulated_tp_count / num_gt precision, recall = metrics.compute_precision_recall( scores, labels, num_gt) precision_float_type, recall_float_type = metrics.compute_precision_recall( scores, labels_float_type, num_gt) # Add a module to export PR curve Data into an event file for showing in Tensorboard #labels = tf.constant([False, True, True, False, True], dtype=tf.bool) #predictions = tf.random_uniform(labels.get_shape(), maxval=1.0) _, update_op = summary_lib.pr_curve_streaming_op(name='PR_Curve', predictions=scores, labels=labels, num_thresholds=10) merged_summary = tf.summary.merge_all() with tf.Session() as sess: writer = tf.summary.FileWriter('/home/ai-lab/frcnn/tmp/logdir', sess.graph) sess.run(tf.local_variables_initializer()) for step in range(43): sess.run([update_op]) if step % 6 == 0: writer.add_summary(sess.run(merged_summary), global_step=step) # These above lines was added by Huy Vu! self.assertAllClose(precision, expected_precision) self.assertAllClose(recall, expected_recall) self.assertAllClose(precision_float_type, expected_precision) self.assertAllClose(recall_float_type, expected_recall)
def evaluate(self): """Compute evaluation result. Returns: average_precision_per_class: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions_per_class: List of precisions, each precision is a float numpy array recalls_per_class: List of recalls, each recall is a float numpy array corloc_per_class: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0))) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision with open("AP"+str(class_index),"wb") as f: save_file = {"rec":recall,"prec":precision,"ap":average_precision} cPickle.dump(save_file,f) self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return (self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def evaluate(self): """Compute evaluation result. Returns: average_precision_per_class: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions_per_class: List of precisions, each precision is a float numpy array recalls_per_class: List of recalls, each recall is a float numpy array corloc_per_class: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0))) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return (self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.warn( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate( self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision( precision, recall) self.average_precision_per_class[class_index] = average_precision average_precision_per_iou = [] for ii in range(len(self.iou_list)): for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class_per_iou[ii][class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate( self.scores_per_class_per_iou[ii][class_index]) tp_fp_labels = np.concatenate( self.tp_fp_labels_per_class_per_iou[ii][class_index]) #if self.use_weighted_mean_ap: # all_scores = np.append(all_scores, scores) # all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class_per_iou[ii].append(precision) self.recalls_per_class_per_iou[ii].append(recall) average_precision = metrics.compute_average_precision( precision, recall) self.average_precision_per_class_per_iou[ii][ class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_ap_per_iou = [] for ii in range(len(self.iou_list)): mean_ap_per_iou.append( np.nanmean(self.average_precision_per_class_per_iou[ii])) print('Mean AP: %.3f' % mean_ap) for ii in range(len(self.iou_list)): print('Mean AP @ IoU %.2f: %.3f' % (self.iou_list[ii], mean_ap_per_iou[ii])) print('Mean AP @ IoU[0.5:0.05:0.95]: %.3f' % np.nanmean(np.asarray(mean_ap_per_iou))) raw_input() mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics(self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def compute_precision_recall_per_cat(detection_file, db_file): print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) with open(db_file, 'r') as f: data = json.load(f) im_to_seq = {} for im in data['images']: im_to_seq[im['id']] = im['seq_id'] im_to_cat = {} for ann in data['annotations']: im_to_cat[ann['image_id']] = ann['category_id'] #add empty category empty_id = max([cat['id'] for cat in data['categories']]) + 1 data['categories'].append({'name': 'empty', 'id': empty_id}) #add all images that don't have annotations, with cat empty for im in data['images']: if im['id'] not in im_to_cat: im_to_cat[im['id']] = empty_id cat_id_to_cat = {} for cat in data['categories']: cat_id_to_cat[cat['id']] = cat['name'] cat_to_ims = {cat_id: [] for cat_id in cat_id_to_cat} for im in data['images']: cat_to_ims[im_to_cat[im['id']]].append(im['id']) seqs = {} for im in detection_results['images']: if im in im_to_seq: if im_to_seq[im] not in seqs: seqs[im_to_seq[im]] = [] seqs[im_to_seq[im]].append(im) print('Clustering detections by image...') #print(detection_results.keys()) # group the detections and gts by image id: per_image_detections, per_image_gts = cluster_detections_by_image( detection_results) per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000) detection_labels = {cat: [] for cat in cat_to_ims} detection_scores = {cat: [] for cat in cat_to_ims} num_total_gts = {cat: 0 for cat in cat_to_ims} count = {cat: 0 for cat in cat_to_ims} precision = {} recall = {} average_precision = {} for cat, images in cat_to_ims.iteritems(): for image_id in images: if image_id not in per_image_detections: #print(image_id) count[cat] += 1 continue scores, tp_fp_labels = get_results_per_image( per_image_detections[image_id], per_image_gts[image_id], per_image_eval) detection_labels[cat].append(tp_fp_labels) detection_scores[cat].append(scores) num_gts = len(per_image_gts[image_id]['bboxes']) num_total_gts[cat] += num_gts if len(detection_scores[cat]) > 0: scores = np.concatenate(detection_scores[cat]) labels = np.concatenate(detection_labels[cat]).astype(np.bool) #print(len(scores)) #print(len(labels)) precision[cat], recall[cat] = metrics.compute_precision_recall( scores, labels, num_total_gts[cat]) average_precision[cat] = metrics.compute_average_precision( precision[cat], recall[cat]) else: print("no detections for " + cat_id_to_cat[cat]) print(cat_id_to_cat[cat], count[cat], len(images)) return precision, recall, average_precision, cat_id_to_cat
def compute_precision_recall(detection_file, detection_results=None, images_to_consider='all', get_night_day=None): if detection_results == None: print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) print('Clustering detections by image...') #print(detection_results.keys()) # group the detections by image id: use_im = get_images_to_consider(detection_results, images_to_consider, get_night_day) per_image_detections, per_image_gts = cluster_detections_by_image( detection_results, use_im) per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000) print('Running per-object analysis...') detection_labels = [] detection_scores = [] num_total_gts = 0 count = 0 for image_id, dets in per_image_detections.iteritems(): num_detections = len(dets['bboxes']) # [ymin, xmin, ymax, xmax] in absolute image coordinates. detected_boxes = np.zeros([num_detections, 4], dtype=np.float32) # detection scores for the boxes detected_scores = np.zeros([num_detections], dtype=np.float32) # 0-indexed detection classes for the boxes detected_class_labels = np.zeros([num_detections], dtype=np.int32) detected_masks = None for i in range(num_detections): x1, y1, x2, y2 = dets['bboxes'][i] detected_boxes[i] = np.array([y1, x1, y2, x2]) detected_scores[i] = dets['scores'][i] detected_class_labels[i] = dets['labels'][i] - 1 gts = per_image_gts[image_id] #print(gts) num_gts = len(gts['bboxes']) #print(num_gts) if num_gts > 0: # [ymin, xmin, ymax, xmax] in absolute image coordinates groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32) # 0-indexed groundtruth classes for the boxes groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32) groundtruth_masks = None groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) for i in range(num_gts): x1, y1, x2, y2 = gts['bboxes'][i] groundtruth_boxes[i] = np.array([y1, x1, y2, x2]) groundtruth_class_labels[i] = gts['labels'][i] - 1 #print(groundtruth_boxes, groundtruth_class_labels,detected_scores[0],detected_boxes[0], detected_class_labels[:2]) scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_class_labels, groundtruth_boxes=groundtruth_boxes, groundtruth_class_labels=groundtruth_class_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list, detected_masks=detected_masks, groundtruth_masks=groundtruth_masks)) #print(scores, tp_fp_labels) detection_labels.append(tp_fp_labels[0]) detection_scores.append(scores[0]) num_total_gts += num_gts count += 1 if count % 1000 == 0: print(str(count) + ' images complete') #if (tp_fp_labels[0].shape[0] != num_detections): # print('Incorrect label length') #if scores[0].shape[0] != num_detections: # print('Incorrect score length') #if tp_fp_labels[0].sum() > num_gts: # print('Too many correct detections') else: detection_labels.append(np.zeros(num_detections, dtype=np.int32)) detection_scores.append(detected_scores) scores = np.concatenate(detection_scores) labels = np.concatenate(detection_labels).astype(np.bool) precision, recall = metrics.compute_precision_recall( scores, labels, num_total_gts) average_precision = metrics.compute_average_precision(precision, recall) return precision, recall, average_precision
def compute_precision_recall_with_images(detection_file): print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) print('Clustering detections by image...') # group the detections by image id: per_image_detections = { detection_results['images'][idx]: { 'bboxes': detection_results['detections'][idx], 'scores': detection_results['detection_scores'][idx], 'labels': detection_results['detection_labels'][idx] } for idx in range(len(detection_results['images'])) } # group the ground truth annotations by image id: per_image_gts = { detection_results['images'][idx]: { 'bboxes': detection_results['gts'][idx], 'labels': detection_results['gt_labels'][idx] } for idx in range(len(detection_results['images'])) } per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000) print('Running per-image analysis...') detection_labels = [] detection_scores = [] num_total_gts = 0 count = 0 for image_id, dets in per_image_detections.iteritems(): im_detection_labels = [] im_detection_scores = [] im_num_gts = [] max_im_scores = [] num_detections = len(dets['bboxes']) # [ymin, xmin, ymax, xmax] in absolute image coordinates. detected_boxes = np.zeros([num_detections, 4], dtype=np.float32) # detection scores for the boxes detected_scores = np.zeros([num_detections], dtype=np.float32) # 0-indexed detection classes for the boxes detected_class_labels = np.zeros([num_detections], dtype=np.int32) detected_masks = None for i in range(num_detections): x1, y1, x2, y2 = dets['bboxes'][i] detected_boxes[i] = np.array([y1, x1, y2, x2]) detected_scores[i] = dets['scores'][i] detected_class_labels[i] = dets['labels'][i] - 1 max_im_scores.append(np.max(detected_scores)) box_id = np.argmax(detected_scores) gts = per_image_gts[image_id] num_gts = len(gts['bboxes']) im_num_gts = num_gts if num_gts > 0: # [ymin, xmin, ymax, xmax] in absolute image coordinates groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32) # 0-indexed groundtruth classes for the boxes groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32) groundtruth_masks = None groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) for i in range(num_gts): x1, y1, x2, y2 = gts['bboxes'][i] groundtruth_boxes[i] = np.array([y1, x1, y2, x2]) groundtruth_class_labels[i] = gts['labels'][i] - 1 ious = np_box_ops.iou(detected_boxes, groundtruth_boxes) if np.max(ious[box_id, :]) < 0.5: max_im_scores[-1] = 0 #print('detected animal box') #print(groundtruth_boxes, groundtruth_class_labels,detected_scores[0],detected_boxes[0], detected_class_labels[0]) scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_class_labels, groundtruth_boxes=groundtruth_boxes, groundtruth_class_labels=groundtruth_class_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list, detected_masks=detected_masks, groundtruth_masks=groundtruth_masks)) #print(scores, tp_fp_labels) im_detection_labels = tp_fp_labels[0] im_detection_scores = scores[0] #num_total_gts += num_gts count += 1 if count % 1000 == 0: print(str(count) + ' images complete') #if (tp_fp_labels[0].shape[0] != num_detections): # print('Incorrect label length') #if scores[0].shape[0] != num_detections: # print('Incorrect score length') #if tp_fp_labels[0].sum() > num_gts: # print('Too many correct detections') else: im_detection_labels = np.zeros(num_detections, dtype=np.int32) im_detection_scores = detected_scores max_im_scores[-1] = 0 best_score = np.max(max_im_scores) if best_score > 0: #print('valid box') best_im = np.argmax(max_im_scores) #print(best_im, best_score) temp_labels = np.zeros(len(im_detection_labels), dtype=np.int32) temp_scores = np.zeros(len(im_detection_scores), dtype=np.float32) for j in range(min(im_num_gts, len(im_detection_labels))): temp_labels[ j] = True #TODO: this currently only works for oneclass? temp_scores[j] = best_score im_detection_labels = temp_labels im_detection_scores = temp_scores num_total_gts += im_num_gts detection_labels.append(im_detection_labels) detection_scores.append(im_detection_scores) print(len(detection_scores), len(detection_scores[0]), len(detection_scores[1])) scores = np.concatenate(detection_scores) labels = np.concatenate(detection_labels).astype(np.bool) precision, recall = metrics.compute_precision_recall( scores, labels, num_total_gts) average_precision = metrics.compute_average_precision(precision, recall) return precision, recall, average_precision
def compute_precision_recall_bbox(per_image_detections, per_image_gts, num_gt_classes, matching_iou_threshold=0.5): """ Compute the precision and recall at each confidence level for detection results of various classes. Args: per_image_detections: dict of image_id to a dict with fields `boxes`, `scores` and `labels` per_image_gts: dict of image_id to a dict with fields `gt_boxes` and `gt_labels` num_gt_classes: number of classes in the ground truth labels matching_iou_threshold: IoU above which a detected and a ground truth box are considered overlapping Returns: A dict `per_cat_metrics`, where the keys are the possible gt classes and `one_class` which considers all classes. Each key corresponds to a dict with the fields precision, recall, average_precision, etc. """ per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=num_gt_classes, matching_iou_threshold=matching_iou_threshold, nms_iou_threshold=1.0, nms_max_output_boxes=10000) print('Running per-object analysis...') detection_tp_fp = defaultdict( list) # key is the category; in each list, 1 is tp, 0 is fp detection_scores = defaultdict(list) num_total_gt = defaultdict(int) for image_id, dets in tqdm(per_image_detections.items()): detected_boxes = np.array(dets['boxes'], dtype=np.float32) detected_scores = np.array(dets['scores'], dtype=np.float32) # labels input to compute_object_detection_metrics() needs to start at 0, not 1 detected_labels = np.array(dets['labels'], dtype=np.int) - 1 # start at 0 # num_detections = len(dets['boxes']) gts = per_image_gts[image_id] gt_boxes = np.array(gts['gt_boxes'], dtype=np.float32) gt_labels = np.array(gts['gt_labels'], dtype=np.int) - 1 # start at 0 num_gts = len(gts['gt_boxes']) groundtruth_is_difficult_list = np.zeros( num_gts, dtype=bool) # place holders - we don't have these groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) # to prevent 'Invalid dimensions for box data.' error if num_gts == 0: # this box will not match any detections gt_boxes = np.array([[0, 0, 0, 0]], dtype=np.float32) scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_labels, groundtruth_boxes=gt_boxes, groundtruth_class_labels=gt_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list)) for i, tp_fp_labels_cat in enumerate(tp_fp_labels): assert sum(tp_fp_labels_cat) <= sum( gt_labels == i) # true positives < gt of that category cat = i + 1 # categories start at 1 detection_tp_fp[cat].append(tp_fp_labels_cat) detection_scores[cat].append(scores[i]) num_total_gt[cat] += sum(gt_labels == i) # gt_labels start at 0 all_scores = [] all_tp_fp = [] print('Computing precision recall for each category...') per_cat_metrics = {} for i in range(num_gt_classes): cat = i + 1 scores_cat = np.concatenate(detection_scores[cat]) tp_fp_cat = np.concatenate(detection_tp_fp[cat]).astype(np.bool) all_scores.append(scores_cat) all_tp_fp.append(tp_fp_cat) precision, recall = metrics.compute_precision_recall( scores_cat, tp_fp_cat, num_total_gt[cat]) average_precision = metrics.compute_average_precision( precision, recall) per_cat_metrics[cat] = { 'category': cat, 'precision': precision, 'recall': recall, 'average_precision': average_precision, 'scores': scores_cat, 'tp_fp': tp_fp_cat, 'num_gt': num_total_gt[cat] } print('Number of ground truth in category {} is {}'.format( cat, num_total_gt[cat])) # compute one-class precision/recall/average precision (if every box is just of an object class) all_scores = np.concatenate(all_scores) all_tp_fp = np.concatenate(all_tp_fp) overall_gt_count = sum(num_total_gt.values()) one_class_prec, one_class_recall = metrics.compute_precision_recall( all_scores, all_tp_fp, overall_gt_count) one_class_average_precision = metrics.compute_average_precision( one_class_prec, one_class_recall) per_cat_metrics['one_class'] = { 'category': 'one_class', 'precision': one_class_prec, 'recall': one_class_recall, 'average_precision': one_class_average_precision, 'scores': all_scores, 'tp_fp': all_tp_fp, 'num_gt': overall_gt_count } return per_cat_metrics
def compute_precision_recall_with_sequences(detection_file, db_file,detection_results=None,images_to_consider='all', get_night_day = None): if detection_results == None: print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) im_to_seq = get_im_to_seq_map(db_file) seqs = {} for im in detection_results['images']: if im in im_to_seq: if im_to_seq[im] not in seqs: seqs[im_to_seq[im]] = [] seqs[im_to_seq[im]].append(im) print('Clustering detections by image...') use_im = get_images_to_consider(detection_results, images_to_consider, get_night_day) per_image_detections, per_image_gts = cluster_detections_by_image(detection_results, use_im) per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000 ) print('Running per-image analysis...') detection_labels = [] detection_scores = [] num_total_gts = 0 count = 0 for seq in seqs: seq_detection_labels = [] seq_detection_scores = [] seq_num_gts = [] is_gt_in_seq = False max_seq_scores = [] valid_max_scores = [] #print(seq) for image_id in seqs[seq]: #for image_id, dets in per_image_detections.iteritems(): dets = per_image_detections[image_id] num_detections = len(dets['bboxes']) # [ymin, xmin, ymax, xmax] in absolute image coordinates. detected_boxes = np.zeros([num_detections, 4], dtype=np.float32) # detection scores for the boxes detected_scores = np.zeros([num_detections], dtype=np.float32) # 0-indexed detection classes for the boxes detected_class_labels = np.zeros([num_detections], dtype=np.int32) detected_masks = None count +=1 if count % 1000 == 0: print(str(count) + ' images complete') for i in range(num_detections): x1, y1, x2, y2 = dets['bboxes'][i] detected_boxes[i] = np.array([y1, x1, y2, x2]) detected_scores[i] = dets['scores'][i] detected_class_labels[i] = dets['labels'][i] - 1 max_seq_scores.append(np.max(detected_scores)) valid_max_scores.append(np.max(detected_scores)) box_id = np.argmax(detected_scores) gts = per_image_gts[image_id] num_gts = len(gts['bboxes']) #seq_num_gts.append(num_gts) #print(num_gts) if num_gts > 0: seq_num_gts.append(1) is_gt_in_seq = True # [ymin, xmin, ymax, xmax] in absolute image coordinates groundtruth_boxes = np.zeros([num_gts, 4], dtype=np.float32) # 0-indexed groundtruth classes for the boxes groundtruth_class_labels = np.zeros(num_gts, dtype=np.int32) groundtruth_masks = None groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) for i in range(num_gts): x1, y1, x2, y2 = gts['bboxes'][i] groundtruth_boxes[i] = np.array([y1, x1, y2, x2]) groundtruth_class_labels[i] = gts['labels'][i] - 1 ious = np_box_ops.iou(detected_boxes,groundtruth_boxes) if np.max(ious[box_id, :]) < 0.5: valid_max_scores[-1] = 0 scores, tp_fp_labels, is_class_correctly_detected_in_image = ( per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_class_labels, groundtruth_boxes=groundtruth_boxes, groundtruth_class_labels=groundtruth_class_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list, detected_masks=detected_masks, groundtruth_masks=groundtruth_masks ) ) seq_detection_labels.append(tp_fp_labels[0]) seq_detection_scores.append(scores[0]) #num_total_gts += 1 else: seq_num_gts.append(0) seq_detection_labels.append(np.zeros(num_detections, dtype=np.int32)) seq_detection_scores.append(detected_scores) valid_max_scores[-1] = 0 seq_detection_label = np.zeros(1, dtype=np.int32) seq_detection_score = np.zeros(1, dtype=np.float32) best_score = np.max(valid_max_scores) if best_score > 0: if not is_gt_in_seq: print(is_gt_in_seq) print('matched box with no gt') print(valid_max_scores) #print('valid box') best_im = np.argmax(max_seq_scores) #print(best_im, best_score) for i in range(len(seqs[seq])): temp_labels = np.zeros(len(seq_detection_labels[i]), dtype=np.int32) temp_scores = np.zeros(len(seq_detection_scores[i]), dtype=np.float32) for j in range(min(seq_num_gts[i], len(temp_labels))): temp_labels[j] = True #TODO: this currently only works for oneclass? temp_scores[j] = best_score seq_detection_labels[i] = temp_labels seq_detection_scores[i] = temp_scores seq_detection_label[0] = True seq_detection_score[0] = best_score else: #print('no valid box') seq_detection_label[0] = False seq_detection_score[0] = np.max(max_seq_scores) #if sum(seq_num_gts)>0: if is_gt_in_seq: num_total_gts+=1 detection_labels.append(seq_detection_label) detection_scores.append(seq_detection_score) scores = np.concatenate(detection_scores) labels = np.concatenate(detection_labels).astype(np.bool) print(count) print(len(seqs.keys())) print(sum([1 for i in range(len(detection_labels)) if detection_labels[i] == True]), num_total_gts) precision, recall = metrics.compute_precision_recall( scores, labels, num_total_gts ) average_precision = metrics.compute_average_precision(precision, recall) return precision, recall, average_precision
def compute_precision_recall_per_loc(detection_file, db_file): print('Loading detection file...') with open(detection_file) as f: detection_results = pickle.load(f) with open(db_file, 'r') as f: data = json.load(f) print('Images: ', len(data['images'])) print('Detection result Images: ', len(detection_results['images'])) loc_to_ims = {} for im in data['images']: if im['location'] not in loc_to_ims: loc_to_ims[im['location']] = [] loc_to_ims[im['location']].append(im['id']) print('Clustering detections by image...') #print(detection_results.keys()) # group the detections and gts by image id: per_image_detections, per_image_gts = cluster_detections_by_image( detection_results) per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=1, matching_iou_threshold=0.5, nms_iou_threshold=1.0, nms_max_output_boxes=10000) detection_labels = {loc: [] for loc in loc_to_ims} detection_scores = {loc: [] for loc in loc_to_ims} num_total_gts = {loc: 0 for loc in loc_to_ims} count = {loc: 0 for loc in loc_to_ims} precision = {} recall = {} average_precision = {} for cat, images in loc_to_ims.iteritems(): for image_id in images: if image_id not in per_image_detections: #print(image_id) count[cat] += 1 continue scores, tp_fp_labels = get_results_per_image( per_image_detections[image_id], per_image_gts[image_id], per_image_eval) detection_labels[cat].append(tp_fp_labels) detection_scores[cat].append(scores) num_gts = len(per_image_gts[image_id]['bboxes']) num_total_gts[cat] += num_gts if len(detection_scores[cat]) > 0: scores = np.concatenate(detection_scores[cat]) labels = np.concatenate(detection_labels[cat]).astype(np.bool) #print(len(scores)) #print(len(labels)) precision[cat], recall[cat] = metrics.compute_precision_recall( scores, labels, num_total_gts[cat]) average_precision[cat] = metrics.compute_average_precision( precision[cat], recall[cat]) else: print("no detections for " + cat) print(cat, count[cat], len(images)) return precision, recall, average_precision
'scores': scores, 'classes': classes, 'num_detections': num_detections } scores, tp_fp_labels, is_class_correctly_detected_in_image = per_image_evaluation.PerImageEvaluation( ).compute_object_detection_metrics( detected_boxes=np.squeeze(boxes), detected_scores=np.squeeze(scores), detected_class_labels=np.squeeze(classes).astype(np.int32), groundtruth_boxes=gt_boxes, groundtruth_class_labels=gt_class_labels, groundtruth_is_difficult_list=gt_is_difficult_list, groundtruth_is_group_of_list=gt_is_group_of_list) #scores=np.array(scores), tp_fp_labels = np.array(tp_fp_labels) precision, recall = metrics.compute_precision_recall( np.array(scores), tp_fp_labels[1].astype(float), 2) print(scores) print('---------') print(len(tp_fp_labels)) #f_name = re.split('/',path_f) #print(category_index.get(value)) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) #plt.savefig(f_name[-1]) #print('Image:{} Num: {} scores:{} Time: {:.3f}s'.format(PATH_TEST_IMAGE, num_detections, np.max(np.squeeze(scores)), use_time)) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) #plt.savefig('./test_result/predicted_' + f_name[-1]) #cv2.imwrite('./test_result/predicted_' + f_name[-1], image_np)
def get_mtl_metrics(result_lists): mtl_metrics = dict() gt_boxes_list = result_lists[fields.InputDataFields.groundtruth_boxes] detection_box_list = result_lists['detection_boxes'] b_window = False b_closeness = False b_edgemask = False if 'window_classes_gt' in result_lists.keys(): window_classes_gt_list = result_lists['window_classes_gt'] window_classes_dt_list = result_lists['window_classes_dt'] b_window = True if 'closeness_gt' in result_lists.keys(): closeness_gt_list = result_lists['closeness_gt'] closeness_dt_list = result_lists['closeness_dt'] b_closeness = True if 'edgemask_gt' in result_lists.keys(): edgemask_gt_list = result_lists['edgemask_gt'] edgemask_dt_list = result_lists['edgemask_dt'] b_edgemask = True if b_window: map_list = [] for window_classes_gt, window_classes_dt in zip( window_classes_gt_list, window_classes_dt_list): ap_list = [] for window_class_gt, window_class_dt in zip( window_classes_gt, window_classes_dt): window_class_dt = _softmax(window_class_dt) window_class_gt = [ float(val_str) for val_str in window_class_gt.split(' ') ] scores = window_class_dt tp_fp_labels = np.asarray([gt > 0 for gt in window_class_gt], dtype=np.bool) num_gt = int(np.sum(np.asarray(tp_fp_labels, dtype=np.int32))) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, num_gt) average_precision = metrics.compute_average_precision( precision, recall) ap_list.append(average_precision) map_list.append(float(np.mean(ap_list))) window_map = float(np.mean(map_list)) mtl_metrics['mtl/window_map'] = window_map gt_dt_index_list = [] for gt_boxes, dt_boxes in zip(gt_boxes_list, detection_box_list): intersection = np_box_ops.intersection(gt_boxes, dt_boxes) gt_dt_index = np.argmax(intersection, axis=1) gt_dt_index_list.append(gt_dt_index) if b_closeness: diff_list = [] for closeness_gt, gt_dt_indices, closeness_dt_image in zip( closeness_gt_list, gt_dt_index_list, closeness_dt_list): ap_list = [] for gt, gt_dt_index in zip(closeness_gt, gt_dt_indices): closeness_dt = _sigmoid(closeness_dt_image[gt_dt_index]) closeness_gt = np.asarray( [float(val_str) for val_str in gt.split(' ')], dtype=np.float32) num_non_zeros = int(np.sum(closeness_gt != 0)) if num_non_zeros == 0: continue argmax_dt = np.argmax(closeness_dt[1:]) argmax_gt = np.argmax(closeness_gt[1:]) ap_list.append(float(argmax_dt == argmax_gt)) if ap_list: diff_list.append(float(np.mean(ap_list))) if diff_list: closeness_diff = float(np.mean(diff_list)) else: closeness_diff = 0.0 mtl_metrics['mtl/closeness_diff'] = closeness_diff if b_edgemask: ap_list = [] for edgemask_gt, edgemask_dt in zip(edgemask_gt_list, edgemask_dt_list): edgemask_gt = edgemask_gt[0] edgemask_dt = edgemask_dt[0] shape_gt = edgemask_gt.shape edgemask_dt_resize = resize(edgemask_dt, list(shape_gt) + [2]).astype( np.float32) edgemask_dt_resize = (edgemask_dt_resize[:, :, 0] < edgemask_dt_resize[:, :, 1]).astype( np.float32) edgemask_precision = np.mean(edgemask_dt_resize == edgemask_gt) ap_list.append(edgemask_precision) if ap_list: mtl_metrics['mtl/edgemask_ap'] = float(np.mean(ap_list)) else: mtl_metrics['mtl/edgemask_ap'] = float(0) return mtl_metrics
def compute_precision_recall_bbox( per_image_detections: Mapping[str, Mapping[str, Any]], per_image_gts: Mapping[str, Mapping[str, Any]], num_gt_classes: int, matching_iou_threshold: float = 0.5 ) -> Dict[Union[str, int], Dict[str, Any]]: """ Compute the precision and recall at each confidence level for detection results of various classes. Args: per_image_detections: dict, image_id (str) => dict with fields 'boxes': array-like, shape [N, 4], type float, each row is [ymin, xmin, ymax, xmax] in normalized coordinates 'scores': array-like, shape [N], float 'labels': array-like, shape [N], integers in [1, num_gt_classes] per_image_gts: dic, image_id (str) => dict with fields 'gt_boxes': array-like, shape [M, 4], type float, each row is [ymin, xmin, ymax, xmax] in normalized coordinates 'gt_labels': array-like, shape [M], integers in [1, num_gt_classes] num_gt_classes: int, number of classes in the ground truth labels matching_iou_threshold: float, IoU above which a detected and a ground truth box are considered overlapping Returns: dict, per-class metrics, keys are integers in [1, num_gt_classes] and 'one_class' which considers all classes. Each value is a dict with fields ['precision', 'recall', 'average_precision', ...] """ per_image_eval = per_image_evaluation.PerImageEvaluation( num_groundtruth_classes=num_gt_classes, matching_iou_threshold=matching_iou_threshold, nms_iou_threshold=1.0, nms_max_output_boxes=10000) print('Running per-object analysis...', flush=True) # keys are categories (int) detection_tp_fp = defaultdict(list) # in each list, 1 is tp, 0 is fp detection_scores = defaultdict(list) num_total_gt: Dict[int, int] = defaultdict(int) for image_id, dets in tqdm(per_image_detections.items()): # we force *_boxes to have shape [N, 4], even in case that N = 0 detected_boxes = np.asarray(dets['boxes'], dtype=np.float32).reshape(-1, 4) detected_scores = np.asarray(dets['scores']) # labels input to compute_object_detection_metrics() needs to start at 0, not 1 detected_labels = np.asarray(dets['labels'], dtype=np.int) - 1 # start at 0 # num_detections = len(dets['boxes']) gts = per_image_gts[image_id] gt_boxes = np.asarray(gts['gt_boxes'], dtype=np.float32).reshape(-1, 4) gt_labels = np.asarray(gts['gt_labels'], dtype=np.int) - 1 # start at 0 num_gts = len(gts['gt_boxes']) # place holders - we don't have these groundtruth_is_difficult_list = np.zeros(num_gts, dtype=bool) groundtruth_is_group_of_list = np.zeros(num_gts, dtype=bool) results = per_image_eval.compute_object_detection_metrics( detected_boxes=detected_boxes, detected_scores=detected_scores, detected_class_labels=detected_labels, groundtruth_boxes=gt_boxes, groundtruth_class_labels=gt_labels, groundtruth_is_difficult_list=groundtruth_is_difficult_list, groundtruth_is_group_of_list=groundtruth_is_group_of_list) scores, tp_fp_labels, is_class_correctly_detected_in_image = results for i, tp_fp_labels_cat in enumerate(tp_fp_labels): # true positives < gt of that category assert sum(tp_fp_labels_cat) <= sum(gt_labels == i) cat = i + 1 # categories start at 1 detection_tp_fp[cat].append(tp_fp_labels_cat) detection_scores[cat].append(scores[i]) num_total_gt[cat] += sum(gt_labels == i) # gt_labels start at 0 all_scores = [] all_tp_fp = [] print('Computing precision recall for each category...') per_cat_metrics: Dict[Union[int, str], Dict[str, Any]] = {} for i in range(num_gt_classes): cat = i + 1 scores_cat = np.concatenate(detection_scores[cat]) tp_fp_cat = np.concatenate(detection_tp_fp[cat]).astype(np.bool) all_scores.append(scores_cat) all_tp_fp.append(tp_fp_cat) precision, recall = metrics.compute_precision_recall( scores_cat, tp_fp_cat, num_total_gt[cat]) average_precision = metrics.compute_average_precision( precision, recall) per_cat_metrics[cat] = { 'category': cat, 'precision': precision, 'recall': recall, 'average_precision': average_precision, 'scores': scores_cat, 'tp_fp': tp_fp_cat, 'num_gt': num_total_gt[cat] } print(f'Number of ground truth in category {cat}: {num_total_gt[cat]}') # compute one-class precision/recall/average precision (if every box is just # of an object class) all_scores = np.concatenate(all_scores) all_tp_fp = np.concatenate(all_tp_fp) overall_gt_count = sum(num_total_gt.values()) one_class_prec, one_class_recall = metrics.compute_precision_recall( all_scores, all_tp_fp, overall_gt_count) one_class_average_precision = metrics.compute_average_precision( one_class_prec, one_class_recall) per_cat_metrics['one_class'] = { 'category': 'one_class', 'precision': one_class_prec, 'recall': one_class_recall, 'average_precision': one_class_average_precision, 'scores': all_scores, 'tp_fp': all_tp_fp, 'num_gt': overall_gt_count } return per_cat_metrics
} scores, tp_fp_labels, is_class_correctly_detected_in_image = per_image_evaluation.PerImageEvaluation( ).compute_object_detection_metrics( detected_boxes=np.squeeze(boxes), detected_scores=np.squeeze(scores), detected_class_labels=np.squeeze(classes).astype(np.int32), groundtruth_boxes=gt_boxes, groundtruth_class_labels=gt_class_labels, groundtruth_is_difficult_list=gt_is_difficult_list, groundtruth_is_group_of_list=gt_is_group_of_list) #scores=np.array(scores), print("source:", np.array(scores), "\n" "tp_fp_labels:", np.array(tp_fp_labels)) tp_fp_labels = np.array(tp_fp_labels) #precision, recall = metrics.compute_precision_recall(np.array(scores), tp_fp_labels[1].astype(float), 2) precision, recall = metrics.compute_precision_recall( scores[1], tp_fp_labels[1], 2) print(scores) print('---------') print(len(tp_fp_labels)) #f_name = re.split('/',path_f) #print(category_index.get(value)) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.savefig("test.jpg") #print('Image:{} Num: {} scores:{} Time: {:.3f}s'.format(PATH_TEST_IMAGE, num_detections, np.max(np.squeeze(scores)), use_time)) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) #plt.savefig('./test_result/predicted_' + f_name[-1]) #cv2.imwrite('./test_result/predicted_' + f_name[-1], image_np)