def get_class_eval_item(conf_mat, class_id, class_name, null_class_id): if conf_mat.ravel().sum() == 0: return ClassEvaluationItem(None, None, None, 0, 0, class_id, class_name) non_null_class_ids = list(range(conf_mat.shape[0])) non_null_class_ids.remove(null_class_id) true_pos = conf_mat[class_id, class_id] false_pos = conf_mat[non_null_class_ids, class_id].sum() - true_pos false_neg = conf_mat[class_id, :].sum() - true_pos precision = float(true_pos) / (true_pos + false_pos) recall = float(true_pos) / (true_pos + false_neg) f1 = 2 * (precision * recall) / (precision + recall) count_error = int(false_pos + false_neg) gt_count = conf_mat[class_id, :].sum() if math.isnan(precision): precision = None else: precision = float(precision) if math.isnan(recall): recall = None else: recall = float(recall) if math.isnan(f1): f1 = None else: f1 = float(f1) return ClassEvaluationItem(precision, recall, f1, count_error, gt_count, class_id, class_name, conf_mat[class_id, :])
def test_merge_both_empty(self): a = ClassEvaluationItem() b = ClassEvaluationItem() a.merge(b) self.assertEqual(a.precision, None) self.assertEqual(a.recall, None) self.assertEqual(a.f1, None) self.assertEqual(a.count_error, None) self.assertEqual(a.gt_count, 0)
def test_merge_second_empty(self): a = ClassEvaluationItem(precision=1, recall=1, f1=1, count_error=0, gt_count=1) b = ClassEvaluationItem() a.merge(b) self.assertEqual(a.precision, 1) self.assertEqual(a.recall, 1) self.assertEqual(a.f1, 1) self.assertEqual(a.count_error, 0) self.assertEqual(a.gt_count, 1)
def compute_eval_items(gt_labels, pred_labels, class_config): nb_classes = len(class_config.names) class_to_eval_item = {} gt_class_ids = [] pred_class_ids = [] gt_cells = gt_labels.get_cells() for gt_cell in gt_cells: gt_class_id = gt_labels.get_cell_class_id(gt_cell) pred_class_id = pred_labels.get_cell_class_id(gt_cell) if gt_class_id is not None and pred_class_id is not None: gt_class_ids.append(gt_class_id) pred_class_ids.append(pred_class_id) sklabels = np.arange(nb_classes) precision, recall, f1, support = metrics.precision_recall_fscore_support( gt_class_ids, pred_class_ids, labels=sklabels, warn_for=()) for class_id, class_name in enumerate(class_config.names): eval_item = ClassEvaluationItem(float(precision[class_id]), float(recall[class_id]), float(f1[class_id]), gt_count=float(support[class_id]), class_id=class_id, class_name=class_name) class_to_eval_item[class_id] = eval_item return class_to_eval_item
def test_merge(self): a = ClassEvaluationItem(precision=1, recall=1, f1=1, count_error=0, gt_count=1) b = ClassEvaluationItem(precision=0, recall=0, f1=0, count_error=1, gt_count=2) a.merge(b) self.assertEqual(a.precision, 1 / 3) self.assertEqual(a.recall, 1 / 3) self.assertEqual(a.f1, 1 / 3) self.assertEqual(a.count_error, 2 / 3) self.assertEqual(a.gt_count, 3)
def compute_eval_items(gt_labels, pred_labels, class_config): iou_thresh = 0.5 num_classes = len(class_config) tps, fps, fns = compute_metrics(gt_labels, pred_labels, num_classes, iou_thresh) class_to_eval_item = {} for class_id, (tp, fp, fn) in enumerate(zip(tps, fps, fns)): gt_count = tp + fn pred_count = tp + fp class_name = class_config.get_name(class_id) if gt_count == 0: eval_item = ClassEvaluationItem( class_id=class_id, class_name=class_name) elif pred_count == 0: eval_item = ClassEvaluationItem( precision=None, recall=0, gt_count=gt_count, class_id=class_id, class_name=class_name) else: prec = tp / (tp + fp) recall = tp / (tp + fn) f1 = 0. if prec + recall != 0.0: f1 = 2 * (prec * recall) / (prec + recall) count_err = pred_count - gt_count norm_count_err = None if gt_count > 0: norm_count_err = count_err / gt_count eval_item = ClassEvaluationItem( precision=prec, recall=recall, f1=f1, count_error=norm_count_err, gt_count=gt_count, class_id=class_id, class_name=class_name) class_to_eval_item[class_id] = eval_item return class_to_eval_item
class ClassificationEvaluation(ABC): """Base class for evaluating predictions for pipelines that have classes. Evaluations can be keyed, for instance, if evaluations happen per class. """ def __init__(self): self.clear() self._is_empty = True def clear(self): """Clear the Evaluation.""" self.class_to_eval_item = {} self.scene_to_eval = {} self.avg_item = None self._is_empty = True def is_empty(self): return self._is_empty def set_class_to_eval_item(self, class_to_eval_item): self.class_to_eval_item = class_to_eval_item def get_by_id(self, key): """Gets the evaluation for a particular EvaluationItem key""" return self.class_to_eval_item[key] def has_id(self, key): """Answers whether or not the EvaluationItem key is represented""" return key in self.class_to_eval_item def to_json(self): json_rep = [] for eval_item in self.class_to_eval_item.values(): json_rep.append(eval_item.to_json()) if self.avg_item: json_rep.append(self.avg_item.to_json()) if self.scene_to_eval: json_rep = {'overall': json_rep} scene_to_eval_json = {} for scene_id, eval in self.scene_to_eval.items(): scene_to_eval_json[scene_id] = eval.to_json() json_rep['per_scene'] = scene_to_eval_json return json_rep def save(self, output_uri): """Save this Evaluation to a file. Args: output_uri: string URI for the file to write. """ json_str = json.dumps(self.to_json(), indent=4) str_to_file(json_str, output_uri) def merge(self, evaluation, scene_id=None): """Merge Evaluation for another Scene into this one. This is useful for computing the average metrics of a set of scenes. The results of the averaging are stored in this Evaluation. Args: evaluation: Evaluation to merge into this one """ if len(self.class_to_eval_item) == 0: self.class_to_eval_item = evaluation.class_to_eval_item else: for key, other_eval_item in \ evaluation.class_to_eval_item.items(): if self.has_id(key): self.get_by_id(key).merge(other_eval_item) else: self.class_to_eval_item[key] = other_eval_item self._is_empty = False self.compute_avg() if scene_id is not None: self.scene_to_eval[scene_id] = copy.deepcopy(evaluation) def compute_avg(self): """Compute average metrics over all keys.""" self.avg_item = ClassEvaluationItem(class_name='average') for eval_item in self.class_to_eval_item.values(): self.avg_item.merge(eval_item) @abstractmethod def compute(self, ground_truth_labels, prediction_labels): """Compute metrics for a single scene. Args: ground_truth_labels: Ground Truth labels to evaluate against. prediction_labels: The predicted labels to evaluate. """ pass
def compute_avg(self): """Compute average metrics over all keys.""" self.avg_item = ClassEvaluationItem(class_name='average') for eval_item in self.class_to_eval_item.values(): self.avg_item.merge(eval_item)
def compute_vector(self, gt, pred, mode, class_id): """Compute evaluation over vector predictions. Args: gt: Ground-truth GeoJSON. Either a string (containing unparsed GeoJSON or a file name), or a dictionary containing parsed GeoJSON. pred: GeoJSON for predictions. Either a string (containing unparsed GeoJSON or a file name), or a dictionary containing parsed GeoJSON. mode: A string containing either 'buildings' or 'polygons'. class_id: An integer containing the class id of interest. """ import mask_to_polygons.vectorification as vectorification import mask_to_polygons.processing.score as score # Ground truth as list of geometries def get_geoms(x): if is_geojson(x): _x = x if 'features' in _x.keys(): _x = _x['features'] geoms = [] for feature in _x: if 'geometry' in feature.keys(): geoms.append(feature['geometry']) else: geoms.append(feature) else: geoms = vectorification.geometries_from_geojson(x) return geoms gt = get_geoms(gt) pred = get_geoms(pred) if len(gt) > 0 and len(pred) > 0: results = score.spacenet(pred, gt) true_positives = results['tp'] false_positives = results['fp'] false_negatives = results['fn'] precision = float(true_positives) / ( true_positives + false_positives) recall = float(true_positives) / (true_positives + false_negatives) if precision + recall != 0: f1 = 2 * (precision * recall) / (precision + recall) else: f1 = 0.0 count_error = int(false_positives + false_negatives) gt_count = len(gt) class_name = 'vector-{}-{}'.format( mode, self.class_config.names[class_id]) evaluation_item = ClassEvaluationItem(precision, recall, f1, count_error, gt_count, class_id, class_name) if hasattr(self, 'class_to_eval_item') and isinstance( self.class_to_eval_item, dict): self.class_to_eval_item[class_id] = evaluation_item else: self.class_to_eval_item = {class_id: evaluation_item} self.compute_avg()