def evaluate(self, results, metric='mIoU', logger=None, gt_seg_maps=None, **kwargs): """Evaluate the dataset. Args: results (list[tuple[torch.Tensor]] | list[str]): per image pre_eval results or predict segmentation map for computing evaluation metric. metric (str | list[str]): Metrics to be evaluated. 'mIoU', 'mDice' and 'mFscore' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. gt_seg_maps (generator[ndarray]): Custom gt seg maps as input, used in ConcatDataset Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice', 'mFscore'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} # test a list of files if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of( results, str): if gt_seg_maps is None: gt_seg_maps = self.get_gt_seg_maps() num_classes = len(self.CLASSES) ret_metrics = eval_metrics( results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) # test a list of pre_eval_results else: ret_metrics = pre_eval_to_metrics(results, metric) # Because dataset.CLASSES is required for per-eval. if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES # summary table ret_metrics_summary = OrderedDict({ ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) # each class table ret_metrics.pop('aAcc', None) ret_metrics_class = OrderedDict({ ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) ret_metrics_class.update({'Class': class_names}) ret_metrics_class.move_to_end('Class', last=False) # for logger class_table_data = PrettyTable() for key, val in ret_metrics_class.items(): class_table_data.add_column(key, val) summary_table_data = PrettyTable() for key, val in ret_metrics_summary.items(): if key == 'aAcc': summary_table_data.add_column(key, [val]) else: summary_table_data.add_column('m' + key, [val]) print_log('per class results:', logger) print_log('\n' + class_table_data.get_string(), logger=logger) print_log('Summary:', logger) print_log('\n' + summary_table_data.get_string(), logger=logger) # each metric dict for key, value in ret_metrics_summary.items(): if key == 'aAcc': eval_results[key] = value / 100.0 else: eval_results['m' + key] = value / 100.0 ret_metrics_class.pop('Class', None) for key, value in ret_metrics_class.items(): eval_results.update({ key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names) }) return eval_results
def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. 'mIoU' and 'mDice' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps(efficient_test) if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics(results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES ret_metrics_round = [ np.round(ret_metric * 100, 2) for ret_metric in ret_metrics ] for i in range(num_classes): class_table_data.append([class_names[i]] + [m[i] for m in ret_metrics_round[2:]] + [ret_metrics_round[1][i]]) summary_table_data = [['Scope'] + ['m' + head for head in class_table_data[0][1:]] + ['aAcc']] ret_metrics_mean = [ np.round(np.nanmean(ret_metric) * 100, 2) for ret_metric in ret_metrics ] summary_table_data.append(['global'] + ret_metrics_mean[2:] + [ret_metrics_mean[1]] + [ret_metrics_mean[0]]) print_log('per class results:', logger) table = AsciiTable(class_table_data) print_log('\n' + table.table, logger=logger) print_log('Summary:', logger) table = AsciiTable(summary_table_data) print_log('\n' + table.table, logger=logger) for i in range(1, len(summary_table_data[0])): eval_results[summary_table_data[0] [i]] = summary_table_data[1][i] / 100.0 for idx, sub_metric in enumerate(class_table_data[0][1:], 1): for item in class_table_data[1:]: eval_results[str(sub_metric) + '.' + str(item[0])] = item[idx] / 100.0 if mmcv.is_list_of(results, str): for file_name in results: os.remove(file_name) if self.attack_info is not None: # calculate mIOU-A attacked_results, attacked_gt_seg_maps, non_attacked_results, non_attacked_gt_seg_maps = self.get_attacked_and_non_attacked_pairs( results, gt_seg_maps) total_area_intersect, total_area_union, total_area_pred_label, \ total_area_label = total_intersect_and_union( attacked_results, attacked_gt_seg_maps, num_classes, self.ignore_index, self.label_map, self.reduce_zero_label) IOU_A = total_area_intersect / (2.220446049250313e-16 + total_area_union) mask = (total_area_label != 0) mIOU_A = IOU_A[mask].mean().item() print_log(f'mIOU-A: {mIOU_A}', logger=logger) # calculate PA-A PA_A = total_area_intersect.sum() / total_area_label.sum() print_log(f'PA-A: {PA_A}', logger=logger) # calculate ASR if self.attack_info['attack_type'] == 'N-to-1': ASR = (total_area_intersect.sum() / total_area_label.sum()).item() elif self.attack_info['attack_type'] == '1-to-1': successful_attack_num = 0.0 should_attack_num = 0.0 for i in range(len(gt_seg_maps)): if self.img_infos[i]['should_attack']: original_label, pred_label = gt_seg_maps[i], results[i] from_label_map = ( original_label == self.attack_info['from_label']) to_label_map = ( pred_label == self.attack_info['to_label']) successful_attack_num += (from_label_map * to_label_map).sum() should_attack_num += from_label_map.sum() ASR = successful_attack_num / should_attack_num else: raise NotImplementedError print_log(f'ASR: {ASR}', logger=logger) pred_from_label_num = 0.0 from_label_num = 0.0 for pred, seg_map in zip(non_attacked_results, non_attacked_gt_seg_maps): pred_from_label_map = (pred == self.attack_info['from_label']) from_label_map = (seg_map == self.attack_info['from_label']) pred_from_label_num += (pred_from_label_map * from_label_map).sum() from_label_num += from_label_map.sum() RSA = pred_from_label_num / from_label_num print_log(f'RSA: {RSA}', logger=logger) return eval_results
def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. 'mIoU' and 'mDice' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps(efficient_test) if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics(results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES ret_metrics_round = [ np.round(ret_metric * 100, 2) for ret_metric in ret_metrics ] for i in range(num_classes): class_table_data.append([class_names[i]] + [m[i] for m in ret_metrics_round[2:]] + [ret_metrics_round[1][i]]) summary_table_data = [['Scope'] + ['m' + head for head in class_table_data[0][1:]] + ['aAcc']] ret_metrics_mean = [ np.round(np.nanmean(ret_metric) * 100, 2) for ret_metric in ret_metrics ] summary_table_data.append(['global'] + ret_metrics_mean[2:] + [ret_metrics_mean[1]] + [ret_metrics_mean[0]]) print_log('per class results:', logger) table = AsciiTable(class_table_data) print_log('\n' + table.table, logger=logger) print_log('Summary:', logger) table = AsciiTable(summary_table_data) print_log('\n' + table.table, logger=logger) for i in range(1, len(summary_table_data[0])): eval_results[summary_table_data[0] [i]] = summary_table_data[1][i] / 100.0 if mmcv.is_list_of(results, str): for file_name in results: os.remove(file_name) con_mat = np.zeros((2, 2)) for result, gt in zip(results, gt_seg_maps): con_mat += metrics.confusion_matrix(gt.flatten(), result.flatten(), labels=[1, 0]) print_log('accuracy:{}'.format(accuracy(con_mat)), logger=logger) print_log('kappa:{}'.format(kappa(con_mat)), logger=logger) print_log('mIoU:{}'.format(ret_metrics_mean[2]), logger=logger) print_log('mDice:{}'.format(ret_metrics_mean[3]), logger=logger) # print_log('precision:{}'.format(precision(con_mat)), logger=logger) # print_log('sensitivity:{}'.format(sensitivity(con_mat)), logger=logger) # print_log('specificity:{}'.format(specificity(con_mat)), logger=logger) return eval_results
def evaluate(self, results, metric='mIoU', logger=None, efficient_test=False, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. 'mIoU', 'mDice' and 'mFscore' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice', 'mFscore'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps(efficient_test) if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics(results, gt_seg_maps, num_classes, self.ignore_index, metric, label_map=self.label_map, reduce_zero_label=self.reduce_zero_label) if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES # summary table ret_metrics_summary = OrderedDict({ ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) # each class table ret_metrics.pop('aAcc', None) ret_metrics_class = OrderedDict({ ret_metric: np.round(ret_metric_value * 100, 2) for ret_metric, ret_metric_value in ret_metrics.items() }) ret_metrics_class.update({'Class': class_names}) ret_metrics_class.move_to_end('Class', last=False) # for logger class_table_data = PrettyTable() for key, val in ret_metrics_class.items(): class_table_data.add_column(key, val) summary_table_data = PrettyTable() for key, val in ret_metrics_summary.items(): if key == 'aAcc': summary_table_data.add_column(key, [val]) else: summary_table_data.add_column('m' + key, [val]) print_log('per class results:', logger) print_log('\n' + class_table_data.get_string(), logger=logger) print_log('Summary:', logger) print_log('\n' + summary_table_data.get_string(), logger=logger) # each metric dict for key, value in ret_metrics_summary.items(): if key == 'aAcc': eval_results[key] = value / 100.0 else: eval_results['m' + key] = value / 100.0 ret_metrics_class.pop('Class', None) for key, value in ret_metrics_class.items(): eval_results.update({ key + '.' + str(name): value[idx] / 100.0 for idx, name in enumerate(class_names) }) if mmcv.is_list_of(results, str): for file_name in results: os.remove(file_name) return eval_results
def evaluate(self, results, metric='mIoU', logger=None, **kwargs): """Evaluate the dataset. Args: results (list): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. 'mIoU' and 'mDice' are supported. logger (logging.Logger | None | str): Logger used for printing related information during evaluation. Default: None. Returns: dict[str, float]: Default metrics. """ if isinstance(metric, str): metric = [metric] allowed_metrics = ['mIoU', 'mDice'] if not set(metric).issubset(set(allowed_metrics)): raise KeyError('metric {} is not supported'.format(metric)) eval_results = {} gt_seg_maps = self.get_gt_seg_maps() if self.CLASSES is None: num_classes = len( reduce(np.union1d, [np.unique(_) for _ in gt_seg_maps])) else: num_classes = len(self.CLASSES) ret_metrics = eval_metrics(results, gt_seg_maps, num_classes, ignore_index=self.ignore_index, metrics=metric) class_table_data = [['Class'] + [m[1:] for m in metric] + ['Acc']] if self.CLASSES is None: class_names = tuple(range(num_classes)) else: class_names = self.CLASSES ret_metrics_round = [ np.round(ret_metric * 100, 2) for ret_metric in ret_metrics ] for i in range(num_classes): class_table_data.append([class_names[i]] + [m[i] for m in ret_metrics_round[2:]] + [ret_metrics_round[1][i]]) summary_table_data = [['Scope'] + ['m' + head for head in class_table_data[0][1:]] + ['aAcc']] ret_metrics_mean = [ np.round(np.nanmean(ret_metric) * 100, 2) for ret_metric in ret_metrics ] summary_table_data.append(['global'] + ret_metrics_mean[2:] + [ret_metrics_mean[1]] + [ret_metrics_mean[0]]) print_log('per class results:', logger) table = AsciiTable(class_table_data) print_log('\n' + table.table, logger=logger) print_log('Summary:', logger) table = AsciiTable(summary_table_data) print_log('\n' + table.table, logger=logger) for i in range(1, len(summary_table_data[0])): eval_results[summary_table_data[0] [i]] = summary_table_data[1][i] / 100.0 return eval_results