def test_extract_boundary(): result = {} # test invalid arguments with pytest.raises(AssertionError): mask_utils.extract_boundary(result) result = {'boundary_result': [0, 1]} with pytest.raises(AssertionError): mask_utils.extract_boundary(result) result = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 1]]} output = mask_utils.extract_boundary(result) assert output[2] == [1]
def eval_hmean(results, img_infos, ann_infos, metrics={'hmean-iou'}, score_thr=0.3, rank_list=None, logger=None, **kwargs): """Evaluation in hmean metric. Args: results (list[dict]): Each dict corresponds to one image, containing the following keys: boundary_result img_infos (list[dict]): Each dict corresponds to one image, containing the following keys: filename, height, width ann_infos (list[dict]): Each dict corresponds to one image, containing the following keys: masks, masks_ignore score_thr (float): Score threshold of prediction map. metrics (set{str}): Hmean metric set, should be one or all of {'hmean-iou', 'hmean-ic13'} Returns: dict[str: float] """ assert utils.is_type_list(results, dict) assert utils.is_type_list(img_infos, dict) assert utils.is_type_list(ann_infos, dict) assert len(results) == len(img_infos) == len(ann_infos) assert isinstance(metrics, set) gts, gts_ignore = get_gt_masks(ann_infos) preds = [] pred_scores = [] for result in results: _, texts, scores = extract_boundary(result) if len(texts) > 0: assert utils.valid_boundary(texts[0], False) valid_texts, valid_text_scores = filter_2dlist_result( texts, scores, score_thr) preds.append(valid_texts) pred_scores.append(valid_text_scores) eval_results = {} for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) best_result = dict(hmean=-1) for iter in range(3, 10): thr = iter * 0.1 if thr < score_thr: continue top_preds = select_top_boundary(preds, pred_scores, thr) if metric == 'hmean-iou': result, img_result = hmean_iou.eval_hmean_iou( top_preds, gts, gts_ignore) elif metric == 'hmean-ic13': result, img_result = hmean_ic13.eval_hmean_ic13( top_preds, gts, gts_ignore) else: raise NotImplementedError if rank_list is not None: output_ranklist(img_result, img_infos, rank_list) print_log('thr {0:.2f}, recall: {1[recall]:.3f}, ' 'precision: {1[precision]:.3f}, ' 'hmean: {1[hmean]:.3f}'.format(thr, result), logger=logger) if result['hmean'] > best_result['hmean']: best_result = result eval_results[metric + ':recall'] = best_result['recall'] eval_results[metric + ':precision'] = best_result['precision'] eval_results[metric + ':hmean'] = best_result['hmean'] return eval_results
def eval_hmean(results, img_infos, ann_infos, metrics={'hmean-iou'}, score_thr=None, min_score_thr=0.3, max_score_thr=0.9, step=0.1, rank_list=None, logger=None, **kwargs): """Evaluation in hmean metric. It conducts grid search over a range of boundary score thresholds and reports the best result. Args: results (list[dict]): Each dict corresponds to one image, containing the following keys: boundary_result img_infos (list[dict]): Each dict corresponds to one image, containing the following keys: filename, height, width ann_infos (list[dict]): Each dict corresponds to one image, containing the following keys: masks, masks_ignore score_thr (float): Deprecated. Please use min_score_thr instead. min_score_thr (float): Minimum score threshold of prediction map. max_score_thr (float): Maximum score threshold of prediction map. step (float): The spacing between score thresholds. metrics (set{str}): Hmean metric set, should be one or all of {'hmean-iou', 'hmean-ic13'} Returns: dict[str: float] """ assert utils.is_type_list(results, dict) assert utils.is_type_list(img_infos, dict) assert utils.is_type_list(ann_infos, dict) if score_thr: warnings.warn('score_thr is deprecated. Please use min_score_thr ' 'instead.') min_score_thr = score_thr assert 0 <= min_score_thr <= max_score_thr <= 1 assert 0 <= step <= 1 assert len(results) == len(img_infos) == len(ann_infos) assert isinstance(metrics, set) min_score_thr = float(min_score_thr) max_score_thr = float(max_score_thr) step = float(step) gts, gts_ignore = get_gt_masks(ann_infos) preds = [] pred_scores = [] for result in results: _, texts, scores = extract_boundary(result) if len(texts) > 0: assert utils.valid_boundary(texts[0], False) valid_texts, valid_text_scores = filter_2dlist_result( texts, scores, min_score_thr) preds.append(valid_texts) pred_scores.append(valid_text_scores) eval_results = {} for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) best_result = dict(hmean=-1) for thr in np.arange(min_score_thr, min(max_score_thr + step, 1.0), step): top_preds = select_top_boundary(preds, pred_scores, thr) if metric == 'hmean-iou': result, img_result = hmean_iou.eval_hmean_iou( top_preds, gts, gts_ignore) elif metric == 'hmean-ic13': result, img_result = hmean_ic13.eval_hmean_ic13( top_preds, gts, gts_ignore) else: raise NotImplementedError if rank_list is not None: output_ranklist(img_result, img_infos, rank_list) print_log( 'thr {0:.2f}, recall: {1[recall]:.3f}, ' 'precision: {1[precision]:.3f}, ' 'hmean: {1[hmean]:.3f}'.format(thr, result), logger=logger) if result['hmean'] > best_result['hmean']: best_result = result eval_results[metric + ':recall'] = best_result['recall'] eval_results[metric + ':precision'] = best_result['precision'] eval_results[metric + ':hmean'] = best_result['hmean'] return eval_results