def get_epoch_metric(oof: np.ndarray, thresholds: list, predictions_dir: str, epoch_num: int) -> list: """ Loads epoch predictions and calculates the epoch metric for a set of thresholds Args: oof : out-of-fold predictions thresholds : list of thresholds predictions_dir: directory for saving scores epoch_num : current epoch for metric calculation """ epoch_scores = [] nb_images = len(oof["scores"]) # check range of thresholds for threshold in thresholds: threshold_scores = [] for img_id in range(nb_images): gt_boxes = oof["gt_boxes"][img_id][0].copy() boxes = oof["boxes"][img_id].copy() scores = oof["scores"][img_id].copy() category = oof["category"][img_id] category = np.exp(category[0, 2]) if len(scores): scores[scores < scores[0] * 0.5] = 0.0 mask = scores * 5 > threshold if gt_boxes[0, 4] == -1.0: if np.any(mask): threshold_scores.append(0.0) else: if len(scores[mask]) == 0: score = 0.0 else: score = metric.map_iou(boxes_true=p1p2_to_xywh(gt_boxes), boxes_pred=p1p2_to_xywh( boxes[mask]), scores=scores[mask]) threshold_scores.append(score) print("threshold {}, score {}".format(threshold, np.mean(threshold_scores))) epoch_scores.append(np.mean(threshold_scores)) best_score = np.max(epoch_scores) print(f"epoch {epoch_num}, best score: {best_score}") return epoch_scores
def check_metric(model_name, run, fold): run_str = '' if run is None or run == '' else f'_{run}' predictions_dir = f'../output/oof2/{model_name}{run_str}_fold_{fold}' thresholds = [ 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.4, 1.6, 2.0, 3.0, 4.0 ] all_scores = [] for epoch_num in range(100): fn = f'{predictions_dir}/{epoch_num:03}.pkl' try: oof = pickle.load(open(fn, 'rb')) except FileNotFoundError: continue print('epoch ', epoch_num) epoch_scores = [] nb_images = len(oof['scores']) for threshold in thresholds: threshold_scores = [] for img_id in range(nb_images): gt_boxes = oof['gt_boxes'][img_id][0].copy() boxes = oof['boxes'][img_id].copy() scores = oof['scores'][img_id].copy() category = oof['category'][img_id] category = np.exp(category[0, 2]) if len(scores): scores[scores < scores[0] * 0.5] = 0.0 # if category > 0.5 and scores[0] < 0.2: # scores[0] *= 2 # mask = scores * category * 10 > threshold mask = scores * 5 > threshold if gt_boxes[0, 4] == -1.0: if np.any(mask): threshold_scores.append(0.0) else: if len(scores[mask]) == 0: score = 0.0 else: score = metric.map_iou( boxes_true=p1p2_to_xywh(gt_boxes), boxes_pred=p1p2_to_xywh(boxes[mask]), scores=scores[mask]) # print(score) threshold_scores.append(score) print(threshold, np.mean(threshold_scores)) epoch_scores.append(np.mean(threshold_scores)) all_scores.append(epoch_scores) print('best score', np.max(all_scores)) plt.imshow(np.array(all_scores)) plt.show()
def evaluate_rsna(generator, retinanet, score_thresholds=[0.05], max_detections=100, save_path=None): """ Evaluate a given dataset using a given retinanet. # Arguments generator : The generator that represents the dataset to evaluate. retinanet : The retinanet to evaluate. iou_threshold : The threshold used to consider when a detection is positive or negative. score_threshold : The score confidence threshold to use for detections. max_detections : The maximum number of detections to use per image. save_path : The path to save images with visualized detections to. # Returns A dict mapping class names to mAP scores. """ # gather all detections and annotations scores_list, labels_list, boxes_list = _get_predictions( generator, retinanet) detections_list = _get_scan_detections(scores_list, labels_list, boxes_list, score_thresholds=score_thresholds, max_detections=max_detections, save_path=save_path) all_annotations = _get_annotations(generator) ap_list = [] youden_list = [] sensitivity_list = [] specificity_list = [] for all_detections in detections_list: average_precisions = [] true_positive = 0 positive = 0 true_negative = 0 negative = 0 for label in range(generator.num_classes()): for i in range(len(generator)): detections = all_detections[i][label] annotations = all_annotations[i][label] boxes_true = [annot[:4] for annot in annotations] boxes_true = [to_pt(box) for box in boxes_true] boxes_true = np.array(boxes_true) boxes_pred = [det[:4] for det in detections] boxes_pred = [to_pt(box) for box in boxes_pred] boxes_pred = np.array(boxes_pred) scores = [det[4] for det in detections] scores = np.array(scores) mAP = map_iou(boxes_true, boxes_pred, scores) if mAP is not None: average_precisions.append(mAP) if mAP > 0.: # hit true_positive += 1 positive += 1 else: if len(annotations) > 0: # miss positive += 1 else: # false negative negative += 1 else: # mAP = None means ture negative true_negative += 1 negative += 1 ap = np.array(average_precisions).mean() ap_list.append(ap) sensitivity = true_positive / positive specificity = true_negative / negative youden_index = (sensitivity + specificity) - 1. youden_list.append(youden_index) sensitivity_list.append(sensitivity) specificity_list.append(specificity) return ap_list, youden_list, sensitivity_list, specificity_list
def check_metric( model_name: str, run: str, fold: int, oof_dir: str, start_epoch: int, end_epoch: int, save_metrics: bool = False, ): """ Loads epoch predicitons and calculates the metric for a set of thresholds Args: model_name : string name from the models configs listed in models.py file run : experiment run string to add for checkpoints name fold : evaluation fold number, 0-3 oof_dir : directory with out-of-fold predictions start_epoch, end_epoch: the first ad last epochs for metric calculation save_metrics: boolean flag weather to save metrics Output: thresholds: list of thresholds for mean average precision calculation epochs : range of epochs all_scores: all metrics values for all thresholds and epochs """ run_str = "" if run is None or run == "" else f"_{run}" predictions_dir = f"{oof_dir}/{model_name}{run_str}_fold_{fold}" thresholds = [ 0.05, 0.1, 0.15, 0.2, 0.25, 0.28, 0.3, 0.35, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 2.0 ] all_scores = [] for epoch_num in range(start_epoch, end_epoch): fn = f"{predictions_dir}/{epoch_num:03}.pkl" try: oof = pickle.load(open(fn, "rb")) except FileNotFoundError: continue print("epoch ", epoch_num) epoch_scores = [] nb_images = len(oof["scores"]) # check range of thresholds for threshold in thresholds: threshold_scores = [] for img_id in range(nb_images): gt_boxes = oof["gt_boxes"][img_id][0].copy() boxes = oof["boxes"][img_id].copy() scores = oof["scores"][img_id].copy() category = oof["category"][img_id] category = np.exp(category[0, 2]) if len(scores): scores[scores < scores[0] * 0.5] = 0.0 mask = scores * 5 > threshold if gt_boxes[0, 4] == -1.0: if np.any(mask): threshold_scores.append(0.0) else: if len(scores[mask]) == 0: score = 0.0 else: score = metric.map_iou( boxes_true=p1p2_to_xywh(gt_boxes), boxes_pred=p1p2_to_xywh(boxes[mask]), scores=scores[mask], ) # print(score) threshold_scores.append(score) print("threshold {}, score {}".format(threshold, np.mean(threshold_scores))) epoch_scores.append(np.mean(threshold_scores)) all_scores.append(epoch_scores) best_score = np.max(all_scores) epochs = np.arange(start_epoch, end_epoch) print("best score: ", best_score) plt.imshow(np.array(all_scores)) plt.show() if save_metrics: scores_dir = f"{RESULTS_DIR}/scores/{model_name}{run_str}_fold_{fold}" os.makedirs(scores_dir, exist_ok=True) print("all scores.shape: {}, thresholds {}, epochs {}".format( np.array(all_scores).shape, thresholds, epochs)) metric_scores = collections.defaultdict(list) metric_scores["scores"] = np.array(all_scores) metric_scores["tresholds"] = thresholds metric_scores["epochs"] = epochs pickle.dump(metric_scores, open(f"{scores_dir}/scores.pkl", "wb")) return np.array(all_scores), thresholds, epochs