def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.info( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision(precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics( self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)
def main(_): with open('%s/ret_%s.pkl' % (FLAGS.job_dir, FLAGS.subset), 'r') as f: ret = cPickle.load(f) with open('data/%s.json' % FLAGS.subset, 'r') as f: gt = json.load(f) for th in [0.3, 0.5, 0.7]: all_scores = {} all_tp_fp = {} num_gt_instances = {} for pred, g in zip(ret, gt): ref = g[1] label = tuple(g[0]['label']) if label not in num_gt_instances: num_gt_instances[label] = 0 n = pred['num_detections'].astype(np.int32) for i, gt_boxes in enumerate(ref['gt']): if len(gt_boxes) > 0: gt_boxes = np.asarray(gt_boxes) else: gt_boxes = np.zeros((0, 4), np.float32) num_gt_instances[label] += gt_boxes.shape[0] * 3 for j in range(3): idx = i * 3 + j det_boxes = pred['detection_boxes'][idx][:n[idx]] det_scores = pred['detection_scores'][idx][:n[idx]] det_classes = pred['detection_classes'][idx][:n[idx]] mask = det_classes == 1 det_boxes = det_boxes[mask] det_scores = det_scores[mask] iou, scores, num_detected_boxes = get_overlaps_and_scores_box_mode( det_boxes, det_scores, gt_boxes) tp_fp_labels = np.zeros(det_scores.size, dtype=bool) if iou.size > 0: max_overlap_gt_ids = np.argmax(iou, axis=1) is_gt_box_detected = np.zeros(iou.shape[1], dtype=bool) for k in range(num_detected_boxes): gt_id = max_overlap_gt_ids[k] if iou[k, gt_id] >= th: if not is_gt_box_detected[gt_id]: tp_fp_labels[k] = True is_gt_box_detected[gt_id] = True all_scores.setdefault(label, []).append(scores) all_tp_fp.setdefault(label, []).append(tp_fp_labels) aps = [] for k in all_scores: scores = np.concatenate(all_scores[k]) tp_fp_labels = np.concatenate(all_tp_fp[k]) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, num_gt_instances[k]) average_precision = metrics.compute_average_precision(precision, recall) aps.append(average_precision) mean_ap = np.mean(aps) print(th, mean_ap)
print("gt_count:{}\n pred_count:{}\n hit_count:{}".format(gt_count, pred_count, hit_count)) mAP = 0.0 prec, rec, AP = {}, {}, {} for key, value in pred_count.items(): prec[key] = hit_count[key] / float(value) rec[key] = hit_count[key] / float(gt_count[key]) print("For class {} with conf_thresh:{}, precision:{:4f} recall:{:4f} "\ .format(key, thres, prec[key], rec[key])) ############################# np_scores = np.asarray(scores[key]) np_labels = np.asarray(labels[key]) metrics_prec, metrics_rec = metrics.compute_precision_recall(np_scores, np_labels, gt_count[key]) AP[key] = metrics.compute_average_precision(metrics_prec, metrics_rec) mAP += AP[key] print("For class {}: ap:{:4f}".format(key, AP[key])) print("For class {} Score = {} \n".format(key, AP[key]*0.5 + rec[key]*0.3 + prec[key]*0.2 )) ############################ key_out = opt.type assert key_out in rec, 'there is no such type in the image' save_file_name = opt.result_list.split('/')[-1].split('.')[0] + "_" + key_out + "_result.ini" with open(save_file_name,"w") as output: output.write("AP="+str(AP[key_out])+"\n") output.write("R="+str(rec[key_out])+"\n") output.write("P="+str(prec[key_out])+"\n")
def review(self, opt): print("Opening result list from {}".format(opt.result_list)) with open(opt.result_list) as f: result_file = [x.strip() for x in f] # assert len(result_file) >=1, 'check result file' file_path = opt.gt_path print("Opening gt list from {}".format(file_path)) with open(file_path) as f: gt_file = [x.strip() for x in f] # assert len(result_file) >=1, 'check ground truth file' thres = opt.conf_thresh iou_thres = opt.iou_thres gt_count, pred_count, hit_count = {}, {}, {} result_dict = {} scores, labels = {}, {} # Parsing result list for idx, line in enumerate(result_file): # Skip header if idx > 0: ID, result_img_name, cls_idx, score, bbox_x1, bbox_y1, bbox_x2, bbox_y2 = line.split( ",") score = float(score) bbox_x1 = int(float(bbox_x1)) bbox_y1 = int(float(bbox_y1)) bbox_x2 = int(float(bbox_x2)) bbox_y2 = int(float(bbox_y2)) result_img_name = result_img_name.split('.')[0] # Save results in a dict result_dict[result_img_name] = result_dict.get( result_img_name, []) result_dict[result_img_name].append( [cls_idx, score, bbox_x1, bbox_y1, bbox_x2, bbox_y2]) # print(result_dict) # Parsing groundtruth list for idx, path in enumerate(gt_file): img_name = path.split('.')[0] xml_path = os.path.join(opt.image_dir, img_name + '.xml') print(xml_path) with open(xml_path) as f: data = minidom.parseString(f.read()) objs = data.getElementsByTagName("object") num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.uint16) gt_classes = [""] * num_objs for ix, obj in enumerate(objs): x1 = float(self.get_data_from_tag(obj, "xmin")) y1 = float(self.get_data_from_tag(obj, "ymin")) x2 = float(self.get_data_from_tag(obj, "xmax")) y2 = float(self.get_data_from_tag(obj, "ymax")) cls_idx = str(self.get_data_from_tag(obj, "name")).lower().strip() boxes[ix, :] = [x1, y1, x2, y2] gt_classes[ix] = cls_idx gt_count[cls_idx] = gt_count.get(cls_idx, 0) + 1 flag = {} for gt_box in boxes: flag[str(gt_box.tolist())] = 0 # Compare prediction result with groundtruth if img_name in result_dict: results = result_dict[img_name] for result in results: cls_idx, score, bbox_x1, bbox_y1, bbox_x2, bbox_y2 = result pred_box = [bbox_x1, bbox_y1, bbox_x2, bbox_y2] # import pdb;pdb.set_trace() # Thresholding if score >= thres: pred_count[cls_idx] = pred_count.get(cls_idx, 0) + 1 #################################### score_list = scores.get(cls_idx, []) score_list.append(score) scores[cls_idx] = score_list is_hit = False hit_cls = cls_idx #################################### for gt_box, gt_cls in zip(boxes, gt_classes): iou = self.bb_intersection_over_union( pred_box, gt_box.tolist()) if iou >= iou_thres and cls_idx == gt_cls: if flag[str(gt_box.tolist())] == 0: hit_count[gt_cls] = hit_count.get( gt_cls, 0) + 1 flag[str(gt_box.tolist())] = 1 #################################### is_hit = True hit_cls = gt_cls if is_hit: labels_list = labels.get(hit_cls, []) labels_list.append(True) labels[hit_cls] = labels_list else: labels_list = labels.get(hit_cls, []) labels_list.append(False) labels[hit_cls] = labels_list #################################### # print hit_count, result, boxes print("gt_count:{}\n pred_count:{}\n hit_count:{}".format( gt_count, pred_count, hit_count)) mAP = 0.0 prec, rec, AP = {}, {}, {} for key, value in pred_count.items(): prec[key] = hit_count[key] / float(value) rec[key] = hit_count[key] / float(gt_count[key]) print("For class {} with conf_thresh:{}, precision:{:4f} recall:{:4f} " \ .format(key, thres, prec[key], rec[key])) ############################# np_scores = np.asarray(scores[key]) np_labels = np.asarray(labels[key]) metrics_prec, metrics_rec = metrics.compute_precision_recall( np_scores, np_labels, gt_count[key]) AP[key] = metrics.compute_average_precision( metrics_prec, metrics_rec) mAP += AP[key] print("For class {}: ap:{:4f}".format(key, AP[key])) print("For class {} Score = {} \n".format( key, AP[key] * 0.5 + rec[key] * 0.3 + prec[key] * 0.2)) ############################ key_out = opt.type # assert key_out in rec, 'there is no such type in the image' # save_file_name = opt.result_list.split('/')[-1].split('.')[0] + "_" + key_out + "_result.ini" # print(save_file_name) with open("/home/kilox/result.txt", "w") as output: output.write("AP=" + str(AP[key_out]) + "\n") output.write("R=" + str(rec[key_out]) + "\n") output.write("P=" + str(prec[key_out]) + "\n") output.write("IoU=" + str(iou_thres) + "\n") output.write("miss_rate=" + str(1 - rec[key_out]) + "\n") output.write("error_rate=" + str(1 - prec[key_out]) + "\n") output.write("score=" + str(AP[key_out] * 0.5 + rec[key_out] * 0.3 + prec[key_out] * 0.2) + "\n")
detections = infer_from_path(image_path, model, prior_boxes) gt_sample = denormalize_boxes(gt_sample, reference_size) num_gt_boxes = num_gt_boxes + len(gt_sample) already_detected = np.zeros(shape=len(gt_sample), dtype=bool) for detection in detections: ious = calculate_intersection_over_union(detection, gt_sample) score = np.max(detection[4:]) best_iou = np.max(ious) best_iou_arg = np.argmax(ious) if best_iou > iou_threshold: if not already_detected[best_iou_arg]: labels.append(True) scores.append(best_iou) already_detected[best_iou_arg] = True else: labels.append(False) scores.append(best_iou) else: labels.append(False) scores.append(best_iou) results = compute_precision_and_recall(scores, labels, num_gt_boxes) precision, recall = results average_precision = compute_average_precision(precision, recall) average_precisions.append(average_precision) print('Class:', class_name) print('Number of ground_truth_boxes:', num_gt_boxes) print('AP:', average_precision) mean_average_precision = np.mean(average_precisions) print('mAP:', mean_average_precision)
def evaluate(self): """Compute evaluation result. Returns: A named tuple with the following fields - average_precision: float numpy array of average precision for each class. mean_ap: mean average precision of all classes, float scalar precisions: List of precisions, each precision is a float numpy array recalls: List of recalls, each recall is a float numpy array corloc: numpy float array mean_corloc: Mean CorLoc score for each class, float scalar """ if (self.num_gt_instances_per_class == 0).any(): logging.info( 'The following classes have no ground truth examples: %s', np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset) if self.use_weighted_mean_ap: all_scores = np.array([], dtype=float) all_tp_fp_labels = np.array([], dtype=bool) for class_index in range(self.num_class): if self.num_gt_instances_per_class[class_index] == 0: continue if not self.scores_per_class[class_index]: scores = np.array([], dtype=float) tp_fp_labels = np.array([], dtype=bool) else: scores = np.concatenate(self.scores_per_class[class_index]) tp_fp_labels = np.concatenate( self.tp_fp_labels_per_class[class_index]) if self.use_weighted_mean_ap: all_scores = np.append(all_scores, scores) all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels) precision, recall = metrics.compute_precision_recall( scores, tp_fp_labels, self.num_gt_instances_per_class[class_index]) self.precisions_per_class.append(precision) self.recalls_per_class.append(recall) average_precision = metrics.compute_average_precision( precision, recall) self.average_precision_per_class[class_index] = average_precision self.corloc_per_class = metrics.compute_cor_loc( self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class) if self.use_weighted_mean_ap: num_gt_instances = np.sum(self.num_gt_instances_per_class) precision, recall = metrics.compute_precision_recall( all_scores, all_tp_fp_labels, num_gt_instances) mean_ap = metrics.compute_average_precision(precision, recall) else: mean_ap = np.nanmean(self.average_precision_per_class) mean_corloc = np.nanmean(self.corloc_per_class) return ObjectDetectionEvalMetrics(self.average_precision_per_class, mean_ap, self.precisions_per_class, self.recalls_per_class, self.corloc_per_class, mean_corloc)