def _average_precision(self, all_scores): aps = [0.] * self.num_classes valid = [0] * self.num_classes ind_all = np.arange(self.gt_classes.shape[0]) num_cls = self.num_classes num_ins = ind_all.shape[0] for i, c in enumerate(self._classes): if i == 0: continue gt_this = (self.gt_classes == i).astype(np.float32) num_this = np.sum(gt_this) if i % 10 == 0: print('AP for %s: %d/%d' % (c, i, num_cls)) if num_this > 0: valid[i] = num_this sco_this = all_scores[ind_all, i] ind_sorted = np.argsort(-sco_this) tp = gt_this[ind_sorted] max_ind = num_ins - np.argmax(tp[::-1]) tp = tp[:max_ind] fp = 1. - tp tp = np.cumsum(tp) fp = np.cumsum(fp) rec = tp / float(num_this) prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) aps[i] = voc_ap(rec, prec) mcls_ap = np.mean([s for s, v in zip(aps, valid) if v]) # Compute the overall score max_inds = np.argmax(all_scores[:, 1:], axis=1) + 1 max_scores = np.empty_like(all_scores) max_scores[:] = 0. max_scores[ind_all, max_inds] = 1. pred_all = max_scores[ind_all, self.gt_classes] sco_all = all_scores[ind_all, self.gt_classes] ind_sorted = np.argsort(-sco_all) tp = pred_all[ind_sorted] fp = 1. - tp tp = np.cumsum(tp) fp = np.cumsum(fp) rec = tp / float(num_ins) prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) mins_ap = voc_ap(rec, prec) return aps[1:], mcls_ap, mins_ap
def evaluate(self, all_image_ids, all_detection_bboxes, all_detection_vertices, metric='apk'): if metric == 'ap': npos = 0 for img_folder in self.annotations: for img_name in self.annotations[img_folder]: npos += len(self.annotations[img_folder][img_name]) tp = np.zeros(len(all_image_ids)) fp = np.zeros(len(all_image_ids)) y_scores = torch.ones(len(all_image_ids)) for i, (img_id, bbox) in enumerate(zip(all_image_ids, all_detection_bboxes)): image_name = img_id.split('/')[-1] image_folder = img_id.split('/')[0] gt_bboxes = [ obj['bbox'] for obj in self.annotations[image_folder][image_name] ] if len(gt_bboxes) == 0: fp[i] = 1 continue gt_bboxes = torch.Tensor(gt_bboxes) bbox = torch.Tensor(bbox).unsqueeze(0) ious = BBox.iou(bbox, gt_bboxes) max_ious, _ = ious.max(dim=2) if max_ious.item() > 0.5: tp[i] = 1 else: fp[i] = 1 fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) score = voc_ap(rec, prec) return score, metric
def vg_eval(detpath, gt_roidb, image_index, classindex, ovthresh=0.5, use_07_metric=False, eval_attributes=False): """rec, prec, ap, sorted_scores, npos = voc_eval( detpath, gt_roidb, image_index, classindex, [ovthresh], [use_07_metric]) Top level function that does the Visual Genome evaluation. detpath: Path to detections gt_roidb: List of ground truth structs. image_index: List of image ids. classindex: Category index [ovthresh]: Overlap threshold (default = 0.5) [use_07_metric]: Whether to use VOC07's 11 point AP computation (default False) """ # extract gt objects for this class class_recs = {} npos = 0 for item, imagename in zip(gt_roidb, image_index): if eval_attributes: bbox = item['boxes'][np.where( np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :] else: bbox = item['boxes'][np.where( item['gt_classes'] == classindex)[0], :] difficult = np.zeros((bbox.shape[0], )).astype(np.bool) det = [False] * bbox.shape[0] npos = npos + sum(~difficult) class_recs[str(imagename)] = { 'bbox': bbox, 'difficult': difficult, 'det': det } if npos == 0: # No ground truth examples return 0, 0, 0, 0, npos # read dets with open(detpath, 'r') as f: lines = f.readlines() if len(lines) == 0: # No detection examples return 0, 0, 0, 0, npos splitlines = [x.strip().split(' ') for x in lines] image_ids = [x[0] for x in splitlines] confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) # sort by confidence sorted_ind = np.argsort(-confidence) sorted_scores = -np.sort(-confidence) BB = BB[sorted_ind, :] image_ids = [image_ids[x] for x in sorted_ind] # go down dets and mark TPs and FPs nd = len(image_ids) tp = np.zeros(nd) fp = np.zeros(nd) for d in range(nd): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf BBGT = R['bbox'].astype(float) if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) if ovmax > ovthresh: if not R['difficult'][jmax]: if not R['det'][jmax]: tp[d] = 1. R['det'][jmax] = 1 else: fp[d] = 1. else: fp[d] = 1. # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) ap = voc_ap(rec, prec, use_07_metric) return rec, prec, ap, sorted_scores, npos
def eval_seg_ap(results_all, dat_list, nb_class=59, ovthresh_seg=0.5, Sparse=False, From_pkl=False, task_id=None): ''' From_pkl: load results from pickle files Sparse: Indicate that the masks in the results are sparse matrices ''' confidence = [] image_ids = [] BB = [] Local_segs_ptr = [] for imagename in tqdm(results_all.keys(), desc='Loading results ..'): if From_pkl: results = pickle.load(gzip.open(results_all[imagename])) else: results = results_all[imagename] det_rects = results['DETS'] for idx, rect in enumerate(det_rects): image_ids.append(imagename) confidence.append(rect[-1]) BB.append(rect[:4]) Local_segs_ptr.append(idx) confidence = np.array(confidence) BB = np.array(BB) Local_segs_ptr = np.array(Local_segs_ptr) sorted_ind = np.argsort(-confidence) sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] Local_segs_ptr = Local_segs_ptr[sorted_ind] image_ids = [image_ids[x] for x in sorted_ind] class_recs, npos = get_gt(dat_list, task_id=task_id) nd = len(image_ids) tp_seg = np.zeros(nd) fp_seg = np.zeros(nd) pcp_list = [] for d in trange(nd, desc='Finding AP^P at thres %f..' % ovthresh_seg): R = class_recs[image_ids[d]] bb = BB[d, :].astype(float) ovmax = -np.inf jmax = -1 if From_pkl: results = pickle.load(gzip.open(results_all[image_ids[d]])) else: results = results_all[image_ids[d]] mask0 = results['MASKS'][Local_segs_ptr[d]] if Sparse: mask_pred = mask0.toarray().astype( np.int) # decode sparse array if it is one else: mask_pred = mask0.astype(np.int) for i in xrange(len(R['anno_adds'])): mask_gt = np.array(Image.open(R['anno_adds'][i])) if len(mask_gt.shape) == 3: mask_gt = mask_gt[:, :, 0] # Make sure ann is a two dimensional np array. seg_iou = cal_one_mean_iou(mask_pred.astype(np.uint8), mask_gt, nb_class) mean_seg_iou = np.nanmean(seg_iou) if mean_seg_iou > ovmax: ovmax = mean_seg_iou seg_iou_max = seg_iou jmax = i mask_gt_u = np.unique(mask_gt) if ovmax > ovthresh_seg: if not R['det'][jmax]: tp_seg[d] = 1. R['det'][jmax] = 1 pcp_d = len(mask_gt_u[np.logical_and(mask_gt_u > 0, mask_gt_u < nb_class)]) pcp_n = float(np.sum(seg_iou_max[1:] > ovthresh_seg)) if pcp_d > 0: pcp_list.append(pcp_n / pcp_d) else: pcp_list.append(0.0) else: fp_seg[d] = 1. else: fp_seg[d] = 1. # compute precision recall fp_seg = np.cumsum(fp_seg) tp_seg = np.cumsum(tp_seg) rec_seg = tp_seg / float(npos) prec_seg = tp_seg / (tp_seg + fp_seg) ap_seg = voc_ap(rec_seg, prec_seg) assert (np.max(tp_seg) == len(pcp_list)), "%d vs %d" % (np.max(tp_seg), len(pcp_list)) pcp_list.extend([0.0] * (npos - len(pcp_list))) pcp = np.mean(pcp_list) print 'AP_seg, PCP:', ap_seg, pcp return ap_seg, pcp
def eval_seg_ap(results_all, dat_list, nb_class=59, ovthresh_seg_list=[0.5], Sparse=False, From_pkl=False, task_id=None): ''' From_pkl: load results from pickle files Sparse: Indicate that the masks in the results are sparse matrices ''' # Save all the info corresponding to a specific person. confidence = [] image_ids = [] BB = [] Local_segs_ptr = [] for imagename in tqdm(results_all.keys(), desc='Loading results ..'): if From_pkl: results = pickle.load(gzip.open(results_all[imagename])) else: results = results_all[imagename] det_rects = results['DETS'] for idx, rect in enumerate(det_rects): image_ids.append(imagename) confidence.append(rect[-1]) BB.append(rect[:4]) Local_segs_ptr.append(idx) confidence = np.array(confidence) BB = np.array(BB) Local_segs_ptr = np.array(Local_segs_ptr) # Sort the boxes based on their score. sorted_ind = np.argsort(-confidence) # sorted_scores = np.sort(-confidence) BB = BB[sorted_ind, :] Local_segs_ptr = Local_segs_ptr[sorted_ind] image_ids = [image_ids[x] for x in sorted_ind] class_recs, npos = get_gt(dat_list, len(ovthresh_seg_list), task_id=task_id) nd = len(image_ids) final_results = {} final_results['ap_list'], final_results['pcp_list'] = [], [] tp_seg = [np.zeros(nd) for i in range(len(ovthresh_seg_list))] fp_seg = [np.zeros(nd) for i in range(len(ovthresh_seg_list))] pcp_list = [[] for i in range(len(ovthresh_seg_list))] # for d in trange(nd, desc='Finding AP^P at thres %f..' % ovthresh_seg): for d in trange(nd, desc="Computing"): R = class_recs[image_ids[d]] # bb = BB[d, :].astype(float) ovmax = -np.inf jmax = -1 if From_pkl: results = pickle.load(gzip.open(results_all[image_ids[d]])) else: results = results_all[image_ids[d]] mask0 = results['MASKS'][Local_segs_ptr[d]] if Sparse: mask_pred = mask0.toarray().astype(np.int) # decode sparse array if it is one else: mask_pred = mask0.astype(np.int) for i in range(len(R['anno_adds'])): mask_gt = np.array(Image.open(R['anno_adds'][i])) if len(mask_gt.shape) == 3: # Make sure ann is a two dimensional np array. mask_gt = mask_gt[:, :, 0] # Get the mean IoU of one person. seg_iou = cal_one_mean_iou(mask_pred.astype(np.uint8), mask_gt, nb_class) mean_seg_iou = np.nanmean(seg_iou) if mean_seg_iou > ovmax: ovmax = mean_seg_iou seg_iou_max = seg_iou jmax = i mask_gt_u = np.unique(mask_gt) for i, ovthresh_seg in enumerate(ovthresh_seg_list): if ovmax > ovthresh_seg: # [TODO] What's the if means? # Maybe, for every predicted person, if the most suitable # gt person was used, the predicted person is regarded as # a false positive. if not R['det'][i][jmax]: tp_seg[i][d] = 1. R['det'][i][jmax] = 1 pcp_d = len(mask_gt_u[np.logical_and(mask_gt_u > 0, mask_gt_u < nb_class)]) pcp_n = float(np.sum(seg_iou_max[1:] > ovthresh_seg)) if pcp_d > 0: pcp_list[i].append(pcp_n / pcp_d) else: pcp_list[i].append(0.0) else: fp_seg[i][d] = 1. else: fp_seg[i][d] = 1. # compute precision recall for i, ovthresh_seg in enumerate(ovthresh_seg_list): fp_seg[i] = np.cumsum(fp_seg[i]) tp_seg[i] = np.cumsum(tp_seg[i]) rec_seg = [tp_seg[i] / float(npos) for i in range(len(tp_seg))] prec_seg =[tp_seg[i] / (tp_seg[i] + fp_seg[i]) for i in range(len(tp_seg))] for i, ovthresh_seg in enumerate(ovthresh_seg_list): ap_seg = voc_ap(rec_seg[i], prec_seg[i]) # assert(np.max(tp_seg) == len(pcp_list), # "%d vs %d" % (np.max(tp_seg), len(pcp_list))) pcp_list[i].extend([0.0] * (npos - len(pcp_list[i]))) pcp = np.mean(pcp_list[i]) print('AP_seg, PCP under thre {}:'.format(ovthresh_seg), ap_seg, pcp) # return ap_seg, pcp final_results['ap_list'].append(ap_seg) final_results['pcp_list'].append(pcp)
def evaluate(self, all_image_ids, all_detection_bboxes, all_detection_vertices, metric='apk'): if metric == 'apk': #apk = [] y_true = np.zeros(len(all_image_ids) * 8) for i, (img_id, vert, bbox) in enumerate( zip(all_image_ids, all_detection_vertices, all_detection_bboxes)): best_nb_correct = -1 width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] max_dim = max(width, height) for ann in self.annotations[img_id]: gt_vert = ann['vertices'] vert_tensor = torch.Tensor(vert) gt_vert_tensor = torch.Tensor(gt_vert) t_dist = [ torch.norm(vert_tensor[:, j] - gt_vert_tensor[:, j], 2) for j in range(8) ] correct = [d.item() < 0.1 * max_dim for d in t_dist] nb_correct = sum(correct) if nb_correct > best_nb_correct: best_nb_correct = nb_correct #max(nb_correct, best_nb_correct) y_true[i:i + 8] = np.asarray(correct) y_scores = np.ones(len(all_image_ids) * 8) score = np.sum(y_true) / np.sum(y_scores) elif metric == 'ap': npos = 0 for img_name in self.annotations: npos += len(self.annotations[img_name]) tp = np.zeros(len(all_image_ids)) fp = np.zeros(len(all_image_ids)) y_scores = torch.ones(len(all_image_ids)) for i, (img_id, bbox) in enumerate(zip(all_image_ids, all_detection_bboxes)): gt_bboxes = [obj['bbox'] for obj in self.annotations[img_id]] if len(gt_bboxes) == 0: fp[i] = 1 continue gt_bboxes = torch.Tensor(gt_bboxes) bbox = torch.Tensor(bbox).unsqueeze(0) ious = BBox.iou(bbox, gt_bboxes) max_ious, _ = ious.max(dim=2) if max_ious.item() > 0.5: tp[i] = 1 else: fp[i] = 1 fp = np.cumsum(fp) tp = np.cumsum(tp) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) score = voc_ap(rec, prec) return score, metric