def match_targets_to_anchors(self, anchor, target, copied_fields=[]): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs = self.proposal_matcher(match_quality_matrix) # RPN doesn't need any fields from target # for creating the labels, so clear them all target = target.copy_with_fields(copied_fields) # get the targets corresponding GT for each anchor # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds if len(target) == 0: dummy_bbox = torch.zeros((len(matched_idxs), 4), dtype=torch.float32, device=matched_idxs.device) from maskrcnn_benchmark.structures.bounding_box import BoxList matched_targets = BoxList(dummy_bbox, target.size, target.mode) from future.utils import viewitems for k, v in viewitems(target.extra_fields): if len(v) == 0: if k == 'labels': matched_targets.add_field(k, torch.zeros(len(matched_idxs), dtype=v.dtype, device=v.device), ) else: # the following seems incorrect. matched_targets.add_field(k, v) else: raise Exception('we have no idea of how to deal with ' 'non-empty fields') else: matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Fast RCNN only need "labels" field for selecting the targets target = target.copy_with_fields(self.copied_fields) # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds if len(target) == 0: dummy_bbox = torch.zeros((len(matched_idxs), 4), dtype=torch.float32, device=matched_idxs.device) from maskrcnn_benchmark.structures.bounding_box import BoxList matched_targets = BoxList(dummy_bbox, target.size, target.mode) matched_targets.add_field( 'labels', self.create_all_bkg_labels(len(matched_idxs), matched_idxs.device)) matched_targets.add_field( 'tightness', torch.zeros(len(matched_idxs), device=matched_idxs.device)) matched_targets.add_field( 'attributes', torch.zeros((len(matched_idxs), 1), device=matched_idxs.device)) else: matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Fast RCNN only need "labels" field for selecting the targets target = target.copy_with_fields("labels") # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def match_targets_to_anchors(self, anchor, target, copied_fields=[]): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs = self.proposal_matcher(match_quality_matrix) # RPN doesn't need any fields from target # for creating the labels, so clear them all target = target.copy_with_fields(copied_fields) # get the targets corresponding GT for each anchor # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def get_pos_proposal_indexes(self, locations, box_regression, matched_idxes, targets): locations = torch.cat(locations, dim=0) pos_indexes_for_targets = [] for im in range(len(targets)): pos_indexes_for_targets_per_im = [] box_regression_im = [ box_regression[l][im].detach().view(4, -1).transpose( 0, 1).contiguous() * self.fpn_strides[l] for l in range(len(box_regression)) ] box_regression_im = torch.cat(box_regression_im, dim=0) for t_id in range(len(targets[im])): valid = matched_idxes[im] == t_id if valid.sum() == 0: pos_indexes_for_targets_per_im.append(valid.new_tensor([])) continue valid_location = locations[valid] valid_regression = box_regression_im[valid] detections = torch.stack([ valid_location[:, 0] - valid_regression[:, 0], valid_location[:, 1] - valid_regression[:, 1], valid_location[:, 0] + valid_regression[:, 2], valid_location[:, 1] + valid_regression[:, 3], ], dim=1) detect_boxlist = BoxList(detections, targets[im].size, mode="xyxy") target_boxlist = BoxList(targets[im].bbox[t_id:t_id + 1], targets[im].size, mode="xyxy") match_quality_matrix = boxlist_iou(detect_boxlist, target_boxlist) pos_labels_per_target = torch.zeros_like(valid) iou_in_target = match_quality_matrix[:, 0] if iou_in_target.max() > self.sample_pos_iou_th: pos_in_target = (iou_in_target > self.sample_pos_iou_th) else: pos_in_target = (iou_in_target == iou_in_target.max()) pos_labels_per_target[valid] = pos_in_target pos_indexes_for_targets_per_im.append( pos_labels_per_target.nonzero().squeeze(1)) pos_indexes_for_targets.append(pos_indexes_for_targets_per_im) return pos_indexes_for_targets
def evaluate_box_proposals(predictions, dataset, thresholds=None, area="all", limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def main(args): annFile = 'datasets/coco/annotations/instances_train2017_0.5.json' coco = COCO(annFile) with open(annFile, 'r') as f: result_json = json.load(f) annos_json = result_json['annotations'] # anno_id = max([ann['id'] for ann in annos_json]) + 1 output_dir = os.path.join(args.predictions, 'coco_2017_train_partial') image_ids = torch.load(os.path.join(output_dir, 'image_ids.pth')) predictions = torch.load(os.path.join(output_dir, 'predictions.pth')) anno_id = max(torch.load(os.path.join(output_dir, 'box_ids.pth'))) + 1 imgIds = sorted(coco.getImgIds()) threshold = args.confidence # threshold = torch.tensor([-1.0, 0.46633365750312805, 0.4409848749637604, 0.47267603874206543, 0.4707889258861542, 0.5220812559127808, 0.5358721613883972, 0.5226702690124512, 0.45160290598869324]) iou_threshold = 0.5 cpu_device = torch.device("cpu") partial_box_num = 0 N = len(image_ids) for i in tqdm(range(N)): im_idx = image_ids[i] bbox = predictions[i] imginfo = coco.loadImgs(imgIds[im_idx])[0] image_width = imginfo['width'] image_height = imginfo['height'] # load annotations partial_anns = coco.loadAnns(coco.getAnnIds(imgIds=(imgIds[im_idx], ))) # full_anns = coco_full.loadAnns(coco_full.getAnnIds(imgIds=(imgIds[im_idx],), catIds=catIds)) partial_boxes = [obj["bbox"] for obj in partial_anns] partial_boxes_ids = set([obj["id"] for obj in partial_anns]) partial_boxes = torch.as_tensor(partial_boxes).reshape( -1, 4) # guard against no boxes partial_boxes = BoxList(partial_boxes, (image_width, image_height), mode="xywh").convert("xyxy") partial_box_num += len(partial_boxes_ids) # get predictions bbox = bbox.resize((image_width, image_height)) bbox = bbox.to(cpu_device) # generate pseudo labels idx = generate_pseudo_label_with_confidence_score( bbox, im_idx, threshold) if len(idx) > 0: pseudo_labels = bbox[idx] scores = pseudo_labels.get_field("scores").tolist() # compute iou overlaps = boxlist_iou(partial_boxes, pseudo_labels) matched_id = [True] * len(pseudo_labels) # remove predictions for partial labels for i in range(len(partial_boxes)): matched = np.argmax(overlaps[i]) if overlaps[i, matched] >= iou_threshold: matched_id[matched] = False pseudo_labels = pseudo_labels[matched_id] # print(num, len(pseudo_labels)) pseudo_annos, anno_id = new_annotation_json( pseudo_labels, imgIds[im_idx], anno_id) annos_json.extend(pseudo_annos) print('confidence threshold: {}'.format(threshold)) result_json['annotations'] = annos_json with open(args.annotation, 'w') as f: json.dump(result_json, f) print(partial_box_num, len(result_json['annotations']))
def main(args): annFile = args.annotation annFile_full = 'datasets/coco/annotations/instances_train2017_full.json' coco = COCO(annFile) coco_full = COCO(annFile_full) image_ids=sorted(coco.getImgIds()) catIds = list(range(2, 10)) tp = 0 fn = 0 fp = 0 sum_iou = 0 partial_box_num_total = 0 missing_box_num_total = 0 pseudo_box_num_total = 0 N = len(image_ids) for i in tqdm(range(N)): im_idx = image_ids[i] imginfo = coco.loadImgs(im_idx)[0] image_width = imginfo['width'] image_height = imginfo['height'] # load annotations partial_anns = coco.loadAnns(coco.getAnnIds(imgIds=(im_idx,))) full_anns = coco_full.loadAnns(coco_full.getAnnIds(imgIds=(im_idx,), catIds=catIds)) # obtain boxes pseudo_boxes = [obj["bbox"] for obj in partial_anns if "ispseudo" in obj.keys()] partial_boxes = [obj["bbox"] for obj in partial_anns if "ispseudo" not in obj.keys()] partial_boxes_id = set([obj["id"] for obj in partial_anns if "ispseudo" not in obj.keys()]) missing_boxes = [obj["bbox"] for obj in full_anns if obj["id"] not in partial_boxes_id] partial_box_num = len(partial_boxes) missing_box_num = len(missing_boxes) pseudo_box_num = len(pseudo_boxes) partial_box_num_total += partial_box_num missing_box_num_total += missing_box_num pseudo_box_num_total += pseudo_box_num pseudo_boxes = convert_box_to_boxlist(pseudo_boxes, image_width, image_height) partial_boxes = convert_box_to_boxlist(partial_boxes, image_width, image_height) missing_boxes = convert_box_to_boxlist(missing_boxes, image_width, image_height) if missing_box_num == 0: fp += pseudo_box_num elif pseudo_box_num == 0: fn += missing_box_num else: # compute iou overlaps = boxlist_iou(missing_boxes, pseudo_boxes).numpy() matched_cnt = 0 for i in range(missing_box_num): matched = np.argmax(overlaps[i]) if overlaps[i, matched] >= 0.5: tp += 1 sum_iou += overlaps[i, matched] overlaps[:, matched] = 0 matched_cnt += 1 else: fn += 1 fp += pseudo_box_num - matched_cnt print(tp, fp, sum_iou/tp) print('TP={}, FP={}, FN={}, IoU Acc={}'.format(tp, fp, fn, sum_iou/tp)) print('PQ = {}'.format(sum_iou / (tp + 0.5*fp + 0.5*fn))) print('partial_box_num_total: {}'.format(partial_box_num_total)) print('missing_box_num_total: {}'.format(missing_box_num_total)) print('pseudo_box_num_total: {}'.format(pseudo_box_num_total))
def prepare_targets(self, targets, anchors): cls_labels = [] reg_targets = [] for im_i in range(len(targets)): targets_per_im = targets[im_i] assert targets_per_im.mode == "xyxy" bboxes_per_im = targets_per_im.bbox labels_per_im = targets_per_im.get_field("labels") anchors_per_im = cat_boxlist(anchors[im_i]) num_gt = bboxes_per_im.shape[0] if self.cfg.MODEL.ATSS.POSITIVE_TYPE == 'SSC': object_sizes_of_interest = [[-1, 64], [64, 128], [128, 256], [256, 512], [512, INF]] area_per_im = targets_per_im.area() expanded_object_sizes_of_interest = [] points = [] for l, anchors_per_level in enumerate(anchors[im_i]): anchors_per_level = anchors_per_level.bbox anchors_cx_per_level = (anchors_per_level[:, 2] + anchors_per_level[:, 0]) / 2.0 anchors_cy_per_level = (anchors_per_level[:, 3] + anchors_per_level[:, 1]) / 2.0 points_per_level = torch.stack((anchors_cx_per_level, anchors_cy_per_level), dim=1) points.append(points_per_level) object_sizes_of_interest_per_level = \ points_per_level.new_tensor(object_sizes_of_interest[l]) expanded_object_sizes_of_interest.append( object_sizes_of_interest_per_level[None].expand(len(points_per_level), -1) ) expanded_object_sizes_of_interest = torch.cat(expanded_object_sizes_of_interest, dim=0) points = torch.cat(points, dim=0) xs, ys = points[:, 0], points[:, 1] l = xs[:, None] - bboxes_per_im[:, 0][None] t = ys[:, None] - bboxes_per_im[:, 1][None] r = bboxes_per_im[:, 2][None] - xs[:, None] b = bboxes_per_im[:, 3][None] - ys[:, None] reg_targets_per_im = torch.stack([l, t, r, b], dim=2) is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0.01 max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] is_cared_in_the_level = \ (max_reg_targets_per_im >= expanded_object_sizes_of_interest[:, [0]]) & \ (max_reg_targets_per_im <= expanded_object_sizes_of_interest[:, [1]]) locations_to_gt_area = area_per_im[None].repeat(len(points), 1) locations_to_gt_area[is_in_boxes == 0] = INF locations_to_gt_area[is_cared_in_the_level == 0] = INF locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min(dim=1) cls_labels_per_im = labels_per_im[locations_to_gt_inds] cls_labels_per_im[locations_to_min_area == INF] = 0 matched_gts = bboxes_per_im[locations_to_gt_inds] elif self.cfg.MODEL.ATSS.POSITIVE_TYPE == 'ATSS': num_anchors_per_loc = len(self.cfg.MODEL.ATSS.ASPECT_RATIOS) * self.cfg.MODEL.ATSS.SCALES_PER_OCTAVE num_anchors_per_level = [len(anchors_per_level.bbox) for anchors_per_level in anchors[im_i]] ious = boxlist_iou(anchors_per_im, targets_per_im) gt_cx = (bboxes_per_im[:, 2] + bboxes_per_im[:, 0]) / 2.0 gt_cy = (bboxes_per_im[:, 3] + bboxes_per_im[:, 1]) / 2.0 gt_points = torch.stack((gt_cx, gt_cy), dim=1) anchors_cx_per_im = (anchors_per_im.bbox[:, 2] + anchors_per_im.bbox[:, 0]) / 2.0 anchors_cy_per_im = (anchors_per_im.bbox[:, 3] + anchors_per_im.bbox[:, 1]) / 2.0 anchor_points = torch.stack((anchors_cx_per_im, anchors_cy_per_im), dim=1) distances = (anchor_points[:, None, :] - gt_points[None, :, :]).pow(2).sum(-1).sqrt() # Selecting candidates based on the center distance between anchor box and object candidate_idxs = [] star_idx = 0 for level, anchors_per_level in enumerate(anchors[im_i]): end_idx = star_idx + num_anchors_per_level[level] distances_per_level = distances[star_idx:end_idx, :] topk = min(self.cfg.MODEL.ATSS.TOPK * num_anchors_per_loc, num_anchors_per_level[level]) _, topk_idxs_per_level = distances_per_level.topk(topk, dim=0, largest=False) candidate_idxs.append(topk_idxs_per_level + star_idx) star_idx = end_idx candidate_idxs = torch.cat(candidate_idxs, dim=0) # Using the sum of mean and standard deviation as the IoU threshold to select final positive samples candidate_ious = ious[candidate_idxs, torch.arange(num_gt)] iou_mean_per_gt = candidate_ious.mean(0) iou_std_per_gt = candidate_ious.std(0) iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt is_pos = candidate_ious >= iou_thresh_per_gt[None, :] # Limiting the final positive samples’ center to object anchor_num = anchors_cx_per_im.shape[0] for ng in range(num_gt): candidate_idxs[:, ng] += ng * anchor_num e_anchors_cx = anchors_cx_per_im.view(1, -1).expand(num_gt, anchor_num).contiguous().view(-1) e_anchors_cy = anchors_cy_per_im.view(1, -1).expand(num_gt, anchor_num).contiguous().view(-1) candidate_idxs = candidate_idxs.view(-1) l = e_anchors_cx[candidate_idxs].view(-1, num_gt) - bboxes_per_im[:, 0] t = e_anchors_cy[candidate_idxs].view(-1, num_gt) - bboxes_per_im[:, 1] r = bboxes_per_im[:, 2] - e_anchors_cx[candidate_idxs].view(-1, num_gt) b = bboxes_per_im[:, 3] - e_anchors_cy[candidate_idxs].view(-1, num_gt) is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 is_pos = is_pos & is_in_gts # if an anchor box is assigned to multiple gts, the one with the highest IoU will be selected. ious_inf = torch.full_like(ious, -INF).t().contiguous().view(-1) index = candidate_idxs.view(-1)[is_pos.view(-1)] ious_inf[index] = ious.t().contiguous().view(-1)[index] ious_inf = ious_inf.view(num_gt, -1).t() anchors_to_gt_values, anchors_to_gt_indexs = ious_inf.max(dim=1) cls_labels_per_im = labels_per_im[anchors_to_gt_indexs] cls_labels_per_im[anchors_to_gt_values == -INF] = 0 matched_gts = bboxes_per_im[anchors_to_gt_indexs] elif self.cfg.MODEL.ATSS.POSITIVE_TYPE == 'TOPK': gt_cx = (bboxes_per_im[:, 2] + bboxes_per_im[:, 0]) / 2.0 gt_cy = (bboxes_per_im[:, 3] + bboxes_per_im[:, 1]) / 2.0 gt_points = torch.stack((gt_cx, gt_cy), dim=1) anchors_cx_per_im = (anchors_per_im.bbox[:, 2] + anchors_per_im.bbox[:, 0]) / 2.0 anchors_cy_per_im = (anchors_per_im.bbox[:, 3] + anchors_per_im.bbox[:, 1]) / 2.0 anchor_points = torch.stack((anchors_cx_per_im, anchors_cy_per_im), dim=1) distances = (anchor_points[:, None, :] - gt_points[None, :, :]).pow(2).sum(-1).sqrt() distances = distances / distances.max() / 1000 ious = boxlist_iou(anchors_per_im, targets_per_im) is_pos = ious * False for ng in range(num_gt): _, topk_idxs = (ious[:, ng] - distances[:, ng]).topk(self.cfg.MODEL.ATSS.TOPK, dim=0) l = anchors_cx_per_im[topk_idxs] - bboxes_per_im[ng, 0] t = anchors_cy_per_im[topk_idxs] - bboxes_per_im[ng, 1] r = bboxes_per_im[ng, 2] - anchors_cx_per_im[topk_idxs] b = bboxes_per_im[ng, 3] - anchors_cy_per_im[topk_idxs] is_in_gt = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 is_pos[topk_idxs[is_in_gt == 1], ng] = True ious[is_pos == 0] = -INF anchors_to_gt_values, anchors_to_gt_indexs = ious.max(dim=1) cls_labels_per_im = labels_per_im[anchors_to_gt_indexs] cls_labels_per_im[anchors_to_gt_values == -INF] = 0 matched_gts = bboxes_per_im[anchors_to_gt_indexs] elif self.cfg.MODEL.ATSS.POSITIVE_TYPE == 'IoU': match_quality_matrix = boxlist_iou(targets_per_im, anchors_per_im) matched_idxs = self.matcher(match_quality_matrix) targets_per_im = targets_per_im.copy_with_fields(['labels']) matched_targets = targets_per_im[matched_idxs.clamp(min=0)] cls_labels_per_im = matched_targets.get_field("labels") cls_labels_per_im = cls_labels_per_im.to(dtype=torch.float32) # Background (negative examples) bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD cls_labels_per_im[bg_indices] = 0 # discard indices that are between thresholds inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS cls_labels_per_im[inds_to_discard] = -1 matched_gts = matched_targets.bbox # Limiting positive samples’ center to object # in order to filter out poor positives and use the centerness branch pos_idxs = torch.nonzero(cls_labels_per_im > 0).squeeze(1) pos_anchors_cx = (anchors_per_im.bbox[pos_idxs, 2] + anchors_per_im.bbox[pos_idxs, 0]) / 2.0 pos_anchors_cy = (anchors_per_im.bbox[pos_idxs, 3] + anchors_per_im.bbox[pos_idxs, 1]) / 2.0 l = pos_anchors_cx - matched_gts[pos_idxs, 0] t = pos_anchors_cy - matched_gts[pos_idxs, 1] r = matched_gts[pos_idxs, 2] - pos_anchors_cx b = matched_gts[pos_idxs, 3] - pos_anchors_cy is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 cls_labels_per_im[pos_idxs[is_in_gts == 0]] = -1 else: raise NotImplementedError reg_targets_per_im = self.box_coder.encode(matched_gts, anchors_per_im.bbox) cls_labels.append(cls_labels_per_im) reg_targets.append(reg_targets_per_im) return cls_labels, reg_targets
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): pred_bbox = pred_boxlist.bbox.numpy() pred_label = pred_boxlist.get_field("labels").numpy() pred_score = pred_boxlist.get_field("scores").numpy() gt_bbox = gt_boxlist.bbox.numpy() gt_label = gt_boxlist.get_field("labels").numpy() gt_difficult = gt_boxlist.get_field("difficult").numpy() for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.size), BoxList(gt_bbox_l, gt_boxlist.size), ).numpy() gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
image_width = imginfo['width'] image_height = imginfo['height'] full_anns = coco_full.loadAnns( coco_full.getAnnIds(imgIds=(im_idx, ), catIds=catIds)) all_boxes = [obj["bbox"] for obj in full_anns] all_boxlist = convert_box_to_boxlist(all_boxes, image_width, image_height) query_box = p.resize((image_width, image_height)) if len(query_box) == 0: i += 1 im_idx = -1 continue overlaps = boxlist_iou(query_box, all_boxlist).numpy() max_overlaps = overlaps.max(axis=1) labels = p.get_field('labels').tolist() scores = p.get_field('scores').tolist() while i < len(NNs) and im_idx == image_ids[NNs[i]['image_id']]: distances.append(NNs[i]['NN_distance']) i += 1 plt.xlabel('IoU') plt.ylabel('NN distance') plt.colorbar() plt.clim(0, 1) plt.show()