def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): # multiclass nms result = boxlist_ml_nms(boxlists[i], self.nms_thresh) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] if self.score_voting: boxes_al = boxlists[i].bbox boxlist = boxlists[i] labels = boxlists[i].get_field("labels") scores = boxlists[i].get_field("scores") sigma = 0.025 result_labels = result.get_field("labels") for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes_al[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") result_inds = (result_labels == j).nonzero().view(-1) boxlist_for_class_nmsed = result[result_inds] ious = boxlist_iou(boxlist_for_class_nmsed, boxlist_for_class) voted_boxes = [] for bi in range(len(boxlist_for_class_nmsed)): cur_ious = ious[bi] pos_inds = (cur_ious > 0.01).nonzero().squeeze(1) pos_ious = cur_ious[pos_inds] pos_boxes = boxlist_for_class.bbox[pos_inds] pos_scores = scores_j[pos_inds] pis = (torch.exp(-(1 - pos_ious)**2 / sigma) * pos_scores).unsqueeze(1) voted_box = torch.sum(pos_boxes * pis, dim=0) / torch.sum(pis, dim=0) voted_boxes.append(voted_box.unsqueeze(0)) if voted_boxes: voted_boxes = torch.cat(voted_boxes, dim=0) boxlist_for_class_nmsed_ = BoxList( voted_boxes, boxlist_for_class_nmsed.size, mode="xyxy") boxlist_for_class_nmsed_.add_field( "scores", boxlist_for_class_nmsed.get_field('scores')) result.bbox[ result_inds] = boxlist_for_class_nmsed_.bbox results.append(result) return results
def match_targets_to_anchors(self, anchor, target, copied_fields=[]): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs, _ = self.proposal_matcher(match_quality_matrix) target = target.copy_with_fields(copied_fields) matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def prepare_iou_based_targets(self, targets, anchors): """Compute IoU-based targets""" cls_labels = [] reg_targets = [] matched_idx_all = [] for im_i in range(len(targets)): targets_per_im = targets[im_i] assert targets_per_im.mode == "xyxy" anchors_per_im = cat_boxlist(anchors[im_i]) match_quality_matrix = boxlist_iou(targets_per_im, anchors_per_im) matched_idxs, _ = self.matcher(match_quality_matrix) targets_per_im = targets_per_im.copy_with_fields(['labels']) matched_targets = targets_per_im[matched_idxs.clamp(min=0)] cls_labels_per_im = matched_targets.get_field("labels") cls_labels_per_im = cls_labels_per_im.to(dtype=torch.float32) # Background (negative examples) bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD cls_labels_per_im[bg_indices] = 0 # discard indices that are between thresholds inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS cls_labels_per_im[inds_to_discard] = -1 matched_gts = matched_targets.bbox matched_idx_all.append(matched_idxs.view(1, -1)) reg_targets_per_im = self.box_coder.encode(matched_gts, anchors_per_im.bbox) cls_labels.append(cls_labels_per_im) reg_targets.append(reg_targets_per_im) return cls_labels, reg_targets, matched_idx_all
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs, _ = self.proposal_matcher(match_quality_matrix) # Mask RCNN needs "labels" and "masks "fields for creating the targets target = target.copy_with_fields(["labels", "masks"]) # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def match_targets_to_anchors(self, anchor, target, copied_fields=[]): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs, _ = self.proposal_matcher(match_quality_matrix) # RPN doesn't need any fields from target # for creating the labels, so clear them all target = target.copy_with_fields(copied_fields) # get the targets corresponding GT for each anchor # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def evaluate_box_proposals(predictions, dataset, thresholds=None, area="all", limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def prepare_targets(self, targets, anchors): cls_labels = [] reg_targets = [] for im_i in range(len(targets)): targets_per_im = targets[im_i] assert targets_per_im.mode == "xyxy" bboxes_per_im = targets_per_im.bbox labels_per_im = targets_per_im.get_field("labels") anchors_per_im = cat_boxlist(anchors[im_i]) num_gt = bboxes_per_im.shape[0] if self.positive_type == 'SSC': object_sizes_of_interest = [[-1, 64], [64, 128], [128, 256], [256, 512], [512, INF]] area_per_im = targets_per_im.area() expanded_object_sizes_of_interest = [] points = [] for l, anchors_per_level in enumerate(anchors[im_i]): anchors_per_level = anchors_per_level.bbox anchors_cx_per_level = (anchors_per_level[:, 2] + anchors_per_level[:, 0]) / 2.0 anchors_cy_per_level = (anchors_per_level[:, 3] + anchors_per_level[:, 1]) / 2.0 points_per_level = torch.stack( (anchors_cx_per_level, anchors_cy_per_level), dim=1) points.append(points_per_level) object_sizes_of_interest_per_level = \ points_per_level.new_tensor(object_sizes_of_interest[l]) expanded_object_sizes_of_interest.append( object_sizes_of_interest_per_level[None].expand( len(points_per_level), -1)) expanded_object_sizes_of_interest = torch.cat( expanded_object_sizes_of_interest, dim=0) points = torch.cat(points, dim=0) xs, ys = points[:, 0], points[:, 1] l = xs[:, None] - bboxes_per_im[:, 0][None] t = ys[:, None] - bboxes_per_im[:, 1][None] r = bboxes_per_im[:, 2][None] - xs[:, None] b = bboxes_per_im[:, 3][None] - ys[:, None] reg_targets_per_im = torch.stack([l, t, r, b], dim=2) is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0.01 max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] is_cared_in_the_level = \ (max_reg_targets_per_im >= expanded_object_sizes_of_interest[:, [0]]) & \ (max_reg_targets_per_im <= expanded_object_sizes_of_interest[:, [1]]) locations_to_gt_area = area_per_im[None].repeat(len(points), 1) locations_to_gt_area[is_in_boxes == 0] = INF locations_to_gt_area[is_cared_in_the_level == 0] = INF locations_to_min_area, locations_to_gt_inds = locations_to_gt_area.min( dim=1) cls_labels_per_im = labels_per_im[locations_to_gt_inds] cls_labels_per_im[locations_to_min_area == INF] = 0 matched_gts = bboxes_per_im[locations_to_gt_inds] elif self.positive_type == 'ATSS': num_anchors_per_level = [ len(anchors_per_level.bbox) for anchors_per_level in anchors[im_i] ] ious = boxlist_iou(anchors_per_im, targets_per_im) gt_cx = (bboxes_per_im[:, 2] + bboxes_per_im[:, 0]) / 2.0 gt_cy = (bboxes_per_im[:, 3] + bboxes_per_im[:, 1]) / 2.0 gt_points = torch.stack((gt_cx, gt_cy), dim=1) anchors_cx_per_im = (anchors_per_im.bbox[:, 2] + anchors_per_im.bbox[:, 0]) / 2.0 anchors_cy_per_im = (anchors_per_im.bbox[:, 3] + anchors_per_im.bbox[:, 1]) / 2.0 anchor_points = torch.stack( (anchors_cx_per_im, anchors_cy_per_im), dim=1) distances = (anchor_points[:, None, :] - gt_points[None, :, :]).pow(2).sum(-1).sqrt() # Selecting candidates based on the center distance between anchor box and object candidate_idxs = [] star_idx = 0 for level, anchors_per_level in enumerate(anchors[im_i]): end_idx = star_idx + num_anchors_per_level[level] distances_per_level = distances[star_idx:end_idx, :] _, topk_idxs_per_level = distances_per_level.topk( self.topk, dim=0, largest=False) candidate_idxs.append(topk_idxs_per_level + star_idx) star_idx = end_idx candidate_idxs = torch.cat(candidate_idxs, dim=0) # Using the sum of mean and standard deviation as the IoU threshold to select final positive samples candidate_ious = ious[candidate_idxs, torch.arange(num_gt)] iou_mean_per_gt = candidate_ious.mean(0) iou_std_per_gt = candidate_ious.std(0) iou_thresh_per_gt = iou_mean_per_gt + iou_std_per_gt is_pos = candidate_ious >= iou_thresh_per_gt[None, :] # Limiting the final positive samples’ center to object anchor_num = anchors_cx_per_im.shape[0] for ng in range(num_gt): candidate_idxs[:, ng] += ng * anchor_num e_anchors_cx = anchors_cx_per_im.view(1, -1).expand( num_gt, anchor_num).contiguous().view(-1) e_anchors_cy = anchors_cy_per_im.view(1, -1).expand( num_gt, anchor_num).contiguous().view(-1) candidate_idxs = candidate_idxs.view(-1) l = e_anchors_cx[candidate_idxs].view( -1, num_gt) - bboxes_per_im[:, 0] t = e_anchors_cy[candidate_idxs].view( -1, num_gt) - bboxes_per_im[:, 1] r = bboxes_per_im[:, 2] - e_anchors_cx[candidate_idxs].view( -1, num_gt) b = bboxes_per_im[:, 3] - e_anchors_cy[candidate_idxs].view( -1, num_gt) is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 is_pos = is_pos & is_in_gts # if an anchor box is assigned to multiple gts, the one with the highest IoU will be selected. ious_inf = torch.full_like(ious, -INF).t().contiguous().view(-1) index = candidate_idxs.view(-1)[is_pos.view(-1)] ious_inf[index] = ious.t().contiguous().view(-1)[index] ious_inf = ious_inf.view(num_gt, -1).t() anchors_to_gt_values, anchors_to_gt_indexs = ious_inf.max( dim=1) cls_labels_per_im = labels_per_im[anchors_to_gt_indexs] cls_labels_per_im[anchors_to_gt_values == -INF] = 0 matched_gts = bboxes_per_im[anchors_to_gt_indexs] elif self.positive_type == 'IoU': match_quality_matrix = boxlist_iou(targets_per_im, anchors_per_im) matched_idxs = self.matcher(match_quality_matrix) targets_per_im = targets_per_im.copy_with_fields(['labels']) matched_targets = targets_per_im[matched_idxs.clamp(min=0)] cls_labels_per_im = matched_targets.get_field("labels") cls_labels_per_im = cls_labels_per_im.to(dtype=torch.float32) # Background (negative examples) bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD cls_labels_per_im[bg_indices] = 0 # discard indices that are between thresholds inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS cls_labels_per_im[inds_to_discard] = -1 matched_gts = matched_targets.bbox # Limiting positive samples’ center to object # in order to filter out poor positives and use the centerness branch pos_idxs = torch.nonzero(cls_labels_per_im > 0).squeeze(1) pos_anchors_cx = (anchors_per_im.bbox[pos_idxs, 2] + anchors_per_im.bbox[pos_idxs, 0]) / 2.0 pos_anchors_cy = (anchors_per_im.bbox[pos_idxs, 3] + anchors_per_im.bbox[pos_idxs, 1]) / 2.0 l = pos_anchors_cx - matched_gts[pos_idxs, 0] t = pos_anchors_cy - matched_gts[pos_idxs, 1] r = matched_gts[pos_idxs, 2] - pos_anchors_cx b = matched_gts[pos_idxs, 3] - pos_anchors_cy is_in_gts = torch.stack([l, t, r, b], dim=1).min(dim=1)[0] > 0.01 cls_labels_per_im[pos_idxs[is_in_gts == 0]] = -1 else: raise NotImplementedError reg_targets_per_im = self.box_coder.encode(matched_gts, anchors_per_im.bbox) cls_labels.append(cls_labels_per_im) reg_targets.append(reg_targets_per_im) return cls_labels, reg_targets