def _get_proposal_pairs(self, proposals): proposal_pairs = [] for i, proposals_per_image in enumerate(proposals): box_subj = proposals_per_image.bbox box_obj = proposals_per_image.bbox box_subj = box_subj.unsqueeze(1).repeat(1, box_subj.shape[0], 1) box_obj = box_obj.unsqueeze(0).repeat(box_obj.shape[0], 1, 1) proposal_box_pairs = torch.cat((box_subj.view(-1, 4), box_obj.view(-1, 4)), 1) idx_subj = torch.arange(box_subj.shape[0]).view(-1, 1, 1).repeat(1, box_obj.shape[0], 1).to(proposals_per_image.bbox.device) idx_obj = torch.arange(box_obj.shape[0]).view(1, -1, 1).repeat(box_subj.shape[0], 1, 1).to(proposals_per_image.bbox.device) proposal_idx_pairs = torch.cat((idx_subj.view(-1, 1), idx_obj.view(-1, 1)), 1) keep_idx = (proposal_idx_pairs[:, 0] != proposal_idx_pairs[:, 1]).nonzero().view(-1) # if we filter non overlap bounding boxes if self.cfg.MODEL.ROI_RELATION_HEAD.FILTER_NON_OVERLAP: ious = boxlist_iou(proposals_per_image, proposals_per_image).view(-1) ious = ious[keep_idx] keep_idx = keep_idx[(ious > 0).nonzero().view(-1)] proposal_idx_pairs = proposal_idx_pairs[keep_idx] proposal_box_pairs = proposal_box_pairs[keep_idx] proposal_pairs_per_image = BoxPairList(proposal_box_pairs, proposals_per_image.size, proposals_per_image.mode) proposal_pairs_per_image.add_field("idx_pairs", proposal_idx_pairs) proposal_pairs.append(proposal_pairs_per_image) return proposal_pairs
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) temp = [] target_box_pairs = [] for i in range(match_quality_matrix.shape[0]): for j in range(match_quality_matrix.shape[0]): match_i = match_quality_matrix[i].view(1, -1) match_j = match_quality_matrix[j].view(-1, 1) match_ij = (match_i + match_j) / 2 match_ij.view(-1)[::match_quality_matrix.shape[1]] = 0 temp.append(match_ij) boxi = target.bbox[i] boxj = target.bbox[j] box_pair = torch.cat((boxi, boxj), 0) target_box_pairs.append(box_pair) match_pair_quality_matrix = torch.stack(temp, 0).view(len(temp), -1) target_box_pairs = torch.stack(target_box_pairs, 0) target_pair = BoxPairList(target_box_pairs, target.size, target.mode) target_pair.add_field("labels", target.get_field("pred_labels").view(-1)) box_subj = proposal.bbox box_obj = proposal.bbox box_subj = box_subj.unsqueeze(1).repeat(1, box_subj.shape[0], 1) box_obj = box_obj.unsqueeze(0).repeat(box_obj.shape[0], 1, 1) proposal_box_pairs = torch.cat( (box_subj.view(-1, 4), box_obj.view(-1, 4)), 1) proposal_pairs = BoxPairList(proposal_box_pairs, proposal.size, proposal.mode) idx_subj = torch.arange(box_subj.shape[0]).view(-1, 1, 1).repeat( 1, box_obj.shape[0], 1).to(proposal.bbox.device) idx_obj = torch.arange(box_obj.shape[0]).view(1, -1, 1).repeat( box_subj.shape[0], 1, 1).to(proposal.bbox.device) proposal_idx_pairs = torch.cat( (idx_subj.view(-1, 1), idx_obj.view(-1, 1)), 1) proposal_pairs.add_field("idx_pairs", proposal_idx_pairs) # matched_idxs = self.proposal_matcher(match_quality_matrix) matched_idxs = self.proposal_pair_matcher(match_pair_quality_matrix) # Fast RCNN only need "labels" field for selecting the targets # target = target.copy_with_fields("pred_labels") # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds if self.use_matched_pairs_only and ( matched_idxs >= 0).sum() > self.minimal_matched_pairs: # filter all matched_idxs < 0 proposal_pairs = proposal_pairs[matched_idxs >= 0] matched_idxs = matched_idxs[matched_idxs >= 0] matched_targets = target_pair[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets, proposal_pairs
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = boxlist_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Fast RCNN only need "labels" field for selecting the targets target = target.copy_with_fields("labels") # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def match_targets_to_anchors(self, anchor, target, copied_fields=[]): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs = self.proposal_matcher(match_quality_matrix) # RPN doesn't need any fields from target # for creating the labels, so clear them all target = target.copy_with_fields(copied_fields) # get the targets corresponding GT for each anchor # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def _fullsample_test(self, proposals): """ This method get all subject-object pairs, and return the proposals. Note: this function keeps a state. Arguments: proposals (list[BoxList]) """ proposal_pairs = [] for i, proposals_per_image in enumerate(proposals): box_subj = proposals_per_image.bbox box_obj = proposals_per_image.bbox box_subj = box_subj.unsqueeze(1).repeat(1, box_subj.shape[0], 1) box_obj = box_obj.unsqueeze(0).repeat(box_obj.shape[0], 1, 1) proposal_box_pairs = torch.cat( (box_subj.view(-1, 4), box_obj.view(-1, 4)), 1) idx_subj = torch.arange(box_subj.shape[0]).view(-1, 1, 1).repeat( 1, box_obj.shape[0], 1).to(proposals_per_image.bbox.device) idx_obj = torch.arange(box_obj.shape[0]).view(1, -1, 1).repeat( box_subj.shape[0], 1, 1).to(proposals_per_image.bbox.device) proposal_idx_pairs = torch.cat( (idx_subj.view(-1, 1), idx_obj.view(-1, 1)), 1) keep_idx = (proposal_idx_pairs[:, 0] != proposal_idx_pairs[:, 1]).nonzero().view(-1) # if we filter non overlap bounding boxes if self.cfg.MODEL.ROI_RELATION_HEAD.FILTER_NON_OVERLAP: ious = boxlist_iou(proposals_per_image, proposals_per_image).view(-1) ious = ious[keep_idx] keep_idx = keep_idx[(ious > 0).nonzero().view(-1)] proposal_idx_pairs = proposal_idx_pairs[keep_idx] proposal_box_pairs = proposal_box_pairs[keep_idx] proposal_pairs_per_image = BoxPairList(proposal_box_pairs, proposals_per_image.size, proposals_per_image.mode) proposal_pairs_per_image.add_field("idx_pairs", proposal_idx_pairs) proposal_pairs.append(proposal_pairs_per_image) return proposal_pairs
def evaluate_box_proposals(predictions, dataset, thresholds=None, area="all", limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = image_id # dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): pred_bbox = pred_boxlist.bbox.numpy() pred_label = pred_boxlist.get_field("labels").numpy() pred_score = pred_boxlist.get_field("scores").numpy() gt_bbox = gt_boxlist.bbox.numpy() gt_label = gt_boxlist.get_field("labels").numpy() gt_difficult = gt_boxlist.get_field("difficult").numpy() for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.size), BoxList(gt_bbox_l, gt_boxlist.size), ).numpy() gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) n_fg_class = max(n_pos.keys()) + 1 prec = [None] * n_fg_class rec = [None] * n_fg_class for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] return prec, rec
def _relpnsample_test(self, proposals): """ perform relpn based sampling during testing """ proposals[0] = proposals[0] proposal_pairs = self._fullsample_test(proposals) proposal_pairs = list(proposal_pairs) relnesses = [] for img_idx, proposals_per_image in enumerate(proposals): obj_logits = proposals_per_image.get_field('logits') obj_bboxes = proposals_per_image.bbox relness = self.relationshipness(obj_logits, obj_bboxes, proposals_per_image.size) keep_idx = (1 - torch.eye(obj_logits.shape[0]).to( relness.device)).view(-1).nonzero().view(-1) if self.cfg.MODEL.ROI_RELATION_HEAD.FILTER_NON_OVERLAP: ious = boxlist_iou(proposals_per_image, proposals_per_image).view(-1) ious = ious[keep_idx] keep_idx = keep_idx[(ious > 0).nonzero().view(-1)] relness = relness.view(-1)[keep_idx] relness_sorted, order = torch.sort(relness.view(-1), descending=True) # perform co-nms to filter duplicate bounding boxes # ious = boxlist_iou(proposals_per_image, proposals_per_image) # subj_ids = []; obj_ids = [] # sample_ids = []; id = 0 # while len(sample_ids) < self.cfg.MODEL.ROI_RELATION_HEAD.BATCH_SIZE_PER_IMAGE and id < len(order): # subj_id = order[id] / len(proposals_per_image) # obj_id = order[id] % len(proposals_per_image) # # if len(subj_ids) == 0 and len(obj_ids) == 0 and subj_id != obj_id: # subj_ids.append(subj_id.item()) # obj_ids.append(obj_id.item()) # sample_ids.append(id) # else: # subj_ious = ious[subj_id, subj_ids] # obj_ious = ious[obj_id, obj_ids] # if (subj_ious.max() < 0.9 or obj_ious.max() < 0.9) and subj_id != obj_id: # subj_ids.append(subj_id.item()) # obj_ids.append(obj_id.item()) # sample_ids.append(id) # id += 1 # img_sampled_inds = order[sample_ids] # relness = relness_sorted[sample_ids] img_sampled_inds = order[:self.cfg.MODEL.ROI_RELATION_HEAD. BATCH_SIZE_PER_IMAGE].view(-1) relness = relness_sorted[:self.cfg.MODEL.ROI_RELATION_HEAD. BATCH_SIZE_PER_IMAGE].view(-1) proposal_pairs_per_image = proposal_pairs[img_idx][ img_sampled_inds] proposal_pairs[img_idx] = proposal_pairs_per_image relnesses.append(relness) self._proposal_pairs = proposal_pairs return proposal_pairs, relnesses