def cat_boxlist(bboxes): """ Concatenates a list of BoxList (having the same image size) into a single BoxList Arguments: bboxes (list[BoxList]) """ assert isinstance(bboxes, (list, tuple)) assert all(isinstance(bbox, BoxList) for bbox in bboxes) size = bboxes[0].size assert all(bbox.size == size for bbox in bboxes) mode = bboxes[0].mode assert all(bbox.mode == mode for bbox in bboxes) fields = set(bboxes[0].fields()) assert all(set(bbox.fields()) == fields for bbox in bboxes) cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode) for field in fields: data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0) cat_boxes.add_field(field, data) return cat_boxes
def single_fmps_process(self, location, pred_cls, pred_box, pred_centerness, image_sizes): B, H, W, C = pred_cls.shape pred_cls = pred_cls.view(B, -1, C).sigmoid() pred_box = pred_box.view(B, -1, 4) pred_centerness = pred_centerness.view(B, -1).sigmoid() # multiply the classification scores with centerness scores pred_cls = pred_cls * pred_centerness[:, :, None] cls_mask = pred_cls > self.conf_thresh cls_mask_top_n = cls_mask.view(B, -1).sum(1) cls_mask_top_n = cls_mask_top_n.clamp(max=self.nms_thresh_topN) res = [] for b in range(B): per_cls = pred_cls[b] per_cls_mask = cls_mask[b] per_cls = per_cls[per_cls_mask] per_cls_mask_nonzeros = per_cls_mask.nonzero() per_box_loc = per_cls_mask_nonzeros[:, 0] per_box_cls = per_cls_mask_nonzeros[:, 1] + 1 # class index per_box = pred_box[b] per_box = per_box[per_box_loc] per_location = location[per_box_loc] per_cls_mask_top_n = cls_mask_top_n[b] if per_cls_mask.sum().item() > per_cls_mask_top_n.item(): per_cls, top_k_idx = per_cls.topk(per_cls_mask_top_n, sorted=False) per_box_cls = per_box_cls[top_k_idx] per_box = per_box[top_k_idx] per_location = per_location[top_k_idx] detections = torch.stack([ per_location[:, 0] - per_box[:, 0], per_location[:, 1] - per_box[:, 1], per_location[:, 0] + per_box[:, 2], per_location[:, 1] + per_box[:, 3], ], dim=1) h, w = image_sizes[0] box_list = BoxList(detections, (w, h), mode='xyxy') box_list.add_field('labels', per_box_cls) box_list.add_field('scores', per_cls) box_list = box_list.clip_to_image(remove_empty=False) res.append(box_list) return res
def select_layers(loc, cls, box, centerness, img_size): batch, channel, height, width = cls.shape cls = cls.permute(0, 2, 3, 1) cls = cls.reshape(batch, -1, channel).sigmoid() box = box.permute(0, 2, 3, 1) box = box.reshape(batch, -1, 4) centerness = centerness.permute(0, 2, 3, 1) centerness = centerness.reshape(batch, -1).sigmoid() select_id = cls > PRE_NMS_THRESH num_id = select_id.view(batch, -1).sum(1) num_id = num_id.clamp(max=PRE_NMS_TOK_K) cls = cls * centerness[:, :, None] res = [] for i in range(batch): cls_i = cls[i] select_id_i = select_id[i] cls_i = cls_i[select_id_i] per_candidate_nonzeros = select_id_i.nonzero() loc_i = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 box_i = box[i] box_i = box_i[loc_i] per_locs = loc[loc_i] tok_k = num_id[i] if select_id_i.sum().item() > tok_k.item(): cls_i, top_k_index = cls_i.topk(tok_k, sorted=False) per_class = per_class[top_k_index] box_i = box_i[top_k_index] per_locs = per_locs[top_k_index] l_ = per_locs[:, 0] - box_i[:, 0] t_ = per_locs[:, 1] - box_i[:, 1] r_ = per_locs[:, 0] + box_i[:, 2] b_ = per_locs[:, 1] + box_i[:, 3] regs = torch.stack([l_, t_, r_, b_], dim=1) h, w = img_size[i] boxlist = BoxList(regs, (int(w), int(h)), mode='xyxy') boxlist.add_field("labels", per_class) boxlist.add_field("scores", cls_i) boxlist = boxlist.clip_to_image(remove_empty=False) # boxlist = remove_small_boxes(boxlist, self.min_size) res.append(boxlist) return res
def select_over_all_levels(self, boxlists): num_imgs = len(boxlists) results = [] for i in range(num_imgs): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, self.num_classes + 1): idx = (labels == j).nonzero().view(-1) scores_j = scores[idx] boxes_j = boxes[idx, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field='scores') num_labels = len(boxlist_for_class) labels_cur = torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device) boxlist_for_class.add_field('labels', labels_cur) result.append(boxlist_for_class) # result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.nms_thresh_topN > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.nms_thresh_topN + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def select_over_all_levels(boxlist): num_images = len(boxlist) results = [] for i in range(num_images): scores = boxlist[i].get_field("scores") labels = boxlist[i].get_field("labels") boxes = boxlist[i].bbox boxlist = boxlist[i] result = [] for j in range(1, CLASS): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, NMS_THRESH, score_field="scores") num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) if number_of_detections > FPN_POS_NMS_TOP_K > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - FPN_POS_NMS_TOP_K + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def calc_detection_voc_prec_rec(gt_boxlists, pred_boxlists, iou_thresh=0.5): """Calculate precision and recall based on evaluation code of PASCAL VOC. This function calculates precision and recall of predicted bounding boxes obtained from a dataset which has :math:`N` images. The code is based on the evaluation code used in PASCAL VOC Challenge. """ n_pos = defaultdict(int) score = defaultdict(list) match = defaultdict(list) gt_labels = [] for gt_boxlist, pred_boxlist in zip(gt_boxlists, pred_boxlists): # pred_bbox = pred_boxlist.bbox.numpy() # pred_label = pred_boxlist.get_field("labels").numpy() # pred_score = pred_boxlist.get_field("scores").numpy() # gt_bbox = gt_boxlist.bbox.numpy() # gt_label = gt_boxlist.get_field("labels").numpy() # gt_difficult = gt_boxlist.get_field("difficult").numpy() pred_bbox = pred_boxlist.bbox pred_label = pred_boxlist.get_field("labels") pred_score = pred_boxlist.get_field("scores") gt_bbox = gt_boxlist.bbox gt_label = gt_boxlist.get_field("labels") gt_difficult = gt_boxlist.get_field("difficult") gt_labels.append(gt_label) for l in np.unique(np.concatenate((pred_label, gt_label)).astype(int)): pred_mask_l = pred_label == l pred_bbox_l = pred_bbox[pred_mask_l] pred_score_l = pred_score[pred_mask_l] # sort by score order = pred_score_l.argsort()[::-1] pred_bbox_l = pred_bbox_l[order] pred_score_l = pred_score_l[order] gt_mask_l = gt_label == l gt_bbox_l = gt_bbox[gt_mask_l] gt_difficult_l = gt_difficult[gt_mask_l] n_pos[l] += np.logical_not(gt_difficult_l).sum() score[l].extend(pred_score_l) if len(pred_bbox_l) == 0: continue if len(gt_bbox_l) == 0: match[l].extend((0, ) * pred_bbox_l.shape[0]) continue # VOC evaluation follows integer typed bounding boxes. pred_bbox_l = pred_bbox_l.copy() pred_bbox_l[:, 2:] += 1 gt_bbox_l = gt_bbox_l.copy() gt_bbox_l[:, 2:] += 1 # iou = boxlist_iou( # BoxList(pred_bbox_l, gt_boxlist.size), # BoxList(gt_bbox_l, gt_boxlist.size), # ).numpy() iou = boxlist_iou( BoxList(pred_bbox_l, gt_boxlist.size), BoxList(gt_bbox_l, gt_boxlist.size), ) gt_index = iou.argmax(axis=1) # set -1 if there is no matching ground truth gt_index[iou.max(axis=1) < iou_thresh] = -1 del iou selec = np.zeros(gt_bbox_l.shape[0], dtype=bool) for gt_idx in gt_index: if gt_idx >= 0: if gt_difficult_l[gt_idx]: match[l].append(-1) else: if not selec[gt_idx]: match[l].append(1) else: match[l].append(0) selec[gt_idx] = True else: match[l].append(0) # n_fg_class = max(n_pos.keys()) + 1 # prec = [None] * n_fg_class # rec = [None] * n_fg_class gt_labels = np.concatenate(gt_labels) n_pos = {} for l in np.unique(gt_labels.astype(int)): m1 = np.sum(gt_labels == l) m2 = np.sum(gt_labels.astype(int) == l) if m1 != m2: print(m1, m2) n_pos[l] = m2 prec = {} rec = {} n_fp = {} n_tp = {} for l in n_pos.keys(): score_l = np.array(score[l]) match_l = np.array(match[l], dtype=np.int8) order = score_l.argsort()[::-1] match_l = match_l[order] tp = np.cumsum(match_l == 1) fp = np.cumsum(match_l == 0) n_tp[l] = np.sum(match_l == 1) n_fp[l] = np.sum(match_l == 0) # If an element of fp + tp is 0, # the corresponding element of prec[l] is nan. prec[l] = tp / (fp + tp) # If n_pos[l] is 0, rec[l] is None. if n_pos[l] > 0: rec[l] = tp / n_pos[l] else: rec[l] = None return prec, rec, n_tp, n_fp, n_pos