def _bbox_nms(self, refined_props_nms): """Non-maximum suppression on each class of refined proposals. Args: refined_props_nms(Tensor): [N, M, (cls, x1, y1, x2, y2, cls_score)]. Returns: keep_idx(LongTensor): keep index after NMS for final bounding box output. """ assert refined_props_nms.size(0) == 1, "batch size >=2 is not supported yet." refined_props_nms.squeeze_(0) nms_thresh = float(self.config['TEST']['NMS_THRESH']) props_indexed = {} # indexed by class # record position in input refined_props for pos, prop in enumerate(refined_props_nms): props_indexed.setdefault(prop[0], []).append((pos, prop[1:])) keep_idx = [] for cls, pos_props in props_indexed.items(): pos = [i[0] for i in pos_props] prop = [i[1].unsqueeze(0) for i in pos_props] pos = prop = score = prop[:, 4] order = torch.sort(score, dim=0, descending=True)[1] pos_ordered = pos[order] prop_ordered = prop[order] keep_idx_per_cls = nms(prop_ordered, nms_thresh) keep_idx.append(pos_ordered[keep_idx_per_cls]) keep_idx = return keep_idx
def forward(self, feature_maps, gt_bboxes=None, img_shape=None): """ Args: feature_maps(Variable): [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature map. gt_bboxes(Tensor): [N, M, (x1, y1, x2, y2)]. img_shape(Tensor): [height, width], Image shape. Returns: rois(Tensor): [N, M, (idx, score, x1, y1, x2, y2)] N: batch size, M: number of roi after nms, idx: bbox index in mini-batch, score: objectness of roi. rpn_loss_cls(Tensor): Classification loss. rpn_loss_bbox(Tensor): Bounding box regression loss. """ batch_size = feature_maps[0].size(0) assert batch_size == 1, "batch_size > 1 will add support later." if self.use_fpn: if pre_nms_top_n = int( self.config['FPN']['TRAIN_FPN_PRE_NMS_TOP_N']) post_nms_top_n = int( self.config['FPN']['TRAIN_FPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['FPN']['TRAIN_FPN_NMS_THRESH']) else: pre_nms_top_n = int( self.config['FPN']['TEST_FPN_PRE_NMS_TOP_N']) post_nms_top_n = int( self.config['FPN']['TEST_FPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH']) rois_pre_nms = [] rpn_loss_cls = 0 rpn_loss_bbox = 0 for idx, feature in enumerate(feature_maps): self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx] self.rpn.RPN_proposal = self.RPN_proposals[idx] rpn_result = self.rpn(feature, img_shape, gt_bboxes, None) roi_single, loss_cls_single, loss_bbox_single = rpn_result rpn_loss_cls += loss_cls_single rpn_loss_bbox += loss_bbox_single rois_pre_nms.append(roi_single) rois_pre_nms =, 1) # [N, M, (n, score, x1, y1, x2, y2)]. # Apply nms to result of all pyramid rois. score = rois_pre_nms[0, :, 1] order = torch.sort(score, dim=0, descending=True)[1] rois_pre_nms = rois_pre_nms[:, order, :][:, :pre_nms_top_n, :] score = rois_pre_nms[0, :, 1].unsqueeze(-1) bbox = rois_pre_nms[0, :, 2:] keep_idx = nms([bbox, score], 1), nms_thresh) keep_idx = keep_idx[:post_nms_top_n] rois =[rois_pre_nms[:, idx, :] for idx in keep_idx]) rois = rois.unsqueeze(0) rpn_loss_cls /= len(feature_maps) rpn_loss_bbox /= len(feature_maps) else: rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None) rois, rpn_loss_cls, rpn_loss_bbox = rpn_result return rois, rpn_loss_cls, rpn_loss_bbox
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx =, 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals if not # filter out score below threshold assert batch_size == 1 scores_keep_idx = torch.nonzero(scores_keep > 0.05).view(-1) if scores_keep_idx.numel() != 0: scores_keep = scores_keep[:, scores_keep_idx] proposals_keep = proposals_keep[:, scores_keep_idx] _, order = torch.sort(scores_keep, 1, True) output =, post_nms_topN, 6).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :].view(-1) # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1] = scores_single output[i, :num_proposal, 2:] = proposals_single return output