def forward(self, feature_maps, gt_bboxes=None, img_shape=None): """ Args: feature_maps(Variable): [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature map. gt_bboxes(Tensor): [N, M, (x1, y1, x2, y2)]. img_shape(Tensor): [height, width], Image shape. Returns: rois(Tensor): [N, M, (idx, x1, y1, x2, y2)] N: batch size, M: number of roi after nms, idx: bbox index in mini-batch. rpn_loss_cls(Tensor): Classification loss rpn_loss_bbo(Tensor)x: Bounding box regression loss """ batch_size = feature_maps[0].size(0) assert batch_size == 1, "batch_size > 1 will add support later." if self.use_fpn: if self.training: post_nms_top_n = int(self.config['FPN']['TRAIN_FPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['FPN']['TRAIN_FPN_NMS_THRESH']) else: post_nms_top_n = int(self.config['FPN']['TEST_FPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH']) rois_pre_nms = [] rpn_loss_cls = 0 rpn_loss_bbox = 0 for idx, feature in enumerate(feature_maps): self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx] self.rpn.RPN_proposal = self.RPN_proposals[idx] rpn_result = self.rpn(feature, img_shape, gt_bboxes, None) roi_single, loss_cls_single, loss_bbox_single = rpn_result rpn_loss_cls += loss_cls_single rpn_loss_bbox += loss_bbox_single rois_pre_nms.append(roi_single) rois_pre_nms = torch.cat(rois_pre_nms, 1) # [N, M, (n, score, x1, y1, x2, y2)]. # Apply nms to result of all pyramid rois. score = rois_pre_nms[0, :, 1] score.unsqueeze_(-1) bbox = rois_pre_nms[0, :, 2:] keep_idx = nms(torch.cat([bbox, score], 1), nms_thresh) keep_idx = keep_idx[:post_nms_top_n] rois_per_img = torch.cat([rois_pre_nms[:, idx, :] for idx in keep_idx]) rois = rois_per_img[:, [0, 2, 3, 4, 5]] # remove roi_score rois = rois.unsqueeze(0) rpn_loss_cls /= len(feature_maps) rpn_loss_bbox /= len(feature_maps) else: rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None) rois_rpn, rpn_loss_cls, rpn_loss_bbox = rpn_result rois = rois_rpn[:, :, [0, 2, 3, 4, 5]] # remove roi_score return rois, rpn_loss_cls, rpn_loss_bbox
def forward(self, feature_maps, gt_bboxes=None, img_shape=None): """ Args: feature_maps: [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature map. gt_bboxes: [N, M, (x1, y1, x2, y2)]. img_shape: [height, width], Image shape. Returns: rois(Tensor): [N, M, (idx, x1, y1, x2, y2)] N: batch size, M: number of roi after nms, idx: bbox index in mini-batch. rpn_loss_cls(Tensor): Classification loss rpn_loss_bbo(Tensor)x: Bounding box regression loss """ batch_size = feature_maps[0].size(0) nms_output_num = cfg.TEST.RPN_POST_NMS_TOP_N if self.training: nms_output_num = cfg.TRAIN.RPN_POST_NMS_TOP_N if self.use_fpn: rois_pre_nms = [] rpn_loss_cls = 0 rpn_loss_bbox = 0 for idx, feature in enumerate(feature_maps): self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx] self.rpn.RPN_proposal = self.RPN_proposals[idx] rpn_result = self.rpn(feature, img_shape, gt_bboxes, None) roi_single, loss_cls_single, loss_bbox_single = rpn_result rpn_loss_cls += loss_cls_single rpn_loss_bbox += loss_bbox_single roi_score = roi_single[:, :, 1] roi_bbox = roi_single[:, :, 2:] roi_score.unsqueeze_(-1) rois_pre_nms.append(torch.cat((roi_bbox, roi_score), 2)) rois_pre_nms = torch.cat(rois_pre_nms, 1) # [N, M, 5], torch.cat() at dim 'M'. rois = feature_maps[0].data.new(batch_size, nms_output_num, 5).zero_() # Apply nms to result of all pyramid rois. for i in range(batch_size): keep_idx = nms(rois_pre_nms[i], cfg.TRAIN.RPN_NMS_THRESH) keep_idx = keep_idx[:nms_output_num] rois_per_img = torch.cat( [rois_pre_nms[i, idx, :].unsqueeze(0) for idx in keep_idx]) rois[i, :, 0] = i rois[i, :rois_per_img.size(0), 1:] = rois_per_img[:, :4] # remove roi_score else: rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None) rois_rpn, rpn_loss_cls, rpn_loss_bbox = rpn_result rois = feature_maps[0].data.new(batch_size, nms_output_num, 5).zero_() rois[:, :, 0] = 0 rois[:, :, 1:] = rois_rpn[:, :, 2:] # remove roi_score return rois, rpn_loss_cls, rpn_loss_bbox
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals if not self.training: # filter out score below threshold assert batch_size == 1 scores_keep_idx = torch.nonzero(scores_keep > 0.5).view(-1) if scores_keep_idx.numel() != 0: scores_keep = scores_keep[:, scores_keep_idx] proposals_keep = proposals_keep[:, scores_keep_idx] _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 6).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1] = scores_single output[i, :num_proposal, 2:] = proposals_single return output
def _process_result(self, batch_size, features, proposals, cls_prob=None, bbox_reg=None): """Get the final result in test stage. Args: batch_size(int): mini-batch size. features(list of Variable): extracted features from backbone proposals(Tensor): [N, M, (idx, score, x1, y1, x2, y2)] cls_prob(Variable): [(NxM), num_classes] bbox_reg(Variable): [(NxM), num_classes, (x1, y1, x2, y2)] Returns: result: list of lists of dict, outer list is mini-batch, inner list is detected objects, dict contains stuff below. dict_key: 'proposal'(Tensor): (x1, y1, x2, y2), course bbox from RPN proposal. 'cls_pred'(int): predicted class id. 'bbox_pred'(Tensor): (x1, y1, x2, y2), refined bbox from prediction head. 'mask_pred'(Tensor): [H, W], predicted mask. e.g. result[0][0]['mask_pred'] stands for the first object's mask prediction of the first image of mini-batch. """ # Todo: support batch_size > 1. assert batch_size == 1, "batch_size > 1 will add support later" proposals = proposals.squeeze(0) result = [] if self.train_rpn_only: obj_detected = [] for i in range(proposals.size(0)): pred_dict = {'proposal': proposals[i, 2:].cpu()} obj_detected.append(pred_dict) result.append(obj_detected) return result else: props = [] bboxes = [] cls_ids = [] for idx, roi in enumerate(proposals): cls_id = torch.max(cls_prob[idx], dim=0)[1] if int(cls_id) > 0: # remove background # refine proposal bbox with bbox regression result. bbox = self._refine_proposal( roi[2:], bbox_reg[idx, :, :][cls_id, :].squeeze(0).data) px1, py1, px2, py2 = bbox # leave malformed bbox alone if py1 >= py2 or px1 >= px2: continue props.append(roi.unsqueeze(0)) bboxes.append(bbox.unsqueeze(0)) cls_ids.append(int(cls_id)) if len(props) != 0: props_origin = torch.cat(props) props_refined = props_origin.clone() props_refined[:, 2:] = torch.cat(bboxes) else: result.append([]) return result # Apply nms. if self.use_fpn: pre_nms_top_n = int( self.config['FPN']['TEST_FPN_PRE_NMS_TOP_N']) post_nms_top_n = int( self.config['FPN']['TEST_FPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH']) else: pre_nms_top_n = int( self.config['RPN']['TEST_RPN_PRE_NMS_TOP_N']) post_nms_top_n = int( self.config['RPN']['TEST_RPN_POST_NMS_TOP_N']) nms_thresh = float(self.config['RPN']['TEST_RPN_NMS_THRESH']) score = props_refined[:, 1] order = torch.sort(score, dim=0, descending=True)[1] props_origin = props_origin[order, :][:pre_nms_top_n, :] props_refined = props_refined[order, :][:pre_nms_top_n, :] score = props_refined[:, 1].unsqueeze(-1) bbox = props_refined[:, 2:] keep_idx = nms(torch.cat([bbox, score], 1), nms_thresh) keep_idx = keep_idx[:post_nms_top_n] props_origin = torch.cat( [props_origin[idx, :].unsqueeze(0) for idx in keep_idx]) props_refined = torch.cat( [props_refined[idx, :].unsqueeze(0) for idx in keep_idx]) if self.use_fpn: rois_pooling_mask = self._roi_align_fpn(features, props_refined.clone(), mode='mask') mask_prob = self.mask_head(rois_pooling_mask).data else: rois_pooling_mask = self.roi_align_mask( features[0], props_refined.clone(), self.img_height) mask_prob = self.mask_head(rois_pooling_mask).data obj_detected = [] for i in range(len(props_origin)): pred_dict = { 'proposal': props_origin[i, 2:].cpu(), 'cls_pred': cls_ids[i], 'bbox_pred': props_refined[i, 2:].cpu(), 'mask_pred': None } px1, py1, px2, py2 = props_refined[i, 2:].int() mask_height, mask_width = py2 - py1 + 1, px2 - px1 + 1 mask = mask_prob[i, :, :, :][cls_ids[i], :, :] mask = Variable(mask.unsqueeze(0), requires_grad=False) mask_resize = F.adaptive_avg_pool2d( mask, (mask_height, mask_width)).data mask_threshold = float(self.config['TEST']['MASK_THRESH']) mask_resize = mask_resize >= mask_threshold mask_pred = mask_prob.new(self.img_height, self.img_width).zero_() mask_pred[py1:py2 + 1, px1:px2 + 1] = mask_resize pred_dict['mask_pred'] = mask_pred.cpu() obj_detected.append(pred_dict) result.append(obj_detected) return result