def read_anchors(): anchor_generator_config = { "base_anchor_size": 16, "scales": [4, 8, 16], "aspect_ratios": [0.5, 0.8, 1], "anchor_stride": [16, 16], "anchor_offset": [0, 0] } anchor_generator = AnchorGenerator(anchor_generator_config) anchors = anchor_generator.generate([[1, 1]]) return anchors[0].cpu().numpy()
class RPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] # self.rpn_batch_size = model_config['rpn_batch_size'] self.num_cls_samples = model_config['num_cls_samples'] self.num_reg_samples = model_config['num_reg_samples'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler({"fg_fraction": 1.0}) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler({"fg_fraction": 1.0}) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder self.use_iou = model_config.get('use_iou') def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) def unfreeze_modules(self): unfreeze_modules = [ self.rpn_coarse_map_conv_iod.bias, self.rpn_fine_map_conv_iod.bias, self.rpn_coarse_map_conv_iog.bias, self.rpn_fine_map_conv_iog.bias, self.rpn_coarse_map_conv_iou.bias, self.rpn_fine_map_conv_iou.bias, self.rpn_coarse_map_conv_iod.weight, self.rpn_fine_map_conv_iod.weight, self.rpn_coarse_map_conv_iog.weight, self.rpn_fine_map_conv_iog.weight, self.rpn_coarse_map_conv_iou.weight, self.rpn_fine_map_conv_iou.weight ] for module in unfreeze_modules: module.requires_grad = True def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial(F.cross_entropy, reduce=False) def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] anchors = anchors[0] # do not backward anchors = anchors rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4) # apply deltas to anchors to decode # loop here due to many features maps # proposals = [] # for rpn_bbox_preds_single_map, anchors_single_map in zip( # rpn_bbox_preds, anchors): # proposals.append( # self.bbox_coder.decode(rpn_bbox_preds_single_map, # anchors_single_map)) # proposals = torch.cat(proposals, dim=1) proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :] fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size, -1) # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms( torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1), self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order def forward(self, bottom_blobs): base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] im_info = bottom_blobs['im_info'] # rpn conv rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True) # rpn cls score # shape(N,2*num_anchors,H,W) rpn_cls_scores = self.rpn_cls_score(rpn_conv) # rpn cls prob shape(N,2*num_anchors,H,W) rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # import ipdb # ipdb.set_trace() # rpn bbox pred # shape(N,4*num_anchors,H,W) if self.use_score: # shape (N,2,num_anchoros*H*W) rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1) rpn_bbox_preds = [] for i in range(self.num_anchors): rpn_bbox_feat = torch.cat( [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]], dim=1) rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat)) rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1) else: # get rpn offsets to the anchor boxes rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # rpn_bbox_preds = [rpn_bbox_preds] # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate(feature_map_list) ############################### # Proposal ############################### # note that proposals_order is used for track transform of propsoals proposals_batch, proposals_order = self.generate_proposal( rpn_cls_probs, anchors, rpn_bbox_preds, im_info) batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( -1, proposals_batch.shape[1]).type_as(proposals_batch) rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), dim=2) if self.training: rois_batch = self.append_gt(rois_batch, gt_boxes) rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1, rpn_cls_scores.shape[2], rpn_cls_scores.shape[3]) rpn_cls_scores = rpn_cls_scores.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) # postprocess rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1, rpn_cls_probs.shape[2], rpn_cls_probs.shape[3]) rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) predict_dict = { 'proposals_batch': proposals_batch, 'rpn_cls_scores': rpn_cls_scores, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'rpn_cls_probs': rpn_cls_probs, 'proposals_order': proposals_order, } return predict_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] assert len(anchors) == 1, 'just one feature maps is supported now' anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem # import ipdb # ipdb.set_trace() rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights = \ self.target_assigner.assign(anchors, gt_boxes, gt_labels=None) ################################ # double subsample ################################ rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1] cls_criterion = rpn_cls_probs # cls loss rpn_cls_score = prediction_dict['rpn_cls_scores'] # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) # cls_criterion = rpn_cls_loss # cls subsample # pos_indicator = rpn_cls_targets > 0 # ignore fg/bg indicator = rpn_cls_weights > 0 pos_indicator = indicator cls_batch_sampled_mask = self.sampler.subsample_batch( self.num_cls_samples, pos_indicator, criterion=cls_criterion, indicator=indicator) cls_batch_sampled_mask = cls_batch_sampled_mask.type_as( rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * cls_batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) # reg subsample # subsample all from fg pos_indicator = rpn_reg_weights > 0 rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) # reg_criterion = rpn_reg_loss.sum(dim=-1) reg_batch_sampled_mask = self.sampler.subsample_batch( self.num_reg_samples, pos_indicator, criterion=cls_criterion, indicator=None) reg_batch_sampled_mask |= cls_batch_sampled_mask.type_as( reg_batch_sampled_mask) reg_batch_sampled_mask = reg_batch_sampled_mask.type_as( rpn_cls_weights) rpn_reg_weights = rpn_reg_weights * reg_batch_sampled_mask num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # cls loss # rpn_cls_score = prediction_dict['rpn_cls_scores'] # # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) # rpn_cls_loss = self.rpn_cls_loss( # rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # bbox loss # shape(N,num,4) # rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] # rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # # shape(N,H*W*num_anchors,4) # rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) # rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view( rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float() loss_dict['rpn_cls_loss'] = rpn_cls_loss loss_dict['rpn_bbox_loss'] = rpn_reg_loss return loss_dict
import sys sys.path.append('.') from core.anchor_generators.anchor_generator import AnchorGenerator from lib.model.rpn.generate_anchors import generate_anchors from utils.visualize import visualize_bbox, read_img, shift_bbox anchor_generator_config = { "base_anchor_size": 1, "scales": [4], "aspect_ratios": [1], "anchor_stride": [16, 16], "anchor_offset": [0, 0] } anchor_generator = AnchorGenerator(anchor_generator_config) anchors = anchor_generator.generate([[24, 80]]) # print(anchors) expect_anchors = generate_anchors( base_size=anchor_generator_config['base_anchor_size'], ratios=np.array(anchor_generator_config['aspect_ratios']), scales=np.array(anchor_generator_config['scales'])) img = read_img('/data/object/training/image_2/000117.png') def vis_help(anchors, expect_anchors): # shift_bbox(anchors, translation=(200, 200)) # shift_bbox(expect_anchors, translation=(800, 200)) # anchors = np.concatenate([anchors, expect_anchors], axis=0) visualize_bbox(img, anchors)
class RPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] self.rpn_batch_size = model_config['rpn_batch_size'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler(model_config['sampler_config']) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder self.use_iou = model_config.get('use_iou') def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial(F.cross_entropy, reduce=False) # def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, # im_info): # pass def forward(self, bottom_blobs): base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] im_info = bottom_blobs['im_info'] # rpn conv rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True) # rpn cls score # shape(N,2*num_anchors,H,W) rpn_cls_scores = self.rpn_cls_score(rpn_conv) # rpn cls prob shape(N,2*num_anchors,H,W) rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # import ipdb # ipdb.set_trace() # rpn bbox pred # shape(N,4*num_anchors,H,W) if self.use_score: # shape (N,2,num_anchoros*H*W) rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1) rpn_bbox_preds = [] for i in range(self.num_anchors): rpn_bbox_feat = torch.cat( [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]], dim=1) rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat)) rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1) else: # get rpn offsets to the anchor boxes rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # rpn_bbox_preds = [rpn_bbox_preds] # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate(feature_map_list) ############################### # Proposal ############################### # note that proposals_order is used for track transform of propsoals rois_batch, proposals_order = Proposal.apply(rpn_cls_probs, anchors, rpn_bbox_preds, im_info) # batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( # -1, proposals_batch.shape[1]).type_as(proposals_batch) # rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), # dim=2) if self.training: rois_batch = self.append_gt(rois_batch, gt_boxes) rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1, rpn_cls_scores.shape[2], rpn_cls_scores.shape[3]) rpn_cls_scores = rpn_cls_scores.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) # postprocess rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1, rpn_cls_probs.shape[2], rpn_cls_probs.shape[3]) rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) predict_dict = { 'rpn_cls_scores': rpn_cls_scores, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'rpn_cls_probs': rpn_cls_probs, 'proposals_order': proposals_order, } return predict_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] assert len(anchors) == 1, 'just one feature maps is supported now' anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem # import ipdb # ipdb.set_trace() rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights = \ self.target_assigner.assign(anchors, gt_boxes, gt_labels=None) ################################ # subsample ################################ pos_indicator = rpn_reg_weights > 0 indicator = rpn_cls_weights > 0 if self.use_iou: cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch else: rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1] cls_criterion = rpn_cls_probs batch_sampled_mask = self.sampler.subsample_batch( self.rpn_batch_size, pos_indicator, criterion=cls_criterion, indicator=indicator) batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * batch_sampled_mask rpn_reg_weights = rpn_reg_weights * batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # cls loss rpn_cls_score = prediction_dict['rpn_cls_scores'] # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # bbox loss # shape(N,num,4) rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view( rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float() loss_dict['rpn_cls_loss'] = rpn_cls_loss loss_dict['rpn_bbox_loss'] = rpn_reg_loss return loss_dict
class LEDRPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] self.rpn_batch_size = model_config['rpn_batch_size'] self.use_focal_loss = model_config['use_focal_loss'] self.alpha = 0.6 self.theta = 1.0 self.iox_bbox_coder = DiscreteBBoxCoder( model_config['iox_coder_config']) self.use_sharpL2 = model_config.get('use_sharpL2') self.use_sigmoid = model_config['use_sigmoid'] self.use_cls_pred = model_config['use_cls_pred'] # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler(model_config['sampler_config']) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = LEDTargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder def iox_clip(self, iox): iox = iox.clone() iox[iox < 0] = 0 iox[iox > 1] = 1 return iox def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_coarse_map_conv_iod, 0, 0.001, self.truncated) Filler.normal_init(self.rpn_fine_map_conv_iod, 0, 0.001, self.truncated) Filler.normal_init(self.rpn_coarse_map_conv_iou, 0, 0.001, self.truncated) Filler.normal_init(self.rpn_fine_map_conv_iou, 0, 0.001, self.truncated) Filler.normal_init(self.rpn_fine_map_conv_iog, 0, 0.001, self.truncated) Filler.normal_init(self.rpn_coarse_map_conv_iog, 0, 0.001, self.truncated) def unfreeze_modules(self): unfreeze_modules = [ self.rpn_coarse_map_conv_iod.bias, self.rpn_fine_map_conv_iod.bias, self.rpn_coarse_map_conv_iog.bias, self.rpn_fine_map_conv_iog.bias, self.rpn_coarse_map_conv_iou.bias, self.rpn_fine_map_conv_iou.bias, self.rpn_coarse_map_conv_iod.weight, self.rpn_fine_map_conv_iod.weight, self.rpn_coarse_map_conv_iog.weight, self.rpn_fine_map_conv_iog.weight, self.rpn_coarse_map_conv_iou.weight, self.rpn_fine_map_conv_iou.weight ] for module in unfreeze_modules: module.requires_grad = True def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) self.rpn_coarse_map_conv_iou = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) self.rpn_fine_map_conv_iou = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) self.rpn_coarse_map_conv_iog = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) self.rpn_fine_map_conv_iog = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) self.rpn_coarse_map_conv_iod = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) self.rpn_fine_map_conv_iod = nn.Conv2d(512, 4 * self.num_anchors, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # rpn bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) if self.use_sharpL2: self.reg_loss = SharpL2Loss() else: self.reg_loss = nn.MSELoss(reduce=False) self.cls_loss = nn.CrossEntropyLoss(reduce=False) # rpn cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial(F.cross_entropy, reduce=False) def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] anchors = anchors[0] # do not backward anchors = anchors rpn_fg_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4) # apply deltas to anchors to decode # loop here due to many features maps # proposals = [] # for rpn_bbox_preds_single_map, anchors_single_map in zip( # rpn_bbox_preds, anchors): # proposals.append( # self.bbox_coder.decode(rpn_bbox_preds_single_map, # anchors_single_map)) # proposals = torch.cat(proposals, dim=1) proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob # fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :] # fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size, # -1) fg_probs = rpn_fg_cls_probs # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms( torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1), self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order def iou_pred(self, rpn_conv): return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iou, self.rpn_fine_map_conv_iou) def iog_pred(self, rpn_conv): return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iog, self.rpn_fine_map_conv_iog) def iod_pred(self, rpn_conv): return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iod, self.rpn_fine_map_conv_iod) def iox_pred(self, rpn_conv, rpn_coarse_map_conv, rpn_fine_map_conv): batch_size = rpn_conv.shape[0] coarse_map = rpn_coarse_map_conv(rpn_conv) fine_map = rpn_fine_map_conv(rpn_conv) coarse_map_reshape = coarse_map.view(batch_size, 4, -1) iou_level_probs = F.softmax(coarse_map_reshape, dim=1) iou_level_probs = iou_level_probs.view_as(coarse_map) if self.use_sigmoid: # normalize it iou_reg = 2 * F.sigmoid(fine_map) - 1 else: iou_reg = fine_map # reshape preprocess iou_reg = iou_reg.view(batch_size, 4, self.num_anchors, -1).permute( 0, 3, 2, 1).contiguous().view(batch_size, -1, 4) iou_cls = iou_level_probs.view(batch_size, 4, self.num_anchors, -1).permute(0, 3, 2, 1).contiguous().view( batch_size, -1, 4) decoded_iou = self.iox_bbox_coder.decode_batch(iou_cls, iou_reg) # used for cls and reg loss iou_cls_scores = coarse_map.view(batch_size, 4, self.num_anchors, -1).permute(0, 3, 2, 1).contiguous().view( batch_size, -1, 4) return decoded_iou, iou_cls_scores, iou_reg def calculate_iou(self, iog, iod): mask = ~(iod == 0) iou_indirect = torch.zeros_like(iog) iod = iod[mask] iog = iog[mask] iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog) return iou_indirect def forward(self, bottom_blobs): # import ipdb # ipdb.set_trace() base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] # im_info = bottom_blobs['im_info'] im_info = bottom_blobs['input_size'] # rpn conv rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True) # rpn cls rpn_cls_scores = self.rpn_cls_score(rpn_conv) rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1).view_as(rpn_cls_scores) rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, self.num_anchors, -1).permute(0, 3, 2, 1).contiguous().view( batch_size, -1, 2) rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, self.num_anchors, -1).permute(0, 3, 2, 1).contiguous().view( batch_size, -1, 2) iou, iou_scores, iou_reg = self.iou_pred(rpn_conv) iog, iog_scores, iog_reg = self.iog_pred(rpn_conv) iod, iod_scores, iod_reg = self.iod_pred(rpn_conv) # bugs here iou = self.iox_clip(iou) iog = self.iox_clip(iog) iod = self.iox_clip(iod) iou_indirect = self.calculate_iou(iog, iod) iou_final = (1 - self.alpha) * iou_indirect + self.alpha * iou # import ipdb # ipdb.set_trace() rpn_fg_probs_final = rpn_cls_probs[:, :, 1] * torch.exp(-torch.pow( (1 - iou_final), 2) / self.theta) # rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) # rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) # rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # import ipdb # ipdb.set_trace() # rpn bbox pred # shape(N,4*num_anchors,H,W) # if self.use_score: # # shape (N,2,num_anchoros*H*W) # rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1) # rpn_bbox_preds = [] # for i in range(self.num_anchors): # rpn_bbox_feat = torch.cat( # [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]], # dim=1) # rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat)) # rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1) # else: # get rpn offsets to the anchor boxes rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # rpn_bbox_preds = [rpn_bbox_preds] # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate(feature_map_list) ############################### # Proposal ############################### # note that proposals_order is used for tracking transform of propsoals proposals_batch, proposals_order = self.generate_proposal( rpn_fg_probs_final, anchors, rpn_bbox_preds, im_info) batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( -1, proposals_batch.shape[1]).type_as(proposals_batch) rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), dim=2) if self.training: rois_batch = self.append_gt(rois_batch, gt_boxes) predict_dict = { 'proposals_batch': proposals_batch, # used for sorting 'rpn_iou_final': rpn_fg_probs_final, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'proposals_order': proposals_order, # reg 'rpn_iou_reg': iou_reg, 'rpn_iog_reg': iog_reg, 'rpn_iod_reg': iod_reg, # cls 'rpn_iou_scores': iou_scores, 'rpn_iog_scores': iog_scores, 'rpn_iod_scores': iod_scores, 'rpn_cls_scores': rpn_cls_scores } return predict_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] assert len(anchors) == 1, 'just one feature maps is supported now' anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights = \ self.target_assigner.assign(anchors, gt_boxes, gt_labels=None, input_size=feed_dict['input_size']) ################################ # subsample ################################ rpn_cls_probs = prediction_dict['rpn_iou_final'] cls_criterion = rpn_cls_probs pos_indicator = rpn_reg_weights > 0 indicator = rpn_cls_weights > 0 batch_sampled_mask = self.sampler.subsample_batch( self.rpn_batch_size, pos_indicator, criterion=cls_criterion, indicator=indicator) batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * batch_sampled_mask rpn_reg_weights = rpn_reg_weights * batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # iou loss iou_scores = prediction_dict['rpn_iou_scores'] iou = prediction_dict['rpn_iou_reg'] iou_reg_targets = self.target_assigner.matcher.assigned_overlaps_batch iou_reg_targets_encode = self.iox_bbox_coder.encode_reg( iou_reg_targets) iou_reg_loss = self.reg_loss(iou, iou_reg_targets_encode).sum(dim=-1) iou_scores_targets = self.iox_bbox_coder.encode_cls(iou_reg_targets) iou_cls_loss = self.cls_loss(iou_scores.view(-1, 4), iou_scores_targets.view(-1)) iou_cls_loss = iou_cls_loss.view_as(rpn_cls_weights) iou_cls_loss = iou_cls_loss.mean(dim=1) iou_reg_loss = iou_reg_loss.mean(dim=1) # iog loss iog_scores = prediction_dict['rpn_iog_scores'] iog = prediction_dict['rpn_iog_reg'] iog_reg_targets = self.target_assigner.matcher.assigned_iog_batch iog_reg_targets_encode = self.iox_bbox_coder.encode_reg( iog_reg_targets) iog_reg_loss = self.reg_loss(iog, iog_reg_targets_encode).sum(dim=-1) iog_scores_targets = self.iox_bbox_coder.encode_cls(iog_reg_targets) iog_cls_loss = self.cls_loss(iog_scores.view(-1, 4), iog_scores_targets.view(-1)) iog_cls_loss = iog_cls_loss.view_as(rpn_cls_weights) iog_cls_loss = iog_cls_loss.mean(dim=1) iog_reg_loss = iog_reg_loss.mean(dim=1) # iod loss iod_scores = prediction_dict['rpn_iod_scores'] iod = prediction_dict['rpn_iod_reg'] iod_reg_targets = self.target_assigner.matcher.assigned_iod_batch iod_reg_targets_encode = self.iox_bbox_coder.encode_reg( iod_reg_targets) iod_reg_loss = self.reg_loss(iod, iod_reg_targets_encode).sum(dim=-1) iod_scores_targets = self.iox_bbox_coder.encode_cls(iod_reg_targets) iod_cls_loss = self.cls_loss(iod_scores.view(-1, 4), iod_scores_targets.view(-1)) iod_cls_loss = iod_cls_loss.view_as(rpn_cls_weights) iod_cls_loss = iod_cls_loss.mean(dim=1) iod_reg_loss = iod_reg_loss.mean(dim=1) # cls loss if self.use_cls_pred: rpn_cls_score = prediction_dict['rpn_cls_scores'] rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() loss_dict['rpn/cls_loss'] = rpn_cls_loss loss_dict['rpn/iou_cls_loss'] = iou_cls_loss loss_dict['rpn/iou_reg_loss'] = iou_reg_loss loss_dict['rpn/iog_cls_loss'] = iog_cls_loss loss_dict['rpn/iog_reg_loss'] = iog_reg_loss loss_dict['rpn/iod_reg_loss'] = iod_reg_loss loss_dict['rpn/iod_cls_loss'] = iod_cls_loss # bbox loss # shape(N,num,4) rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view( rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float() # loss_dict['rpn_cls_loss'] = iox_loss loss_dict['rpn/bbox_loss'] = rpn_reg_loss # loss_dict['iox_loss'] = iox_loss return loss_dict
class RPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] self.rpn_batch_size = model_config['rpn_batch_size'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler(model_config['sampler_config']) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder self.use_iou = model_config.get('use_iou') def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv_cls, 0, 0.01, self.truncated) # Filler.normal_init(self.rpn_conv_bbox, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv_cls = nn.Conv2d( self.in_channels, 512, 3, 1, 1, bias=True) # self.rpn_conv_bbox = nn.Conv2d( # self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss( 2, alpha=0.2, gamma=2, auto_alpha=False) else: self.rpn_cls_loss = functools.partial( F.cross_entropy, reduce=False) def generate_new_anchors(self, anchors): # import ipdb # ipdb.set_trace() anchor_size = 2 # anchors_w = anchors[:, :, 2] - anchors[:, :, 0] + 1 # anchors_h = anchors[:, :, 3] - anchors[:, :, 1] + 1 center_x = (anchors[:, 2] + anchors[:, 0]) / 2 center_y = (anchors[:, 3] + anchors[:, 1]) / 2 # new anchors has the same center as old ones min_x = center_x - (anchor_size - 1) / 2 min_y = center_y - (anchor_size - 1) / 2 max_x = center_x + (anchor_size - 1) / 2 max_y = center_y + (anchor_size - 1) / 2 return torch.stack([min_x, min_y, max_x, max_y], dim=-1) def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] # do not backward anchors = anchors rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4) # apply deltas to anchors to decode # loop here due to many features maps # proposals = [] # for rpn_bbox_preds_single_map, anchors_single_map in zip( # rpn_bbox_preds, anchors): # proposals.append( # self.bbox_coder.decode(rpn_bbox_preds_single_map, # anchors_single_map)) # proposals = torch.cat(proposals, dim=1) # make anchors small new_anchors = self.generate_new_anchors(anchors) proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, new_anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :] fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size, -1) # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms( torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1), self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order def forward(self, bottom_blobs): base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] im_info = bottom_blobs['im_info'] # separate cls featmap and bbox featmap # rpn conv rpn_conv_cls = F.relu(self.rpn_conv_cls(base_feat), inplace=True) # rpn cls score # shape(N,2*num_anchors,H,W) rpn_cls_scores = self.rpn_cls_score(rpn_conv_cls) # rpn cls prob shape(N,2*num_anchors,H,W) rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # import ipdb # ipdb.set_trace() # rpn bbox pred # shape(N,4*num_anchors,H,W) # if self.use_score: # # shape (N,2,num_anchoros*H*W) # rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1) # rpn_bbox_preds = [] # for i in range(self.num_anchors): # rpn_bbox_feat = torch.cat( # [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]], # dim=1) # rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat)) # rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1) # else: # # get rpn offsets to the anchor boxes # rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # # rpn_bbox_preds = [rpn_bbox_preds] # rpn_conv_bbox = F.relu(self.rpn_conv_bbox(base_feat), inplace=True) # shared with cls rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv_cls) # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate( feature_map_list, input_size=im_info[0][:-1]) ############################### # Proposal ############################### # note that proposals_order is used for track transform of propsoals proposals_batch, proposals_order = self.generate_proposal( rpn_cls_probs, anchors, rpn_bbox_preds, im_info) batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( -1, proposals_batch.shape[1]).type_as(proposals_batch) rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), dim=2) rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1, rpn_cls_scores.shape[2], rpn_cls_scores.shape[3]) rpn_cls_scores = rpn_cls_scores.permute( 0, 3, 4, 2, 1).contiguous().view(batch_size, -1, 2) # postprocess rpn_cls_probs = rpn_cls_probs.view( batch_size, 2, -1, rpn_cls_probs.shape[2], rpn_cls_probs.shape[3]) rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) predict_dict = { 'proposals_batch': proposals_batch, 'rpn_cls_scores': rpn_cls_scores, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'rpn_cls_probs': rpn_cls_probs, 'proposals_order': proposals_order, } return predict_dict def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] # small anchors new_anchors = self.generate_new_anchors(anchors) # assert len(anchors) == 1, 'just one feature maps is supported now' # anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem # import ipdb # ipdb.set_trace() rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights, stats = \ self.target_assigner.assign(anchors, gt_boxes, new_anchors, gt_labels=None) ################################ # subsample ################################ pos_indicator = rpn_reg_weights > 0 indicator = rpn_cls_weights > 0 rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1] cls_criterion = rpn_cls_probs batch_sampled_mask = self.sampler.subsample_batch( self.rpn_batch_size, pos_indicator, criterion=cls_criterion, indicator=indicator) batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * batch_sampled_mask rpn_reg_weights = rpn_reg_weights * batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # cls loss rpn_cls_score = prediction_dict['rpn_cls_scores'] # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) rpn_cls_loss = self.rpn_cls_loss( rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # bbox loss # shape(N,num,4) rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view(rpn_reg_loss.shape[0], -1).sum( dim=1) / num_reg_coeff.float() loss_dict['rpn_cls_loss'] = rpn_cls_loss loss_dict['rpn_bbox_loss'] = rpn_reg_loss return loss_dict
class DistanceRPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] self.rpn_batch_size = model_config['rpn_batch_size'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) self.sampler = DetectionSampler(model_config['sampler_config']) # can not use hem here # self.sampler = BalancedSampler(model_config['sampler_config']) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = DistanceTargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial( F.cross_entropy, reduce=False) # self.rpn_cls_loss = nn.MSELoss(reduce=False) # self.distance_similarity_calc = DistanceSimilarityCalc() def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] anchors = anchors[0] # do not backward anchors = anchors rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4) # fg prob gate = rpn_cls_probs[:, self.num_anchors:, :, :] gate = gate.permute(0, 2, 3, 1).contiguous().view(batch_size, -1) fg_probs, distance = self.get_rpn_cls_probs( rpn_bbox_preds, anchors=None) fg_probs[gate < 0.5] = 0 distance[gate < 0.5] = 1e5 # apply deltas to anchors to decode # loop here due to many features maps # proposals = [] # for rpn_bbox_preds_single_map, anchors_single_map in zip( # rpn_bbox_preds, anchors): # proposals.append( # self.bbox_coder.decode(rpn_bbox_preds_single_map, # anchors_single_map)) # proposals = torch.cat(proposals, dim=1) proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms( torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1), self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single # row = torch.arange(0, batch_size).type_as(proposals_order) # fg_probs = fg_probs[row, proposals_order.view(-1)].view_as( # proposals_order) return proposals_batch, proposals_order, fg_probs, distance def get_rpn_cls_probs(self, bbox_pred, anchors=None): """ Note that all inputs have no gradients Args: bbox_pred: shape (N,M,4) anchors: shape (M,4) Returns: distance: shape(N,M) """ # shape(N,M,4) # distances = self.distance_similarity_calc.compare_batch(bbox, gt_boxes) # anchors = anchors.expand_as(bbox_pred) # widths = anchors[:, :, 2] - anchors[:, :, 0] + 1.0 # heights = anchors[:, :, 3] - anchors[:, :, 1] + 1.0 # dx = bbox_pred[:, :, 0] * widths # dy = bbox_pred[:, :, 1] * heights dx = bbox_pred[:, :, 0] dy = bbox_pred[:, :, 1] distance = torch.sqrt(dx * dx + dy * dy) theta = 1e-5 return 1.0 / (distance + theta), distance def forward(self, bottom_blobs): base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] im_info = bottom_blobs['im_info'] # rpn conv rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True) # rpn bbox pred # shape(N,4*num_anchors,H,W) rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate(feature_map_list) rpn_cls_scores = self.rpn_cls_score(rpn_conv) # softmax rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # use distance to instead of rpn_cls_probs # rpn_cls_probs = self.get_rpn_cls_probs(rpn_bbox_preds) ############################### # Proposal ############################### # note that proposals_order is used for track transform of propsoals proposals_batch, proposals_order, fg_probs, distance = self.generate_proposal( rpn_cls_probs, anchors, rpn_bbox_preds, im_info) batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( -1, proposals_batch.shape[1]).type_as(proposals_batch) rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), dim=2) if self.training: rois_batch = self.append_gt(rois_batch, gt_boxes) # postprocess rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1, rpn_cls_scores.shape[2], rpn_cls_scores.shape[3]) rpn_cls_scores = rpn_cls_scores.permute( 0, 3, 4, 2, 1).contiguous().view(batch_size, -1, 2) predict_dict = { 'proposals_batch': proposals_batch, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'rpn_cls_scores': rpn_cls_scores, 'proposals_order': proposals_order, 'fg_probs': fg_probs, 'distance': distance } return predict_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] assert len(anchors) == 1, 'just one feature maps is supported now' anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem # import ipdb # ipdb.set_trace() rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights = \ self.target_assigner.assign(anchors, gt_boxes, gt_labels=None) ################################ # subsample ################################ rpn_cls_probs = prediction_dict['fg_probs'] pos_indicator = rpn_cls_targets > 0 indicator = rpn_cls_weights > 0 cls_criterion = rpn_cls_probs batch_sampled_mask = self.sampler.subsample_batch( self.rpn_batch_size, pos_indicator, criterion=cls_criterion, indicator=indicator) batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * batch_sampled_mask rpn_reg_weights = rpn_reg_weights * batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # cls loss rpn_cls_scores = prediction_dict['rpn_cls_scores'] rpn_cls_loss = self.rpn_cls_loss(rpn_cls_scores, rpn_cls_targets) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # rpn_cls_probs = prediction_dict['rpn_cls_probs'] # fg_rpn_cls_probs = rpn_cls_probs.view(-1, 2)[:, 1] # # exp # fg_rpn_cls_probs = torch.exp(fg_rpn_cls_probs) # rpn_cls_targets = torch.exp(rpn_cls_targets) # # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) # rpn_cls_loss = self.rpn_cls_loss(fg_rpn_cls_probs, # rpn_cls_targets.view(-1)) # rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) # rpn_cls_loss *= rpn_cls_weights # rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # bbox loss # shape(N,num,4) rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view(rpn_reg_loss.shape[0], -1).sum( dim=1) / num_reg_coeff.float() loss_dict['rpn_cls_loss'] = rpn_cls_loss loss_dict['rpn_bbox_loss'] = rpn_reg_loss return loss_dict