def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True
def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] # self.rpn_batch_size = model_config['rpn_batch_size'] self.num_cls_samples = model_config['num_cls_samples'] self.num_reg_samples = model_config['num_reg_samples'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler({"fg_fraction": 1.0}) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler({"fg_fraction": 1.0}) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder self.use_iou = model_config.get('use_iou')
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = False # optimize reg self.enable_reg = True # cal iou self.enable_iou = False # track good rois self.enable_track_rois = True self.enable_track_rcnn_rois = True # eval the final bbox self.enable_eval_final_bbox = True # use gt self.use_gt = False # if self.enable_eval_final_bbox: self.subsample = False self.multiple_crop = False
class DoubleIoUFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) # shape(N,C) # if self.reduce: pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) # else: # pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds # prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['rcnn_cls_probs'] = prediction_dict['rpn_cls_probs'][ 0][proposals_order] return prediction_dict # def rcnn_cls_pred(pooled_feat) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 # indicator = rcnn_cls_weights > 0 indicator = None # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check # assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' # prediction_dict[ # 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() # prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ # batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights # rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] # rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss # rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] # rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) # rcnn_cls_loss *= rcnn_cls_weights # rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients # loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets return loss_dict
class DetachDoubleIOUFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # although it must be true # if self.enable_reg: # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds if self.enable_cls: pooled_feat_cls = self.feature_extractor.third_stage_feature( pooled_feat.detach()) # shape(N,C) pooled_feat_cls = pooled_feat_cls.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['second_rpn_cls_probs'] = prediction_dict[ 'rpn_cls_probs'][0][proposals_order] if not self.training and self.enable_iou: # calculate fake iou as final score,of course use scores to filter bg pred_boxes = self.bbox_coder.decode_batch( rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5]) iou_matrix = box_ops.iou(pred_boxes, rois_batch[:, :, 1:5])[0] iou_matrix[rcnn_cls_probs[:, 1] < 0.5] = 0 rcnn_cls_probs[:, 1] = iou_matrix prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs if not self.training and self.enable_track_rois: self.target_assigner.assign(rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels']) return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, alpha=0.25) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = False # optimize reg self.enable_reg = True # cal iou self.enable_iou = False # track good rois self.enable_track_rois = False def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None if self.enable_reg: # used for reg training pos_indicator = rcnn_reg_weights > 0 indicator = None elif self.enable_cls: # used for cls training pos_indicator = rcnn_cls_targets > 0 indicator = rcnn_cls_weights > 0 else: raise ValueError("please check enable reg and enable cls again") # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) if self.enable_cls: rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) assert num_cls_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() # used for retriving statistic prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] # used for fg/bg rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() if self.enable_reg: prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets if self.enable_cls: # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss if self.enable_reg: rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # analysis ap # when enable cls,otherwise it is no sense if self.enable_cls: rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = prediction_dict['fake_match'] self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) return loss_dict
class RPNModel(Model): def init_param(self, model_config): self.in_channels = model_config['din'] self.post_nms_topN = model_config['post_nms_topN'] self.pre_nms_topN = model_config['pre_nms_topN'] self.nms_thresh = model_config['nms_thresh'] self.use_score = model_config['use_score'] # self.rpn_batch_size = model_config['rpn_batch_size'] self.num_cls_samples = model_config['num_cls_samples'] self.num_reg_samples = model_config['num_reg_samples'] self.use_focal_loss = model_config['use_focal_loss'] # sampler # self.sampler = HardNegativeSampler({"fg_fraction": 1.0}) # self.sampler = BalancedSampler(model_config['sampler_config']) self.sampler = DetectionSampler({"fg_fraction": 1.0}) # anchor generator self.anchor_generator = AnchorGenerator( model_config['anchor_generator_config']) self.num_anchors = self.anchor_generator.num_anchors self.nc_bbox_out = 4 * self.num_anchors self.nc_score_out = self.num_anchors * 2 # target assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox coder self.bbox_coder = self.target_assigner.bbox_coder self.use_iou = model_config.get('use_iou') def init_weights(self): self.truncated = False Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated) Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated) def unfreeze_modules(self): unfreeze_modules = [ self.rpn_coarse_map_conv_iod.bias, self.rpn_fine_map_conv_iod.bias, self.rpn_coarse_map_conv_iog.bias, self.rpn_fine_map_conv_iog.bias, self.rpn_coarse_map_conv_iou.bias, self.rpn_fine_map_conv_iou.bias, self.rpn_coarse_map_conv_iod.weight, self.rpn_fine_map_conv_iod.weight, self.rpn_coarse_map_conv_iog.weight, self.rpn_fine_map_conv_iog.weight, self.rpn_coarse_map_conv_iou.weight, self.rpn_fine_map_conv_iou.weight ] for module in unfreeze_modules: module.requires_grad = True def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial(F.cross_entropy, reduce=False) def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds, im_info): # TODO create a new Function """ Args: rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W) rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W) anchors: FloatTensor,shape(N,4,H,W) Returns: proposals_batch: FloatTensor, shape(N,post_nms_topN,4) fg_probs_batch: FloatTensor, shape(N,post_nms_topN) """ # assert len( # rpn_bbox_preds) == 1, 'just one feature maps is supported now' # rpn_bbox_preds = rpn_bbox_preds[0] anchors = anchors[0] # do not backward anchors = anchors rpn_cls_probs = rpn_cls_probs.detach() rpn_bbox_preds = rpn_bbox_preds.detach() batch_size = rpn_bbox_preds.shape[0] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4) # apply deltas to anchors to decode # loop here due to many features maps # proposals = [] # for rpn_bbox_preds_single_map, anchors_single_map in zip( # rpn_bbox_preds, anchors): # proposals.append( # self.bbox_coder.decode(rpn_bbox_preds_single_map, # anchors_single_map)) # proposals = torch.cat(proposals, dim=1) proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors) # filer and clip proposals = box_ops.clip_boxes(proposals, im_info) # fg prob fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :] fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size, -1) # sort fg _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True) # fg_probs_batch = torch.zeros(batch_size, # self.post_nms_topN).type_as(rpn_cls_probs) proposals_batch = torch.zeros(batch_size, self.post_nms_topN, 4).type_as(rpn_bbox_preds) proposals_order = torch.zeros( batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order) for i in range(batch_size): proposals_single = proposals[i] fg_probs_single = fg_probs[i] fg_order_single = fg_probs_order[i] # pre nms if self.pre_nms_topN > 0: fg_order_single = fg_order_single[:self.pre_nms_topN] proposals_single = proposals_single[fg_order_single] fg_probs_single = fg_probs_single[fg_order_single] # nms keep_idx_i = nms( torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1), self.nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # post nms if self.post_nms_topN > 0: keep_idx_i = keep_idx_i[:self.post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] fg_probs_single = fg_probs_single[keep_idx_i] fg_order_single = fg_order_single[keep_idx_i] # padding 0 at the end. num_proposal = keep_idx_i.numel() proposals_batch[i, :num_proposal, :] = proposals_single # fg_probs_batch[i, :num_proposal] = fg_probs_single proposals_order[i, :num_proposal] = fg_order_single return proposals_batch, proposals_order def forward(self, bottom_blobs): base_feat = bottom_blobs['base_feat'] batch_size = base_feat.shape[0] gt_boxes = bottom_blobs['gt_boxes'] im_info = bottom_blobs['im_info'] # rpn conv rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True) # rpn cls score # shape(N,2*num_anchors,H,W) rpn_cls_scores = self.rpn_cls_score(rpn_conv) # rpn cls prob shape(N,2*num_anchors,H,W) rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1) rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1) rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores) # import ipdb # ipdb.set_trace() # rpn bbox pred # shape(N,4*num_anchors,H,W) if self.use_score: # shape (N,2,num_anchoros*H*W) rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1) rpn_bbox_preds = [] for i in range(self.num_anchors): rpn_bbox_feat = torch.cat( [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]], dim=1) rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat)) rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1) else: # get rpn offsets to the anchor boxes rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv) # rpn_bbox_preds = [rpn_bbox_preds] # generate anchors feature_map_list = [base_feat.size()[-2:]] anchors = self.anchor_generator.generate(feature_map_list) ############################### # Proposal ############################### # note that proposals_order is used for track transform of propsoals proposals_batch, proposals_order = self.generate_proposal( rpn_cls_probs, anchors, rpn_bbox_preds, im_info) batch_idx = torch.arange(batch_size).view(batch_size, 1).expand( -1, proposals_batch.shape[1]).type_as(proposals_batch) rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch), dim=2) if self.training: rois_batch = self.append_gt(rois_batch, gt_boxes) rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1, rpn_cls_scores.shape[2], rpn_cls_scores.shape[3]) rpn_cls_scores = rpn_cls_scores.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) # postprocess rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1, rpn_cls_probs.shape[2], rpn_cls_probs.shape[3]) rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view( batch_size, -1, 2) predict_dict = { 'proposals_batch': proposals_batch, 'rpn_cls_scores': rpn_cls_scores, 'rois_batch': rois_batch, 'anchors': anchors, # used for loss 'rpn_bbox_preds': rpn_bbox_preds, 'rpn_cls_probs': rpn_cls_probs, 'proposals_order': proposals_order, } return predict_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def loss(self, prediction_dict, feed_dict): # loss for cls loss_dict = {} gt_boxes = feed_dict['gt_boxes'] anchors = prediction_dict['anchors'] assert len(anchors) == 1, 'just one feature maps is supported now' anchors = anchors[0] ################################# # target assigner ################################ # no need gt labels here,it just a binary classifcation problem # import ipdb # ipdb.set_trace() rpn_cls_targets, rpn_reg_targets, \ rpn_cls_weights, rpn_reg_weights = \ self.target_assigner.assign(anchors, gt_boxes, gt_labels=None) ################################ # double subsample ################################ rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1] cls_criterion = rpn_cls_probs # cls loss rpn_cls_score = prediction_dict['rpn_cls_scores'] # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights) # cls_criterion = rpn_cls_loss # cls subsample # pos_indicator = rpn_cls_targets > 0 # ignore fg/bg indicator = rpn_cls_weights > 0 pos_indicator = indicator cls_batch_sampled_mask = self.sampler.subsample_batch( self.num_cls_samples, pos_indicator, criterion=cls_criterion, indicator=indicator) cls_batch_sampled_mask = cls_batch_sampled_mask.type_as( rpn_cls_weights) rpn_cls_weights = rpn_cls_weights * cls_batch_sampled_mask num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1) # reg subsample # subsample all from fg pos_indicator = rpn_reg_weights > 0 rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # shape(N,H*W*num_anchors,4) rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) # reg_criterion = rpn_reg_loss.sum(dim=-1) reg_batch_sampled_mask = self.sampler.subsample_batch( self.num_reg_samples, pos_indicator, criterion=cls_criterion, indicator=None) reg_batch_sampled_mask |= cls_batch_sampled_mask.type_as( reg_batch_sampled_mask) reg_batch_sampled_mask = reg_batch_sampled_mask.type_as( rpn_cls_weights) rpn_reg_weights = rpn_reg_weights * reg_batch_sampled_mask num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1) # check # assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_cls_coeff == 0: num_cls_coeff = torch.ones([]).type_as(num_cls_coeff) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) # cls loss # rpn_cls_score = prediction_dict['rpn_cls_scores'] # # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets) # rpn_cls_loss = self.rpn_cls_loss( # rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1)) rpn_cls_loss *= rpn_cls_weights rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float() # bbox loss # shape(N,num,4) # rpn_bbox_preds = prediction_dict['rpn_bbox_preds'] # rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous() # # shape(N,H*W*num_anchors,4) # rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4) # rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets) rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4) rpn_reg_loss = rpn_reg_loss.view( rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float() loss_dict['rpn_cls_loss'] = rpn_cls_loss loss_dict['rpn_bbox_loss'] = rpn_reg_loss return loss_dict
class PostCLSFasterRCNN(Model): def forward(self, feed_dict): # some pre forward hook self.clean_stats() prediction_dict = {} ################################ # first stage ################################ # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) ##################################### # second stage(bbox regression) ##################################### # pre subsample for reduce consume of memory if self.training and self.enable_reg: # append gt if self.use_gt: prediction_dict['rois_batch'] = self.append_gt( prediction_dict['rois_batch'], feed_dict['gt_boxes']) stats = self.pre_subsample(prediction_dict, feed_dict) # rois stats self.stats.update(stats) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # although it must be true # if self.enable_reg: # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds # used for tracking proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['second_rpn_cls_probs'] = prediction_dict[ 'rpn_cls_probs'][0][proposals_order] ########################################### # third stage(predict scores of final bbox) ########################################### # decode rcnn bbox, generate rcnn rois batch pred_boxes = self.bbox_coder.decode_batch( rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5]) rcnn_rois_batch = torch.zeros_like(rois_batch) rcnn_rois_batch[:, :, 1:5] = pred_boxes.detach() prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch if self.training and self.use_gt: # append gt rcnn_rois_batch = self.append_gt(rcnn_rois_batch, feed_dict['gt_boxes']) prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch if self.enable_cls: if self.training: rcnn_stats = self.pre_subsample(prediction_dict, feed_dict, stage='rcnn') # rcnn stats self.rcnn_stats.update(rcnn_stats) # rois after subsample pred_rois = prediction_dict['rcnn_rois_batch'] pooled_feat_cls = self.rcnn_pooling(base_feat, pred_rois.view(-1, 5)) pooled_feat_cls = self.feature_extractor.third_stage_feature( pooled_feat_cls.detach()) # shape(N,C) pooled_feat_cls = pooled_feat_cls.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores ################################### # stats ################################### # import ipdb # ipdb.set_trace() if not self.training or (self.enable_track_rois and not self.enable_reg): # when enable reg, skip it, stats = self.target_assigner.assign(rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels'])[-1] self.stats.update(stats) if not self.training or (self.enable_track_rcnn_rois and not self.enable_cls): # when enable cls, skip it stats = self.target_assigner.assign(rcnn_rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels'])[-1] self.rcnn_stats.update(stats) # analysis ap # when enable cls, otherwise it is no sense if self.training and self.enable_cls: rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = self.rcnn_stats['match'] stats = self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) # collect stats self.rcnn_stats.update(stats) return prediction_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, alpha=0.25, gamma=2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = True # optimize reg self.enable_reg = False # cal iou self.enable_iou = False # track good rois self.enable_track_rois = True self.enable_track_rcnn_rois = True # eval the final bbox self.enable_eval_final_bbox = True # use gt self.use_gt = False # if self.enable_eval_final_bbox: self.subsample = False def clean_stats(self): # rois bbox self.stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None } # rcnn bbox(final bbox) self.rcnn_stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None } def pre_subsample(self, prediction_dict, feed_dict, stage='rpn'): if stage == 'rpn': rois_name = 'rois_batch' else: rois_name = 'rcnn_rois_batch' rois_batch = prediction_dict[rois_name] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # append gt # rois_batch = self.append_gt(rois_batch, gt_boxes) ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights, stats = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## if self.subsample: cls_criterion = None if self.enable_reg: # used for reg training pos_indicator = rcnn_reg_weights > 0 indicator = None elif self.enable_cls: # used for cls training pos_indicator = rcnn_cls_targets > 0 indicator = rcnn_cls_weights > 0 else: raise ValueError( "please check enable reg and enable cls again") # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) else: batch_sampled_mask = torch.ones_like(rcnn_cls_weights > 0) if self.enable_cls: rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) assert num_cls_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() # used for retriving statistic prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] # used for fg/bg rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) num_reg_coeff = torch.max(num_reg_coeff, torch.ones_like(num_reg_coeff)) # import ipdb # ipdb.set_trace() # assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() if self.enable_reg: prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # here use rcnn_target_assigner for final bbox pred stats['match'] = stats['match'][batch_sampled_mask] # update rois_batch prediction_dict[rois_name] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) return stats def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets if self.enable_cls: # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss if self.enable_reg: loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss return loss_dict