def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.theta = 1.0 self.alpha = 0.6 self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.iou_criterion = model_config['iou_criterion'] self.use_iox = model_config['use_iox'] # self.use_cls_pred = model_config['use_cls_pred'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = LEDTargetAssigner( model_config['target_assigner_config']) # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) if self.iou_criterion: self.sampler = DetectionSampler(model_config['sampler_config']) else: self.sampler = BalancedSampler(model_config['sampler_config'])
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_cascade = model_config.get('use_cascade') self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.fg_thresh_arr = model_config['fg_thresh_arr'] self.bg_thresh_arr = model_config['bg_thresh_arr'] # some submodule config self.feature_extractor_config = model_config['feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config['feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = RefineTargetAssigner( model_config['target_assigner_config']) # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) self.sampler = BalancedSampler(model_config['sampler_config'])
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) + 1 self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config'])
def init_param(self, model_config): if model_config.get('din'): self.ndin = model_config['din'] else: self.ndin = 512 classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.use_self_attention = model_config.get('use_self_attention') # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config'])
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = False # optimize reg self.enable_reg = True # cal iou self.enable_iou = False # track good rois self.enable_track_rois = True self.enable_track_rcnn_rois = True # eval the final bbox self.enable_eval_final_bbox = True # use gt self.use_gt = False # if self.enable_eval_final_bbox: self.subsample = False self.multiple_crop = False
def init_param(self, model_config): self.n_classes = len(model_config['classes']) + 1 self.rcnn_batch_size = model_config['rcnn_batch_size'] self.profiler = Profiler() self.encoder = DataEncoder(ModelCFG, anchor_type=ModelCFG['anchor_type'], infer_mode=True) self.num_bins = 2 self.model_path = model_config['model_path'] self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.sampler = BalancedSampler(model_config['sampler_config'])
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 self.train_3d = False self.train_2d = not self.train_3d # more accurate bbox for 3d prediction if self.train_3d: fg_thresh = 0.6 else: fg_thresh = 0.5 model_config['target_assigner_config']['fg_thresh'] = fg_thresh # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.profiler = Profiler() self.h_cat = False
def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.rpn_config = model_config['rpn_config'] self.theta = 1.0 self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.use_sigmoid = model_config.get('use_sigmoid') self.use_sharpL2 = model_config['use_sharpL2'] self.use_cls_pred = model_config['use_cls_pred'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] # assigner self.target_assigner = LEDTargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) self.reduce = True self.alpha = 0.6 # self.iou_anchors = [0.05, 0.25, 0.55, 0.85] # self.iou_lengths = [0.05, 0.15, 0.15, 0.15] # self.iou_intervals = [[0, 0.1], [0.1, 0.4], [0.4, 0.7], [0.7, 1.0]] self.iox_bbox_coder = DiscreteBBoxCoder( model_config['iox_coder_config'])
class Mono3DSimplerFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) mask_pooled_feat = self.mask_rcnn_pooling(base_feat, rois_batch.view(-1, 5)) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # common_pooled_feat = pooled_feat rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) pooled_feat = pooled_feat * saliency_map[:, 1:, :, :] reduced_pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] ################################### # 3d training ################################### keypoint_heatmap = self.keypoint_predictor(mask_pooled_feat) keypoint_scores = keypoint_heatmap.view(-1, 56 * 56) keypoint_probs = F.softmax(keypoint_scores, dim=-1) prediction_dict['keypoint_probs'] = keypoint_probs prediction_dict['keypoint_scores'] = keypoint_scores # import ipdb # ipdb.set_trace() rcnn_3d = self.rcnn_3d_pred(reduced_pooled_feat) prediction_dict['rcnn_3d'] = rcnn_3d if not self.training: # import ipdb # ipdb.set_trace() # _, keypoint_peak_pos = keypoint_probs.max(dim=-1) keypoints = self.keypoint_coder.decode_keypoint_heatmap( rois_batch[0, :, 1:], keypoint_probs.view(-1, 4, 56 * 56)) prediction_dict['keypoints'] = keypoints return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def modify_feature_extractor(self): from torchvision.models.resnet import Bottleneck layer4 = self._make_layer(Bottleneck, 512, 3, stride=1) self.feature_extractor.second_stage_feature = layer4 def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.modify_feature_extractor() self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.mask_rcnn_pooling = RoIAlignAvg(14, 14, 1.0 / 16.0) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_kp_loss = functools.partial(F.cross_entropy, reduce=False, ignore_index=-1) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d self.rcnn_3d_pred = nn.Linear(in_channels, 3) # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins) self.rcnn_3d_loss = OrientationLoss(split_loss=True) self.keypoint_predictor = KeyPointPredictor2(1024) def _make_layer(self, block, planes, blocks, stride=1): inplanes = 1024 downsample = None if stride != 1 or inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append(block(inplanes, planes, stride, downsample)) inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(inplanes, planes)) return nn.Sequential(*layers) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.keypoint_coder = self.target_assigner.keypoint_coder self.profiler = Profiler() def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # shape(N,7) gt_boxes_3d = feed_dict['gt_boxes_3d'] keypoint_gt = feed_dict['keypoint_gt'] # import ipdb # ipdb.set_trace() gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], keypoint_gt], dim=-1) ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict[ 'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float( ) prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # keypoint heatmap loss # keypoint_gt = feed_dict['keypoint_gt'] # import ipdb # ipdb.set_trace() rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d'] rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d'] keypoint_scores = prediction_dict['keypoint_scores'] keypoint_gt = rcnn_reg_targets_3d[:, 3:].contiguous().view(-1, 2) keypoint_weights = keypoint_gt[:, 1] keypoint_pos = keypoint_gt[:, 0] keypoint_pos[keypoint_weights == 0] = -1 keypoint_loss = self.rcnn_kp_loss(keypoint_scores, keypoint_pos.long()) keypoint_loss = keypoint_loss.view( -1, 4) * rcnn_reg_weights_3d.unsqueeze(-1) # keypoint_loss = keypoint_loss * keypoint_weights loss_dict['keypoint_loss'] = keypoint_loss.sum(dim=-1).sum(dim=-1) # dims loss rcnn_3d = prediction_dict['rcnn_3d'] rcnn_3d_loss = self.rcnn_bbox_loss(rcnn_3d, rcnn_reg_targets_3d[:, :3]) rcnn_3d_loss = rcnn_3d_loss * rcnn_reg_weights_3d.sum(dim=-1) loss_dict['rcnn_3d_loss'] = rcnn_3d_loss.sum(dim=-1).sum(dim=-1) return loss_dict
class ThreeIoUFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) ######################################## # semantic map ######################################## rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # shape(N,C) rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :] rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat) # shape(N,C) # pooled_feat = pooled_feat.mean(3).mean(2) # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][ proposals_order] return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(2048, 4) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module # if self.use_focal_loss: # self.rcnn_cls_loss = FocalLoss(2) # else: # self.rcnn_cls_loss = functools.partial( # F.cross_entropy, reduce=False) self.rcnn_cls_loss = nn.MSELoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5}) self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1}) self.iou_sampler = BalancedSampler(model_config['sampler_config']) def pre_subsample(self, prediction_dict, feed_dict): # import ipdb # ipdb.set_trace() rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # double subsampler ########################## cls_criterion = None # bbox subsample pos_indicator = rcnn_reg_weights > 0 cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch bbox_batch_sampled_mask = self.bbox_sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, criterion=cls_criterion) # rcnn_cls_weights = rcnn_cls_weights[bbox_batch_sampled_mask] # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) # ignore when bbox loss is not necessary bbox_batch_sampled_mask[rcnn_reg_weights == 0] = 0 rcnn_reg_weights = rcnn_reg_weights[bbox_batch_sampled_mask] num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) assert num_reg_coeff, 'bug happens' # iou subsample # balanced subsample pos_indicator = rcnn_cls_targets > 0 iou_batch_sampled_mask = self.iou_sampler.subsample_batch( self.rcnn_batch_size, pos_indicator) # rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask] # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) # check # make sure iou optimized when bbox optimized iou_batch_sampled_mask |= bbox_batch_sampled_mask rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) assert num_cls_coeff, 'bug happens' batch_sampled_mask = iou_batch_sampled_mask prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ iou_batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ bbox_batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] # prediction_dict['iou_batch_sampled_mask'] = iou_batch_sampled_mask prediction_dict['bbox_batch_sampled_mask'] = bbox_batch_sampled_mask[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # mask # iou_batch_sampled_mask = prediction_dict['iou_batch_sampled_mask'] bbox_batch_sampled_mask = prediction_dict['bbox_batch_sampled_mask'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_probs'][:, 1] # exp rcnn_cls_scores = torch.exp(rcnn_cls_scores) rcnn_cls_targets = torch.exp(rcnn_cls_targets) rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_preds = rcnn_bbox_preds[bbox_batch_sampled_mask] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss return loss_dict
class DoubleIoUFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) # shape(N,C) # if self.reduce: pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) # else: # pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds # prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['rcnn_cls_probs'] = prediction_dict['rpn_cls_probs'][ 0][proposals_order] return prediction_dict # def rcnn_cls_pred(pooled_feat) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 # indicator = rcnn_cls_weights > 0 indicator = None # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check # assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' # prediction_dict[ # 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() # prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ # batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights # rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] # rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss # rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] # rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) # rcnn_cls_loss *= rcnn_cls_weights # rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients # loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets return loss_dict
class LossFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # semantic map rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2) # shape(N,C) rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :] # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1) rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2) # if self.use_score: # pooled_feat = rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][ proposals_order] return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = LossRPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(2048, 4) # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # cluster loss for bbox and cls(feat) self.cluster_loss = ClusterLoss() def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) def get_cluster_loss(self, num_gt, bbox_feat, cls_feat): """ Args: bbox_feat:(N,M,) """ cluster_loss = 0 # shape(N,M) match = self.target_assigner.matcher.match assert match.shape[0] == 1, 'only one num of batch is supported now' match = match[0] for i in range(num_gt): cluster_loss += self.cluster_loss() def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] # mask assignments like as before match = self.target_assigner.matcher.assignments prediction_dict['match'] = match[batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # rcnn_cls_feat_single = rcnn_cls_scores rcnn_bbox_pred_single = rcnn_bbox_preds # cluster_cls_loss = 0 cluster_bbox_loss = 0 num_gt = feed_dict['gt_boxes'].shape[1] match = prediction_dict['match'] # import ipdb # ipdb.set_trace() for i in range(num_gt): # cls_feat = rcnn_cls_feat_single[match == i] # cluster_cls_loss += self.cluster_loss(cls_feat) bbox_feat = rcnn_bbox_pred_single[match == i] cluster_bbox_loss += self.cluster_loss(bbox_feat) # loss weights has no gradients loss_dict['rcnn/cls_loss'] = rcnn_cls_loss loss_dict['rcnn/bbox_loss'] = rcnn_bbox_loss loss_dict['rpn/cluster_bbox_loss'] = cluster_bbox_loss # loss_dict['rpn/cluster_cls_loss'] = cluster_cls_loss return loss_dict
class SINetModel(Model): def collect_intermedia_layers(self, img): feat2 = self.feature_extractor.first_stage_feature[:-1](img) feat3 = self.feature_extractor.first_stage_feature[-1](feat2) end_points = {'feat2': feat2, 'feat3': feat3} return feat3, end_points def caroi_pooling(self, all_feats, rois_batch, out_channels): pooled_feats = [] for feat in all_feats: pooled_feats.append(self.rcnn_pooling(feat, rois_batch)) pooled_feats = torch.cat(pooled_feats, dim=1) if pooled_feats.shape[1] != out_channels: # add 1x1 conv pooled_feats = self.reduce_pooling(pooled_feats) return pooled_feats def forward(self, feed_dict): prediction_dict = {} # base model # base_feat = self.feature_extractor.first_stage_feature( # feed_dict['img']) base_feat, all_feats = self.collect_intermedia_layers(feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) # pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # import ipdb # ipdb.set_trace() pooled_feat = self.caroi_pooling( all_feats, rois_batch.view(-1, 5), out_channels=1024) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # shape(N,C) if self.reduce: pooled_feat = pooled_feat.mean(3).mean(2) else: pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 8.0) self.reduce_pooling = nn.Sequential( nn.Conv2d(1024 + 512, 1024, 1, 1, 0), nn.ReLU()) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config['feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # self.visualizer = FeatVisualizer() def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets # analysis ap rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = prediction_dict['fake_match'] self.target_assigner.analyzer.analyze_ap( fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) return loss_dict
class FPNFasterRCNN(Model): def calculate_roi_level(self, rois_batch): h = rois_batch[:, 4] - rois_batch[:, 2] + 1 w = rois_batch[:, 3] - rois_batch[:, 1] + 1 roi_level = torch.log(torch.sqrt(w * h) / 224.0) roi_level = torch.round(roi_level + 4) roi_level[roi_level < 2] = 2 roi_level[roi_level > 5] = 5 roi_level[...] = 4 return roi_level def pyramid_rcnn_pooling(self, rcnn_feat_maps, rois_batch): pooled_feats = [] # determine which layer to get feat roi_level = self.calculate_roi_level(rois_batch) for idx, rcnn_feat_map in enumerate(rcnn_feat_maps): idx += 2 mask = roi_level == idx rois_batch_per_stage = rois_batch[mask] if rois_batch_per_stage.shape[0] == 0: continue pooled_feats.append( self.rcnn_pooling(rcnn_feat_map, rois_batch_per_stage)) return torch.cat(pooled_feats, dim=0) def forward(self, feed_dict): prediction_dict = {} # base model rpn_feat_maps, rcnn_feat_maps, = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'rpn_feat_maps': rpn_feat_maps}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) # pooled_feat = self.rcnn_pooling(rcnn_feat_maps, rois_batch.view(-1, 5)) pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # shape(N,C) if self.reduce: pooled_feat = pooled_feat.mean(3).mean(2) else: pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = FPNFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(1024, self.n_classes) if self.reduce: in_channels = 1024 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets return loss_dict
class SemanticFasterRCNN(Model): def forward(self, feed_dict): self.clean_stats() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: stats = self.pre_subsample(prediction_dict, feed_dict) self.stats.update(stats) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # semantic map # if self.use_self_attention: # pooled_feat_cls = pooled_feat.mean(3).mean(2) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # # self-attention # channel_attention = self.generate_channel_attention(pooled_feat) # spatial_attention = self.generate_spatial_attention(pooled_feat) # pooled_feat_reg = pooled_feat * channel_attention # pooled_feat_reg = pooled_feat * spatial_attention # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) # else: rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2) # shape(N,C) rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :] # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1) rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2) # if self.use_score: # pooled_feat = rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] pred_boxes = self.bbox_coder.decode_batch( rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5]) rcnn_rois_batch = torch.zeros_like(rois_batch) rcnn_rois_batch[:, :, 1:5] = pred_boxes.detach() prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch # if self.training: # # append gt # rcnn_rois_batch = self.append_gt(rcnn_rois_batch, # feed_dict['gt_boxes']) # prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch ################################### # stats ################################### # when enable cls, skip it stats = self.target_assigner.assign(rcnn_rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels'])[-1] self.rcnn_stats.update(stats) # analysis ap # when enable cls, otherwise it is no sense if self.training: rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = self.rcnn_stats['match'] stats = self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) # collect stats self.rcnn_stats.update(stats) return prediction_dict def clean_stats(self): # rois bbox self.stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None, # 'matched': 0, # 'num_gt': 1, } # rcnn bbox(final bbox) self.rcnn_stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None, # 'matched': 0, } def generate_channel_attention(self, feat): return feat.mean(3, keepdim=True).mean(2, keepdim=True) def generate_spatial_attention(self, feat): return self.spatial_attention(feat) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = feature_extractors_builder.build( self.feature_extractor_config) # self.feature_extractor = ResNetFeatureExtractor( # self.feature_extractor_config) # self.feature_extractor = MobileNetFeatureExtractor( # self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') if self.use_self_attention: self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes) else: self.rcnn_cls_pred = nn.Conv2d(self.ndin, self.n_classes, 3, 1, 1) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4) # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1) else: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # attention if self.use_self_attention: self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1) def init_param(self, model_config): if model_config.get('din'): self.ndin = model_config['din'] else: self.ndin = 512 classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.use_self_attention = model_config.get('use_self_attention') # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # sampler self.sampler = BalancedSampler(model_config['sampler_config']) def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights, stats = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' num_reg_coeff = torch.max(num_reg_coeff, torch.ones_like(num_reg_coeff)) prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) stats['match'] = stats['match'][batch_sampled_mask] return stats def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets # analysis ap rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = prediction_dict['fake_match'] self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) return loss_dict
class PRModel(Model): def init_weights(self): print("loading pre-trained weight") weight = torch.load(self.model_path, map_location=lambda storage, loc: storage) from collections import OrderedDict new_state_dict = OrderedDict() module_dict = self.det_model.state_dict() for k, v in weight.items(): if k not in module_dict: continue name = k[7:] # remove `module.` new_state_dict[name] = v module_dict.update(new_state_dict) self.det_model.load_state_dict(module_dict) # else: # new_state_dict = OrderedDict() # for k, v in weight.items(): # name = k[7:] # remove `module.` # new_state_dict[name] = v # self.det_model.load_state_dict(new_state_dict) def pre_forward(self): self.freeze_modules() for param in self.det_model.multibox.box_3d_feature.parameters(): param.requires_grad = True for param in self.det_model.multibox.orients_out.parameters(): param.requires_grad = True for param in self.det_model.multibox.dims_3d_out.parameters(): param.requires_grad = True self.freeze_bn(self) self.unfreeze_bn(self.det_model.multibox.box_3d_feature) self.unfreeze_bn(self.det_model.multibox.orients_out) self.unfreeze_bn(self.det_model.multibox.dims_3d_out) def init_param(self, model_config): self.n_classes = len(model_config['classes']) + 1 self.rcnn_batch_size = model_config['rcnn_batch_size'] self.profiler = Profiler() self.encoder = DataEncoder(ModelCFG, anchor_type=ModelCFG['anchor_type'], infer_mode=True) self.num_bins = 2 self.model_path = model_config['model_path'] self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.sampler = BalancedSampler(model_config['sampler_config']) def init_modules(self): self.det_model = PRNet(ModelCFG) # dims loss self.dims_loss = nn.SmoothL1Loss(reduce=False) # multibin loss self.multibin_loss = MultiBinLoss(self.num_bins) def forward(self, feed_dict): self.target_assigner.bbox_coder_3d.mean_dims = feed_dict['mean_dims'] image = feed_dict['img'] loc1_preds, loc2_preds, os_preds, cls_preds,\ dims_3d_out, orients_out = self.det_model.forward( image) # if not self.training: # boxes, lbls, scores, has_obj = self.encoder.decode( # loc2_preds.data.squeeze(0), F.softmax(cls_preds.squeeze(0), dim=1).data, os_preds.squeeze(0), Nt=0.5) prediction_dict = {} prediction_dict['dims_3d_out'] = dims_3d_out prediction_dict['orients_out'] = orients_out # prediction_dict['rcnn_cls_probs'] = scores # prediction_dict['rcnn_bbox_pred'] = return prediction_dict def generate_anchors(self, im_shape): default_boxes = self.encoder.default_boxes xymin = default_boxes[:, :2] - 0.5 * default_boxes[:, 2:] xymax = default_boxes[:, :2] + 0.5 * default_boxes[:, 2:] xymin = xymin xymax = xymax normalized_anchors = torch.cat([xymin, xymax], dim=-1) anchors = torch.zeros_like(normalized_anchors) anchors[:, ::2] = normalized_anchors[:, ::2] * im_shape[1] anchors[:, 1::2] = normalized_anchors[:, 1::2] * im_shape[0] return anchors def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets, out_c=4): """ squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4) Args: rcnn_bbox_preds: shape(N, num_classes, 4) rcnn_cls_targets: shape(N, 1) """ rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, out_c) batch_size = rcnn_bbox_preds.shape[0] offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1) rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets) rcnn_bbox_preds = rcnn_bbox_preds.contiguous().view( -1, out_c)[rcnn_cls_targets[0]].unsqueeze(0) return rcnn_bbox_preds def loss(self, prediction_dict, feed_dict): # import ipdb # ipdb.set_trace() loss_dict = {} anchors = self.generate_anchors(feed_dict['im_info'][0][:2]) gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] local_angle = feed_dict['local_angle'] gt_boxes_3d = feed_dict['gt_boxes_3d'] gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1) rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( anchors.unsqueeze(0), gt_boxes, gt_boxes_3d, gt_labels) pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1] cls_criterion = None batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, criterion=cls_criterion, indicator=indicator) batch_sampled_mask = batch_sampled_mask.type_as(rcnn_cls_weights) rcnn_reg_weights_3d = rcnn_reg_weights_3d * batch_sampled_mask num_reg_coeff = (rcnn_reg_weights_3d > 0).sum(dim=1) if num_reg_coeff == 0: num_reg_coeff = torch.ones([]).type_as(num_reg_coeff) rcnn_reg_weights_3d = rcnn_reg_weights_3d / num_reg_coeff.float() # dims loss dims_pred = self.squeeze_bbox_preds(prediction_dict['dims_3d_out'], rcnn_cls_targets, 3) dims_loss = self.dims_loss(dims_pred, rcnn_reg_targets_3d[:, :, :3]) dims_loss = dims_loss * rcnn_reg_weights_3d.unsqueeze(-1) dims_loss = dims_loss.sum(dim=-1).sum(dim=-1) # multibin loss orient_loss, angle_tp_mask = self.multibin_loss( prediction_dict['orients_out'], rcnn_reg_targets_3d[:, :, -1:]) orient_loss = orient_loss * rcnn_reg_weights_3d orient_loss = orient_loss.sum(dim=-1) loss_dict['dims_loss'] = dims_loss loss_dict['orient_loss'] = orient_loss prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights_3d[ batch_sampled_mask > 0] # angles stats angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0] angles_tp_num = angle_tp_mask.int().sum().item() angles_all_num = angle_tp_mask.numel() self.target_assigner.stat.update({ 'cls_orient_2s_all_num': angles_all_num, 'cls_orient_2s_tp_num': angles_tp_num }) return loss_dict
class PostCLSFasterRCNN(Model): def forward(self, feed_dict): # some pre forward hook self.clean_stats() prediction_dict = {} ################################ # first stage ################################ # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) ##################################### # second stage(bbox regression) ##################################### # pre subsample for reduce consume of memory if self.training and self.enable_reg: # append gt if self.use_gt: prediction_dict['rois_batch'] = self.append_gt( prediction_dict['rois_batch'], feed_dict['gt_boxes']) stats = self.pre_subsample(prediction_dict, feed_dict) # rois stats self.stats.update(stats) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # although it must be true # if self.enable_reg: # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds # used for tracking proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['second_rpn_cls_probs'] = prediction_dict[ 'rpn_cls_probs'][0][proposals_order] ########################################### # third stage(predict scores of final bbox) ########################################### # decode rcnn bbox, generate rcnn rois batch pred_boxes = self.bbox_coder.decode_batch( rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5]) rcnn_rois_batch = torch.zeros_like(rois_batch) rcnn_rois_batch[:, :, 1:5] = pred_boxes.detach() prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch if self.training and self.use_gt: # append gt rcnn_rois_batch = self.append_gt(rcnn_rois_batch, feed_dict['gt_boxes']) prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch if self.enable_cls: if self.training: rcnn_stats = self.pre_subsample(prediction_dict, feed_dict, stage='rcnn') # rcnn stats self.rcnn_stats.update(rcnn_stats) # rois after subsample pred_rois = prediction_dict['rcnn_rois_batch'] pooled_feat_cls = self.rcnn_pooling(base_feat, pred_rois.view(-1, 5)) pooled_feat_cls = self.feature_extractor.third_stage_feature( pooled_feat_cls.detach()) # shape(N,C) pooled_feat_cls = pooled_feat_cls.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores ################################### # stats ################################### # import ipdb # ipdb.set_trace() if not self.training or (self.enable_track_rois and not self.enable_reg): # when enable reg, skip it, stats = self.target_assigner.assign(rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels'])[-1] self.stats.update(stats) if not self.training or (self.enable_track_rcnn_rois and not self.enable_cls): # when enable cls, skip it stats = self.target_assigner.assign(rcnn_rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels'])[-1] self.rcnn_stats.update(stats) # analysis ap # when enable cls, otherwise it is no sense if self.training and self.enable_cls: rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = self.rcnn_stats['match'] stats = self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) # collect stats self.rcnn_stats.update(stats) return prediction_dict def append_gt(self, rois_batch, gt_boxes): ################################ # append gt_boxes to rois_batch for losses ################################ # may be some bugs here gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1], 5).type_as(gt_boxes) gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4] # cat gt_boxes to rois_batch rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1) return rois_batch def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, alpha=0.25, gamma=2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = True # optimize reg self.enable_reg = False # cal iou self.enable_iou = False # track good rois self.enable_track_rois = True self.enable_track_rcnn_rois = True # eval the final bbox self.enable_eval_final_bbox = True # use gt self.use_gt = False # if self.enable_eval_final_bbox: self.subsample = False def clean_stats(self): # rois bbox self.stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None } # rcnn bbox(final bbox) self.rcnn_stats = { 'num_det': 1, 'num_tp': 0, 'matched_thresh': 0, 'recall_thresh': 0, 'match': None } def pre_subsample(self, prediction_dict, feed_dict, stage='rpn'): if stage == 'rpn': rois_name = 'rois_batch' else: rois_name = 'rcnn_rois_batch' rois_batch = prediction_dict[rois_name] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # append gt # rois_batch = self.append_gt(rois_batch, gt_boxes) ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights, stats = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## if self.subsample: cls_criterion = None if self.enable_reg: # used for reg training pos_indicator = rcnn_reg_weights > 0 indicator = None elif self.enable_cls: # used for cls training pos_indicator = rcnn_cls_targets > 0 indicator = rcnn_cls_weights > 0 else: raise ValueError( "please check enable reg and enable cls again") # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) else: batch_sampled_mask = torch.ones_like(rcnn_cls_weights > 0) if self.enable_cls: rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) assert num_cls_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() # used for retriving statistic prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] # used for fg/bg rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) num_reg_coeff = torch.max(num_reg_coeff, torch.ones_like(num_reg_coeff)) # import ipdb # ipdb.set_trace() # assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() if self.enable_reg: prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # here use rcnn_target_assigner for final bbox pred stats['match'] = stats['match'][batch_sampled_mask] # update rois_batch prediction_dict[rois_name] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) return stats def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets if self.enable_cls: # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss if self.enable_reg: loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss return loss_dict
class Mono3DFinalFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) pooled_feat = pooled_feat * saliency_map[:, 1:, :, :] reduced_pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_3d = self.rcnn_3d_pred(reduced_pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] ################################### # 3d training ################################### # if self.training and self.train_3d: # prediction_dict['rois_batch'] = final_rois_batch # self.pre_subsample(prediction_dict, feed_dict) # final_rois_batch = prediction_dict['rois_batch'] # shape(M,C,7,7) # mono_3d_pooled_feat = self.rcnn_pooling(base_feat, # final_rois_batch.view(-1, 5)) # mono_3d_pooled_feat = self.feature_extractor.third_stage_feature( # mono_3d_pooled_feat) # mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2) prediction_dict['rcnn_3d'] = rcnn_3d if not self.training: rcnn_bbox_preds = rcnn_bbox_preds.detach() final_bbox = self.target_assigner.bbox_coder.decode_batch( rcnn_bbox_preds.unsqueeze(0), rois_batch[:, :, 1:]) final_rois_inds = torch.zeros_like(final_bbox[:, :, -1:]) final_rois_batch = torch.cat([final_rois_inds, final_bbox], dim=-1) rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox( rcnn_3d, final_rois_batch) prediction_dict['rcnn_3d'] = rcnn_3d return prediction_dict def pre_forward(self): pass # params # if self.train_3d and self.training and not self.train_2d: # self.freeze_modules() # for parameter in self.feature_extractor.third_stage_feature.parameters( # ): # parameter.requires_grad = True # for param in self.rcnn_3d_pred.parameters(): # param.requires_grad = True # self.freeze_bn(self) # self.unfreeze_bn(self.feature_extractor.third_stage_feature) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1) self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 * 2) self.rcnn_3d_loss = OrientationLoss(split_loss=True) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 # more accurate bbox for 3d prediction # if self.train_3d: # fg_thresh = 0.6 # else: # fg_thresh = 0.5 # model_config['target_assigner_config']['fg_thresh'] = fg_thresh # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.profiler = Profiler() self.h_cat = False def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # shape(N,7) gt_boxes_3d = feed_dict['gt_boxes_3d'] # orient cls_orient = torch.unsqueeze(feed_dict['cls_orient'], dim=-1).float() reg_orient = feed_dict['reg_orient'] orient = torch.cat([cls_orient, reg_orient], dim=-1) gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], orient], dim=-1) ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_reg_coeff == 0: num_reg_coeff = torch.ones_like(num_reg_coeff) prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict[ 'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float( ) prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ # import ipdb # ipdb.set_trace() loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss ###################################### # 3d loss ###################################### rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d'] rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d'] rcnn_3d = prediction_dict['rcnn_3d'] # dims rcnn_3d_loss_dims = self.rcnn_bbox_loss( rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1) # angles res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:]) for res_loss_key in res: tmp = res[res_loss_key] * rcnn_reg_weights_3d res[res_loss_key] = tmp.sum(dim=-1) loss_dict.update(res) rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1) loss_dict['rcnn_3d_loss'] = rcnn_3d_loss # stats of orients cls_orient_preds = rcnn_3d[:, 3:5] cls_orient = rcnn_reg_targets_3d[:, 3] _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1) orient_tp_mask = cls_orient.type_as( cls_orient_preds_argmax) == cls_orient_preds_argmax mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1) orient_tp_mask = orient_tp_mask[mask] orient_tp_num = orient_tp_mask.int().sum().item() orient_all_num = orient_tp_mask.numel() # store all stats in target assigner self.target_assigner.stat.update({ # 'angle_num_tp': torch.tensor(0), # 'angle_num_all': 1, # stats of orient 'orient_tp_num': orient_tp_num, # 'orient_tp_num2': orient_tp_num2, # 'orient_tp_num3': orient_4s_tp_num, # 'orient_all_num3': orient_all_num3, # 'orient_pr': orient_pr, 'orient_all_num': orient_all_num, # 'orient_tp_num4': orient_tp_num4, # 'orient_all_num4': orient_all_num4, # 'cls_orient_2s_all_num': depth_ind_all_num, # 'cls_orient_2s_tp_num': depth_ind_tp_num }) return loss_dict
class LEDFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() # feed_dict['input_size'] = torch.stack(img_shapes, dim=0) feed_dict['input_size'] = feed_dict['im_info'] prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # shape(N,C) if self.reduce: pooled_feat = pooled_feat.mean(3).mean(2) else: pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # import ipdb # ipdb.set_trace() iou, iou_scores, iou_reg = self.iou_pred(pooled_feat) iog, iog_scores, iog_reg = self.iog_pred(pooled_feat) iod, iod_scores, iod_reg = self.iod_pred(pooled_feat) iou = self.iox_clip(iou) iog = self.iox_clip(iog) iod = self.iox_clip(iod) # import ipdb # ipdb.set_trace() iou_indirect = self.calculate_iou(iog, iod) iou_final = (1 - self.alpha) * iou_indirect + self.alpha * iou if self.use_cls_pred: rcnn_fg_probs_final = rcnn_cls_probs[:, 1] * torch.exp(-torch.pow( (1 - iou_final), 2) / self.theta) else: rcnn_fg_probs_final = iou_final prediction_dict['rcnn_cls_probs'] = torch.stack( [rcnn_fg_probs_final, rcnn_fg_probs_final], dim=-1) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # prediction_dict['rcnn_iou_final'] = iou_final prediction_dict['rcnn_iou_reg'] = iou_reg prediction_dict['rcnn_iou_scores'] = iou_scores prediction_dict['rcnn_iod_reg'] = iod_reg prediction_dict['rcnn_iod_scores'] = iod_scores prediction_dict['rcnn_iog_reg'] = iog_reg prediction_dict['rcnn_iog_scores'] = iog_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][ proposals_order] return prediction_dict def iox_clip(self, iox): iox = iox.clone() iox[iox < 0] = 0 iox[iox > 1] = 1 return iox def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_coarse_map_conv_iod, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_fine_map_conv_iod, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_coarse_map_conv_iou, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_fine_map_conv_iou, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_fine_map_conv_iog, 0, 0.001, self.truncated) Filler.normal_init(self.rcnn_coarse_map_conv_iog, 0, 0.001, self.truncated) # freeze all first self.freeze_modules() # unfreeze some modules self.rpn_model.unfreeze_modules() self.unfreeze_modules() def unfreeze_modules(self): unfreeze_modules = [ self.rcnn_coarse_map_conv_iod.bias, self.rcnn_fine_map_conv_iod.bias, self.rcnn_coarse_map_conv_iog.bias, self.rcnn_fine_map_conv_iog.bias, self.rcnn_coarse_map_conv_iou.bias, self.rcnn_fine_map_conv_iou.bias, self.rcnn_coarse_map_conv_iod.weight, self.rcnn_fine_map_conv_iod.weight, self.rcnn_coarse_map_conv_iog.weight, self.rcnn_fine_map_conv_iog.weight, self.rcnn_coarse_map_conv_iou.weight, self.rcnn_fine_map_conv_iou.weight ] for module in unfreeze_modules: module.requires_grad = True def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = LEDRPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # pred for iox self.rcnn_coarse_map_conv_iou = nn.Linear(2048, 4) self.rcnn_fine_map_conv_iou = nn.Linear(2048, 4) self.rcnn_coarse_map_conv_iog = nn.Linear(2048, 4) self.rcnn_fine_map_conv_iog = nn.Linear(2048, 4) self.rcnn_coarse_map_conv_iod = nn.Linear(2048, 4) self.rcnn_fine_map_conv_iod = nn.Linear(2048, 4) # loss for iox if self.use_sharpL2: self.reg_loss = SharpL2Loss() else: self.reg_loss = nn.MSELoss(reduce=False) self.cls_loss = nn.CrossEntropyLoss(reduce=False) # cls loss if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.rpn_config = model_config['rpn_config'] self.theta = 1.0 self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.use_sigmoid = model_config.get('use_sigmoid') self.use_sharpL2 = model_config['use_sharpL2'] self.use_cls_pred = model_config['use_cls_pred'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] # assigner self.target_assigner = LEDTargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) self.reduce = True self.alpha = 0.6 # self.iou_anchors = [0.05, 0.25, 0.55, 0.85] # self.iou_lengths = [0.05, 0.15, 0.15, 0.15] # self.iou_intervals = [[0, 0.1], [0.1, 0.4], [0.4, 0.7], [0.7, 1.0]] self.iox_bbox_coder = DiscreteBBoxCoder( model_config['iox_coder_config']) def iou_pred(self, rcnn_conv): return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iou, self.rcnn_fine_map_conv_iou) def iog_pred(self, rcnn_conv): return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iog, self.rcnn_fine_map_conv_iog) def iod_pred(self, rcnn_conv): return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iod, self.rcnn_fine_map_conv_iod) def iox_pred(self, rcnn_conv, rcnn_coarse_map_conv, rcnn_fine_map_conv): batch_size = rcnn_conv.shape[0] coarse_map = rcnn_coarse_map_conv(rcnn_conv) fine_map = rcnn_fine_map_conv(rcnn_conv) coarse_map_reshape = coarse_map.view(batch_size, 4) iou_level_probs = F.softmax(coarse_map_reshape, dim=1) iou_level_probs = iou_level_probs.view_as(coarse_map) if self.use_sigmoid: # normalize it iou_reg = 2 * F.sigmoid(fine_map) - 1 else: iou_reg = fine_map iou_cls = iou_level_probs decoded_iou = self.iox_bbox_coder.decode_batch(iou_cls, iou_reg) # used for cls and reg loss iou_cls_scores = coarse_map return decoded_iou, iou_cls_scores, iou_reg def calculate_iou(self, iog, iod): mask = ~(iod == 0) iou_indirect = torch.zeros_like(iog) iod = iod[mask] iog = iog[mask] iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog) return iou_indirect def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] # iou targets iou_targets = self.target_assigner.matcher.assigned_overlaps_batch iou_cls_targets = self.iox_bbox_coder.encode_cls(iou_targets) iou_reg_targets = self.iox_bbox_coder.encode_reg(iou_targets) prediction_dict['rcnn_iou_cls_targets'] = iou_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_iou_reg_targets'] = iou_reg_targets[ batch_sampled_mask] # iod targets iod_targets = self.target_assigner.matcher.assigned_iod_batch iod_cls_targets = self.iox_bbox_coder.encode_cls(iod_targets) iod_reg_targets = self.iox_bbox_coder.encode_reg(iod_targets) prediction_dict['rcnn_iod_cls_targets'] = iod_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_iod_reg_targets'] = iod_reg_targets[ batch_sampled_mask] # iog targets iog_targets = self.target_assigner.matcher.assigned_iog_batch iog_cls_targets = self.iox_bbox_coder.encode_cls(iog_targets) iog_reg_targets = self.iox_bbox_coder.encode_reg(iog_targets) prediction_dict['rcnn_iog_cls_targets'] = iog_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_iog_reg_targets'] = iog_reg_targets[ batch_sampled_mask] def iox_loss(self, iou_scores, iou_cls_targets, iou_reg, iou_reg_targets): iou_cls_loss = self.cls_loss(iou_scores, iou_cls_targets) iou_reg_loss = self.reg_loss(iou_reg, iou_reg_targets).sum(dim=-1) return iou_cls_loss.mean(), iou_reg_loss.mean() def iou_loss(self, prediction_dict): return self.iox_loss(prediction_dict['rcnn_iou_scores'], prediction_dict['rcnn_iou_cls_targets'], prediction_dict['rcnn_iou_reg'], prediction_dict['rcnn_iou_reg_targets']) def iog_loss(self, prediction_dict): return self.iox_loss(prediction_dict['rcnn_iog_scores'], prediction_dict['rcnn_iog_cls_targets'], prediction_dict['rcnn_iog_reg'], prediction_dict['rcnn_iog_reg_targets']) def iod_loss(self, prediction_dict): return self.iox_loss(prediction_dict['rcnn_iod_scores'], prediction_dict['rcnn_iod_cls_targets'], prediction_dict['rcnn_iod_reg'], prediction_dict['rcnn_iod_reg_targets']) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # iou loss iou_cls_loss, iou_reg_loss = self.iou_loss(prediction_dict) # iog loss iog_cls_loss, iog_reg_loss = self.iog_loss(prediction_dict) # iod loss iod_cls_loss, iod_reg_loss = self.iod_loss(prediction_dict) # total_loss = [ # iou_cls_loss, iou_reg_loss, iog_cls_loss, iog_reg_loss, # iod_reg_loss, iod_cls_loss # ] # classification loss if self.use_cls_pred: rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) loss_dict['rcnn/cls_loss'] = rcnn_cls_loss loss_dict['rcnn/iou_cls_loss'] = iou_cls_loss loss_dict['rcnn/iou_reg_loss'] = iou_reg_loss loss_dict['rcnn/iog_cls_loss'] = iog_cls_loss loss_dict['rcnn/iog_reg_loss'] = iog_reg_loss loss_dict['rcnn/iod_reg_loss'] = iod_reg_loss loss_dict['rcnn/iod_cls_loss'] = iod_cls_loss # iox_loss = 0 # for loss in total_loss: # if torch.isnan(loss).byte().any(): # import ipdb # ipdb.set_trace() # iox_loss += loss # bbox regression loss rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients # loss_dict['rcnn_cls_loss'] = iox_loss loss_dict['rcnn/bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_reg_targets'] = rcnn_reg_weights return loss_dict
class DetachDoubleIOUFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # although it must be true # if self.enable_reg: # shape(N,C,1,1) pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat) pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds if self.enable_cls: pooled_feat_cls = self.feature_extractor.third_stage_feature( pooled_feat.detach()) # shape(N,C) pooled_feat_cls = pooled_feat_cls.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] prediction_dict['second_rpn_cls_probs'] = prediction_dict[ 'rpn_cls_probs'][0][proposals_order] if not self.training and self.enable_iou: # calculate fake iou as final score,of course use scores to filter bg pred_boxes = self.bbox_coder.decode_batch( rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5]) iou_matrix = box_ops.iou(pred_boxes, rois_batch[:, :, 1:5])[0] iou_matrix[rcnn_cls_probs[:, 1] < 0.5] = 0 rcnn_cls_probs[:, 1] = iou_matrix prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs if not self.training and self.enable_track_rois: self.target_assigner.assign(rois_batch[:, :, 1:], feed_dict['gt_boxes'], feed_dict['gt_labels']) return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, alpha=0.25) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # bbox_coder self.bbox_coder = self.target_assigner.bbox_coder # similarity self.similarity_calc = self.target_assigner.similarity_calc # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True # optimize cls self.enable_cls = False # optimize reg self.enable_reg = True # cal iou self.enable_iou = False # track good rois self.enable_track_rois = False def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None if self.enable_reg: # used for reg training pos_indicator = rcnn_reg_weights > 0 indicator = None elif self.enable_cls: # used for cls training pos_indicator = rcnn_cls_targets > 0 indicator = rcnn_cls_weights > 0 else: raise ValueError("please check enable reg and enable cls again") # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) if self.enable_cls: rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) assert num_cls_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() # used for retriving statistic prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] # used for fg/bg rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() if self.enable_reg: prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets if self.enable_cls: # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss if self.enable_reg: rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # analysis ap # when enable cls,otherwise it is no sense if self.enable_cls: rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = prediction_dict['fake_match'] self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) return loss_dict
class RefineFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) # shape(N,C) pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][ proposals_order] return prediction_dict def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = FeatureExtractor( self.feature_extractor_config) # self.rpn_model = RPNModel(self.rpn_config) self.rpn_model = RefineRPNModel(self.rpn_config) self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(2048, 4) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module # if self.use_focal_loss: # self.rcnn_cls_loss = FocalLoss(2) # else: # self.rcnn_cls_loss = functools.partial( # F.cross_entropy, reduce=False) self.rcnn_cls_loss = nn.MSELoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config['feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = RefineTargetAssigner( model_config['target_assigner_config']) # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) self.sampler = BalancedSampler(model_config['sampler_config']) def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## pos_indicator = rcnn_cls_targets > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = rcnn_cls_weights.type(torch.cuda.ByteTensor).sum( dim=-1) num_reg_coeff = rcnn_reg_weights.type(torch.cuda.ByteTensor).sum( dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] fg_rcnn_cls_probs = rcnn_cls_probs[:, 1] # exp fg_rcnn_cls_probs = torch.exp(fg_rcnn_cls_probs) rcnn_cls_targets = torch.exp(rcnn_cls_targets) # import ipdb # ipdb.set_trace() rcnn_cls_loss = self.rcnn_cls_loss( fg_rcnn_cls_probs, rcnn_cls_targets.type_as(fg_rcnn_cls_probs)) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn loss_dict['rcnn_cls_targets'] = rcnn_cls_targets return loss_dict
class Mono3DFasterRCNN(Model): def forward(self, feed_dict): prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) if self.training and self.train_2d: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) pooled_feat = pooled_feat * saliency_map[:, 1:, :, :] reduced_pooled_feat = pooled_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] ################################### # 3d training ################################### rcnn_bbox_preds = rcnn_bbox_preds.detach() final_bbox = self.target_assigner.bbox_coder.decode_batch( rcnn_bbox_preds.unsqueeze(0), rois_batch[:, :, 1:]) final_rois_inds = torch.zeros_like(final_bbox[:, :, -1:]) final_rois_batch = torch.cat([final_rois_inds, final_bbox], dim=-1) if self.training and self.train_3d: prediction_dict['rois_batch'] = final_rois_batch self.pre_subsample(prediction_dict, feed_dict) final_rois_batch = prediction_dict['rois_batch'] # shape(M,C,7,7) mono_3d_pooled_feat = self.rcnn_pooling(base_feat, final_rois_batch.view(-1, 5)) # H-concat to abbrevate the perspective transform # shape(N,M,9) # import ipdb # ipdb.set_trace() # concat with pooled feat # mono_3d_pooled_feat = torch.cat([mono_3d_pooled_feat, H_inv], dim=1) # mono_3d_pooled_feat = self.reduced_layer(mono_3d_pooled_feat) mono_3d_pooled_feat = self.feature_extractor.third_stage_feature( mono_3d_pooled_feat) mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2) if self.h_cat: H_inv = self.calc_Hinv(final_rois_batch, feed_dict['p2'], feed_dict['im_info'], base_feat.shape[-2:])[0].view(-1, 9) mono_3d_pooled_feat = torch.cat([mono_3d_pooled_feat, H_inv], dim=-1) rcnn_3d = self.rcnn_3d_pred(mono_3d_pooled_feat) # normalize to [0,1] # rcnn_3d[:, 5:11] = F.sigmoid(rcnn_3d[:, 5:11]) prediction_dict['rcnn_3d'] = rcnn_3d if not self.training: # rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox( # rcnn_3d, rois_batch) rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_dims( rcnn_3d, final_rois_batch) prediction_dict['rcnn_3d'] = rcnn_3d return prediction_dict def calc_Hinv(self, final_rois_batch, p2, img_size, feat_size): p2 = p2[0] K_c = p2[:, :3] fx = K_c[0, 0] fy = K_c[1, 1] px = K_c[0, 2] py = K_c[1, 2] fw = self.pooling_size fh = self.pooling_size proposals = final_rois_batch[:, :, 1:] rw = (proposals[:, :, 2] - proposals[:, :, 0] + 1) / img_size[:, 1] * feat_size[1] rh = (proposals[:, :, 3] - proposals[:, :, 1] + 1) / img_size[:, 0] * feat_size[0] # rx = (proposals[:, :, 0] + proposals[:, :, 2]) / 2 # ry = (proposals[:, :, 1] + proposals[:, :, 3]) / 2 # roi camera intrinsic parameters sw = fw / rw sh = fh / rh fx_roi = fx * sw fy_roi = fy * sh zeros = torch.zeros_like(fx_roi) ones = torch.ones_like(fx_roi) px_roi = (px - proposals[:, :, 0]) * sw py_roi = (py - proposals[:, :, 1]) * sh K_roi = torch.stack( [fx_roi, zeros, px_roi, zeros, fy_roi, py_roi, zeros, zeros, ones], dim=-1).view(-1, 3, 3) H = K_roi.matmul(torch.inverse(K_c)) # import ipdb # ipdb.set_trace() # Too slow # H_inv = [] # for i in range(H.shape[0]): # H_inv.append(torch.inverse(H[i])) # H_inv = torch.stack(H_inv, dim=0) # import ipdb # ipdb.set_trace() H_np = H.cpu().numpy() H_inv_np = np.linalg.inv(H_np) H_inv = torch.from_numpy(H_inv_np).cuda().float() return H_inv.view(1, -1, 9) def pre_forward(self): # params if self.train_3d and self.training and not self.train_2d: self.freeze_modules() for parameter in self.feature_extractor.third_stage_feature.parameters( ): parameter.requires_grad = True for param in self.rcnn_3d_pred.parameters(): param.requires_grad = True self.freeze_bn(self) self.unfreeze_bn(self.feature_extractor.third_stage_feature) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) # if self.train_3d and self.training: # self.freeze_modules() # for parameter in self.feature_extractor.third_stage_feature.parameters( # ): # parameter.requires_grad = True # for param in self.rcnn_3d_preds_new.parameters(): # param.requires_grad = True def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d # self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 + 3 + 1 + 4 + 2) if self.h_cat: c = in_channels + 9 else: c = in_channels # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1) self.rcnn_3d_pred = nn.Linear(c, 3 + 4 * 2) # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins) self.rcnn_3d_loss = OrientationLoss(split_loss=True) # reduce for concat with the following layers # self.reduced_layer = nn.Sequential( # * [nn.Conv2d(1024 + 9, 1024, 1, 1, 0), nn.BatchNorm2d(1024)]) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 self.train_3d = False self.train_2d = not self.train_3d # more accurate bbox for 3d prediction if self.train_3d: fg_thresh = 0.6 else: fg_thresh = 0.5 model_config['target_assigner_config']['fg_thresh'] = fg_thresh # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.profiler = Profiler() self.h_cat = False def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # gt_boxes_3d = feed_dict['coords'] # dims_2d = feed_dict['dims_2d'] # use local angle # oritations = feed_dict['local_angle_oritation'] # local_angle = feed_dict['local_angle'] # shape(N,7) gt_boxes_3d = feed_dict['gt_boxes_3d'] # orient # cls_orient = torch.unsqueeze(feed_dict['cls_orient'], dim=-1).float() # reg_orient = feed_dict['reg_orient'] # orient = torch.cat([cls_orient, reg_orient], dim=-1) # h_2ds = feed_dict['h_2d'] # c_2ds = feed_dict['c_2d'] # r_2ds = feed_dict['r_2d'] # cls_orient_4s = feed_dict['cls_orient_4'] # center_orients = feed_dict['center_orient'] # distances = feed_dict['distance'] # d_ys = feed_dict['d_y'] # angles_camera = feed_dict['angles_camera'] # here just concat them # dims and their projection # gt_boxes_3d = torch.cat( # [gt_boxes_3d[:, :, :3], orient, distances, d_ys], dim=-1) encoded_side_points = feed_dict['encoded_side_points'] gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], encoded_side_points], dim=-1) ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' # assert num_reg_coeff, 'bug happens' if num_reg_coeff == 0: num_reg_coeff = torch.ones_like(num_reg_coeff) prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict[ 'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float( ) prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} if self.train_2d: # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss ###################################### # 3d loss ###################################### rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d'] rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d'] rcnn_3d = prediction_dict['rcnn_3d'] if self.train_3d: rcnn_3d_loss = self.rcnn_bbox_loss(rcnn_3d, rcnn_reg_targets_3d).sum(dim=-1) rcnn_3d_loss = rcnn_3d_loss * rcnn_reg_weights_3d # dims # rcnn_3d_loss_dims = self.rcnn_bbox_loss( # rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1) # # angles # res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:]) # for res_loss_key in res: # tmp = res[res_loss_key] * rcnn_reg_weights_3d # res[res_loss_key] = tmp.sum(dim=-1) # loss_dict.update(res) # rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d # rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1) loss_dict['rcnn_3d_loss'] = rcnn_3d_loss # stats of orients # cls_orient_preds = rcnn_3d[:, 3:5] # cls_orient = rcnn_reg_targets_3d[:, 3] # _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1) # orient_tp_mask = cls_orient.type_as( # cls_orient_preds_argmax) == cls_orient_preds_argmax # mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1) # orient_tp_mask = orient_tp_mask[mask] # orient_tp_num = orient_tp_mask.int().sum().item() # orient_all_num = orient_tp_mask.numel() # # depth ind ap # depth_ind_preds = rcnn_3d[:, 7:7 + 11] # depth_ind_targets = rcnn_reg_targets_3d[:, 6] # _, depth_ind_preds_argmax = torch.max(depth_ind_preds, dim=-1) # depth_ind_mask = depth_ind_targets.type_as( # depth_ind_preds_argmax) == depth_ind_preds_argmax # depth_ind_mask = depth_ind_mask[rcnn_reg_weights_3d > 0] # depth_ind_tp_num = depth_ind_mask.int().sum().item() # depth_ind_all_num = depth_ind_mask.numel() # # this mask is converted from reg methods # r_2ds_dis = torch.zeros_like(cls_orient) # r_2ds = rcnn_3d[:, 10] # r_2ds_dis[r_2ds < 0.5] = 0 # r_2ds_dis[r_2ds > 0.5] = 1 # orient_tp_mask2 = (r_2ds_dis == cls_orient) # orient_tp_mask2 = orient_tp_mask2[mask] # orient_tp_num2 = orient_tp_mask2.int().sum().item() # # cls_orient_4s # cls_orient_4s_pred = rcnn_3d[:, 11:15] # _, cls_orient_4s_inds = torch.max(cls_orient_4s_pred, dim=-1) # cls_orient_4s = rcnn_reg_targets_3d[:, 10] # # cls_orient_4s_inds[(cls_orient_4s_inds == 0) | (cls_orient_4s_inds == 2 # # )] = 1 # # cls_orient_4s_inds[(cls_orient_4s_inds == 1) | (cls_orient_4s_inds == 3 # # )] = 0 # orient_tp_mask3 = cls_orient_4s_inds.type_as( # cls_orient_4s) == cls_orient_4s # mask3 = (rcnn_reg_weights_3d > 0) # orient_tp_mask3 = orient_tp_mask3[mask3] # orient_4s_tp_num = orient_tp_mask3.int().sum().item() # orient_all_num3 = orient_tp_mask3.numel() # # test cls_orient_4s(check label) # cls_orient_2s_inds = torch.zeros_like(cls_orient) # cls_orient_2s_inds[(cls_orient_4s == 0) | (cls_orient_4s == 2)] = 1 # cls_orient_2s_inds[(cls_orient_4s == 1) | (cls_orient_4s == 3)] = 0 # cls_orient_2s_mask = (cls_orient_2s_inds == cls_orient) # cls_orient_2s_mask = cls_orient_2s_mask[mask] # cls_orient_2s_tp_num = cls_orient_2s_mask.int().sum().item() # cls_orient_2s_all_num = cls_orient_2s_mask.numel() # # center_orient # center_orients_preds = rcnn_3d[:, 15:17] # _, center_orients_inds = torch.max(center_orients_preds, dim=-1) # center_orients = rcnn_reg_targets_3d[:, 11] # orient_tp_mask4 = center_orients.type_as( # center_orients_inds) == center_orients_inds # mask4 = (rcnn_reg_weights_3d > 0) & (center_orients > -1) # orient_tp_mask4 = orient_tp_mask4[mask4] # orient_tp_num4 = orient_tp_mask4.int().sum().item() # orient_all_num4 = orient_tp_mask4.numel() # store all stats in target assigner # self.target_assigner.stat.update({ # # 'angle_num_tp': torch.tensor(0), # # 'angle_num_all': 1, # # stats of orient # 'orient_tp_num': orient_tp_num, # # 'orient_tp_num2': orient_tp_num2, # # 'orient_tp_num3': orient_4s_tp_num, # # 'orient_all_num3': orient_all_num3, # # 'orient_pr': orient_pr, # 'orient_all_num': orient_all_num, # # 'orient_tp_num4': orient_tp_num4, # # 'orient_all_num4': orient_all_num4, # 'cls_orient_2s_all_num': depth_ind_all_num, # 'cls_orient_2s_tp_num': depth_ind_tp_num # }) return loss_dict
class DoubleIoUSecondStageFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() # self.visualizer.visualize( # feed_dict['img'], # nn.Sequential(self.feature_extractor.first_stage_feature, # self.feature_extractor.first_stage_cls_feature)) prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.add_feat('base_feat', base_feat) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) pooled_feat = F.relu(self.rcnn_conv(pooled_feat), inplace=True) pooled_feat_cls = self.rcnn_pooled_feat_cls(pooled_feat.detach()) pooled_feat_bbox = self.rcnn_pooled_feat_bbox(pooled_feat) # classification pooled_feat_cls = self.feature_extractor.third_stage_feature( pooled_feat_cls) pooled_feat_cls = pooled_feat_cls.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # regression pooled_feat_reg = self.feature_extractor.second_stage_feature( pooled_feat_bbox) pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] return prediction_dict def unfreeze_part_modules(self, model): # model = self.feature_extractor.third_stage_feature for param in model.parameters(): param.requires_grad = True # model = self.feature_extractor.first_stage_feature # def freeze_part_modules(self): # pass # def rcnn_cls_pred(pooled_feat) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) # if self.training_stage == 'cls': # self.freeze_modules() # unfreeze part # models = [ # # self.feature_extractor.first_stage_feature, # # self.feature_extractor.second_stage_feature, # self.feature_extractor.third_stage_feature # ] # for model in models: # self.unfreeze_part_modules(model) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # decouple cls and bbox self.rcnn_conv = nn.Conv2d(1024, 512, 3, 1, 1, bias=True) self.rcnn_pooled_feat_cls = nn.Conv2d(512, 1024, 1, 1, 0) self.rcnn_pooled_feat_bbox = nn.Conv2d(512, 1024, 1, 1, 0) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.sampler = DetectionSampler({'fg_fraction': 1}) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## # import ipdb # ipdb.set_trace() cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # indicator = None # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] # fake_match = self.target_assigner.analyzer.match fake_match = prediction_dict['fake_match'] num_gt = feed_dict['gt_labels'].numel() self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) # prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights return loss_dict def loss_new(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## # cls_criterion = None # pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 pos_indicator = indicator # indicator = None rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets[0]) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets[0]).sum(dim=-1) cls_criterion = rcnn_cls_loss * rcnn_cls_weights + rcnn_bbox_loss * rcnn_reg_weights # subsample from all # shape (N,M) # import ipdb # ipdb.set_trace() batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights * batch_sampled_mask.type_as( rcnn_cls_weights) num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' rcnn_cls_weights = rcnn_cls_weights / num_cls_coeff.float() # import ipdb # ipdb.set_trace() # rcnn_cls_targets *= batch_sampled_mask.type_as(rcnn_cls_targets) # rcnn_reg_targets *= batch_sampled_mask.type_as(rcnn_reg_targets) # targets and weights # rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] # rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] # rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] # rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss # import ipdb # ipdb.set_trace() rcnn_cls_loss *= rcnn_cls_weights[0] rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bbox reg rcnn_reg_weights *= batch_sampled_mask.type_as(rcnn_reg_weights) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) assert num_reg_coeff, 'bug happens' rcnn_reg_weights = rcnn_reg_weights / num_reg_coeff.float() rcnn_bbox_loss *= rcnn_reg_weights[0] # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss # loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets # analysis precision rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] fake_match = self.target_assigner.analyzer.match num_gt = feed_dict['gt_labels'].numel() self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5) prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights return loss_dict
class Mono3DAngleNewFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) ################################### # 3d training ################################### mono_3d_pooled_feat = self.feature_extractor.third_stage_feature( pooled_feat.detach()) mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2) # rcnn_3d = self.rcnn_3d_preds_new(mono_3d_pooled_feat) # prediction_dict['rcnn_3d'] = rcnn_3d # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) pooled_feat = pooled_feat * saliency_map[:, 1:, :, :] # shape(N,C) if self.reduce: pooled_feat = pooled_feat.mean(3).mean(2) else: pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1) rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) rcnn_3d_dims = self.rcnn_dims_pred(mono_3d_pooled_feat) rcnn_3d_angles = self.rcnn_angle_pred(mono_3d_pooled_feat).view( -1, self.num_bins, 2) rcnn_3d_angles_cls = self.rcnn_angle_conf_pred( mono_3d_pooled_feat).view(-1, self.num_bins, 2) rcnn_3d_angles_cls_reg = torch.cat( [rcnn_3d_angles_cls, rcnn_3d_angles], dim=-1).view(-1, self.num_bins * 4) rcnn_3d = torch.cat([rcnn_3d_dims, rcnn_3d_angles_cls_reg], dim=-1) prediction_dict['rcnn_3d'] = rcnn_3d prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] if not self.training: # import ipdb # ipdb.set_trace() dims = rcnn_3d[:, :3] angles = rcnn_3d[:, 3:].view(-1, self.num_bins, 4) angles_cls = F.softmax(angles[:, :, :2], dim=-1) _, angles_cls_argmax = torch.max(angles_cls[:, :, 1], dim=-1) row = torch.arange( 0, angles_cls_argmax.shape[0]).type_as(angles_cls_argmax) angles_oritations = angles[:, :, 2:][row, angles_cls_argmax] rcnn_3d = torch.cat([dims, angles_oritations], dim=-1) # import ipdb # ipdb.set_trace() rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_angle( rcnn_3d, self.rcnn_3d_loss.bin_centers[angles_cls_argmax]) prediction_dict['rcnn_3d'] = rcnn_3d return prediction_dict def pre_forward(self): # params if self.train_3d and self.training and not self.train_2d: self.freeze_modules() for parameter in self.feature_extractor.third_stage_feature.parameters( ): parameter.requires_grad = True # for param in self.rcnn_3d_preds_new.parameters(): # param.requires_grad = True for param in self.rcnn_angle_conf_pred.parameters(): param.requires_grad = True for param in self.rcnn_angle_pred.parameters(): param.requires_grad = True for param in self.rcnn_dims_pred.parameters(): param.requires_grad = True self.freeze_bn(self) self.unfreeze_bn(self.feature_extractor.third_stage_feature) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d # self.rcnn_3d_preds_new = nn.Linear(in_channels, 3 + 4 * self.num_bins) self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # dims self.rcnn_dims_pred = nn.Sequential( *[nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, 3)]) # angle self.rcnn_angle_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ]) # angle conf self.rcnn_angle_conf_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ]) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 2 self.train_3d = True # self.train_2d = not self.train_3d self.train_2d = True # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # gt_boxes_3d = feed_dict['coords'] # dims_2d = feed_dict['dims_2d'] # use local angle # oritations = feed_dict['local_angle_oritation'] local_angle = feed_dict['local_angle'] # shape(N,7) gt_boxes_3d = feed_dict['gt_boxes_3d'] # orient # here just concat them # dims and their projection gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1) ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels ) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict[ 'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float( ) prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} if self.train_2d: # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss ###################################### # 3d loss ###################################### rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d'] rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d'] rcnn_3d = prediction_dict['rcnn_3d'] if self.train_3d: # dims rcnn_3d_loss_dims = self.rcnn_bbox_loss( rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1) # angles rcnn_angle_loss, angle_tp_mask = self.rcnn_3d_loss( rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:]) rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1) rcnn_angle_loss = rcnn_angle_loss * rcnn_reg_weights_3d rcnn_angle_loss = rcnn_angle_loss.sum(dim=-1) loss_dict['rcnn_3d_loss'] = rcnn_3d_loss loss_dict['rcnn_angle_loss'] = rcnn_angle_loss # angles stats angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0] angles_tp_num = angle_tp_mask.int().sum().item() angles_all_num = angle_tp_mask.numel() else: angles_all_num = 0 angles_tp_num = 0 # store all stats in target assigner self.target_assigner.stat.update({ 'angle_num_tp': torch.tensor(0), 'angle_num_all': 1, # stats of orient 'orient_tp_num': 0, 'orient_tp_num2': 0, 'orient_tp_num3': 0, 'orient_all_num3': 0, # 'orient_pr': orient_pr, 'orient_all_num': 0, 'orient_tp_num4': 0, 'orient_all_num4': 0, 'cls_orient_2s_all_num': angles_all_num, 'cls_orient_2s_tp_num': angles_tp_num # 'angles_tp_num': angles_tp_num, # 'angles_all_num': angles_all_num }) # import ipdb # ipdb.set_trace() return loss_dict
class IoUFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # batch_size = base_feat.shape[0] # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) ######################################## # semantic map ######################################## rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # shape(N,C) rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :] rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2) rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # iou rcnn_iou = self.rcnn_iou(rcnn_bbox_feat) rcnn_iou = F.sigmoid(rcnn_iou) if self.use_iox: # iog rcnn_iog = self.rcnn_iog(rcnn_bbox_feat) rcnn_iog = F.sigmoid(rcnn_iog) # iod rcnn_iod = self.rcnn_iog(rcnn_bbox_feat) rcnn_iod = F.sigmoid(rcnn_iod) rcnn_iou_indirect = self.calculate_iou(rcnn_iog, rcnn_iod) rcnn_iou_final = ( 1 - self.alpha) * rcnn_iou_indirect + self.alpha * rcnn_iou prediction_dict['rcnn_iog'] = rcnn_iog prediction_dict['rcnn_iod'] = rcnn_iod else: # use iou directly rcnn_iou_final = rcnn_iou rcnn_fg_probs_final = rcnn_cls_probs[:, 1:] * torch.exp(-torch.pow( (1 - rcnn_iou_final[:, 1:]), 2) / self.theta) prediction_dict['rcnn_cls_probs'] = torch.cat( [rcnn_fg_probs_final, rcnn_fg_probs_final], dim=-1) prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores prediction_dict['rcnn_iou'] = rcnn_iou # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][ proposals_order] return prediction_dict def calculate_iou(self, iog, iod): mask = ~(iod == 0) iou_indirect = torch.zeros_like(iog) iod = iod[mask] iog = iog[mask] iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog) return iou_indirect def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) # freeze module # self.freeze_modules() # # unfreeze some layers # unfreeze_params = [ # self.rpn_model.rpn_iou.bias, self.rpn_model.rpn_iou.weight, # self.rcnn_iou.bias, self.rcnn_iou.weight # ] # for param in unfreeze_params: # param.requires_grad = True def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = IoURPNModel(self.rpn_config) self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) in_channels = 2048 self.rcnn_iou = nn.Linear(in_channels, self.n_classes) self.rcnn_iog = nn.Linear(in_channels, self.n_classes) self.rcnn_iod = nn.Linear(in_channels, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_iou_loss = nn.MSELoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.theta = 1.0 self.alpha = 0.6 self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.iou_criterion = model_config['iou_criterion'] self.use_iox = model_config['use_iox'] # self.use_cls_pred = model_config['use_cls_pred'] # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = LEDTargetAssigner( model_config['target_assigner_config']) # sampler # self.sampler = HardNegativeSampler(model_config['sampler_config']) if self.iou_criterion: self.sampler = DetectionSampler(model_config['sampler_config']) else: self.sampler = BalancedSampler(model_config['sampler_config']) def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## if self.iou_criterion: cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch else: cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] # import ipdb # ipdb.set_trace() prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] # iou targets rcnn_iou_targets = self.target_assigner.matcher.assigned_overlaps_batch prediction_dict['rcnn_iou_targets'] = rcnn_iou_targets[ batch_sampled_mask] # iog targets rcnn_iog_targets = self.target_assigner.matcher.assigned_iog_batch prediction_dict['rcnn_iog_targets'] = rcnn_iog_targets[ batch_sampled_mask] # iod targets rcnn_iod_targets = self.target_assigner.matcher.assigned_iod_batch prediction_dict['rcnn_iod_targets'] = rcnn_iod_targets[ batch_sampled_mask] def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # iou loss rcnn_iou = prediction_dict['rcnn_iou'][:, 1] rcnn_iou_targets = prediction_dict['rcnn_iou_targets'] rcnn_iou = torch.exp(rcnn_iou) rcnn_iou_targets = torch.exp(rcnn_iou_targets) rcnn_iou_loss = self.rcnn_iou_loss(rcnn_iou, rcnn_iou_targets) rcnn_iou_loss *= rcnn_cls_weights rcnn_iou_loss = rcnn_iou_loss.sum(dim=-1) if self.use_iox: # iog loss rcnn_iog = prediction_dict['rcnn_iog'][:, 1] rcnn_iog_targets = prediction_dict['rcnn_iog_targets'] rcnn_iog = torch.exp(rcnn_iog) rcnn_iog_targets = torch.exp(rcnn_iog_targets) rcnn_iog_loss = self.rcnn_iou_loss(rcnn_iog, rcnn_iog_targets) rcnn_iog_loss *= rcnn_cls_weights rcnn_iog_loss = rcnn_iog_loss.sum(dim=-1) # iod loss rcnn_iod = prediction_dict['rcnn_iod'][:, 1] rcnn_iod_targets = prediction_dict['rcnn_iod_targets'] rcnn_iod = torch.exp(rcnn_iod) rcnn_iod_targets = torch.exp(rcnn_iod_targets) rcnn_iod_loss = self.rcnn_iou_loss(rcnn_iod, rcnn_iod_targets) rcnn_iod_loss *= rcnn_cls_weights rcnn_iod_loss = rcnn_iod_loss.sum(dim=-1) loss_dict['rcnn_iod_loss'] = rcnn_iod_loss loss_dict['rcnn_iog_loss'] = rcnn_iog_loss # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] # exp # rcnn_cls_scores = torch.exp(rcnn_cls_scores) # rcnn_cls_targets = torch.exp(rcnn_cls_targets) rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss loss_dict['rcnn_iou_loss'] = rcnn_iou_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets return loss_dict
class Mono3DFinalAngleFasterRCNN(Model): def forward(self, feed_dict): self.target_assigner.bbox_coder_3d.mean_dims = feed_dict['mean_dims'] prediction_dict = {} # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) # shape(N,C,1,1) second_pooled_feat = self.feature_extractor.second_stage_feature( pooled_feat) second_pooled_feat = second_pooled_feat.mean(3).mean(2) rcnn_cls_scores = self.rcnn_cls_preds(second_pooled_feat) rcnn_bbox_preds = self.rcnn_bbox_preds(second_pooled_feat) rcnn_3d = self.rcnn_3d_pred(second_pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] ################################### # 3d training ################################### prediction_dict['rcnn_3d'] = rcnn_3d if not self.training: if self.class_agnostic_3d: orient = rcnn_3d[:, 3:] dims = rcnn_3d[:, :3] else: orient = rcnn_3d[:, 3 * self.n_classes:] dims = rcnn_3d[:, :3 * self.n_classes] angles = orient.view(-1, self.num_bins, 4) angles_cls = F.softmax(angles[:, :, :2], dim=-1) _, angles_cls_argmax = torch.max(angles_cls[:, :, 1], dim=-1) row = torch.arange( 0, angles_cls_argmax.shape[0]).type_as(angles_cls_argmax) angles_oritations = angles[:, :, 2:][row, angles_cls_argmax] rcnn_3d = torch.cat([dims, angles_oritations], dim=-1) # import ipdb # ipdb.set_trace() rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_angle( rcnn_3d, self.rcnn_3d_loss.bin_centers[angles_cls_argmax]) prediction_dict['rcnn_3d'] = rcnn_3d return prediction_dict def pre_forward(self): pass def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_preds, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_preds, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = ROIAlign((self.pooling_size, self.pooling_size), 1.0 / 16.0, 2) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) self.rcnn_cls_preds = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_preds = nn.Linear(in_channels, 4) else: self.rcnn_bbox_preds = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(self.n_classes) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1) if self.class_agnostic_3d: self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 * self.num_bins) else: self.rcnn_3d_pred = nn.Linear( in_channels, 3 * self.n_classes + 4 * self.num_bins) # self.rcnn_3d_loss = OrientationLoss(split_loss=True) self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes self.n_classes = len(classes) + 1 self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.class_agnostic_3d = model_config['class_agnostic_3d'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] # some submodule config self.feature_extractor_config = model_config['feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # sampler self.sampler = BalancedSampler(model_config['sampler_config']) # self.reduce = model_config.get('reduce') self.reduce = True self.visualizer = FeatVisualizer() self.num_bins = 4 # more accurate bbox for 3d prediction # if self.train_3d: # fg_thresh = 0.6 # else: # fg_thresh = 0.5 # model_config['target_assigner_config']['fg_thresh'] = fg_thresh # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) self.profiler = Profiler() self.h_cat = False def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] # gt_boxes_3d = feed_dict['coords'] # dims_2d = feed_dict['dims_2d'] # use local angle # oritations = feed_dict['local_angle_oritation'] local_angle = feed_dict['local_angle'] # shape(N,7) gt_boxes_3d = feed_dict['gt_boxes_3d'] # orient # here just concat them # dims and their projection gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1) ########################## # assigner ########################## rcnn_cls_targets, rcnn_reg_targets,\ rcnn_cls_weights, rcnn_reg_weights,\ rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels ) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict[ 'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets, out_c=4): """ squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4) Args: rcnn_bbox_preds: shape(N, num_classes, 4) rcnn_cls_targets: shape(N, 1) """ rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, out_c) batch_size = rcnn_bbox_preds.shape[0] offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1) rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets) rcnn_bbox_preds = rcnn_bbox_preds.contiguous().view( -1, out_c)[rcnn_cls_targets] return rcnn_bbox_preds def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ # import ipdb # ipdb.set_trace() loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] # if not self.class_agnostic: rcnn_bbox_preds = self.squeeze_bbox_preds(rcnn_bbox_preds, rcnn_cls_targets) rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss ###################################### # 3d loss ###################################### rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d'] rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d'] rcnn_3d = prediction_dict['rcnn_3d'] if not self.class_agnostic_3d: dims_pred = rcnn_3d[:, :3 * self.n_classes] dims_pred = self.squeeze_bbox_preds(dims_pred, rcnn_cls_targets, 3) orient_pred = rcnn_3d[:, 3 * self.n_classes:] else: dims_pred = rcnn_3d[:, :3] orient_pred = rcnn_3d[:, 3:] # dims rcnn_3d_loss_dims = self.rcnn_bbox_loss( dims_pred, rcnn_reg_targets_3d[:, :3]).sum(dim=-1) # angles rcnn_angle_loss, angle_tp_mask = self.rcnn_3d_loss( orient_pred, rcnn_reg_targets_3d[:, 3:]) # angles # res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:6]) # for res_loss_key in res: # tmp = res[res_loss_key] * rcnn_reg_weights_3d # res[res_loss_key] = tmp.sum(dim=-1) # loss_dict.update(res) rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1) rcnn_angle_loss = rcnn_angle_loss * rcnn_reg_weights_3d rcnn_angle_loss = rcnn_angle_loss.sum(dim=-1) loss_dict['rcnn_3d_loss'] = rcnn_3d_loss loss_dict['rcnn_angle_loss'] = rcnn_angle_loss # stats of orients angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0] angles_tp_num = angle_tp_mask.int().sum().item() angles_all_num = angle_tp_mask.numel() # cls_orient_preds = rcnn_3d[:, 3:5] # cls_orient = rcnn_reg_targets_3d[:, 3] # _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1) # orient_tp_mask = cls_orient.type_as( # cls_orient_preds_argmax) == cls_orient_preds_argmax # mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1) # orient_tp_mask = orient_tp_mask[mask] # orient_tp_num = orient_tp_mask.int().sum().item() # orient_all_num = orient_tp_mask.numel() # gt_boxes_proj = feed_dict['gt_boxes_proj'] self.target_assigner.stat.update({ 'cls_orient_2s_all_num': angles_all_num, 'cls_orient_2s_tp_num': angles_tp_num # 'angle_num_tp': torch.tensor(0), # 'angle_num_all': 1, # stats of orient # 'orient_tp_num': orient_tp_num, # 'orient_tp_num2': orient_tp_num2, # 'orient_tp_num3': orient_tp_num3, # 'orient_all_num3': orient_all_num3, # 'orient_pr': orient_pr, # 'orient_all_num': orient_all_num, # 'orient_all_num3': orient_all_num3, # 'orient_tp_num4': orient_tp_num4, # 'orient_all_num4': orient_all_num4, # 'cls_orient_2s_all_num': depth_ind_all_num, # 'cls_orient_2s_tp_num': depth_ind_tp_num }) return loss_dict
class SemanticFasterRCNN(Model): def forward(self, feed_dict): # import ipdb # ipdb.set_trace() prediction_dict = {} self.profiler.start('base_model') # base model base_feat = self.feature_extractor.first_stage_feature( feed_dict['img']) feed_dict.update({'base_feat': base_feat}) self.profiler.end('base_model') # batch_size = base_feat.shape[0] self.profiler.start('rpn') # rpn model prediction_dict.update(self.rpn_model.forward(feed_dict)) self.profiler.end('rpn') # proposals = prediction_dict['proposals_batch'] # shape(N,num_proposals,5) # pre subsample for reduce consume of memory if self.training: self.pre_subsample(prediction_dict, feed_dict) rois_batch = prediction_dict['rois_batch'] self.profiler.start('roipooling') # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5) pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5)) self.profiler.end('roipooling') self.profiler.start('second_stage') # shape(N,C,1,1) pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat) self.profiler.end('second_stage') # semantic map # if self.use_self_attention: # pooled_feat_cls = pooled_feat.mean(3).mean(2) # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls) # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) # # self-attention # channel_attention = self.generate_channel_attention(pooled_feat) # spatial_attention = self.generate_spatial_attention(pooled_feat) # pooled_feat_reg = pooled_feat * channel_attention # pooled_feat_reg = pooled_feat * spatial_attention # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2) # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg) # else: # rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat) # rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2) # saliency_map = F.softmax(rcnn_cls_scores_map, dim=1) pooled_feat = pooled_feat.mean(3).mean(2) # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2) # shape(N,C) # rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :] # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1) # rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2) # if self.use_score: # pooled_feat = rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat) rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat) rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1) prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][ proposals_order] return prediction_dict def generate_channel_attention(self, feat): return feat.mean(3, keepdim=True).mean(2, keepdim=True) def generate_spatial_attention(self, feat): return self.spatial_attention(feat) def init_weights(self): # submodule init weights self.feature_extractor.init_weights() self.rpn_model.init_weights() Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated) Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated) def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # if self.use_self_attention: self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) # else: # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(2048, 4) # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(self.n_classes) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # attention if self.use_self_attention: self.spatial_attention = nn.Conv2d(2048, 1, 3, 1, 1) def init_param(self, model_config): classes = model_config['classes'] self.classes = classes # including bg self.n_classes = len(classes) + 1 self.class_agnostic = model_config['class_agnostic'] self.pooling_size = model_config['pooling_size'] self.pooling_mode = model_config['pooling_mode'] self.crop_resize_with_max_pool = model_config[ 'crop_resize_with_max_pool'] self.truncated = model_config['truncated'] self.use_focal_loss = model_config['use_focal_loss'] self.subsample_twice = model_config['subsample_twice'] self.rcnn_batch_size = model_config['rcnn_batch_size'] self.use_self_attention = model_config.get('use_self_attention') # some submodule config self.feature_extractor_config = model_config[ 'feature_extractor_config'] self.rpn_config = model_config['rpn_config'] # assigner self.target_assigner = TargetAssigner( model_config['target_assigner_config']) # sampler self.sampler = BalancedSampler(model_config['sampler_config']) self.profiler = Profiler() def pre_subsample(self, prediction_dict, feed_dict): rois_batch = prediction_dict['rois_batch'] gt_boxes = feed_dict['gt_boxes'] gt_labels = feed_dict['gt_labels'] ########################## # assigner ########################## # import ipdb # ipdb.set_trace() rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign( rois_batch[:, :, 1:], gt_boxes, gt_labels) ########################## # subsampler ########################## cls_criterion = None pos_indicator = rcnn_reg_weights > 0 indicator = rcnn_cls_weights > 0 # subsample from all # shape (N,M) batch_sampled_mask = self.sampler.subsample_batch( self.rcnn_batch_size, pos_indicator, indicator=indicator, criterion=cls_criterion) rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask] rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask] num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1) num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1) # check assert num_cls_coeff, 'bug happens' assert num_reg_coeff, 'bug happens' prediction_dict[ 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float() prediction_dict[ 'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float() prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[ batch_sampled_mask] prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[ batch_sampled_mask] prediction_dict['fake_match'] = self.target_assigner.analyzer.match[ batch_sampled_mask] # update rois_batch prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view( rois_batch.shape[0], -1, 5) if not self.training: # used for track proposals_order = prediction_dict['proposals_order'] prediction_dict['proposals_order'] = proposals_order[ batch_sampled_mask] # def umap_reg_targets(): # """ # expand rcnn_reg_targets(shape (N, 4) to shape(N, 4 * num_classes)) # """ # pass def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets): """ squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4) Args: rcnn_bbox_preds: shape(N, num_classes, 4) rcnn_cls_targets: shape(N, 1) """ rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, 4) batch_size = rcnn_bbox_preds.shape[0] offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1) rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets) rcnn_bbox_preds = rcnn_bbox_preds.view(-1, 4)[rcnn_cls_targets] return rcnn_bbox_preds def loss(self, prediction_dict, feed_dict): """ assign proposals label and subsample from them Then calculate loss """ loss_dict = {} # submodule loss loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict)) # targets and weights rcnn_cls_weights = prediction_dict['rcnn_cls_weights'] rcnn_reg_weights = prediction_dict['rcnn_reg_weights'] rcnn_cls_targets = prediction_dict['rcnn_cls_targets'] rcnn_reg_targets = prediction_dict['rcnn_reg_targets'] # classification loss rcnn_cls_scores = prediction_dict['rcnn_cls_scores'] rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets) rcnn_cls_loss *= rcnn_cls_weights rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) # bounding box regression L1 loss rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds'] if not self.class_agnostic: rcnn_bbox_preds = self.squeeze_bbox_preds(rcnn_bbox_preds, rcnn_cls_targets) rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds, rcnn_reg_targets).sum(dim=-1) rcnn_bbox_loss *= rcnn_reg_weights # rcnn_bbox_loss *= rcnn_reg_weights rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) # loss weights has no gradients loss_dict['rcnn_cls_loss'] = rcnn_cls_loss loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss # add rcnn_cls_targets to get the statics of rpn # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets # analysis ap rcnn_cls_probs = prediction_dict['rcnn_cls_probs'] num_gt = feed_dict['gt_labels'].numel() fake_match = prediction_dict['fake_match'] self.target_assigner.analyzer.analyze_ap(fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.1) return loss_dict