class LossFasterRCNN(Model):
    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        # semantic map
        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2)
        # shape(N,C)
        rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1)
        rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        # if self.use_score:
        # pooled_feat =

        rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = LossRPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
            # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # cluster loss for bbox and cls(feat)
        self.cluster_loss = ClusterLoss()

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

    def get_cluster_loss(self, num_gt, bbox_feat, cls_feat):
        """
        Args:
            bbox_feat:(N,M,)
        """
        cluster_loss = 0
        # shape(N,M)
        match = self.target_assigner.matcher.match
        assert match.shape[0] == 1, 'only one num of batch is supported now'
        match = match[0]
        for i in range(num_gt):
            cluster_loss += self.cluster_loss()

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]
        # mask assignments like as before

        match = self.target_assigner.matcher.assignments
        prediction_dict['match'] = match[batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        #  rcnn_cls_feat_single = rcnn_cls_scores
        rcnn_bbox_pred_single = rcnn_bbox_preds
        #  cluster_cls_loss = 0
        cluster_bbox_loss = 0
        num_gt = feed_dict['gt_boxes'].shape[1]
        match = prediction_dict['match']
        # import ipdb
        # ipdb.set_trace()
        for i in range(num_gt):
            #  cls_feat = rcnn_cls_feat_single[match == i]
            #  cluster_cls_loss += self.cluster_loss(cls_feat)
            bbox_feat = rcnn_bbox_pred_single[match == i]
            cluster_bbox_loss += self.cluster_loss(bbox_feat)

        # loss weights has no gradients
        loss_dict['rcnn/cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn/bbox_loss'] = rcnn_bbox_loss
        loss_dict['rpn/cluster_bbox_loss'] = cluster_bbox_loss
        #  loss_dict['rpn/cluster_cls_loss'] = cluster_cls_loss

        return loss_dict
class DetachDoubleIOUFasterRCNN(Model):
    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # although it must be true
        #  if self.enable_reg:
        # shape(N,C,1,1)
        pooled_feat_reg = self.feature_extractor.second_stage_feature(
            pooled_feat)

        pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)
        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds

        if self.enable_cls:
            pooled_feat_cls = self.feature_extractor.third_stage_feature(
                pooled_feat.detach())

            # shape(N,C)
            pooled_feat_cls = pooled_feat_cls.mean(3).mean(2)
            rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
            prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]
        prediction_dict['second_rpn_cls_probs'] = prediction_dict[
            'rpn_cls_probs'][0][proposals_order]

        if not self.training and self.enable_iou:
            # calculate fake iou as final score,of course use scores to filter bg
            pred_boxes = self.bbox_coder.decode_batch(
                rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5])
            iou_matrix = box_ops.iou(pred_boxes, rois_batch[:, :, 1:5])[0]
            iou_matrix[rcnn_cls_probs[:, 1] < 0.5] = 0
            rcnn_cls_probs[:, 1] = iou_matrix
            prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs

        if not self.training and self.enable_track_rois:
            self.target_assigner.assign(rois_batch[:, :,
                                                   1:], feed_dict['gt_boxes'],
                                        feed_dict['gt_labels'])

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2, alpha=0.25)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox_coder
        self.bbox_coder = self.target_assigner.bbox_coder

        # similarity
        self.similarity_calc = self.target_assigner.similarity_calc

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        # optimize cls
        self.enable_cls = False

        # optimize reg
        self.enable_reg = True

        # cal iou
        self.enable_iou = False

        # track good rois
        self.enable_track_rois = False

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None

        if self.enable_reg:
            # used for reg training
            pos_indicator = rcnn_reg_weights > 0
            indicator = None
        elif self.enable_cls:
            # used for cls training
            pos_indicator = rcnn_cls_targets > 0
            indicator = rcnn_cls_weights > 0
        else:
            raise ValueError("please check enable reg and enable cls again")

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)

        if self.enable_cls:
            rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
            num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
            assert num_cls_coeff, 'bug happens'
            prediction_dict[
                'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()

        # used for retriving statistic
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]

        # used for fg/bg
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        assert num_reg_coeff, 'bug happens'
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()

        if self.enable_reg:
            prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
                batch_sampled_mask]

        prediction_dict['fake_match'] = self.target_assigner.analyzer.match[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # add rcnn_cls_targets to get the statics of rpn
        #  loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        if self.enable_cls:
            # targets and weights
            rcnn_cls_weights = prediction_dict['rcnn_cls_weights']

            rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
            # classification loss
            rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
            rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                               rcnn_cls_targets)
            rcnn_cls_loss *= rcnn_cls_weights
            rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

            loss_dict['rcnn_cls_loss'] = rcnn_cls_loss

        if self.enable_reg:
            rcnn_reg_weights = prediction_dict['rcnn_reg_weights']
            rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

            # bounding box regression L1 loss
            rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
            rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                                 rcnn_reg_targets).sum(dim=-1)
            rcnn_bbox_loss *= rcnn_reg_weights
            rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

            # loss weights has no gradients
            loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # analysis ap
        # when enable cls,otherwise it is no sense
        if self.enable_cls:
            rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
            num_gt = feed_dict['gt_labels'].numel()
            fake_match = prediction_dict['fake_match']
            self.target_assigner.analyzer.analyze_ap(fake_match,
                                                     rcnn_cls_probs[:, 1],
                                                     num_gt,
                                                     thresh=0.5)

        return loss_dict
class PostCLSFasterRCNN(Model):
    def forward(self, feed_dict):
        # some pre forward hook
        self.clean_stats()

        prediction_dict = {}

        ################################
        # first stage
        ################################
        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        #####################################
        # second stage(bbox regression)
        #####################################
        # pre subsample for reduce consume of memory
        if self.training and self.enable_reg:
            # append gt
            if self.use_gt:
                prediction_dict['rois_batch'] = self.append_gt(
                    prediction_dict['rois_batch'], feed_dict['gt_boxes'])
            stats = self.pre_subsample(prediction_dict, feed_dict)
            # rois stats
            self.stats.update(stats)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # although it must be true
        #  if self.enable_reg:
        # shape(N,C,1,1)
        pooled_feat_reg = self.feature_extractor.second_stage_feature(
            pooled_feat)

        pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)
        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds

        # used for tracking
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]
        prediction_dict['second_rpn_cls_probs'] = prediction_dict[
            'rpn_cls_probs'][0][proposals_order]

        ###########################################
        # third stage(predict scores of final bbox)
        ###########################################

        # decode rcnn bbox, generate rcnn rois batch
        pred_boxes = self.bbox_coder.decode_batch(
            rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5])
        rcnn_rois_batch = torch.zeros_like(rois_batch)
        rcnn_rois_batch[:, :, 1:5] = pred_boxes.detach()
        prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch

        if self.training and self.use_gt:
            # append gt
            rcnn_rois_batch = self.append_gt(rcnn_rois_batch,
                                             feed_dict['gt_boxes'])
            prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch

        if self.enable_cls:
            if self.training:
                rcnn_stats = self.pre_subsample(prediction_dict,
                                                feed_dict,
                                                stage='rcnn')
                # rcnn stats
                self.rcnn_stats.update(rcnn_stats)

            # rois after subsample
            pred_rois = prediction_dict['rcnn_rois_batch']
            pooled_feat_cls = self.rcnn_pooling(base_feat,
                                                pred_rois.view(-1, 5))
            pooled_feat_cls = self.feature_extractor.third_stage_feature(
                pooled_feat_cls.detach())

            # shape(N,C)
            pooled_feat_cls = pooled_feat_cls.mean(3).mean(2)
            rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
            rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

            prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
            prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        ###################################
        # stats
        ###################################
        # import ipdb
        # ipdb.set_trace()
        if not self.training or (self.enable_track_rois
                                 and not self.enable_reg):
            # when enable reg, skip it,
            stats = self.target_assigner.assign(rois_batch[:, :, 1:],
                                                feed_dict['gt_boxes'],
                                                feed_dict['gt_labels'])[-1]
            self.stats.update(stats)

        if not self.training or (self.enable_track_rcnn_rois
                                 and not self.enable_cls):
            # when enable cls, skip it
            stats = self.target_assigner.assign(rcnn_rois_batch[:, :, 1:],
                                                feed_dict['gt_boxes'],
                                                feed_dict['gt_labels'])[-1]
            self.rcnn_stats.update(stats)

        # analysis ap
        # when enable cls, otherwise it is no sense
        if self.training and self.enable_cls:
            rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
            num_gt = feed_dict['gt_labels'].numel()
            fake_match = self.rcnn_stats['match']
            stats = self.target_assigner.analyzer.analyze_ap(fake_match,
                                                             rcnn_cls_probs[:,
                                                                            1],
                                                             num_gt,
                                                             thresh=0.5)
            # collect stats
            self.rcnn_stats.update(stats)

        return prediction_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2, alpha=0.25, gamma=2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox_coder
        self.bbox_coder = self.target_assigner.bbox_coder

        # similarity
        self.similarity_calc = self.target_assigner.similarity_calc

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        # optimize cls
        self.enable_cls = True

        # optimize reg
        self.enable_reg = False

        # cal iou
        self.enable_iou = False

        # track good rois
        self.enable_track_rois = True
        self.enable_track_rcnn_rois = True

        # eval the final bbox
        self.enable_eval_final_bbox = True

        # use gt
        self.use_gt = False

        # if self.enable_eval_final_bbox:
        self.subsample = False

    def clean_stats(self):
        # rois bbox
        self.stats = {
            'num_det': 1,
            'num_tp': 0,
            'matched_thresh': 0,
            'recall_thresh': 0,
            'match': None
        }

        # rcnn bbox(final bbox)
        self.rcnn_stats = {
            'num_det': 1,
            'num_tp': 0,
            'matched_thresh': 0,
            'recall_thresh': 0,
            'match': None
        }

    def pre_subsample(self, prediction_dict, feed_dict, stage='rpn'):
        if stage == 'rpn':
            rois_name = 'rois_batch'
        else:
            rois_name = 'rcnn_rois_batch'

        rois_batch = prediction_dict[rois_name]

        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        # append gt
        # rois_batch = self.append_gt(rois_batch, gt_boxes)

        ##########################
        # assigner
        ##########################
        # import ipdb
        # ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights, stats = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        if self.subsample:
            cls_criterion = None

            if self.enable_reg:
                # used for reg training
                pos_indicator = rcnn_reg_weights > 0
                indicator = None
            elif self.enable_cls:
                # used for cls training
                pos_indicator = rcnn_cls_targets > 0
                indicator = rcnn_cls_weights > 0
            else:
                raise ValueError(
                    "please check enable reg and enable cls again")

            # subsample from all
            # shape (N,M)
            batch_sampled_mask = self.sampler.subsample_batch(
                self.rcnn_batch_size,
                pos_indicator,
                indicator=indicator,
                criterion=cls_criterion)
        else:
            batch_sampled_mask = torch.ones_like(rcnn_cls_weights > 0)

        if self.enable_cls:
            rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
            num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
            assert num_cls_coeff, 'bug happens'
            prediction_dict[
                'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()

        # used for retriving statistic
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]

        # used for fg/bg
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        num_reg_coeff = torch.max(num_reg_coeff,
                                  torch.ones_like(num_reg_coeff))
        # import ipdb
        # ipdb.set_trace()
        # assert num_reg_coeff, 'bug happens'
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()

        if self.enable_reg:
            prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
                batch_sampled_mask]

        # here use rcnn_target_assigner for final bbox pred
        stats['match'] = stats['match'][batch_sampled_mask]

        # update rois_batch
        prediction_dict[rois_name] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)
        return stats

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss

        # add rcnn_cls_targets to get the statics of rpn
        #  loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        if self.enable_cls:
            # targets and weights
            rcnn_cls_weights = prediction_dict['rcnn_cls_weights']

            rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
            # classification loss
            rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
            rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                               rcnn_cls_targets)
            rcnn_cls_loss *= rcnn_cls_weights
            rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

            loss_dict['rcnn_cls_loss'] = rcnn_cls_loss

        if self.enable_reg:
            loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

            rcnn_reg_weights = prediction_dict['rcnn_reg_weights']
            rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

            # bounding box regression L1 loss
            rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
            rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                                 rcnn_reg_targets).sum(dim=-1)
            rcnn_bbox_loss *= rcnn_reg_weights
            rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

            # loss weights has no gradients
            loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        return loss_dict
Beispiel #4
0
class Mono3DFinalFasterRCNN(Model):
    def forward(self, feed_dict):
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        pooled_feat = pooled_feat * saliency_map[:, 1:, :, :]

        reduced_pooled_feat = pooled_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_3d = self.rcnn_3d_pred(reduced_pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        ###################################
        # 3d training
        ###################################

        # if self.training and self.train_3d:
        # prediction_dict['rois_batch'] = final_rois_batch
        # self.pre_subsample(prediction_dict, feed_dict)
        # final_rois_batch = prediction_dict['rois_batch']

        # shape(M,C,7,7)
        # mono_3d_pooled_feat = self.rcnn_pooling(base_feat,
        # final_rois_batch.view(-1, 5))

        # mono_3d_pooled_feat = self.feature_extractor.third_stage_feature(
        # mono_3d_pooled_feat)
        # mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2)

        prediction_dict['rcnn_3d'] = rcnn_3d

        if not self.training:
            rcnn_bbox_preds = rcnn_bbox_preds.detach()
            final_bbox = self.target_assigner.bbox_coder.decode_batch(
                rcnn_bbox_preds.unsqueeze(0), rois_batch[:, :, 1:])
            final_rois_inds = torch.zeros_like(final_bbox[:, :, -1:])
            final_rois_batch = torch.cat([final_rois_inds, final_bbox], dim=-1)

            rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox(
                rcnn_3d, final_rois_batch)

            prediction_dict['rcnn_3d'] = rcnn_3d

        return prediction_dict

    def pre_forward(self):
        pass
        # params
        # if self.train_3d and self.training and not self.train_2d:

    # self.freeze_modules()
    # for parameter in self.feature_extractor.third_stage_feature.parameters(
    # ):
    # parameter.requires_grad = True
    # for param in self.rcnn_3d_pred.parameters():
    # param.requires_grad = True
    # self.freeze_bn(self)
    # self.unfreeze_bn(self.feature_extractor.third_stage_feature)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1)
        self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 * 2)

        self.rcnn_3d_loss = OrientationLoss(split_loss=True)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 4

        # more accurate bbox for 3d prediction
        # if self.train_3d:
        # fg_thresh = 0.6
        # else:
        # fg_thresh = 0.5
        # model_config['target_assigner_config']['fg_thresh'] = fg_thresh

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        self.profiler = Profiler()

        self.h_cat = False

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        # shape(N,7)
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        # orient
        cls_orient = torch.unsqueeze(feed_dict['cls_orient'], dim=-1).float()
        reg_orient = feed_dict['reg_orient']
        orient = torch.cat([cls_orient, reg_orient], dim=-1)

        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], orient], dim=-1)

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        # assert num_reg_coeff, 'bug happens'
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones_like(num_reg_coeff)

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict[
            'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float(
            )
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        #  import ipdb
        #  ipdb.set_trace()

        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))
        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']

        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        ######################################
        # 3d loss
        ######################################

        rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d']
        rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d']
        rcnn_3d = prediction_dict['rcnn_3d']

        # dims
        rcnn_3d_loss_dims = self.rcnn_bbox_loss(
            rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1)

        # angles
        res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:])
        for res_loss_key in res:
            tmp = res[res_loss_key] * rcnn_reg_weights_3d
            res[res_loss_key] = tmp.sum(dim=-1)
        loss_dict.update(res)

        rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d
        rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1)

        loss_dict['rcnn_3d_loss'] = rcnn_3d_loss

        # stats of orients
        cls_orient_preds = rcnn_3d[:, 3:5]
        cls_orient = rcnn_reg_targets_3d[:, 3]
        _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1)
        orient_tp_mask = cls_orient.type_as(
            cls_orient_preds_argmax) == cls_orient_preds_argmax
        mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1)
        orient_tp_mask = orient_tp_mask[mask]
        orient_tp_num = orient_tp_mask.int().sum().item()
        orient_all_num = orient_tp_mask.numel()

        # store all stats in target assigner
        self.target_assigner.stat.update({
            # 'angle_num_tp': torch.tensor(0),
            # 'angle_num_all': 1,

            # stats of orient
            'orient_tp_num': orient_tp_num,
            # 'orient_tp_num2': orient_tp_num2,
            # 'orient_tp_num3': orient_4s_tp_num,
            # 'orient_all_num3': orient_all_num3,
            # 'orient_pr': orient_pr,
            'orient_all_num': orient_all_num,
            # 'orient_tp_num4': orient_tp_num4,
            # 'orient_all_num4': orient_all_num4,
            #  'cls_orient_2s_all_num': depth_ind_all_num,
            #  'cls_orient_2s_tp_num': depth_ind_tp_num
        })

        return loss_dict
class DoubleIoUSecondStageFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        # self.visualizer.visualize(
        # feed_dict['img'],
        # nn.Sequential(self.feature_extractor.first_stage_feature,
        # self.feature_extractor.first_stage_cls_feature))

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))
        pooled_feat = F.relu(self.rcnn_conv(pooled_feat), inplace=True)

        pooled_feat_cls = self.rcnn_pooled_feat_cls(pooled_feat.detach())
        pooled_feat_bbox = self.rcnn_pooled_feat_bbox(pooled_feat)

        #  classification
        pooled_feat_cls = self.feature_extractor.third_stage_feature(
            pooled_feat_cls)
        pooled_feat_cls = pooled_feat_cls.mean(3).mean(2)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        # regression
        pooled_feat_reg = self.feature_extractor.second_stage_feature(
            pooled_feat_bbox)
        pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        return prediction_dict

    def unfreeze_part_modules(self, model):
        #  model = self.feature_extractor.third_stage_feature
        for param in model.parameters():
            param.requires_grad = True

        #  model = self.feature_extractor.first_stage_feature

        # def freeze_part_modules(self):
        # pass

        # def rcnn_cls_pred(pooled_feat)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)
        # if self.training_stage == 'cls':
        # self.freeze_modules()
        # unfreeze part
        # models = [

    # #  self.feature_extractor.first_stage_feature,
    # #  self.feature_extractor.second_stage_feature,
    # self.feature_extractor.third_stage_feature
    # ]
    # for model in models:
    # self.unfreeze_part_modules(model)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # decouple cls and bbox
        self.rcnn_conv = nn.Conv2d(1024, 512, 3, 1, 1, bias=True)
        self.rcnn_pooled_feat_cls = nn.Conv2d(512, 1024, 1, 1, 0)
        self.rcnn_pooled_feat_bbox = nn.Conv2d(512, 1024, 1, 1, 0)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])
        #  self.sampler = DetectionSampler({'fg_fraction': 1})

        # self.reduce = model_config.get('reduce')
        self.reduce = True
        self.visualizer = FeatVisualizer()

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0
        #  indicator = None

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['fake_match'] = self.target_assigner.analyzer.match[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        #  fake_match = self.target_assigner.analyzer.match
        fake_match = prediction_dict['fake_match']
        num_gt = feed_dict['gt_labels'].numel()
        self.target_assigner.analyzer.analyze_ap(fake_match,
                                                 rcnn_cls_probs[:, 1],
                                                 num_gt,
                                                 thresh=0.5)
        #  prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights

        return loss_dict

    def loss_new(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        # import ipdb
        # ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        # cls_criterion = None
        # pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0
        pos_indicator = indicator
        # indicator = None

        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                           rcnn_cls_targets[0])

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets[0]).sum(dim=-1)

        cls_criterion = rcnn_cls_loss * rcnn_cls_weights + rcnn_bbox_loss * rcnn_reg_weights
        # subsample from all
        # shape (N,M)
        # import ipdb
        # ipdb.set_trace()
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights * batch_sampled_mask.type_as(
            rcnn_cls_weights)
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'

        rcnn_cls_weights = rcnn_cls_weights / num_cls_coeff.float()

        # import ipdb
        # ipdb.set_trace()
        # rcnn_cls_targets *= batch_sampled_mask.type_as(rcnn_cls_targets)
        # rcnn_reg_targets *= batch_sampled_mask.type_as(rcnn_reg_targets)

        # targets and weights
        # rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        # rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        # rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        # rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss

        # import ipdb
        # ipdb.set_trace()
        rcnn_cls_loss *= rcnn_cls_weights[0]
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bbox reg
        rcnn_reg_weights *= batch_sampled_mask.type_as(rcnn_reg_weights)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        assert num_reg_coeff, 'bug happens'
        rcnn_reg_weights = rcnn_reg_weights / num_reg_coeff.float()

        rcnn_bbox_loss *= rcnn_reg_weights[0]
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        # loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        # analysis precision
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        fake_match = self.target_assigner.analyzer.match
        num_gt = feed_dict['gt_labels'].numel()
        self.target_assigner.analyzer.analyze_ap(fake_match,
                                                 rcnn_cls_probs[:, 1],
                                                 num_gt,
                                                 thresh=0.5)
        prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights
        return loss_dict
Beispiel #6
0
class Mono3DSimplerFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))
        mask_pooled_feat = self.mask_rcnn_pooling(base_feat,
                                                  rois_batch.view(-1, 5))

        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        #  common_pooled_feat = pooled_feat

        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        pooled_feat = pooled_feat * saliency_map[:, 1:, :, :]

        reduced_pooled_feat = pooled_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        ###################################
        # 3d training
        ###################################
        keypoint_heatmap = self.keypoint_predictor(mask_pooled_feat)
        keypoint_scores = keypoint_heatmap.view(-1, 56 * 56)
        keypoint_probs = F.softmax(keypoint_scores, dim=-1)

        prediction_dict['keypoint_probs'] = keypoint_probs
        prediction_dict['keypoint_scores'] = keypoint_scores

        # import ipdb
        # ipdb.set_trace()
        rcnn_3d = self.rcnn_3d_pred(reduced_pooled_feat)
        prediction_dict['rcnn_3d'] = rcnn_3d
        if not self.training:
            #  import ipdb
            #  ipdb.set_trace()
            #  _, keypoint_peak_pos = keypoint_probs.max(dim=-1)
            keypoints = self.keypoint_coder.decode_keypoint_heatmap(
                rois_batch[0, :, 1:], keypoint_probs.view(-1, 4, 56 * 56))
            prediction_dict['keypoints'] = keypoints

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def modify_feature_extractor(self):
        from torchvision.models.resnet import Bottleneck
        layer4 = self._make_layer(Bottleneck, 512, 3, stride=1)
        self.feature_extractor.second_stage_feature = layer4

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)

        self.modify_feature_extractor()
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.mask_rcnn_pooling = RoIAlignAvg(14, 14, 1.0 / 16.0)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)
        self.rcnn_kp_loss = functools.partial(F.cross_entropy,
                                              reduce=False,
                                              ignore_index=-1)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d
        self.rcnn_3d_pred = nn.Linear(in_channels, 3)

        # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)
        # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins)
        self.rcnn_3d_loss = OrientationLoss(split_loss=True)

        self.keypoint_predictor = KeyPointPredictor2(1024)

    def _make_layer(self, block, planes, blocks, stride=1):
        inplanes = 1024
        downsample = None
        if stride != 1 or inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(inplanes,
                          planes * block.expansion,
                          kernel_size=1,
                          stride=stride,
                          bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(inplanes, planes, stride, downsample))
        inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(inplanes, planes))

        return nn.Sequential(*layers)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 4

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])
        self.keypoint_coder = self.target_assigner.keypoint_coder

        self.profiler = Profiler()

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        # shape(N,7)
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        keypoint_gt = feed_dict['keypoint_gt']

        # import ipdb
        # ipdb.set_trace()
        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], keypoint_gt], dim=-1)

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict[
            'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float(
            )
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # keypoint heatmap loss
        # keypoint_gt = feed_dict['keypoint_gt']
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d']
        rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d']
        keypoint_scores = prediction_dict['keypoint_scores']
        keypoint_gt = rcnn_reg_targets_3d[:, 3:].contiguous().view(-1, 2)
        keypoint_weights = keypoint_gt[:, 1]
        keypoint_pos = keypoint_gt[:, 0]
        keypoint_pos[keypoint_weights == 0] = -1
        keypoint_loss = self.rcnn_kp_loss(keypoint_scores, keypoint_pos.long())
        keypoint_loss = keypoint_loss.view(
            -1, 4) * rcnn_reg_weights_3d.unsqueeze(-1)
        #  keypoint_loss = keypoint_loss * keypoint_weights
        loss_dict['keypoint_loss'] = keypoint_loss.sum(dim=-1).sum(dim=-1)

        # dims loss
        rcnn_3d = prediction_dict['rcnn_3d']
        rcnn_3d_loss = self.rcnn_bbox_loss(rcnn_3d, rcnn_reg_targets_3d[:, :3])
        rcnn_3d_loss = rcnn_3d_loss * rcnn_reg_weights_3d.sum(dim=-1)
        loss_dict['rcnn_3d_loss'] = rcnn_3d_loss.sum(dim=-1).sum(dim=-1)

        return loss_dict
Beispiel #7
0
class LEDFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        # feed_dict['input_size'] = torch.stack(img_shapes, dim=0)
        feed_dict['input_size'] = feed_dict['im_info']

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        # shape(N,C)
        if self.reduce:
            pooled_feat = pooled_feat.mean(3).mean(2)
        else:
            pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # import ipdb
        # ipdb.set_trace()
        iou, iou_scores, iou_reg = self.iou_pred(pooled_feat)
        iog, iog_scores, iog_reg = self.iog_pred(pooled_feat)
        iod, iod_scores, iod_reg = self.iod_pred(pooled_feat)

        iou = self.iox_clip(iou)
        iog = self.iox_clip(iog)
        iod = self.iox_clip(iod)

        # import ipdb
        # ipdb.set_trace()
        iou_indirect = self.calculate_iou(iog, iod)
        iou_final = (1 - self.alpha) * iou_indirect + self.alpha * iou
        if self.use_cls_pred:

            rcnn_fg_probs_final = rcnn_cls_probs[:, 1] * torch.exp(-torch.pow(
                (1 - iou_final), 2) / self.theta)
        else:
            rcnn_fg_probs_final = iou_final

        prediction_dict['rcnn_cls_probs'] = torch.stack(
            [rcnn_fg_probs_final, rcnn_fg_probs_final], dim=-1)
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores
        # prediction_dict['rcnn_iou_final'] = iou_final

        prediction_dict['rcnn_iou_reg'] = iou_reg
        prediction_dict['rcnn_iou_scores'] = iou_scores
        prediction_dict['rcnn_iod_reg'] = iod_reg
        prediction_dict['rcnn_iod_scores'] = iod_scores
        prediction_dict['rcnn_iog_reg'] = iog_reg
        prediction_dict['rcnn_iog_scores'] = iog_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def iox_clip(self, iox):
        iox = iox.clone()
        iox[iox < 0] = 0
        iox[iox > 1] = 1
        return iox

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

        Filler.normal_init(self.rcnn_coarse_map_conv_iod, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rcnn_fine_map_conv_iod, 0, 0.001,
                           self.truncated)

        Filler.normal_init(self.rcnn_coarse_map_conv_iou, 0, 0.001,
                           self.truncated)

        Filler.normal_init(self.rcnn_fine_map_conv_iou, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rcnn_fine_map_conv_iog, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rcnn_coarse_map_conv_iog, 0, 0.001,
                           self.truncated)

        # freeze all first
        self.freeze_modules()

        # unfreeze some modules
        self.rpn_model.unfreeze_modules()
        self.unfreeze_modules()

    def unfreeze_modules(self):
        unfreeze_modules = [
            self.rcnn_coarse_map_conv_iod.bias,
            self.rcnn_fine_map_conv_iod.bias,
            self.rcnn_coarse_map_conv_iog.bias,
            self.rcnn_fine_map_conv_iog.bias,
            self.rcnn_coarse_map_conv_iou.bias,
            self.rcnn_fine_map_conv_iou.bias,
            self.rcnn_coarse_map_conv_iod.weight,
            self.rcnn_fine_map_conv_iod.weight,
            self.rcnn_coarse_map_conv_iog.weight,
            self.rcnn_fine_map_conv_iog.weight,
            self.rcnn_coarse_map_conv_iou.weight,
            self.rcnn_fine_map_conv_iou.weight
        ]
        for module in unfreeze_modules:
            module.requires_grad = True

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = LEDRPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # pred for iox
        self.rcnn_coarse_map_conv_iou = nn.Linear(2048, 4)
        self.rcnn_fine_map_conv_iou = nn.Linear(2048, 4)

        self.rcnn_coarse_map_conv_iog = nn.Linear(2048, 4)
        self.rcnn_fine_map_conv_iog = nn.Linear(2048, 4)

        self.rcnn_coarse_map_conv_iod = nn.Linear(2048, 4)
        self.rcnn_fine_map_conv_iod = nn.Linear(2048, 4)

        # loss for iox
        if self.use_sharpL2:
            self.reg_loss = SharpL2Loss()
        else:
            self.reg_loss = nn.MSELoss(reduce=False)
        self.cls_loss = nn.CrossEntropyLoss(reduce=False)

        # cls loss
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']
        self.rpn_config = model_config['rpn_config']
        self.theta = 1.0

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']
        self.use_sigmoid = model_config.get('use_sigmoid')
        self.use_sharpL2 = model_config['use_sharpL2']
        self.use_cls_pred = model_config['use_cls_pred']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']

        # assigner
        self.target_assigner = LEDTargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        self.reduce = True

        self.alpha = 0.6
        # self.iou_anchors = [0.05, 0.25, 0.55, 0.85]
        # self.iou_lengths = [0.05, 0.15, 0.15, 0.15]
        # self.iou_intervals = [[0, 0.1], [0.1, 0.4], [0.4, 0.7], [0.7, 1.0]]
        self.iox_bbox_coder = DiscreteBBoxCoder(
            model_config['iox_coder_config'])

    def iou_pred(self, rcnn_conv):
        return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iou,
                             self.rcnn_fine_map_conv_iou)

    def iog_pred(self, rcnn_conv):
        return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iog,
                             self.rcnn_fine_map_conv_iog)

    def iod_pred(self, rcnn_conv):
        return self.iox_pred(rcnn_conv, self.rcnn_coarse_map_conv_iod,
                             self.rcnn_fine_map_conv_iod)

    def iox_pred(self, rcnn_conv, rcnn_coarse_map_conv, rcnn_fine_map_conv):
        batch_size = rcnn_conv.shape[0]
        coarse_map = rcnn_coarse_map_conv(rcnn_conv)
        fine_map = rcnn_fine_map_conv(rcnn_conv)

        coarse_map_reshape = coarse_map.view(batch_size, 4)
        iou_level_probs = F.softmax(coarse_map_reshape, dim=1)
        iou_level_probs = iou_level_probs.view_as(coarse_map)
        if self.use_sigmoid:
            # normalize it
            iou_reg = 2 * F.sigmoid(fine_map) - 1
        else:
            iou_reg = fine_map
        iou_cls = iou_level_probs
        decoded_iou = self.iox_bbox_coder.decode_batch(iou_cls, iou_reg)

        # used for cls and reg loss
        iou_cls_scores = coarse_map
        return decoded_iou, iou_cls_scores, iou_reg

    def calculate_iou(self, iog, iod):
        mask = ~(iod == 0)
        iou_indirect = torch.zeros_like(iog)
        iod = iod[mask]
        iog = iog[mask]
        iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog)
        return iou_indirect

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

        # iou targets
        iou_targets = self.target_assigner.matcher.assigned_overlaps_batch
        iou_cls_targets = self.iox_bbox_coder.encode_cls(iou_targets)
        iou_reg_targets = self.iox_bbox_coder.encode_reg(iou_targets)

        prediction_dict['rcnn_iou_cls_targets'] = iou_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_iou_reg_targets'] = iou_reg_targets[
            batch_sampled_mask]

        # iod targets
        iod_targets = self.target_assigner.matcher.assigned_iod_batch
        iod_cls_targets = self.iox_bbox_coder.encode_cls(iod_targets)
        iod_reg_targets = self.iox_bbox_coder.encode_reg(iod_targets)

        prediction_dict['rcnn_iod_cls_targets'] = iod_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_iod_reg_targets'] = iod_reg_targets[
            batch_sampled_mask]

        # iog targets
        iog_targets = self.target_assigner.matcher.assigned_iog_batch
        iog_cls_targets = self.iox_bbox_coder.encode_cls(iog_targets)
        iog_reg_targets = self.iox_bbox_coder.encode_reg(iog_targets)

        prediction_dict['rcnn_iog_cls_targets'] = iog_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_iog_reg_targets'] = iog_reg_targets[
            batch_sampled_mask]

    def iox_loss(self, iou_scores, iou_cls_targets, iou_reg, iou_reg_targets):
        iou_cls_loss = self.cls_loss(iou_scores, iou_cls_targets)
        iou_reg_loss = self.reg_loss(iou_reg, iou_reg_targets).sum(dim=-1)
        return iou_cls_loss.mean(), iou_reg_loss.mean()

    def iou_loss(self, prediction_dict):
        return self.iox_loss(prediction_dict['rcnn_iou_scores'],
                             prediction_dict['rcnn_iou_cls_targets'],
                             prediction_dict['rcnn_iou_reg'],
                             prediction_dict['rcnn_iou_reg_targets'])

    def iog_loss(self, prediction_dict):
        return self.iox_loss(prediction_dict['rcnn_iog_scores'],
                             prediction_dict['rcnn_iog_cls_targets'],
                             prediction_dict['rcnn_iog_reg'],
                             prediction_dict['rcnn_iog_reg_targets'])

    def iod_loss(self, prediction_dict):
        return self.iox_loss(prediction_dict['rcnn_iod_scores'],
                             prediction_dict['rcnn_iod_cls_targets'],
                             prediction_dict['rcnn_iod_reg'],
                             prediction_dict['rcnn_iod_reg_targets'])

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # iou loss
        iou_cls_loss, iou_reg_loss = self.iou_loss(prediction_dict)

        # iog loss
        iog_cls_loss, iog_reg_loss = self.iog_loss(prediction_dict)

        # iod loss
        iod_cls_loss, iod_reg_loss = self.iod_loss(prediction_dict)

        # total_loss = [
        # iou_cls_loss, iou_reg_loss, iog_cls_loss, iog_reg_loss,
        # iod_reg_loss, iod_cls_loss
        # ]

        # classification loss
        if self.use_cls_pred:
            rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
            rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
            rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
            rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                               rcnn_cls_targets)
            rcnn_cls_loss *= rcnn_cls_weights
            rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)
            loss_dict['rcnn/cls_loss'] = rcnn_cls_loss

        loss_dict['rcnn/iou_cls_loss'] = iou_cls_loss
        loss_dict['rcnn/iou_reg_loss'] = iou_reg_loss
        loss_dict['rcnn/iog_cls_loss'] = iog_cls_loss
        loss_dict['rcnn/iog_reg_loss'] = iog_reg_loss
        loss_dict['rcnn/iod_reg_loss'] = iod_reg_loss
        loss_dict['rcnn/iod_cls_loss'] = iod_cls_loss
        # iox_loss = 0
        # for loss in total_loss:
        # if torch.isnan(loss).byte().any():
        # import ipdb
        # ipdb.set_trace()
        # iox_loss += loss

        # bbox regression loss
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        # loss_dict['rcnn_cls_loss'] = iox_loss
        loss_dict['rcnn/bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_reg_targets'] = rcnn_reg_weights

        return loss_dict
Beispiel #8
0
class Mono3DAngleNewFasterRCNN(Model):
    def forward(self, feed_dict):
        #  import ipdb
        #  ipdb.set_trace()
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        ###################################
        # 3d training
        ###################################
        mono_3d_pooled_feat = self.feature_extractor.third_stage_feature(
            pooled_feat.detach())
        mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2)
        # rcnn_3d = self.rcnn_3d_preds_new(mono_3d_pooled_feat)

        # prediction_dict['rcnn_3d'] = rcnn_3d

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        pooled_feat = pooled_feat * saliency_map[:, 1:, :, :]
        # shape(N,C)
        if self.reduce:
            pooled_feat = pooled_feat.mean(3).mean(2)
        else:
            pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        #  rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        #  rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        rcnn_3d_dims = self.rcnn_dims_pred(mono_3d_pooled_feat)
        rcnn_3d_angles = self.rcnn_angle_pred(mono_3d_pooled_feat).view(
            -1, self.num_bins, 2)
        rcnn_3d_angles_cls = self.rcnn_angle_conf_pred(
            mono_3d_pooled_feat).view(-1, self.num_bins, 2)
        rcnn_3d_angles_cls_reg = torch.cat(
            [rcnn_3d_angles_cls, rcnn_3d_angles],
            dim=-1).view(-1, self.num_bins * 4)

        rcnn_3d = torch.cat([rcnn_3d_dims, rcnn_3d_angles_cls_reg], dim=-1)
        prediction_dict['rcnn_3d'] = rcnn_3d

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        if not self.training:
            # import ipdb
            # ipdb.set_trace()
            dims = rcnn_3d[:, :3]
            angles = rcnn_3d[:, 3:].view(-1, self.num_bins, 4)
            angles_cls = F.softmax(angles[:, :, :2], dim=-1)
            _, angles_cls_argmax = torch.max(angles_cls[:, :, 1], dim=-1)
            row = torch.arange(
                0, angles_cls_argmax.shape[0]).type_as(angles_cls_argmax)
            angles_oritations = angles[:, :, 2:][row, angles_cls_argmax]
            rcnn_3d = torch.cat([dims, angles_oritations], dim=-1)
            #  import ipdb
            #  ipdb.set_trace()
            rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_angle(
                rcnn_3d, self.rcnn_3d_loss.bin_centers[angles_cls_argmax])
            prediction_dict['rcnn_3d'] = rcnn_3d

        return prediction_dict

    def pre_forward(self):
        # params
        if self.train_3d and self.training and not self.train_2d:
            self.freeze_modules()
            for parameter in self.feature_extractor.third_stage_feature.parameters(
            ):
                parameter.requires_grad = True
            # for param in self.rcnn_3d_preds_new.parameters():
            # param.requires_grad = True

            for param in self.rcnn_angle_conf_pred.parameters():
                param.requires_grad = True
            for param in self.rcnn_angle_pred.parameters():
                param.requires_grad = True
            for param in self.rcnn_dims_pred.parameters():
                param.requires_grad = True
        self.freeze_bn(self)
        self.unfreeze_bn(self.feature_extractor.third_stage_feature)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        #  self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d
        # self.rcnn_3d_preds_new = nn.Linear(in_channels, 3 + 4 * self.num_bins)

        self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)

        # dims
        self.rcnn_dims_pred = nn.Sequential(
            *[nn.Linear(in_channels, 256),
              nn.ReLU(),
              nn.Linear(256, 3)])

        # angle
        self.rcnn_angle_pred = nn.Sequential(*[
            nn.Linear(in_channels, 256),
            nn.ReLU(),
            nn.Linear(256, self.num_bins * 2)
        ])

        # angle conf
        self.rcnn_angle_conf_pred = nn.Sequential(*[
            nn.Linear(in_channels, 256),
            nn.ReLU(),
            nn.Linear(256, self.num_bins * 2)
        ])

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 2

        self.train_3d = True

        # self.train_2d = not self.train_3d
        self.train_2d = True

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']
        #  gt_boxes_3d = feed_dict['coords']
        #  dims_2d = feed_dict['dims_2d']
        # use local angle
        #  oritations = feed_dict['local_angle_oritation']
        local_angle = feed_dict['local_angle']

        # shape(N,7)
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        # orient

        # here just concat them
        # dims and their projection

        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1)

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels )

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict[
            'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float(
            )
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        if self.train_2d:
            # submodule loss
            loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))
            # targets and weights
            rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
            rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

            rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
            rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

            # classification loss
            rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
            rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                               rcnn_cls_targets)
            rcnn_cls_loss *= rcnn_cls_weights
            rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

            # bounding box regression L1 loss
            rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
            rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                                 rcnn_reg_targets).sum(dim=-1)
            rcnn_bbox_loss *= rcnn_reg_weights
            rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

            loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
            loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        ######################################
        # 3d loss
        ######################################

        rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d']
        rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d']
        rcnn_3d = prediction_dict['rcnn_3d']
        if self.train_3d:

            # dims
            rcnn_3d_loss_dims = self.rcnn_bbox_loss(
                rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1)

            # angles
            rcnn_angle_loss, angle_tp_mask = self.rcnn_3d_loss(
                rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:])

            rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d
            rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1)

            rcnn_angle_loss = rcnn_angle_loss * rcnn_reg_weights_3d
            rcnn_angle_loss = rcnn_angle_loss.sum(dim=-1)

            loss_dict['rcnn_3d_loss'] = rcnn_3d_loss
            loss_dict['rcnn_angle_loss'] = rcnn_angle_loss

            # angles stats
            angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0]
            angles_tp_num = angle_tp_mask.int().sum().item()
            angles_all_num = angle_tp_mask.numel()
        else:
            angles_all_num = 0
            angles_tp_num = 0

        # store all stats in target assigner
        self.target_assigner.stat.update({
            'angle_num_tp': torch.tensor(0),
            'angle_num_all': 1,

            # stats of orient
            'orient_tp_num': 0,
            'orient_tp_num2': 0,
            'orient_tp_num3': 0,
            'orient_all_num3': 0,
            # 'orient_pr': orient_pr,
            'orient_all_num': 0,
            'orient_tp_num4': 0,
            'orient_all_num4': 0,
            'cls_orient_2s_all_num': angles_all_num,
            'cls_orient_2s_tp_num': angles_tp_num

            #  'angles_tp_num': angles_tp_num,
            #  'angles_all_num': angles_all_num
        })
        # import ipdb
        # ipdb.set_trace()

        return loss_dict
Beispiel #9
0
class FPNFasterRCNN(Model):
    def calculate_roi_level(self, rois_batch):
        h = rois_batch[:, 4] - rois_batch[:, 2] + 1
        w = rois_batch[:, 3] - rois_batch[:, 1] + 1
        roi_level = torch.log(torch.sqrt(w * h) / 224.0)
        roi_level = torch.round(roi_level + 4)
        roi_level[roi_level < 2] = 2
        roi_level[roi_level > 5] = 5
        roi_level[...] = 4
        return roi_level

    def pyramid_rcnn_pooling(self, rcnn_feat_maps, rois_batch):
        pooled_feats = []
        # determine which layer to get feat
        roi_level = self.calculate_roi_level(rois_batch)
        for idx, rcnn_feat_map in enumerate(rcnn_feat_maps):
            idx += 2
            mask = roi_level == idx
            rois_batch_per_stage = rois_batch[mask]
            if rois_batch_per_stage.shape[0] == 0:
                continue
            pooled_feats.append(
                self.rcnn_pooling(rcnn_feat_map, rois_batch_per_stage))
        return torch.cat(pooled_feats, dim=0)

    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        rpn_feat_maps, rcnn_feat_maps, = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'rpn_feat_maps': rpn_feat_maps})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        # pooled_feat = self.rcnn_pooling(rcnn_feat_maps, rois_batch.view(-1, 5))
        pooled_feat = self.pyramid_rcnn_pooling(rcnn_feat_maps,
                                                rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        # shape(N,C)
        if self.reduce:
            pooled_feat = pooled_feat.mean(3).mean(2)
        else:
            pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = FPNFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(1024, self.n_classes)
        if self.reduce:
            in_channels = 1024
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        return loss_dict
Beispiel #10
0
class SINetModel(Model):
    def collect_intermedia_layers(self, img):
        feat2 = self.feature_extractor.first_stage_feature[:-1](img)
        feat3 = self.feature_extractor.first_stage_feature[-1](feat2)

        end_points = {'feat2': feat2, 'feat3': feat3}
        return feat3, end_points

    def caroi_pooling(self, all_feats, rois_batch, out_channels):
        pooled_feats = []
        for feat in all_feats:
            pooled_feats.append(self.rcnn_pooling(feat, rois_batch))
        pooled_feats = torch.cat(pooled_feats, dim=1)
        if pooled_feats.shape[1] != out_channels:
            # add 1x1 conv
            pooled_feats = self.reduce_pooling(pooled_feats)
        return pooled_feats

    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        # base_feat = self.feature_extractor.first_stage_feature(
        # feed_dict['img'])
        base_feat, all_feats = self.collect_intermedia_layers(feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        # pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))
        # import ipdb
        # ipdb.set_trace()
        pooled_feat = self.caroi_pooling(
            all_feats, rois_batch.view(-1, 5), out_channels=1024)

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        # shape(N,C)
        if self.reduce:
            pooled_feat = pooled_feat.mean(3).mean(2)
        else:
            pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                         1.0 / 8.0)
        self.reduce_pooling = nn.Sequential(
            nn.Conv2d(1024 + 512, 1024, 1, 1, 0), nn.ReLU())

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config['feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        #  self.visualizer = FeatVisualizer()

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['fake_match'] = self.target_assigner.analyzer.match[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        # analysis ap
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        num_gt = feed_dict['gt_labels'].numel()
        fake_match = prediction_dict['fake_match']
        self.target_assigner.analyzer.analyze_ap(
            fake_match, rcnn_cls_probs[:, 1], num_gt, thresh=0.5)

        return loss_dict
Beispiel #11
0
class SemanticFasterRCNN(Model):
    def forward(self, feed_dict):
        self.clean_stats()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            stats = self.pre_subsample(prediction_dict, feed_dict)
            self.stats.update(stats)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        # semantic map
        # if self.use_self_attention:
        # pooled_feat_cls = pooled_feat.mean(3).mean(2)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        # # self-attention
        # channel_attention = self.generate_channel_attention(pooled_feat)
        # spatial_attention = self.generate_spatial_attention(pooled_feat)
        # pooled_feat_reg = pooled_feat * channel_attention
        # pooled_feat_reg = pooled_feat * spatial_attention
        # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)

        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        # else:
        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2)
        # shape(N,C)
        rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1)
        rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        # if self.use_score:
        # pooled_feat =

        rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        pred_boxes = self.bbox_coder.decode_batch(
            rcnn_bbox_preds.view(1, -1, 4), rois_batch[:, :, 1:5])
        rcnn_rois_batch = torch.zeros_like(rois_batch)
        rcnn_rois_batch[:, :, 1:5] = pred_boxes.detach()
        prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch

        # if self.training:
        # # append gt
        # rcnn_rois_batch = self.append_gt(rcnn_rois_batch,
        # feed_dict['gt_boxes'])
        # prediction_dict['rcnn_rois_batch'] = rcnn_rois_batch

        ###################################
        # stats
        ###################################

        # when enable cls, skip it
        stats = self.target_assigner.assign(rcnn_rois_batch[:, :, 1:],
                                            feed_dict['gt_boxes'],
                                            feed_dict['gt_labels'])[-1]
        self.rcnn_stats.update(stats)

        # analysis ap
        # when enable cls, otherwise it is no sense
        if self.training:
            rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
            num_gt = feed_dict['gt_labels'].numel()
            fake_match = self.rcnn_stats['match']
            stats = self.target_assigner.analyzer.analyze_ap(fake_match,
                                                             rcnn_cls_probs[:,
                                                                            1],
                                                             num_gt,
                                                             thresh=0.5)
            # collect stats
            self.rcnn_stats.update(stats)

        return prediction_dict

    def clean_stats(self):
        # rois bbox
        self.stats = {
            'num_det': 1,
            'num_tp': 0,
            'matched_thresh': 0,
            'recall_thresh': 0,
            'match': None,
            # 'matched': 0,
            # 'num_gt': 1,
        }

        # rcnn bbox(final bbox)
        self.rcnn_stats = {
            'num_det': 1,
            'num_tp': 0,
            'matched_thresh': 0,
            'recall_thresh': 0,
            'match': None,
            # 'matched': 0,
        }

    def generate_channel_attention(self, feat):
        return feat.mean(3, keepdim=True).mean(2, keepdim=True)

    def generate_spatial_attention(self, feat):
        return self.spatial_attention(feat)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = feature_extractors_builder.build(
            self.feature_extractor_config)
        # self.feature_extractor = ResNetFeatureExtractor(
        # self.feature_extractor_config)
        # self.feature_extractor = MobileNetFeatureExtractor(
        # self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        if self.use_self_attention:
            self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes)
        else:
            self.rcnn_cls_pred = nn.Conv2d(self.ndin, self.n_classes, 3, 1, 1)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4)
            # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1)
        else:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        if self.use_self_attention:
            self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1)

    def init_param(self, model_config):
        if model_config.get('din'):
            self.ndin = model_config['din']
        else:
            self.ndin = 512
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']
        self.use_self_attention = model_config.get('use_self_attention')

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox_coder
        self.bbox_coder = self.target_assigner.bbox_coder

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights, stats = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        num_reg_coeff = torch.max(num_reg_coeff,
                                  torch.ones_like(num_reg_coeff))

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['fake_match'] = self.target_assigner.analyzer.match[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        stats['match'] = stats['match'][batch_sampled_mask]

        return stats

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        # analysis ap
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        num_gt = feed_dict['gt_labels'].numel()
        fake_match = prediction_dict['fake_match']
        self.target_assigner.analyzer.analyze_ap(fake_match,
                                                 rcnn_cls_probs[:, 1],
                                                 num_gt,
                                                 thresh=0.5)

        return loss_dict
Beispiel #12
0
class PRModel(Model):
    def init_weights(self):
        print("loading pre-trained weight")
        weight = torch.load(self.model_path,
                            map_location=lambda storage, loc: storage)

        from collections import OrderedDict
        new_state_dict = OrderedDict()
        module_dict = self.det_model.state_dict()
        for k, v in weight.items():
            if k not in module_dict:
                continue
            name = k[7:]  # remove `module.`
            new_state_dict[name] = v

        module_dict.update(new_state_dict)
        self.det_model.load_state_dict(module_dict)
        # else:

    # new_state_dict = OrderedDict()
    # for k, v in weight.items():
    # name = k[7:]      # remove `module.`
    # new_state_dict[name] = v
    # self.det_model.load_state_dict(new_state_dict)

    def pre_forward(self):
        self.freeze_modules()
        for param in self.det_model.multibox.box_3d_feature.parameters():
            param.requires_grad = True

        for param in self.det_model.multibox.orients_out.parameters():
            param.requires_grad = True

        for param in self.det_model.multibox.dims_3d_out.parameters():
            param.requires_grad = True

        self.freeze_bn(self)
        self.unfreeze_bn(self.det_model.multibox.box_3d_feature)
        self.unfreeze_bn(self.det_model.multibox.orients_out)
        self.unfreeze_bn(self.det_model.multibox.dims_3d_out)

    def init_param(self, model_config):
        self.n_classes = len(model_config['classes']) + 1
        self.rcnn_batch_size = model_config['rcnn_batch_size']
        self.profiler = Profiler()
        self.encoder = DataEncoder(ModelCFG,
                                   anchor_type=ModelCFG['anchor_type'],
                                   infer_mode=True)
        self.num_bins = 2

        self.model_path = model_config['model_path']

        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        self.sampler = BalancedSampler(model_config['sampler_config'])

    def init_modules(self):
        self.det_model = PRNet(ModelCFG)

        # dims loss
        self.dims_loss = nn.SmoothL1Loss(reduce=False)

        # multibin loss
        self.multibin_loss = MultiBinLoss(self.num_bins)

    def forward(self, feed_dict):
        self.target_assigner.bbox_coder_3d.mean_dims = feed_dict['mean_dims']
        image = feed_dict['img']
        loc1_preds, loc2_preds, os_preds, cls_preds,\
            dims_3d_out, orients_out = self.det_model.forward(
            image)

        # if not self.training:
        # boxes, lbls, scores, has_obj = self.encoder.decode(
        # loc2_preds.data.squeeze(0), F.softmax(cls_preds.squeeze(0), dim=1).data, os_preds.squeeze(0), Nt=0.5)

        prediction_dict = {}
        prediction_dict['dims_3d_out'] = dims_3d_out
        prediction_dict['orients_out'] = orients_out

        # prediction_dict['rcnn_cls_probs'] = scores
        # prediction_dict['rcnn_bbox_pred'] =

        return prediction_dict

    def generate_anchors(self, im_shape):
        default_boxes = self.encoder.default_boxes
        xymin = default_boxes[:, :2] - 0.5 * default_boxes[:, 2:]
        xymax = default_boxes[:, :2] + 0.5 * default_boxes[:, 2:]

        xymin = xymin
        xymax = xymax

        normalized_anchors = torch.cat([xymin, xymax], dim=-1)
        anchors = torch.zeros_like(normalized_anchors)
        anchors[:, ::2] = normalized_anchors[:, ::2] * im_shape[1]
        anchors[:, 1::2] = normalized_anchors[:, 1::2] * im_shape[0]

        return anchors

    def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets, out_c=4):
        """
        squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4)
        Args:
            rcnn_bbox_preds: shape(N, num_classes, 4)
            rcnn_cls_targets: shape(N, 1)
        """
        rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, out_c)
        batch_size = rcnn_bbox_preds.shape[0]
        offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1)
        rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets)
        rcnn_bbox_preds = rcnn_bbox_preds.contiguous().view(
            -1, out_c)[rcnn_cls_targets[0]].unsqueeze(0)
        return rcnn_bbox_preds

    def loss(self, prediction_dict, feed_dict):
        #  import ipdb
        #  ipdb.set_trace()
        loss_dict = {}

        anchors = self.generate_anchors(feed_dict['im_info'][0][:2])

        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']
        local_angle = feed_dict['local_angle']
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1)

        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            anchors.unsqueeze(0), gt_boxes, gt_boxes_3d, gt_labels)

        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1]
        cls_criterion = None

        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        batch_sampled_mask = batch_sampled_mask.type_as(rcnn_cls_weights)
        rcnn_reg_weights_3d = rcnn_reg_weights_3d * batch_sampled_mask
        num_reg_coeff = (rcnn_reg_weights_3d > 0).sum(dim=1)

        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        rcnn_reg_weights_3d = rcnn_reg_weights_3d / num_reg_coeff.float()

        # dims loss
        dims_pred = self.squeeze_bbox_preds(prediction_dict['dims_3d_out'],
                                            rcnn_cls_targets, 3)
        dims_loss = self.dims_loss(dims_pred, rcnn_reg_targets_3d[:, :, :3])
        dims_loss = dims_loss * rcnn_reg_weights_3d.unsqueeze(-1)
        dims_loss = dims_loss.sum(dim=-1).sum(dim=-1)

        # multibin loss
        orient_loss, angle_tp_mask = self.multibin_loss(
            prediction_dict['orients_out'], rcnn_reg_targets_3d[:, :, -1:])

        orient_loss = orient_loss * rcnn_reg_weights_3d
        orient_loss = orient_loss.sum(dim=-1)

        loss_dict['dims_loss'] = dims_loss
        loss_dict['orient_loss'] = orient_loss
        prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights_3d[
            batch_sampled_mask > 0]

        # angles stats
        angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0]
        angles_tp_num = angle_tp_mask.int().sum().item()
        angles_all_num = angle_tp_mask.numel()

        self.target_assigner.stat.update({
            'cls_orient_2s_all_num': angles_all_num,
            'cls_orient_2s_tp_num': angles_tp_num
        })

        return loss_dict
class IoUFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        ########################################
        # semantic map
        ########################################
        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # shape(N,C)
        rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        # iou
        rcnn_iou = self.rcnn_iou(rcnn_bbox_feat)
        rcnn_iou = F.sigmoid(rcnn_iou)

        if self.use_iox:
            # iog
            rcnn_iog = self.rcnn_iog(rcnn_bbox_feat)
            rcnn_iog = F.sigmoid(rcnn_iog)

            # iod
            rcnn_iod = self.rcnn_iog(rcnn_bbox_feat)
            rcnn_iod = F.sigmoid(rcnn_iod)

            rcnn_iou_indirect = self.calculate_iou(rcnn_iog, rcnn_iod)
            rcnn_iou_final = (
                1 - self.alpha) * rcnn_iou_indirect + self.alpha * rcnn_iou
            prediction_dict['rcnn_iog'] = rcnn_iog
            prediction_dict['rcnn_iod'] = rcnn_iod
        else:
            # use iou directly
            rcnn_iou_final = rcnn_iou

        rcnn_fg_probs_final = rcnn_cls_probs[:, 1:] * torch.exp(-torch.pow(
            (1 - rcnn_iou_final[:, 1:]), 2) / self.theta)

        prediction_dict['rcnn_cls_probs'] = torch.cat(
            [rcnn_fg_probs_final, rcnn_fg_probs_final], dim=-1)
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores
        prediction_dict['rcnn_iou'] = rcnn_iou

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def calculate_iou(self, iog, iod):
        mask = ~(iod == 0)
        iou_indirect = torch.zeros_like(iog)
        iod = iod[mask]
        iog = iog[mask]
        iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog)
        return iou_indirect

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

        # freeze module
        # self.freeze_modules()
        # # unfreeze some layers
        # unfreeze_params = [

    # self.rpn_model.rpn_iou.bias, self.rpn_model.rpn_iou.weight,
    # self.rcnn_iou.bias, self.rcnn_iou.weight
    # ]
    # for param in unfreeze_params:
    # param.requires_grad = True

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = IoURPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        in_channels = 2048
        self.rcnn_iou = nn.Linear(in_channels, self.n_classes)
        self.rcnn_iog = nn.Linear(in_channels, self.n_classes)
        self.rcnn_iod = nn.Linear(in_channels, self.n_classes)

        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)
        self.rcnn_iou_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']
        self.theta = 1.0
        self.alpha = 0.6

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']
        self.iou_criterion = model_config['iou_criterion']
        self.use_iox = model_config['use_iox']
        # self.use_cls_pred = model_config['use_cls_pred']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = LEDTargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.sampler = HardNegativeSampler(model_config['sampler_config'])
        if self.iou_criterion:
            self.sampler = DetectionSampler(model_config['sampler_config'])
        else:
            self.sampler = BalancedSampler(model_config['sampler_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        if self.iou_criterion:
            cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch
        else:
            cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        # import ipdb
        # ipdb.set_trace()
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

        # iou targets
        rcnn_iou_targets = self.target_assigner.matcher.assigned_overlaps_batch
        prediction_dict['rcnn_iou_targets'] = rcnn_iou_targets[
            batch_sampled_mask]

        # iog targets
        rcnn_iog_targets = self.target_assigner.matcher.assigned_iog_batch
        prediction_dict['rcnn_iog_targets'] = rcnn_iog_targets[
            batch_sampled_mask]

        # iod targets
        rcnn_iod_targets = self.target_assigner.matcher.assigned_iod_batch
        prediction_dict['rcnn_iod_targets'] = rcnn_iod_targets[
            batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # iou loss
        rcnn_iou = prediction_dict['rcnn_iou'][:, 1]
        rcnn_iou_targets = prediction_dict['rcnn_iou_targets']
        rcnn_iou = torch.exp(rcnn_iou)
        rcnn_iou_targets = torch.exp(rcnn_iou_targets)
        rcnn_iou_loss = self.rcnn_iou_loss(rcnn_iou, rcnn_iou_targets)
        rcnn_iou_loss *= rcnn_cls_weights
        rcnn_iou_loss = rcnn_iou_loss.sum(dim=-1)

        if self.use_iox:
            # iog loss
            rcnn_iog = prediction_dict['rcnn_iog'][:, 1]
            rcnn_iog_targets = prediction_dict['rcnn_iog_targets']
            rcnn_iog = torch.exp(rcnn_iog)
            rcnn_iog_targets = torch.exp(rcnn_iog_targets)
            rcnn_iog_loss = self.rcnn_iou_loss(rcnn_iog, rcnn_iog_targets)
            rcnn_iog_loss *= rcnn_cls_weights
            rcnn_iog_loss = rcnn_iog_loss.sum(dim=-1)

            # iod loss
            rcnn_iod = prediction_dict['rcnn_iod'][:, 1]
            rcnn_iod_targets = prediction_dict['rcnn_iod_targets']
            rcnn_iod = torch.exp(rcnn_iod)
            rcnn_iod_targets = torch.exp(rcnn_iod_targets)
            rcnn_iod_loss = self.rcnn_iou_loss(rcnn_iod, rcnn_iod_targets)
            rcnn_iod_loss *= rcnn_cls_weights
            rcnn_iod_loss = rcnn_iod_loss.sum(dim=-1)

            loss_dict['rcnn_iod_loss'] = rcnn_iod_loss
            loss_dict['rcnn_iog_loss'] = rcnn_iog_loss

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        # exp
        # rcnn_cls_scores = torch.exp(rcnn_cls_scores)
        # rcnn_cls_targets = torch.exp(rcnn_cls_targets)

        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss
        loss_dict['rcnn_iou_loss'] = rcnn_iou_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        return loss_dict
Beispiel #14
0
class Mono3DFasterRCNN(Model):
    def forward(self, feed_dict):
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        if self.training and self.train_2d:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)

        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        pooled_feat = pooled_feat * saliency_map[:, 1:, :, :]

        reduced_pooled_feat = pooled_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(reduced_pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        ###################################
        # 3d training
        ###################################
        rcnn_bbox_preds = rcnn_bbox_preds.detach()
        final_bbox = self.target_assigner.bbox_coder.decode_batch(
            rcnn_bbox_preds.unsqueeze(0), rois_batch[:, :, 1:])
        final_rois_inds = torch.zeros_like(final_bbox[:, :, -1:])
        final_rois_batch = torch.cat([final_rois_inds, final_bbox], dim=-1)

        if self.training and self.train_3d:
            prediction_dict['rois_batch'] = final_rois_batch
            self.pre_subsample(prediction_dict, feed_dict)
            final_rois_batch = prediction_dict['rois_batch']

        # shape(M,C,7,7)
        mono_3d_pooled_feat = self.rcnn_pooling(base_feat,
                                                final_rois_batch.view(-1, 5))

        # H-concat to abbrevate the perspective transform
        # shape(N,M,9)
        # import ipdb
        # ipdb.set_trace()

        # concat with pooled feat
        # mono_3d_pooled_feat = torch.cat([mono_3d_pooled_feat, H_inv], dim=1)
        # mono_3d_pooled_feat = self.reduced_layer(mono_3d_pooled_feat)

        mono_3d_pooled_feat = self.feature_extractor.third_stage_feature(
            mono_3d_pooled_feat)
        mono_3d_pooled_feat = mono_3d_pooled_feat.mean(3).mean(2)

        if self.h_cat:
            H_inv = self.calc_Hinv(final_rois_batch, feed_dict['p2'],
                                   feed_dict['im_info'],
                                   base_feat.shape[-2:])[0].view(-1, 9)
            mono_3d_pooled_feat = torch.cat([mono_3d_pooled_feat, H_inv],
                                            dim=-1)
        rcnn_3d = self.rcnn_3d_pred(mono_3d_pooled_feat)

        # normalize to [0,1]
        # rcnn_3d[:, 5:11] = F.sigmoid(rcnn_3d[:, 5:11])

        prediction_dict['rcnn_3d'] = rcnn_3d

        if not self.training:
            # rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_bbox(
            # rcnn_3d, rois_batch)
            rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_dims(
                rcnn_3d, final_rois_batch)

            prediction_dict['rcnn_3d'] = rcnn_3d

        return prediction_dict

    def calc_Hinv(self, final_rois_batch, p2, img_size, feat_size):
        p2 = p2[0]
        K_c = p2[:, :3]
        fx = K_c[0, 0]
        fy = K_c[1, 1]
        px = K_c[0, 2]
        py = K_c[1, 2]
        fw = self.pooling_size
        fh = self.pooling_size

        proposals = final_rois_batch[:, :, 1:]
        rw = (proposals[:, :, 2] - proposals[:, :, 0] +
              1) / img_size[:, 1] * feat_size[1]
        rh = (proposals[:, :, 3] - proposals[:, :, 1] +
              1) / img_size[:, 0] * feat_size[0]
        # rx = (proposals[:, :, 0] + proposals[:, :, 2]) / 2
        # ry = (proposals[:, :, 1] + proposals[:, :, 3]) / 2

        # roi camera intrinsic parameters
        sw = fw / rw
        sh = fh / rh
        fx_roi = fx * sw
        fy_roi = fy * sh
        zeros = torch.zeros_like(fx_roi)
        ones = torch.ones_like(fx_roi)

        px_roi = (px - proposals[:, :, 0]) * sw
        py_roi = (py - proposals[:, :, 1]) * sh

        K_roi = torch.stack(
            [fx_roi, zeros, px_roi, zeros, fy_roi, py_roi, zeros, zeros, ones],
            dim=-1).view(-1, 3, 3)

        H = K_roi.matmul(torch.inverse(K_c))
        # import ipdb
        # ipdb.set_trace()
        # Too slow
        # H_inv = []
        # for i in range(H.shape[0]):
        # H_inv.append(torch.inverse(H[i]))
        # H_inv = torch.stack(H_inv, dim=0)
        # import ipdb
        # ipdb.set_trace()
        H_np = H.cpu().numpy()
        H_inv_np = np.linalg.inv(H_np)
        H_inv = torch.from_numpy(H_inv_np).cuda().float()

        return H_inv.view(1, -1, 9)

    def pre_forward(self):
        # params
        if self.train_3d and self.training and not self.train_2d:
            self.freeze_modules()
            for parameter in self.feature_extractor.third_stage_feature.parameters(
            ):
                parameter.requires_grad = True
            for param in self.rcnn_3d_pred.parameters():
                param.requires_grad = True
            self.freeze_bn(self)
            self.unfreeze_bn(self.feature_extractor.third_stage_feature)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

        # if self.train_3d and self.training:

    # self.freeze_modules()
    # for parameter in self.feature_extractor.third_stage_feature.parameters(
    # ):
    # parameter.requires_grad = True
    # for param in self.rcnn_3d_preds_new.parameters():
    # param.requires_grad = True

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d
        # self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 + 3 + 1 + 4 + 2)
        if self.h_cat:
            c = in_channels + 9
        else:
            c = in_channels
        # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1)
        self.rcnn_3d_pred = nn.Linear(c, 3 + 4 * 2)

        # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)
        # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins)
        self.rcnn_3d_loss = OrientationLoss(split_loss=True)

        # reduce for concat with the following layers
        # self.reduced_layer = nn.Sequential(

    # * [nn.Conv2d(1024 + 9, 1024, 1, 1, 0), nn.BatchNorm2d(1024)])

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 4

        self.train_3d = False

        self.train_2d = not self.train_3d

        # more accurate bbox for 3d prediction
        if self.train_3d:
            fg_thresh = 0.6
        else:
            fg_thresh = 0.5
        model_config['target_assigner_config']['fg_thresh'] = fg_thresh

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        self.profiler = Profiler()

        self.h_cat = False

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']
        #  gt_boxes_3d = feed_dict['coords']
        #  dims_2d = feed_dict['dims_2d']
        # use local angle
        #  oritations = feed_dict['local_angle_oritation']
        # local_angle = feed_dict['local_angle']

        # shape(N,7)
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        # orient
        # cls_orient = torch.unsqueeze(feed_dict['cls_orient'], dim=-1).float()
        # reg_orient = feed_dict['reg_orient']
        # orient = torch.cat([cls_orient, reg_orient], dim=-1)

        # h_2ds = feed_dict['h_2d']
        # c_2ds = feed_dict['c_2d']
        # r_2ds = feed_dict['r_2d']
        # cls_orient_4s = feed_dict['cls_orient_4']
        # center_orients = feed_dict['center_orient']
        # distances = feed_dict['distance']
        # d_ys = feed_dict['d_y']
        # angles_camera = feed_dict['angles_camera']

        # here just concat them
        # dims and their projection

        # gt_boxes_3d = torch.cat(
        # [gt_boxes_3d[:, :, :3], orient, distances, d_ys], dim=-1)
        encoded_side_points = feed_dict['encoded_side_points']
        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], encoded_side_points],
                                dim=-1)

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        # assert num_reg_coeff, 'bug happens'
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones_like(num_reg_coeff)

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict[
            'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float(
            )
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        if self.train_2d:
            # submodule loss
            loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))
            # targets and weights
            rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
            rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

            rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
            rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

            # classification loss
            rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
            rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                               rcnn_cls_targets)
            rcnn_cls_loss *= rcnn_cls_weights
            rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

            # bounding box regression L1 loss
            rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
            rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                                 rcnn_reg_targets).sum(dim=-1)
            rcnn_bbox_loss *= rcnn_reg_weights
            rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

            loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
            loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        ######################################
        # 3d loss
        ######################################

        rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d']
        rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d']
        rcnn_3d = prediction_dict['rcnn_3d']
        if self.train_3d:
            rcnn_3d_loss = self.rcnn_bbox_loss(rcnn_3d,
                                               rcnn_reg_targets_3d).sum(dim=-1)
            rcnn_3d_loss = rcnn_3d_loss * rcnn_reg_weights_3d

            # dims
            # rcnn_3d_loss_dims = self.rcnn_bbox_loss(
            # rcnn_3d[:, :3], rcnn_reg_targets_3d[:, :3]).sum(dim=-1)

            # # angles
            # res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:])
            # for res_loss_key in res:
            # tmp = res[res_loss_key] * rcnn_reg_weights_3d
            # res[res_loss_key] = tmp.sum(dim=-1)
            # loss_dict.update(res)

            # rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d
            # rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1)

            loss_dict['rcnn_3d_loss'] = rcnn_3d_loss

        # stats of orients
        # cls_orient_preds = rcnn_3d[:, 3:5]
        # cls_orient = rcnn_reg_targets_3d[:, 3]
        # _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1)
        # orient_tp_mask = cls_orient.type_as(
        # cls_orient_preds_argmax) == cls_orient_preds_argmax
        # mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1)
        # orient_tp_mask = orient_tp_mask[mask]
        # orient_tp_num = orient_tp_mask.int().sum().item()
        # orient_all_num = orient_tp_mask.numel()

        # # depth ind ap
        # depth_ind_preds = rcnn_3d[:, 7:7 + 11]
        # depth_ind_targets = rcnn_reg_targets_3d[:, 6]
        # _, depth_ind_preds_argmax = torch.max(depth_ind_preds, dim=-1)
        # depth_ind_mask = depth_ind_targets.type_as(
        # depth_ind_preds_argmax) == depth_ind_preds_argmax
        # depth_ind_mask = depth_ind_mask[rcnn_reg_weights_3d > 0]
        # depth_ind_tp_num = depth_ind_mask.int().sum().item()
        # depth_ind_all_num = depth_ind_mask.numel()

        # # this mask is converted from reg methods
        # r_2ds_dis = torch.zeros_like(cls_orient)
        # r_2ds = rcnn_3d[:, 10]
        # r_2ds_dis[r_2ds < 0.5] = 0
        # r_2ds_dis[r_2ds > 0.5] = 1
        # orient_tp_mask2 = (r_2ds_dis == cls_orient)

        # orient_tp_mask2 = orient_tp_mask2[mask]
        # orient_tp_num2 = orient_tp_mask2.int().sum().item()

        # # cls_orient_4s
        # cls_orient_4s_pred = rcnn_3d[:, 11:15]
        # _, cls_orient_4s_inds = torch.max(cls_orient_4s_pred, dim=-1)
        # cls_orient_4s = rcnn_reg_targets_3d[:, 10]

        # # cls_orient_4s_inds[(cls_orient_4s_inds == 0) | (cls_orient_4s_inds == 2
        # # )] = 1
        # # cls_orient_4s_inds[(cls_orient_4s_inds == 1) | (cls_orient_4s_inds == 3
        # # )] = 0
        # orient_tp_mask3 = cls_orient_4s_inds.type_as(
        # cls_orient_4s) == cls_orient_4s
        # mask3 = (rcnn_reg_weights_3d > 0)
        # orient_tp_mask3 = orient_tp_mask3[mask3]
        # orient_4s_tp_num = orient_tp_mask3.int().sum().item()
        # orient_all_num3 = orient_tp_mask3.numel()

        # # test cls_orient_4s(check label)
        # cls_orient_2s_inds = torch.zeros_like(cls_orient)
        # cls_orient_2s_inds[(cls_orient_4s == 0) | (cls_orient_4s == 2)] = 1
        # cls_orient_2s_inds[(cls_orient_4s == 1) | (cls_orient_4s == 3)] = 0
        # cls_orient_2s_mask = (cls_orient_2s_inds == cls_orient)
        # cls_orient_2s_mask = cls_orient_2s_mask[mask]
        # cls_orient_2s_tp_num = cls_orient_2s_mask.int().sum().item()
        # cls_orient_2s_all_num = cls_orient_2s_mask.numel()

        # # center_orient
        # center_orients_preds = rcnn_3d[:, 15:17]
        # _, center_orients_inds = torch.max(center_orients_preds, dim=-1)
        # center_orients = rcnn_reg_targets_3d[:, 11]
        # orient_tp_mask4 = center_orients.type_as(
        # center_orients_inds) == center_orients_inds
        # mask4 = (rcnn_reg_weights_3d > 0) & (center_orients > -1)
        # orient_tp_mask4 = orient_tp_mask4[mask4]
        # orient_tp_num4 = orient_tp_mask4.int().sum().item()
        # orient_all_num4 = orient_tp_mask4.numel()

        # store all stats in target assigner
        # self.target_assigner.stat.update({
        # # 'angle_num_tp': torch.tensor(0),
        # # 'angle_num_all': 1,

        # # stats of orient
        # 'orient_tp_num': orient_tp_num,
        # # 'orient_tp_num2': orient_tp_num2,
        # # 'orient_tp_num3': orient_4s_tp_num,
        # # 'orient_all_num3': orient_all_num3,
        # # 'orient_pr': orient_pr,
        # 'orient_all_num': orient_all_num,
        # # 'orient_tp_num4': orient_tp_num4,
        # # 'orient_all_num4': orient_all_num4,
        # 'cls_orient_2s_all_num': depth_ind_all_num,
        # 'cls_orient_2s_tp_num': depth_ind_tp_num
        # })

        return loss_dict
Beispiel #15
0
class Mono3DFinalAngleFasterRCNN(Model):
    def forward(self, feed_dict):
        self.target_assigner.bbox_coder_3d.mean_dims = feed_dict['mean_dims']
        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        second_pooled_feat = self.feature_extractor.second_stage_feature(
            pooled_feat)

        second_pooled_feat = second_pooled_feat.mean(3).mean(2)

        rcnn_cls_scores = self.rcnn_cls_preds(second_pooled_feat)
        rcnn_bbox_preds = self.rcnn_bbox_preds(second_pooled_feat)
        rcnn_3d = self.rcnn_3d_pred(second_pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        ###################################
        # 3d training
        ###################################

        prediction_dict['rcnn_3d'] = rcnn_3d

        if not self.training:
            if self.class_agnostic_3d:
                orient = rcnn_3d[:, 3:]
                dims = rcnn_3d[:, :3]
            else:
                orient = rcnn_3d[:, 3 * self.n_classes:]
                dims = rcnn_3d[:, :3 * self.n_classes]
            angles = orient.view(-1, self.num_bins, 4)
            angles_cls = F.softmax(angles[:, :, :2], dim=-1)
            _, angles_cls_argmax = torch.max(angles_cls[:, :, 1], dim=-1)
            row = torch.arange(
                0, angles_cls_argmax.shape[0]).type_as(angles_cls_argmax)
            angles_oritations = angles[:, :, 2:][row, angles_cls_argmax]
            rcnn_3d = torch.cat([dims, angles_oritations], dim=-1)
            #  import ipdb
            #  ipdb.set_trace()
            rcnn_3d = self.target_assigner.bbox_coder_3d.decode_batch_angle(
                rcnn_3d, self.rcnn_3d_loss.bin_centers[angles_cls_argmax])
            prediction_dict['rcnn_3d'] = rcnn_3d

        return prediction_dict

    def pre_forward(self):
        pass

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_preds, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_preds, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = ROIAlign((self.pooling_size,
                                          self.pooling_size), 1.0 / 16.0, 2)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        self.rcnn_cls_preds = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_preds = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_preds = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(self.n_classes)
        else:
            self.rcnn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1)
        if self.class_agnostic_3d:
            self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 * self.num_bins)
        else:
            self.rcnn_3d_pred = nn.Linear(
                in_channels, 3 * self.n_classes + 4 * self.num_bins)

        #  self.rcnn_3d_loss = OrientationLoss(split_loss=True)
        self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes) + 1
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.class_agnostic_3d = model_config['class_agnostic_3d']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config['feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

        self.visualizer = FeatVisualizer()

        self.num_bins = 4

        # more accurate bbox for 3d prediction
        # if self.train_3d:
        # fg_thresh = 0.6
        # else:
        # fg_thresh = 0.5
        # model_config['target_assigner_config']['fg_thresh'] = fg_thresh

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        self.profiler = Profiler()

        self.h_cat = False

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']
        #  gt_boxes_3d = feed_dict['coords']
        #  dims_2d = feed_dict['dims_2d']
        # use local angle
        #  oritations = feed_dict['local_angle_oritation']
        local_angle = feed_dict['local_angle']

        # shape(N,7)
        gt_boxes_3d = feed_dict['gt_boxes_3d']

        # orient

        # here just concat them
        # dims and their projection

        gt_boxes_3d = torch.cat([gt_boxes_3d[:, :, :3], local_angle], dim=-1)

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets,\
            rcnn_cls_weights, rcnn_reg_weights,\
            rcnn_reg_targets_3d, rcnn_reg_weights_3d = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_boxes_3d, gt_labels )

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        rcnn_reg_weights_3d = rcnn_reg_weights_3d[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict[
            'rcnn_reg_weights_3d'] = rcnn_reg_weights_3d / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets_3d'] = rcnn_reg_targets_3d[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

    def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets, out_c=4):
        """
        squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4)
        Args:
            rcnn_bbox_preds: shape(N, num_classes, 4)
            rcnn_cls_targets: shape(N, 1)
        """
        rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, out_c)
        batch_size = rcnn_bbox_preds.shape[0]
        offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1)
        rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets)
        rcnn_bbox_preds = rcnn_bbox_preds.contiguous().view(
            -1, out_c)[rcnn_cls_targets]
        return rcnn_bbox_preds

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        #  import ipdb
        #  ipdb.set_trace()

        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))
        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']

        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        #
        if not self.class_agnostic:
            rcnn_bbox_preds = self.squeeze_bbox_preds(rcnn_bbox_preds,
                                                      rcnn_cls_targets)
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        ######################################
        # 3d loss
        ######################################

        rcnn_reg_weights_3d = prediction_dict['rcnn_reg_weights_3d']
        rcnn_reg_targets_3d = prediction_dict['rcnn_reg_targets_3d']
        rcnn_3d = prediction_dict['rcnn_3d']

        if not self.class_agnostic_3d:
            dims_pred = rcnn_3d[:, :3 * self.n_classes]
            dims_pred = self.squeeze_bbox_preds(dims_pred, rcnn_cls_targets, 3)
            orient_pred = rcnn_3d[:, 3 * self.n_classes:]
        else:
            dims_pred = rcnn_3d[:, :3]
            orient_pred = rcnn_3d[:, 3:]
        # dims
        rcnn_3d_loss_dims = self.rcnn_bbox_loss(
            dims_pred, rcnn_reg_targets_3d[:, :3]).sum(dim=-1)

        # angles
        rcnn_angle_loss, angle_tp_mask = self.rcnn_3d_loss(
            orient_pred, rcnn_reg_targets_3d[:, 3:])
        # angles
        #  res = self.rcnn_3d_loss(rcnn_3d[:, 3:], rcnn_reg_targets_3d[:, 3:6])
        #  for res_loss_key in res:
        #  tmp = res[res_loss_key] * rcnn_reg_weights_3d
        #  res[res_loss_key] = tmp.sum(dim=-1)
        #  loss_dict.update(res)

        rcnn_3d_loss = rcnn_3d_loss_dims * rcnn_reg_weights_3d
        rcnn_3d_loss = rcnn_3d_loss.sum(dim=-1)

        rcnn_angle_loss = rcnn_angle_loss * rcnn_reg_weights_3d
        rcnn_angle_loss = rcnn_angle_loss.sum(dim=-1)

        loss_dict['rcnn_3d_loss'] = rcnn_3d_loss
        loss_dict['rcnn_angle_loss'] = rcnn_angle_loss

        # stats of orients
        angle_tp_mask = angle_tp_mask[rcnn_reg_weights_3d > 0]
        angles_tp_num = angle_tp_mask.int().sum().item()
        angles_all_num = angle_tp_mask.numel()
        #  cls_orient_preds = rcnn_3d[:, 3:5]
        #  cls_orient = rcnn_reg_targets_3d[:, 3]
        #  _, cls_orient_preds_argmax = torch.max(cls_orient_preds, dim=-1)
        #  orient_tp_mask = cls_orient.type_as(
        #  cls_orient_preds_argmax) == cls_orient_preds_argmax
        #  mask = (rcnn_reg_weights_3d > 0) & (rcnn_reg_targets_3d[:, 3] > -1)
        #  orient_tp_mask = orient_tp_mask[mask]
        #  orient_tp_num = orient_tp_mask.int().sum().item()
        #  orient_all_num = orient_tp_mask.numel()

        # gt_boxes_proj = feed_dict['gt_boxes_proj']

        self.target_assigner.stat.update({
            'cls_orient_2s_all_num': angles_all_num,
            'cls_orient_2s_tp_num': angles_tp_num
            # 'angle_num_tp': torch.tensor(0),
            # 'angle_num_all': 1,

            # stats of orient
            #  'orient_tp_num': orient_tp_num,
            # 'orient_tp_num2': orient_tp_num2,
            #  'orient_tp_num3': orient_tp_num3,
            # 'orient_all_num3': orient_all_num3,
            # 'orient_pr': orient_pr,
            #  'orient_all_num': orient_all_num,
            #  'orient_all_num3': orient_all_num3,
            # 'orient_tp_num4': orient_tp_num4,
            # 'orient_all_num4': orient_all_num4,
            #  'cls_orient_2s_all_num': depth_ind_all_num,
            #  'cls_orient_2s_tp_num': depth_ind_tp_num
        })

        return loss_dict
Beispiel #16
0
class DoubleIoUFasterRCNN(Model):
    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.add_feat('base_feat', base_feat)
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat_reg = self.feature_extractor.second_stage_feature(
            pooled_feat)
        # shape(N,C)
        # if self.reduce:
        pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)
        # else:
        # pooled_feat = pooled_feat.view(self.rcnn_batch_size, -1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        # prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        # prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]
        prediction_dict['rcnn_cls_probs'] = prediction_dict['rpn_cls_probs'][
            0][proposals_order]

        return prediction_dict

    # def rcnn_cls_pred(pooled_feat)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        # self.reduce = model_config.get('reduce')
        self.reduce = True

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        # indicator = rcnn_cls_weights > 0
        indicator = None

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        # assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        # prediction_dict[
        # 'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        # prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
        # batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        # rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        # rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        # rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        # rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        # rcnn_cls_loss *= rcnn_cls_weights
        # rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        # loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        return loss_dict
class ThreeIoUFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        ########################################
        # semantic map
        ########################################
        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # shape(N,C)
        rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat)
        # shape(N,C)
        # pooled_feat = pooled_feat.mean(3).mean(2)

        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        # if self.use_focal_loss:
        # self.rcnn_cls_loss = FocalLoss(2)
        # else:
        # self.rcnn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)
        self.rcnn_cls_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5})
        self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        self.iou_sampler = BalancedSampler(model_config['sampler_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # double subsampler
        ##########################

        cls_criterion = None

        # bbox subsample
        pos_indicator = rcnn_reg_weights > 0
        cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch
        bbox_batch_sampled_mask = self.bbox_sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator, criterion=cls_criterion)
        # rcnn_cls_weights = rcnn_cls_weights[bbox_batch_sampled_mask]
        # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        # ignore when bbox loss is not necessary
        bbox_batch_sampled_mask[rcnn_reg_weights == 0] = 0
        rcnn_reg_weights = rcnn_reg_weights[bbox_batch_sampled_mask]
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        assert num_reg_coeff, 'bug happens'

        # iou subsample
        # balanced subsample
        pos_indicator = rcnn_cls_targets > 0
        iou_batch_sampled_mask = self.iou_sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator)
        # rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask]
        # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)

        # check

        # make sure iou optimized when bbox optimized
        iou_batch_sampled_mask |= bbox_batch_sampled_mask
        rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        assert num_cls_coeff, 'bug happens'

        batch_sampled_mask = iou_batch_sampled_mask

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            iou_batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            bbox_batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]
        # prediction_dict['iou_batch_sampled_mask'] = iou_batch_sampled_mask
        prediction_dict['bbox_batch_sampled_mask'] = bbox_batch_sampled_mask[
            batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # mask
        # iou_batch_sampled_mask = prediction_dict['iou_batch_sampled_mask']
        bbox_batch_sampled_mask = prediction_dict['bbox_batch_sampled_mask']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_probs'][:, 1]
        # exp
        rcnn_cls_scores = torch.exp(rcnn_cls_scores)
        rcnn_cls_targets = torch.exp(rcnn_cls_targets)
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_preds = rcnn_bbox_preds[bbox_batch_sampled_mask]
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        return loss_dict
class RefineFasterRCNN(Model):
    def forward(self, feed_dict):

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        # shape(N,C)
        pooled_feat = pooled_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = FeatureExtractor(
            self.feature_extractor_config)
        #  self.rpn_model = RPNModel(self.rpn_config)
        self.rpn_model = RefineRPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        # if self.use_focal_loss:
        # self.rcnn_cls_loss = FocalLoss(2)
        # else:
        # self.rcnn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)

        self.rcnn_cls_loss = nn.MSELoss(reduce=False)
        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config['feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = RefineTargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.sampler = HardNegativeSampler(model_config['sampler_config'])
        self.sampler = BalancedSampler(model_config['sampler_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        pos_indicator = rcnn_cls_targets > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator, indicator=indicator)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = rcnn_cls_weights.type(torch.cuda.ByteTensor).sum(
            dim=-1)
        num_reg_coeff = rcnn_reg_weights.type(torch.cuda.ByteTensor).sum(
            dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        fg_rcnn_cls_probs = rcnn_cls_probs[:, 1]
        # exp
        fg_rcnn_cls_probs = torch.exp(fg_rcnn_cls_probs)
        rcnn_cls_targets = torch.exp(rcnn_cls_targets)
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_loss = self.rcnn_cls_loss(
            fg_rcnn_cls_probs, rcnn_cls_targets.type_as(fg_rcnn_cls_probs))
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        return loss_dict
class SemanticFasterRCNN(Model):
    def forward(self, feed_dict):
        #  import ipdb
        #  ipdb.set_trace()
        prediction_dict = {}

        self.profiler.start('base_model')
        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        self.profiler.end('base_model')
        # batch_size = base_feat.shape[0]

        self.profiler.start('rpn')
        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))
        self.profiler.end('rpn')

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        self.profiler.start('roipooling')
        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))
        self.profiler.end('roipooling')

        self.profiler.start('second_stage')
        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        self.profiler.end('second_stage')

        # semantic map
        # if self.use_self_attention:
        # pooled_feat_cls = pooled_feat.mean(3).mean(2)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        # # self-attention
        # channel_attention = self.generate_channel_attention(pooled_feat)
        # spatial_attention = self.generate_spatial_attention(pooled_feat)
        # pooled_feat_reg = pooled_feat * channel_attention
        # pooled_feat_reg = pooled_feat * spatial_attention
        # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)

        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        # else:
        # rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        # rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        # saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        pooled_feat = pooled_feat.mean(3).mean(2)

        # rcnn_cls_probs = rcnn_cls_probs_map.mean(3).mean(2)
        # shape(N,C)
        # rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        # rcnn_bbox_feat = torch.cat([rcnn_bbox_feat, pooled_feat], dim=1)
        # rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        # if self.use_score:
        # pooled_feat =

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][
            proposals_order]

        return prediction_dict

    def generate_channel_attention(self, feat):
        return feat.mean(3, keepdim=True).mean(2, keepdim=True)

    def generate_spatial_attention(self, feat):
        return self.spatial_attention(feat)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        # if self.use_self_attention:
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        # else:
        # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
            # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(self.n_classes)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        if self.use_self_attention:
            self.spatial_attention = nn.Conv2d(2048, 1, 3, 1, 1)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        # including bg
        self.n_classes = len(classes) + 1
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']
        self.use_self_attention = model_config.get('use_self_attention')

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        self.sampler = BalancedSampler(model_config['sampler_config'])

        self.profiler = Profiler()

    def pre_subsample(self, prediction_dict, feed_dict):
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # subsampler
        ##########################
        cls_criterion = None
        pos_indicator = rcnn_reg_weights > 0
        indicator = rcnn_cls_weights > 0

        # subsample from all
        # shape (N,M)
        batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            indicator=indicator,
            criterion=cls_criterion)
        rcnn_cls_weights = rcnn_cls_weights[batch_sampled_mask]
        rcnn_reg_weights = rcnn_reg_weights[batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        # check
        assert num_cls_coeff, 'bug happens'
        assert num_reg_coeff, 'bug happens'

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            batch_sampled_mask]
        prediction_dict['fake_match'] = self.target_assigner.analyzer.match[
            batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]

    #  def umap_reg_targets():
    #  """
    #  expand rcnn_reg_targets(shape (N, 4) to shape(N, 4 * num_classes))
    #  """
    #  pass
    def squeeze_bbox_preds(self, rcnn_bbox_preds, rcnn_cls_targets):
        """
        squeeze rcnn_bbox_preds from shape (N, 4 * num_classes) to shape (N, 4)
        Args:
            rcnn_bbox_preds: shape(N, num_classes, 4)
            rcnn_cls_targets: shape(N, 1)
        """
        rcnn_bbox_preds = rcnn_bbox_preds.view(-1, self.n_classes, 4)
        batch_size = rcnn_bbox_preds.shape[0]
        offset = torch.arange(0, batch_size) * rcnn_bbox_preds.size(1)
        rcnn_cls_targets = rcnn_cls_targets + offset.type_as(rcnn_cls_targets)
        rcnn_bbox_preds = rcnn_bbox_preds.view(-1, 4)[rcnn_cls_targets]
        return rcnn_bbox_preds

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_scores']
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        if not self.class_agnostic:
            rcnn_bbox_preds = self.squeeze_bbox_preds(rcnn_bbox_preds,
                                                      rcnn_cls_targets)
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # add rcnn_cls_targets to get the statics of rpn
        # loss_dict['rcnn_cls_targets'] = rcnn_cls_targets

        # analysis ap
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        num_gt = feed_dict['gt_labels'].numel()
        fake_match = prediction_dict['fake_match']
        self.target_assigner.analyzer.analyze_ap(fake_match,
                                                 rcnn_cls_probs[:, 1],
                                                 num_gt,
                                                 thresh=0.1)

        return loss_dict