Python AnchorGenerator.generate 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: core.anchor_generators.anchor_generator

클래스/타입: AnchorGenerator

메소드/함수: generate

hotexamples.com에서의 예제들: 7

Python AnchorGenerator.generate - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 core.anchor_generators.anchor_generator.AnchorGenerator.generate에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

generate(7)

AnchorGenerator(6)

generate_pyramid(1)

예제 #1

파일 보기

def read_anchors():
    anchor_generator_config = {
        "base_anchor_size": 16,
        "scales": [4, 8, 16],
        "aspect_ratios": [0.5, 0.8, 1],
        "anchor_stride": [16, 16],
        "anchor_offset": [0, 0]
    }
    anchor_generator = AnchorGenerator(anchor_generator_config)

    anchors = anchor_generator.generate([[1, 1]])
    return anchors[0].cpu().numpy()

예제 #2

파일 보기

파일: double_iou_rpn_model_slow.py 프로젝트: kl456123/Detection

class RPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        # self.rpn_batch_size = model_config['rpn_batch_size']
        self.num_cls_samples = model_config['num_cls_samples']
        self.num_reg_samples = model_config['num_reg_samples']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        # self.sampler = HardNegativeSampler({"fg_fraction": 1.0})
        # self.sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({"fg_fraction": 1.0})

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

        self.use_iou = model_config.get('use_iou')

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

    def unfreeze_modules(self):
        unfreeze_modules = [
            self.rpn_coarse_map_conv_iod.bias, self.rpn_fine_map_conv_iod.bias,
            self.rpn_coarse_map_conv_iog.bias, self.rpn_fine_map_conv_iog.bias,
            self.rpn_coarse_map_conv_iou.bias, self.rpn_fine_map_conv_iou.bias,
            self.rpn_coarse_map_conv_iod.weight,
            self.rpn_fine_map_conv_iod.weight,
            self.rpn_coarse_map_conv_iog.weight,
            self.rpn_fine_map_conv_iog.weight,
            self.rpn_coarse_map_conv_iou.weight,
            self.rpn_fine_map_conv_iou.weight
        ]
        for module in unfreeze_modules:
            module.requires_grad = True

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        # cls
        if self.use_focal_loss:
            self.rpn_cls_loss = FocalLoss(2)
        else:
            self.rpn_cls_loss = functools.partial(F.cross_entropy,
                                                  reduce=False)

    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        anchors = anchors[0]
        # do not backward
        anchors = anchors
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)
        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :]
        fg_probs = fg_probs.permute(0, 2, 3,
                                    1).contiguous().view(batch_size, -1)

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order

    def forward(self, bottom_blobs):
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        im_info = bottom_blobs['im_info']

        # rpn conv
        rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True)

        # rpn cls score
        # shape(N,2*num_anchors,H,W)
        rpn_cls_scores = self.rpn_cls_score(rpn_conv)

        # rpn cls prob shape(N,2*num_anchors,H,W)
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # import ipdb
        # ipdb.set_trace()

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        if self.use_score:
            # shape (N,2,num_anchoros*H*W)
            rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1)
            rpn_bbox_preds = []
            for i in range(self.num_anchors):
                rpn_bbox_feat = torch.cat(
                    [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]],
                    dim=1)
                rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat))
            rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1)
        else:
            # get rpn offsets to the anchor boxes
            rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)
            # rpn_bbox_preds = [rpn_bbox_preds]

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(feature_map_list)

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for track transform of propsoals
        proposals_batch, proposals_order = self.generate_proposal(
            rpn_cls_probs, anchors, rpn_bbox_preds, im_info)
        batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
            -1, proposals_batch.shape[1]).type_as(proposals_batch)
        rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
                               dim=2)

        if self.training:
            rois_batch = self.append_gt(rois_batch, gt_boxes)

        rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1,
                                             rpn_cls_scores.shape[2],
                                             rpn_cls_scores.shape[3])
        rpn_cls_scores = rpn_cls_scores.permute(0, 3, 4, 2,
                                                1).contiguous().view(
                                                    batch_size, -1, 2)

        # postprocess
        rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1,
                                           rpn_cls_probs.shape[2],
                                           rpn_cls_probs.shape[3])
        rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view(
            batch_size, -1, 2)
        predict_dict = {
            'proposals_batch': proposals_batch,
            'rpn_cls_scores': rpn_cls_scores,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'rpn_cls_probs': rpn_cls_probs,
            'proposals_order': proposals_order,
        }

        return predict_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}

        gt_boxes = feed_dict['gt_boxes']

        anchors = prediction_dict['anchors']

        assert len(anchors) == 1, 'just one feature maps is supported now'
        anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        #  import ipdb
        #  ipdb.set_trace()
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights = \
            self.target_assigner.assign(anchors, gt_boxes, gt_labels=None)

        ################################
        # double subsample
        ################################

        rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1]
        cls_criterion = rpn_cls_probs
        # cls loss
        rpn_cls_score = prediction_dict['rpn_cls_scores']
        # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2),
                                         rpn_cls_targets.view(-1))
        rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
        # cls_criterion = rpn_cls_loss
        # cls subsample
        # pos_indicator = rpn_cls_targets > 0
        # ignore fg/bg
        indicator = rpn_cls_weights > 0
        pos_indicator = indicator
        cls_batch_sampled_mask = self.sampler.subsample_batch(
            self.num_cls_samples,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        cls_batch_sampled_mask = cls_batch_sampled_mask.type_as(
            rpn_cls_weights)

        rpn_cls_weights = rpn_cls_weights * cls_batch_sampled_mask
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)

        # reg subsample
        # subsample all from fg
        pos_indicator = rpn_reg_weights > 0
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        # reg_criterion = rpn_reg_loss.sum(dim=-1)

        reg_batch_sampled_mask = self.sampler.subsample_batch(
            self.num_reg_samples,
            pos_indicator,
            criterion=cls_criterion,
            indicator=None)
        reg_batch_sampled_mask |= cls_batch_sampled_mask.type_as(
            reg_batch_sampled_mask)
        reg_batch_sampled_mask = reg_batch_sampled_mask.type_as(
            rpn_cls_weights)
        rpn_reg_weights = rpn_reg_weights * reg_batch_sampled_mask
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)
        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # cls loss
        # rpn_cls_score = prediction_dict['rpn_cls_scores']
        # # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        # rpn_cls_loss = self.rpn_cls_loss(
        # rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1))
        rpn_cls_loss *= rpn_cls_weights
        rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        # bbox loss
        # shape(N,num,4)
        # rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        # rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # # shape(N,H*W*num_anchors,4)
        # rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        # rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(
            rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float()

        loss_dict['rpn_cls_loss'] = rpn_cls_loss
        loss_dict['rpn_bbox_loss'] = rpn_reg_loss
        return loss_dict

예제 #3

파일 보기

파일: anchor_generator_test.py 프로젝트: kl456123/Detection

import sys
sys.path.append('.')
from core.anchor_generators.anchor_generator import AnchorGenerator
from lib.model.rpn.generate_anchors import generate_anchors
from utils.visualize import visualize_bbox, read_img, shift_bbox

anchor_generator_config = {
    "base_anchor_size": 1,
    "scales": [4],
    "aspect_ratios": [1],
    "anchor_stride": [16, 16],
    "anchor_offset": [0, 0]
}
anchor_generator = AnchorGenerator(anchor_generator_config)

anchors = anchor_generator.generate([[24, 80]])
# print(anchors)

expect_anchors = generate_anchors(
    base_size=anchor_generator_config['base_anchor_size'],
    ratios=np.array(anchor_generator_config['aspect_ratios']),
    scales=np.array(anchor_generator_config['scales']))

img = read_img('/data/object/training/image_2/000117.png')


def vis_help(anchors, expect_anchors):
    # shift_bbox(anchors, translation=(200, 200))
    # shift_bbox(expect_anchors, translation=(800, 200))
    # anchors = np.concatenate([anchors, expect_anchors], axis=0)
    visualize_bbox(img, anchors)

예제 #4

파일 보기

class RPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        # self.sampler = HardNegativeSampler(model_config['sampler_config'])
        # self.sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler(model_config['sampler_config'])

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

        self.use_iou = model_config.get('use_iou')

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        # cls
        if self.use_focal_loss:
            self.rpn_cls_loss = FocalLoss(2)
        else:
            self.rpn_cls_loss = functools.partial(F.cross_entropy,
                                                  reduce=False)

    # def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
    # im_info):
    # pass

    def forward(self, bottom_blobs):
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        im_info = bottom_blobs['im_info']

        # rpn conv
        rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True)

        # rpn cls score
        # shape(N,2*num_anchors,H,W)
        rpn_cls_scores = self.rpn_cls_score(rpn_conv)

        # rpn cls prob shape(N,2*num_anchors,H,W)
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # import ipdb
        # ipdb.set_trace()

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        if self.use_score:
            # shape (N,2,num_anchoros*H*W)
            rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1)
            rpn_bbox_preds = []
            for i in range(self.num_anchors):
                rpn_bbox_feat = torch.cat(
                    [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]],
                    dim=1)
                rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat))
            rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1)
        else:
            # get rpn offsets to the anchor boxes
            rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)
            # rpn_bbox_preds = [rpn_bbox_preds]

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(feature_map_list)

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for track transform of propsoals
        rois_batch, proposals_order = Proposal.apply(rpn_cls_probs, anchors,
                                                     rpn_bbox_preds, im_info)
        # batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
        # -1, proposals_batch.shape[1]).type_as(proposals_batch)
        # rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
        # dim=2)

        if self.training:
            rois_batch = self.append_gt(rois_batch, gt_boxes)

        rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1,
                                             rpn_cls_scores.shape[2],
                                             rpn_cls_scores.shape[3])
        rpn_cls_scores = rpn_cls_scores.permute(0, 3, 4, 2,
                                                1).contiguous().view(
                                                    batch_size, -1, 2)

        # postprocess
        rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1,
                                           rpn_cls_probs.shape[2],
                                           rpn_cls_probs.shape[3])
        rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view(
            batch_size, -1, 2)
        predict_dict = {
            'rpn_cls_scores': rpn_cls_scores,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'rpn_cls_probs': rpn_cls_probs,
            'proposals_order': proposals_order,
        }

        return predict_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}

        gt_boxes = feed_dict['gt_boxes']

        anchors = prediction_dict['anchors']

        assert len(anchors) == 1, 'just one feature maps is supported now'
        anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        #  import ipdb
        #  ipdb.set_trace()
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights = \
            self.target_assigner.assign(anchors, gt_boxes, gt_labels=None)

        ################################
        # subsample
        ################################

        pos_indicator = rpn_reg_weights > 0
        indicator = rpn_cls_weights > 0

        if self.use_iou:
            cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch
        else:
            rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1]
            cls_criterion = rpn_cls_probs

        batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights)
        rpn_cls_weights = rpn_cls_weights * batch_sampled_mask
        rpn_reg_weights = rpn_reg_weights * batch_sampled_mask
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)
        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # cls loss
        rpn_cls_score = prediction_dict['rpn_cls_scores']
        # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2),
                                         rpn_cls_targets.view(-1))
        rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
        rpn_cls_loss *= rpn_cls_weights
        rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        # bbox loss
        # shape(N,num,4)
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(
            rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float()

        loss_dict['rpn_cls_loss'] = rpn_cls_loss
        loss_dict['rpn_bbox_loss'] = rpn_reg_loss
        return loss_dict

예제 #5

파일 보기

파일: LED_rpn_model.py 프로젝트: kl456123/Detection

class LEDRPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']
        self.alpha = 0.6
        self.theta = 1.0
        self.iox_bbox_coder = DiscreteBBoxCoder(
            model_config['iox_coder_config'])

        self.use_sharpL2 = model_config.get('use_sharpL2')
        self.use_sigmoid = model_config['use_sigmoid']
        self.use_cls_pred = model_config['use_cls_pred']

        # sampler
        # self.sampler = HardNegativeSampler(model_config['sampler_config'])
        # self.sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler(model_config['sampler_config'])

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = LEDTargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

    def iox_clip(self, iox):
        iox = iox.clone()
        iox[iox < 0] = 0
        iox[iox > 1] = 1
        return iox

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

        Filler.normal_init(self.rpn_coarse_map_conv_iod, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rpn_fine_map_conv_iod, 0, 0.001,
                           self.truncated)

        Filler.normal_init(self.rpn_coarse_map_conv_iou, 0, 0.001,
                           self.truncated)

        Filler.normal_init(self.rpn_fine_map_conv_iou, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rpn_fine_map_conv_iog, 0, 0.001,
                           self.truncated)
        Filler.normal_init(self.rpn_coarse_map_conv_iog, 0, 0.001,
                           self.truncated)

    def unfreeze_modules(self):
        unfreeze_modules = [
            self.rpn_coarse_map_conv_iod.bias, self.rpn_fine_map_conv_iod.bias,
            self.rpn_coarse_map_conv_iog.bias, self.rpn_fine_map_conv_iog.bias,
            self.rpn_coarse_map_conv_iou.bias, self.rpn_fine_map_conv_iou.bias,
            self.rpn_coarse_map_conv_iod.weight,
            self.rpn_fine_map_conv_iod.weight,
            self.rpn_coarse_map_conv_iog.weight,
            self.rpn_fine_map_conv_iog.weight,
            self.rpn_coarse_map_conv_iou.weight,
            self.rpn_fine_map_conv_iou.weight
        ]
        for module in unfreeze_modules:
            module.requires_grad = True

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
        self.rpn_coarse_map_conv_iou = nn.Conv2d(512, 4 * self.num_anchors, 1,
                                                 1, 0)
        self.rpn_fine_map_conv_iou = nn.Conv2d(512, 4 * self.num_anchors, 1, 1,
                                               0)

        self.rpn_coarse_map_conv_iog = nn.Conv2d(512, 4 * self.num_anchors, 1,
                                                 1, 0)
        self.rpn_fine_map_conv_iog = nn.Conv2d(512, 4 * self.num_anchors, 1, 1,
                                               0)

        self.rpn_coarse_map_conv_iod = nn.Conv2d(512, 4 * self.num_anchors, 1,
                                                 1, 0)
        self.rpn_fine_map_conv_iod = nn.Conv2d(512, 4 * self.num_anchors, 1, 1,
                                               0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # rpn bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        if self.use_sharpL2:
            self.reg_loss = SharpL2Loss()
        else:
            self.reg_loss = nn.MSELoss(reduce=False)
        self.cls_loss = nn.CrossEntropyLoss(reduce=False)

        # rpn cls
        if self.use_focal_loss:
            self.rpn_cls_loss = FocalLoss(2)
        else:
            self.rpn_cls_loss = functools.partial(F.cross_entropy,
                                                  reduce=False)

    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        anchors = anchors[0]
        # do not backward
        anchors = anchors
        rpn_fg_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)
        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        # fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :]
        # fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size,
        # -1)
        fg_probs = rpn_fg_cls_probs

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order

    def iou_pred(self, rpn_conv):
        return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iou,
                             self.rpn_fine_map_conv_iou)

    def iog_pred(self, rpn_conv):
        return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iog,
                             self.rpn_fine_map_conv_iog)

    def iod_pred(self, rpn_conv):
        return self.iox_pred(rpn_conv, self.rpn_coarse_map_conv_iod,
                             self.rpn_fine_map_conv_iod)

    def iox_pred(self, rpn_conv, rpn_coarse_map_conv, rpn_fine_map_conv):
        batch_size = rpn_conv.shape[0]
        coarse_map = rpn_coarse_map_conv(rpn_conv)
        fine_map = rpn_fine_map_conv(rpn_conv)

        coarse_map_reshape = coarse_map.view(batch_size, 4, -1)
        iou_level_probs = F.softmax(coarse_map_reshape, dim=1)
        iou_level_probs = iou_level_probs.view_as(coarse_map)
        if self.use_sigmoid:
            # normalize it
            iou_reg = 2 * F.sigmoid(fine_map) - 1
        else:
            iou_reg = fine_map
        # reshape preprocess
        iou_reg = iou_reg.view(batch_size, 4, self.num_anchors, -1).permute(
            0, 3, 2, 1).contiguous().view(batch_size, -1, 4)
        iou_cls = iou_level_probs.view(batch_size, 4, self.num_anchors,
                                       -1).permute(0, 3, 2,
                                                   1).contiguous().view(
                                                       batch_size, -1, 4)
        decoded_iou = self.iox_bbox_coder.decode_batch(iou_cls, iou_reg)

        # used for cls and reg loss
        iou_cls_scores = coarse_map.view(batch_size, 4, self.num_anchors,
                                         -1).permute(0, 3, 2,
                                                     1).contiguous().view(
                                                         batch_size, -1, 4)
        return decoded_iou, iou_cls_scores, iou_reg

    def calculate_iou(self, iog, iod):
        mask = ~(iod == 0)
        iou_indirect = torch.zeros_like(iog)
        iod = iod[mask]
        iog = iog[mask]
        iou_indirect[mask] = (iod * iog) / (iod + iog - iod * iog)
        return iou_indirect

    def forward(self, bottom_blobs):
        # import ipdb
        # ipdb.set_trace()
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        # im_info = bottom_blobs['im_info']
        im_info = bottom_blobs['input_size']

        # rpn conv
        rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True)

        # rpn cls
        rpn_cls_scores = self.rpn_cls_score(rpn_conv)
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape,
                                  dim=1).view_as(rpn_cls_scores)
        rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, self.num_anchors,
                                           -1).permute(0, 3, 2,
                                                       1).contiguous().view(
                                                           batch_size, -1, 2)
        rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, self.num_anchors,
                                             -1).permute(0, 3, 2,
                                                         1).contiguous().view(
                                                             batch_size, -1, 2)

        iou, iou_scores, iou_reg = self.iou_pred(rpn_conv)
        iog, iog_scores, iog_reg = self.iog_pred(rpn_conv)
        iod, iod_scores, iod_reg = self.iod_pred(rpn_conv)

        # bugs here
        iou = self.iox_clip(iou)
        iog = self.iox_clip(iog)
        iod = self.iox_clip(iod)

        iou_indirect = self.calculate_iou(iog, iod)
        iou_final = (1 - self.alpha) * iou_indirect + self.alpha * iou

        # import ipdb
        # ipdb.set_trace()
        rpn_fg_probs_final = rpn_cls_probs[:, :, 1] * torch.exp(-torch.pow(
            (1 - iou_final), 2) / self.theta)

        # rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        # rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        # rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # import ipdb
        # ipdb.set_trace()

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        # if self.use_score:
        # # shape (N,2,num_anchoros*H*W)
        # rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1)
        # rpn_bbox_preds = []
        # for i in range(self.num_anchors):
        # rpn_bbox_feat = torch.cat(
        # [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]],
        # dim=1)
        # rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat))
        # rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1)
        # else:
        # get rpn offsets to the anchor boxes
        rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)
        # rpn_bbox_preds = [rpn_bbox_preds]

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(feature_map_list)

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for tracking transform of propsoals
        proposals_batch, proposals_order = self.generate_proposal(
            rpn_fg_probs_final, anchors, rpn_bbox_preds, im_info)
        batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
            -1, proposals_batch.shape[1]).type_as(proposals_batch)
        rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
                               dim=2)

        if self.training:
            rois_batch = self.append_gt(rois_batch, gt_boxes)

        predict_dict = {
            'proposals_batch': proposals_batch,
            # used for sorting
            'rpn_iou_final': rpn_fg_probs_final,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'proposals_order': proposals_order,
            # reg
            'rpn_iou_reg': iou_reg,
            'rpn_iog_reg': iog_reg,
            'rpn_iod_reg': iod_reg,
            # cls
            'rpn_iou_scores': iou_scores,
            'rpn_iog_scores': iog_scores,
            'rpn_iod_scores': iod_scores,
            'rpn_cls_scores': rpn_cls_scores
        }

        return predict_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}
        gt_boxes = feed_dict['gt_boxes']
        anchors = prediction_dict['anchors']
        assert len(anchors) == 1, 'just one feature maps is supported now'
        anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights = \
            self.target_assigner.assign(anchors,
                                        gt_boxes,
                                        gt_labels=None,
                                        input_size=feed_dict['input_size'])

        ################################
        # subsample
        ################################
        rpn_cls_probs = prediction_dict['rpn_iou_final']
        cls_criterion = rpn_cls_probs
        pos_indicator = rpn_reg_weights > 0
        indicator = rpn_cls_weights > 0

        batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights)
        rpn_cls_weights = rpn_cls_weights * batch_sampled_mask
        rpn_reg_weights = rpn_reg_weights * batch_sampled_mask
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)
        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # iou loss
        iou_scores = prediction_dict['rpn_iou_scores']
        iou = prediction_dict['rpn_iou_reg']
        iou_reg_targets = self.target_assigner.matcher.assigned_overlaps_batch
        iou_reg_targets_encode = self.iox_bbox_coder.encode_reg(
            iou_reg_targets)
        iou_reg_loss = self.reg_loss(iou, iou_reg_targets_encode).sum(dim=-1)

        iou_scores_targets = self.iox_bbox_coder.encode_cls(iou_reg_targets)
        iou_cls_loss = self.cls_loss(iou_scores.view(-1, 4),
                                     iou_scores_targets.view(-1))
        iou_cls_loss = iou_cls_loss.view_as(rpn_cls_weights)
        iou_cls_loss = iou_cls_loss.mean(dim=1)
        iou_reg_loss = iou_reg_loss.mean(dim=1)

        # iog loss
        iog_scores = prediction_dict['rpn_iog_scores']
        iog = prediction_dict['rpn_iog_reg']
        iog_reg_targets = self.target_assigner.matcher.assigned_iog_batch
        iog_reg_targets_encode = self.iox_bbox_coder.encode_reg(
            iog_reg_targets)
        iog_reg_loss = self.reg_loss(iog, iog_reg_targets_encode).sum(dim=-1)

        iog_scores_targets = self.iox_bbox_coder.encode_cls(iog_reg_targets)
        iog_cls_loss = self.cls_loss(iog_scores.view(-1, 4),
                                     iog_scores_targets.view(-1))
        iog_cls_loss = iog_cls_loss.view_as(rpn_cls_weights)
        iog_cls_loss = iog_cls_loss.mean(dim=1)
        iog_reg_loss = iog_reg_loss.mean(dim=1)

        # iod loss
        iod_scores = prediction_dict['rpn_iod_scores']
        iod = prediction_dict['rpn_iod_reg']
        iod_reg_targets = self.target_assigner.matcher.assigned_iod_batch
        iod_reg_targets_encode = self.iox_bbox_coder.encode_reg(
            iod_reg_targets)
        iod_reg_loss = self.reg_loss(iod, iod_reg_targets_encode).sum(dim=-1)

        iod_scores_targets = self.iox_bbox_coder.encode_cls(iod_reg_targets)
        iod_cls_loss = self.cls_loss(iod_scores.view(-1, 4),
                                     iod_scores_targets.view(-1))
        iod_cls_loss = iod_cls_loss.view_as(rpn_cls_weights)
        iod_cls_loss = iod_cls_loss.mean(dim=1)
        iod_reg_loss = iod_reg_loss.mean(dim=1)

        # cls loss
        if self.use_cls_pred:
            rpn_cls_score = prediction_dict['rpn_cls_scores']
            rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score.view(-1, 2),
                                             rpn_cls_targets.view(-1))
            rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
            rpn_cls_loss *= rpn_cls_weights
            rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()
            loss_dict['rpn/cls_loss'] = rpn_cls_loss
        loss_dict['rpn/iou_cls_loss'] = iou_cls_loss
        loss_dict['rpn/iou_reg_loss'] = iou_reg_loss
        loss_dict['rpn/iog_cls_loss'] = iog_cls_loss
        loss_dict['rpn/iog_reg_loss'] = iog_reg_loss
        loss_dict['rpn/iod_reg_loss'] = iod_reg_loss
        loss_dict['rpn/iod_cls_loss'] = iod_cls_loss

        # bbox loss
        # shape(N,num,4)
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(
            rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float()

        # loss_dict['rpn_cls_loss'] = iox_loss
        loss_dict['rpn/bbox_loss'] = rpn_reg_loss
        # loss_dict['iox_loss'] = iox_loss
        return loss_dict

예제 #6

파일 보기

파일: better_reg_rpn_model.py 프로젝트: kl456123/Detection

class RPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        # self.sampler = HardNegativeSampler(model_config['sampler_config'])
        # self.sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler(model_config['sampler_config'])

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

        self.use_iou = model_config.get('use_iou')

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv_cls, 0, 0.01, self.truncated)
        # Filler.normal_init(self.rpn_conv_bbox, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv_cls = nn.Conv2d(
            self.in_channels, 512, 3, 1, 1, bias=True)
        # self.rpn_conv_bbox = nn.Conv2d(
        # self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        # cls
        if self.use_focal_loss:
            self.rpn_cls_loss = FocalLoss(
                2, alpha=0.2, gamma=2, auto_alpha=False)
        else:
            self.rpn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)

    def generate_new_anchors(self, anchors):
        # import ipdb
        # ipdb.set_trace()
        anchor_size = 2
        # anchors_w = anchors[:, :, 2] - anchors[:, :, 0] + 1
        # anchors_h = anchors[:, :, 3] - anchors[:, :, 1] + 1
        center_x = (anchors[:, 2] + anchors[:, 0]) / 2
        center_y = (anchors[:, 3] + anchors[:, 1]) / 2

        # new anchors has the same center as old ones
        min_x = center_x - (anchor_size - 1) / 2
        min_y = center_y - (anchor_size - 1) / 2
        max_x = center_x + (anchor_size - 1) / 2
        max_y = center_y + (anchor_size - 1) / 2
        return torch.stack([min_x, min_y, max_x, max_y], dim=-1)

    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        # do not backward
        anchors = anchors
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)
        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        # make anchors small

        new_anchors = self.generate_new_anchors(anchors)
        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, new_anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :]
        fg_probs = fg_probs.permute(0, 2, 3, 1).contiguous().view(batch_size,
                                                                  -1)

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order

    def forward(self, bottom_blobs):
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        im_info = bottom_blobs['im_info']

        # separate cls featmap and bbox featmap
        # rpn conv
        rpn_conv_cls = F.relu(self.rpn_conv_cls(base_feat), inplace=True)

        # rpn cls score
        # shape(N,2*num_anchors,H,W)
        rpn_cls_scores = self.rpn_cls_score(rpn_conv_cls)

        # rpn cls prob shape(N,2*num_anchors,H,W)
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # import ipdb
        # ipdb.set_trace()

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        # if self.use_score:
        # # shape (N,2,num_anchoros*H*W)
        # rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1)
        # rpn_bbox_preds = []
        # for i in range(self.num_anchors):
        # rpn_bbox_feat = torch.cat(
        # [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]],
        # dim=1)
        # rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat))
        # rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1)
        # else:
        # # get rpn offsets to the anchor boxes
        # rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)
        # # rpn_bbox_preds = [rpn_bbox_preds]

        # rpn_conv_bbox = F.relu(self.rpn_conv_bbox(base_feat), inplace=True)
        # shared with cls
        rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv_cls)

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(
            feature_map_list, input_size=im_info[0][:-1])

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for track transform of propsoals
        proposals_batch, proposals_order = self.generate_proposal(
            rpn_cls_probs, anchors, rpn_bbox_preds, im_info)
        batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
            -1, proposals_batch.shape[1]).type_as(proposals_batch)
        rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
                               dim=2)

        rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1,
                                             rpn_cls_scores.shape[2],
                                             rpn_cls_scores.shape[3])
        rpn_cls_scores = rpn_cls_scores.permute(
            0, 3, 4, 2, 1).contiguous().view(batch_size, -1, 2)

        # postprocess
        rpn_cls_probs = rpn_cls_probs.view(
            batch_size, 2, -1, rpn_cls_probs.shape[2], rpn_cls_probs.shape[3])
        rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view(
            batch_size, -1, 2)
        predict_dict = {
            'proposals_batch': proposals_batch,
            'rpn_cls_scores': rpn_cls_scores,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'rpn_cls_probs': rpn_cls_probs,
            'proposals_order': proposals_order,
        }

        return predict_dict

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}

        gt_boxes = feed_dict['gt_boxes']

        anchors = prediction_dict['anchors']

        # small anchors
        new_anchors = self.generate_new_anchors(anchors)

        # assert len(anchors) == 1, 'just one feature maps is supported now'
        # anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        #  import ipdb
        #  ipdb.set_trace()
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights, stats = \
            self.target_assigner.assign(anchors, gt_boxes, new_anchors, gt_labels=None)

        ################################
        # subsample
        ################################

        pos_indicator = rpn_reg_weights > 0
        indicator = rpn_cls_weights > 0

        rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1]
        cls_criterion = rpn_cls_probs

        batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights)
        rpn_cls_weights = rpn_cls_weights * batch_sampled_mask
        rpn_reg_weights = rpn_reg_weights * batch_sampled_mask
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)
        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # cls loss
        rpn_cls_score = prediction_dict['rpn_cls_scores']
        # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        rpn_cls_loss = self.rpn_cls_loss(
            rpn_cls_score.view(-1, 2), rpn_cls_targets.view(-1))
        rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
        rpn_cls_loss *= rpn_cls_weights
        rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        # bbox loss
        # shape(N,num,4)
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(rpn_reg_loss.shape[0], -1).sum(
            dim=1) / num_reg_coeff.float()

        loss_dict['rpn_cls_loss'] = rpn_cls_loss
        loss_dict['rpn_bbox_loss'] = rpn_reg_loss
        return loss_dict

예제 #7

파일 보기

파일: distance_rpn_model.py 프로젝트: kl456123/Detection

class DistanceRPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        #  self.sampler = HardNegativeSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler(model_config['sampler_config'])
        # can not use hem here
        #  self.sampler = BalancedSampler(model_config['sampler_config'])

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = DistanceTargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        # cls
        if self.use_focal_loss:
            self.rpn_cls_loss = FocalLoss(2)
        else:
            self.rpn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)
        #  self.rpn_cls_loss = nn.MSELoss(reduce=False)

        # self.distance_similarity_calc = DistanceSimilarityCalc()

    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
            rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
            rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
            anchors: FloatTensor,shape(N,4,H,W)

        Returns:
            proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
            fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        anchors = anchors[0]
        # do not backward
        anchors = anchors
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)

        # fg prob
        gate = rpn_cls_probs[:, self.num_anchors:, :, :]
        gate = gate.permute(0, 2, 3, 1).contiguous().view(batch_size, -1)
        fg_probs, distance = self.get_rpn_cls_probs(
            rpn_bbox_preds, anchors=None)
        fg_probs[gate < 0.5] = 0
        distance[gate < 0.5] = 1e5

        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single

        #  row = torch.arange(0, batch_size).type_as(proposals_order)
        #  fg_probs = fg_probs[row, proposals_order.view(-1)].view_as(
        #  proposals_order)
        return proposals_batch, proposals_order, fg_probs, distance

    def get_rpn_cls_probs(self, bbox_pred, anchors=None):
        """
        Note that all inputs have no gradients
        Args:
            bbox_pred: shape (N,M,4)
            anchors: shape (M,4)
        Returns:
            distance: shape(N,M)
        """
        # shape(N,M,4)
        # distances = self.distance_similarity_calc.compare_batch(bbox, gt_boxes)
        # anchors = anchors.expand_as(bbox_pred)
        # widths = anchors[:, :, 2] - anchors[:, :, 0] + 1.0
        # heights = anchors[:, :, 3] - anchors[:, :, 1] + 1.0
        # dx = bbox_pred[:, :, 0] * widths
        # dy = bbox_pred[:, :, 1] * heights
        dx = bbox_pred[:, :, 0]
        dy = bbox_pred[:, :, 1]
        distance = torch.sqrt(dx * dx + dy * dy)
        theta = 1e-5
        return 1.0 / (distance + theta), distance

    def forward(self, bottom_blobs):
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        im_info = bottom_blobs['im_info']

        # rpn conv
        rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True)

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(feature_map_list)

        rpn_cls_scores = self.rpn_cls_score(rpn_conv)

        # softmax
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # use distance to instead of rpn_cls_probs
        #  rpn_cls_probs = self.get_rpn_cls_probs(rpn_bbox_preds)

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for track transform of propsoals
        proposals_batch, proposals_order, fg_probs, distance = self.generate_proposal(
            rpn_cls_probs, anchors, rpn_bbox_preds, im_info)
        batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
            -1, proposals_batch.shape[1]).type_as(proposals_batch)
        rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
                               dim=2)

        if self.training:
            rois_batch = self.append_gt(rois_batch, gt_boxes)

        # postprocess
        rpn_cls_scores = rpn_cls_scores.view(batch_size, 2, -1,
                                             rpn_cls_scores.shape[2],
                                             rpn_cls_scores.shape[3])
        rpn_cls_scores = rpn_cls_scores.permute(
            0, 3, 4, 2, 1).contiguous().view(batch_size, -1, 2)

        predict_dict = {
            'proposals_batch': proposals_batch,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'rpn_cls_scores': rpn_cls_scores,
            'proposals_order': proposals_order,
            'fg_probs': fg_probs,
            'distance': distance
        }

        return predict_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}

        gt_boxes = feed_dict['gt_boxes']

        anchors = prediction_dict['anchors']

        assert len(anchors) == 1, 'just one feature maps is supported now'
        anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        #  import ipdb
        #  ipdb.set_trace()
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights = \
            self.target_assigner.assign(anchors, gt_boxes, gt_labels=None)

        ################################
        # subsample
        ################################
        rpn_cls_probs = prediction_dict['fg_probs']
        pos_indicator = rpn_cls_targets > 0
        indicator = rpn_cls_weights > 0
        cls_criterion = rpn_cls_probs

        batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)
        batch_sampled_mask = batch_sampled_mask.type_as(rpn_cls_weights)
        rpn_cls_weights = rpn_cls_weights * batch_sampled_mask
        rpn_reg_weights = rpn_reg_weights * batch_sampled_mask
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)
        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)
        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # cls loss
        rpn_cls_scores = prediction_dict['rpn_cls_scores']
        rpn_cls_loss = self.rpn_cls_loss(rpn_cls_scores, rpn_cls_targets)
        rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
        rpn_cls_loss *= rpn_cls_weights
        rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        #  rpn_cls_probs = prediction_dict['rpn_cls_probs']
        #  fg_rpn_cls_probs = rpn_cls_probs.view(-1, 2)[:, 1]
        #  # exp
        #  fg_rpn_cls_probs = torch.exp(fg_rpn_cls_probs)
        #  rpn_cls_targets = torch.exp(rpn_cls_targets)

        #  # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        #  rpn_cls_loss = self.rpn_cls_loss(fg_rpn_cls_probs,
        #  rpn_cls_targets.view(-1))
        #  rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)
        #  rpn_cls_loss *= rpn_cls_weights
        #  rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        # bbox loss
        # shape(N,num,4)
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(rpn_reg_loss.shape[0], -1).sum(
            dim=1) / num_reg_coeff.float()

        loss_dict['rpn_cls_loss'] = rpn_cls_loss
        loss_dict['rpn_bbox_loss'] = rpn_reg_loss
        return loss_dict