Esempio n. 1
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        # rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(base_feat)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(base_feat)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))

            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)

            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())

            # from collections import  Counter
            # label = rpn_label.cpu().numpy()
            # print(Counter(label))

            loss = -F.log_softmax(rpn_cls_score, dim=1)[:, 0]
            mask, num_pos = hard_negative_mining(loss, rpn_label)
            confidence = rpn_cls_score[mask, :]
            self.rpn_loss_cls = F.cross_entropy(confidence.reshape(-1, 2),
                                                rpn_label[mask],
                                                reduction='mean')

            # self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            # self.rpn_loss_cls = OHEM_loss(rpn_cls_score, rpn_label)

            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(
                rpn_bbox_pred,
                rpn_bbox_targets,
                rpn_bbox_inside_weights,
                rpn_bbox_outside_weights,
                sigma=3,
                dim=[1, 2, 3],
            )

        return rois, self.rpn_loss_cls, self.rpn_loss_box
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)


        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        start = time.time()
        basefeat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rpn_feat= self.rpn(basefeat)


        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes)
        rpn_time = time.time()
        self.rpn_time = rpn_time - start
        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        pre_roi_time = time.time()
        self.pre_roi_time = pre_roi_time - rpn_time

        base_feat = self.sam([basefeat,rpn_feat])



        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'align':
            pooled_feat = self._roi_align_layer(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self._roi_pool_layer(base_feat, rois.view(-1, 5))

        roi_pool_time = time.time()
        self.roi_pooling_time = roi_pool_time - pre_roi_time

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))

            bbox_pred = bbox_pred_select.squeeze(1)


        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0


        if self.training:
            # classification loss
            # RCNN_loss_cls = OHEM_loss(cls_score,rois_label)

            loss = -F.log_softmax(cls_score, dim=1)[:, 0]
            mask, num_pos = hard_negative_mining(loss, rois_label)
            confidence = cls_score[mask, :]
            RCNN_loss_cls = F.cross_entropy(confidence, rois_label[mask], reduction='mean')


            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox = RCNN_loss_bbox * 2  # "to balance multi-task training"

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        subnet_time = time.time()
        self.subnet_time = subnet_time - roi_pool_time
        time_measure = [
            self.rpn_time, self.pre_roi_time, self.roi_pooling_time,
            self.subnet_time
        ]

        return time_measure, rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label