Ejemplo n.º 1
0
 def _roi_pooing(self, base_feat, rois):
     # do roi pooling based on predicted rois
     if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
         # pdb.set_trace()
         # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
         grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
         grid_yx = torch.stack([grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
         pooled_feat = self.VMRN_obj_roi_crop(base_feat, Variable(grid_yx).detach())
         if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
             pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
     elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
         pooled_feat = self.VMRN_obj_roi_align(base_feat, rois.view(-1, 5))
     elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
         pooled_feat = self.VMRN_obj_roi_pool(base_feat, rois.view(-1, 5))
     return pooled_feat
Ejemplo n.º 2
0
    def forward_rcnn_batch(self, base_feat, branch, rois, wgt_boxes, wnum_boxes, gt_boxes, num_boxes, im_info, image_classes, output_refine=False):
        batch_size = base_feat.size(0)

        # if it is training phrase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(
                rois, wgt_boxes, wnum_boxes, gt_boxes, num_boxes)
            out_rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            out_rois = rois
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None

        out_rois = Variable(out_rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(
                out_rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(
                base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, out_rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, out_rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat, branch)

        # compute bbox offset
        bbox_pred = branch.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(
                bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(
                rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = branch.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:

            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(
                bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

            # add image-level label regularization
            rois_batch_size = out_rois.size(1)
            rois_prob = F.softmax(cls_score, 1).view(batch_size, rois_batch_size, -1)

            valid_rois_prob = (rois_label > 0).view(batch_size, rois_batch_size, -1).float()
            rois_attention = F.softmax(cls_score, 1).view(batch_size, rois_batch_size, -1)
            rois_attention = rois_attention * valid_rois_prob

            # ignore background
            rois_prob = rois_prob[:, :, 1:]
            rois_attention = rois_attention[:, :, 1:]

            # rois_attention_prob = torch.sum(rois_prob * rois_attention, dim=1) / (torch.sum(rois_attention, dim=1) + 1e-10)
            rois_attention_prob, _ = torch.max(rois_prob, dim=1)
            image_loss_cls = F.binary_cross_entropy(rois_attention_prob, image_classes[:, 1:])
        else:
            image_loss_cls = None

        if self.training:
            cls_prob = cls_prob.view(batch_size, out_rois.size(1), -1)
            bbox_pred = bbox_pred.view(batch_size, out_rois.size(1), -1)
        else:
            cls_prob = cls_prob.view(1, out_rois.size(1), -1)
            bbox_pred = bbox_pred.view(1, out_rois.size(1), -1)

        if self.training and output_refine:
            # get transformation for wgt_boxes
            wgt_rois = wgt_boxes.new(wgt_boxes.size()).zero_()
            wgt_rois[:, :, 1:5] = wgt_boxes[:, :, :4]
            batch_size = base_feat.size(0)
            for i in range(batch_size):
                wgt_rois[:, :, 0] = i

            # do roi pooling based on predicted rois
            if cfg.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(
                    wgt_rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
                gt_pooled_feat = self.RCNN_roi_crop(
                    base_feat, Variable(grid_yx).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    gt_pooled_feat = F.max_pool2d(gt_pooled_feat, 2, 2)
            elif cfg.POOLING_MODE == 'align':
                gt_pooled_feat = self.RCNN_roi_align(
                    base_feat, wgt_rois.view(-1, 5))
            elif cfg.POOLING_MODE == 'pool':
                gt_pooled_feat = self.RCNN_roi_pool(
                    base_feat, wgt_rois.view(-1, 5))

            # feed pooled features to top model
            gt_pooled_feat = self._head_to_tail(gt_pooled_feat, branch)

            # compute bbox offset
            wgt_bbox_delta = branch.RCNN_bbox_pred(gt_pooled_feat)
            wgt_bbox_delta = wgt_bbox_delta.view(-1, 4) * torch.FloatTensor(
                cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
            wgt_bbox_delta = wgt_bbox_delta.view(batch_size, -1, 4 * 21)
            wgt_bbox_out_rois = bbox_transform_inv(
                wgt_boxes, wgt_bbox_delta, batch_size)

            wgt_bbox_out_rois = clip_boxes(
                wgt_bbox_out_rois, im_info.data, batch_size)

            wgt_bbox_out = wgt_boxes.new(wgt_boxes.size()).zero_()

            wgt_cls = Variable(
                wgt_boxes[:, :, 4].data, requires_grad=False).long()
            for i in range(batch_size):
                for j in range(20):
                    cls_ind = wgt_cls[i, j]
                    wgt_bbox_out[i, j, :4] = wgt_bbox_out_rois[i,
                                                               j, cls_ind * 4:cls_ind * 4 + 4]

            wgt_bbox_out[:, :, 4] = wgt_boxes[:, :, 4]

            wgt_boxes_x = (wgt_boxes[:, :, 2] - wgt_boxes[:, :, 0] + 1)
            wgt_boxes_y = (wgt_boxes[:, :, 3] - wgt_boxes[:, :, 1] + 1)
            wgt_area_zero = (wgt_boxes_x == 1) & (wgt_boxes_y == 1)
            wgt_bbox_out.masked_fill_(wgt_area_zero.view(
                batch_size, wgt_area_zero.size(1), 1).expand(wgt_boxes.size()), 0)
            wgt_bbox_out = wgt_bbox_out.detach()
        else:
            wgt_bbox_out = None

        return (out_rois, cls_prob, bbox_pred, RCNN_loss_cls, RCNN_loss_bbox, rois_label, image_loss_cls), wgt_bbox_out
Ejemplo n.º 3
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label