Beispiel #1
0
    def forward(self, feature_maps, gt_bboxes=None, img_shape=None):
        """
        
        Args:
            feature_maps(Variable): [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature 
                map.
            gt_bboxes(Tensor): [N, M, (x1, y1, x2, y2)].
            img_shape(Tensor): [height, width], Image shape. 
        Returns:
             rois(Tensor): [N, M, (idx, x1, y1, x2, y2)] N: batch size, M: number of roi after 
                nms, idx: bbox index in mini-batch.
             rpn_loss_cls(Tensor): Classification loss
             rpn_loss_bbo(Tensor)x: Bounding box regression loss
        """
        batch_size = feature_maps[0].size(0)
        assert batch_size == 1, "batch_size > 1 will add support later."

        if self.use_fpn:
            if self.training:
                post_nms_top_n = int(self.config['FPN']['TRAIN_FPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['FPN']['TRAIN_FPN_NMS_THRESH'])
            else:
                post_nms_top_n = int(self.config['FPN']['TEST_FPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH'])
            rois_pre_nms = []
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            for idx, feature in enumerate(feature_maps):
                self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx]
                self.rpn.RPN_proposal = self.RPN_proposals[idx]
                rpn_result = self.rpn(feature, img_shape, gt_bboxes, None)
                roi_single, loss_cls_single, loss_bbox_single = rpn_result
                rpn_loss_cls += loss_cls_single
                rpn_loss_bbox += loss_bbox_single

                rois_pre_nms.append(roi_single)

            rois_pre_nms = torch.cat(rois_pre_nms, 1)  # [N, M, (n, score, x1, y1, x2, y2)].
            # Apply nms to result of all pyramid rois.
            score = rois_pre_nms[0, :, 1]
            score.unsqueeze_(-1)
            bbox = rois_pre_nms[0, :, 2:]
            keep_idx = nms(torch.cat([bbox, score], 1), nms_thresh)
            keep_idx = keep_idx[:post_nms_top_n]
            rois_per_img = torch.cat([rois_pre_nms[:, idx, :] for idx in keep_idx])
            rois = rois_per_img[:, [0, 2, 3, 4, 5]]  # remove roi_score
            rois = rois.unsqueeze(0)
            rpn_loss_cls /= len(feature_maps)
            rpn_loss_bbox /= len(feature_maps)
        else:
            rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None)
            rois_rpn, rpn_loss_cls, rpn_loss_bbox = rpn_result
            rois = rois_rpn[:, :, [0, 2, 3, 4, 5]]  # remove roi_score

        return rois, rpn_loss_cls, rpn_loss_bbox
Beispiel #2
0
    def forward(self, feature_maps, gt_bboxes=None, img_shape=None):
        """
        
        Args:
            feature_maps: [p2, p3, p4, p5, p6] or [c5], feature pyramid or single feature map.
            gt_bboxes: [N, M, (x1, y1, x2, y2)].
            img_shape: [height, width], Image shape. 
        Returns:
             rois(Tensor): [N, M, (idx, x1, y1, x2, y2)] N: batch size, M: number of roi after nms, 
                 idx: bbox index in mini-batch.
             rpn_loss_cls(Tensor): Classification loss
             rpn_loss_bbo(Tensor)x: Bounding box regression loss
        """
        batch_size = feature_maps[0].size(0)
        nms_output_num = cfg.TEST.RPN_POST_NMS_TOP_N
        if self.training:
            nms_output_num = cfg.TRAIN.RPN_POST_NMS_TOP_N

        if self.use_fpn:
            rois_pre_nms = []
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            for idx, feature in enumerate(feature_maps):
                self.rpn.RPN_anchor_target = self.RPN_anchor_targets[idx]
                self.rpn.RPN_proposal = self.RPN_proposals[idx]
                rpn_result = self.rpn(feature, img_shape, gt_bboxes, None)
                roi_single, loss_cls_single, loss_bbox_single = rpn_result
                rpn_loss_cls += loss_cls_single
                rpn_loss_bbox += loss_bbox_single
                roi_score = roi_single[:, :, 1]
                roi_bbox = roi_single[:, :, 2:]
                roi_score.unsqueeze_(-1)
                rois_pre_nms.append(torch.cat((roi_bbox, roi_score), 2))

            rois_pre_nms = torch.cat(rois_pre_nms,
                                     1)  # [N, M, 5], torch.cat() at dim 'M'.
            rois = feature_maps[0].data.new(batch_size, nms_output_num,
                                            5).zero_()
            # Apply nms to result of all pyramid rois.
            for i in range(batch_size):
                keep_idx = nms(rois_pre_nms[i], cfg.TRAIN.RPN_NMS_THRESH)
                keep_idx = keep_idx[:nms_output_num]
                rois_per_img = torch.cat(
                    [rois_pre_nms[i, idx, :].unsqueeze(0) for idx in keep_idx])
                rois[i, :, 0] = i
                rois[i, :rois_per_img.size(0),
                     1:] = rois_per_img[:, :4]  # remove roi_score
        else:
            rpn_result = self.rpn(feature_maps[0], img_shape, gt_bboxes, None)
            rois_rpn, rpn_loss_cls, rpn_loss_bbox = rpn_result
            rois = feature_maps[0].data.new(batch_size, nms_output_num,
                                            5).zero_()
            rois[:, :, 0] = 0
            rois[:, :, 1:] = rois_rpn[:, :, 2:]  # remove roi_score
        return rois, rpn_loss_cls, rpn_loss_bbox
Beispiel #3
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals

        if not self.training:
            # filter out score below threshold
            assert batch_size == 1
            scores_keep_idx = torch.nonzero(scores_keep > 0.5).view(-1)
            if scores_keep_idx.numel() != 0:
                scores_keep = scores_keep[:, scores_keep_idx]
                proposals_keep = proposals_keep[:, scores_keep_idx]

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 6).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1] = scores_single
            output[i, :num_proposal, 2:] = proposals_single

        return output
Beispiel #4
0
    def _process_result(self,
                        batch_size,
                        features,
                        proposals,
                        cls_prob=None,
                        bbox_reg=None):
        """Get the final result in test stage.
        Args:
            batch_size(int): mini-batch size.
            features(list of Variable): extracted features from backbone
            proposals(Tensor): [N, M, (idx, score, x1, y1, x2, y2)]
            cls_prob(Variable): [(NxM),  num_classes]
            bbox_reg(Variable): [(NxM), num_classes, (x1, y1, x2, y2)]
            
        Returns:
            result: list of lists of dict, outer list is mini-batch, inner list is detected objects,
                dict contains stuff below.
                
                dict_key:
                    'proposal'(Tensor): (x1, y1, x2, y2), course bbox from RPN proposal.
                    'cls_pred'(int): predicted class id.
                    'bbox_pred'(Tensor): (x1, y1, x2, y2), refined bbox from prediction head.
                    'mask_pred'(Tensor): [H, W], predicted mask.
                    
                e.g. result[0][0]['mask_pred'] stands for the first object's mask prediction of
                    the first image of mini-batch.
        """

        # Todo: support batch_size > 1.
        assert batch_size == 1, "batch_size > 1 will add support later"
        proposals = proposals.squeeze(0)
        result = []

        if self.train_rpn_only:
            obj_detected = []
            for i in range(proposals.size(0)):
                pred_dict = {'proposal': proposals[i, 2:].cpu()}
                obj_detected.append(pred_dict)
            result.append(obj_detected)
            return result

        else:
            props = []
            bboxes = []
            cls_ids = []
            for idx, roi in enumerate(proposals):
                cls_id = torch.max(cls_prob[idx], dim=0)[1]
                if int(cls_id) > 0:  # remove background
                    # refine proposal bbox with bbox regression result.
                    bbox = self._refine_proposal(
                        roi[2:],
                        bbox_reg[idx, :, :][cls_id, :].squeeze(0).data)
                    px1, py1, px2, py2 = bbox
                    # leave malformed bbox alone
                    if py1 >= py2 or px1 >= px2:
                        continue
                    props.append(roi.unsqueeze(0))
                    bboxes.append(bbox.unsqueeze(0))
                    cls_ids.append(int(cls_id))

            if len(props) != 0:
                props_origin = torch.cat(props)
                props_refined = props_origin.clone()
                props_refined[:, 2:] = torch.cat(bboxes)
            else:
                result.append([])
                return result

            # Apply nms.
            if self.use_fpn:
                pre_nms_top_n = int(
                    self.config['FPN']['TEST_FPN_PRE_NMS_TOP_N'])
                post_nms_top_n = int(
                    self.config['FPN']['TEST_FPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['FPN']['TEST_FPN_NMS_THRESH'])
            else:
                pre_nms_top_n = int(
                    self.config['RPN']['TEST_RPN_PRE_NMS_TOP_N'])
                post_nms_top_n = int(
                    self.config['RPN']['TEST_RPN_POST_NMS_TOP_N'])
                nms_thresh = float(self.config['RPN']['TEST_RPN_NMS_THRESH'])

            score = props_refined[:, 1]
            order = torch.sort(score, dim=0, descending=True)[1]
            props_origin = props_origin[order, :][:pre_nms_top_n, :]
            props_refined = props_refined[order, :][:pre_nms_top_n, :]
            score = props_refined[:, 1].unsqueeze(-1)
            bbox = props_refined[:, 2:]
            keep_idx = nms(torch.cat([bbox, score], 1), nms_thresh)
            keep_idx = keep_idx[:post_nms_top_n]
            props_origin = torch.cat(
                [props_origin[idx, :].unsqueeze(0) for idx in keep_idx])
            props_refined = torch.cat(
                [props_refined[idx, :].unsqueeze(0) for idx in keep_idx])
            if self.use_fpn:
                rois_pooling_mask = self._roi_align_fpn(features,
                                                        props_refined.clone(),
                                                        mode='mask')
                mask_prob = self.mask_head(rois_pooling_mask).data
            else:
                rois_pooling_mask = self.roi_align_mask(
                    features[0], props_refined.clone(), self.img_height)
                mask_prob = self.mask_head(rois_pooling_mask).data

            obj_detected = []
            for i in range(len(props_origin)):
                pred_dict = {
                    'proposal': props_origin[i, 2:].cpu(),
                    'cls_pred': cls_ids[i],
                    'bbox_pred': props_refined[i, 2:].cpu(),
                    'mask_pred': None
                }

                px1, py1, px2, py2 = props_refined[i, 2:].int()
                mask_height, mask_width = py2 - py1 + 1, px2 - px1 + 1
                mask = mask_prob[i, :, :, :][cls_ids[i], :, :]
                mask = Variable(mask.unsqueeze(0), requires_grad=False)
                mask_resize = F.adaptive_avg_pool2d(
                    mask, (mask_height, mask_width)).data
                mask_threshold = float(self.config['TEST']['MASK_THRESH'])
                mask_resize = mask_resize >= mask_threshold
                mask_pred = mask_prob.new(self.img_height,
                                          self.img_width).zero_()
                mask_pred[py1:py2 + 1, px1:px2 + 1] = mask_resize
                pred_dict['mask_pred'] = mask_pred.cpu()
                obj_detected.append(pred_dict)
            result.append(obj_detected)

            return result