Ejemplo n.º 1
0
    def aug_test(self, imgs, img_metas, rescale=False):
        # recompute feats to save memory
        feats = self.extract_feats(imgs)

        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            # only one image in the batch
            outs = self.bbox_head(x)
            bbox_inputs = outs + (img_meta, self.test_cfg, False, False)
            det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0]
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = self.merge_aug_results(
            aug_bboxes, aug_scores, img_metas)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                self.test_cfg.score_thr,
                                                self.test_cfg.nms,
                                                self.test_cfg.max_per_img)

        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_results = bbox2result(_det_bboxes, det_labels,
                                   self.bbox_head.num_classes)
        return bbox_results
Ejemplo n.º 2
0
 def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
     aug_bboxes = []
     aug_scores = []
     for x, img_meta in zip(feats, img_metas):
         # only one image in the batch
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
         flip = img_meta[0]['flip']
         # TODO more flexible
         proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                  scale_factor, flip)
         rois = bbox2roi([proposals])
         # recompute feature maps to save GPU memory
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
         if self.with_shared_head:
             roi_feats = self.shared_head(roi_feats)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         bboxes, scores = self.bbox_head.get_det_bboxes(rois,
                                                        cls_score,
                                                        bbox_pred,
                                                        img_shape,
                                                        scale_factor,
                                                        rescale=False,
                                                        cfg=None)
         aug_bboxes.append(bboxes)
         aug_scores.append(scores)
     # after merging, bboxes will be rescaled to the original image size
     merged_bboxes, merged_scores = merge_aug_bboxes(
         aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
     det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                             rcnn_test_cfg.score_thr,
                                             rcnn_test_cfg.nms,
                                             rcnn_test_cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 3
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       mlvl_points,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False,
                       nms=True):
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_points)
     mlvl_bboxes = []
     mlvl_scores = []
     for i_lvl, (cls_score, bbox_pred, points) in enumerate(
             zip(cls_scores, bbox_preds, mlvl_points)):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             points = points[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1)
         bboxes = bbox_pred * self.point_strides[i_lvl] + bbox_pos_center
         x1 = bboxes[:, 0].clamp(min=0, max=img_shape[1])
         y1 = bboxes[:, 1].clamp(min=0, max=img_shape[0])
         x2 = bboxes[:, 2].clamp(min=0, max=img_shape[1])
         y2 = bboxes[:, 3].clamp(min=0, max=img_shape[0])
         bboxes = torch.stack([x1, y1, x2, y2], dim=-1)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     if nms:
         det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                                 cfg.score_thr, cfg.nms,
                                                 cfg.max_per_img)
         return det_bboxes, det_labels
     else:
         return mlvl_bboxes, mlvl_scores
Ejemplo n.º 4
0
    def get_bboxes_single(self,
                          cls_scores,
                          bbox_preds,
                          centernesses,
                          mlvl_anchors,
                          img_shape,
                          scale_factor,
                          cfg,
                          rescale=False):
        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
        mlvl_bboxes = []
        mlvl_scores = []
        mlvl_centerness = []
        for cls_score, bbox_pred, centerness, anchors in zip(
                cls_scores, bbox_preds, centernesses, mlvl_anchors):
            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]

            scores = cls_score.permute(1, 2, 0).reshape(
                -1, self.cls_out_channels).sigmoid()
            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
            centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid()

            nms_pre = cfg.get('nms_pre', -1)
            if nms_pre > 0 and scores.shape[0] > nms_pre:
                max_scores, _ = (scores * centerness[:, None]).max(dim=1)
                _, topk_inds = max_scores.topk(nms_pre)
                anchors = anchors[topk_inds, :]
                bbox_pred = bbox_pred[topk_inds, :]
                scores = scores[topk_inds, :]
                centerness = centerness[topk_inds]

            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                                self.target_stds, img_shape)
            mlvl_bboxes.append(bboxes)
            mlvl_scores.append(scores)
            mlvl_centerness.append(centerness)

        mlvl_bboxes = torch.cat(mlvl_bboxes)
        if rescale:
            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)

        mlvl_scores = torch.cat(mlvl_scores)
        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
        mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
        mlvl_centerness = torch.cat(mlvl_centerness)

        det_bboxes, det_labels = multiclass_nms(
            mlvl_bboxes,
            mlvl_scores,
            cfg.score_thr,
            cfg.nms,
            cfg.max_per_img,
            score_factors=mlvl_centerness)
        return det_bboxes, det_labels
Ejemplo n.º 5
0
 def get_bboxes_single(self,
                       cls_score_list,
                       bbox_pred_list,
                       mlvl_anchors,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     """
     Transform outputs for a single batch item into labeled boxes.
     """
     assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     for cls_score, bbox_pred, anchors in zip(cls_score_list,
                                              bbox_pred_list, mlvl_anchors):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             # Get maximum scores for foreground classes.
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         # Add a dummy background class to the front when using sigmoid
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 6
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       featmap_sizes,
                       point_list,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     assert len(cls_scores) == len(bbox_preds) == len(point_list)
     det_bboxes = []
     det_scores = []
     for cls_score, bbox_pred, featmap_size, stride, base_len, (y, x) \
             in zip(cls_scores, bbox_preds, featmap_sizes, self.strides,
                    self.base_edge_list, point_list):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         scores = cls_score.permute(1, 2, 0).reshape(
             -1, self.cls_out_channels).sigmoid()
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4).exp()
         nms_pre = cfg.get('nms_pre', -1)
         if (nms_pre > 0) and (scores.shape[0] > nms_pre):
             max_scores, _ = scores.max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
             y = y[topk_inds]
             x = x[topk_inds]
         x1 = (stride * x - base_len * bbox_pred[:, 0]).\
             clamp(min=0, max=img_shape[1] - 1)
         y1 = (stride * y - base_len * bbox_pred[:, 1]).\
             clamp(min=0, max=img_shape[0] - 1)
         x2 = (stride * x + base_len * bbox_pred[:, 2]).\
             clamp(min=0, max=img_shape[1] - 1)
         y2 = (stride * y + base_len * bbox_pred[:, 3]).\
             clamp(min=0, max=img_shape[0] - 1)
         bboxes = torch.stack([x1, y1, x2, y2], -1)
         det_bboxes.append(bboxes)
         det_scores.append(scores)
     det_bboxes = torch.cat(det_bboxes)
     if rescale:
         det_bboxes /= det_bboxes.new_tensor(scale_factor)
     det_scores = torch.cat(det_scores)
     padding = det_scores.new_zeros(det_scores.shape[0], 1)
     det_scores = torch.cat([padding, det_scores], dim=1)
     det_bboxes, det_labels = multiclass_nms(det_bboxes, det_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 7
0
    def get_det_bboxes(self,
                       rois,
                       cls_score,
                       bbox_pred,
                       img_shape,
                       scale_factor,
                       rescale=False,
                       cfg=None):
        if isinstance(cls_score, list):
            cls_score = sum(cls_score) / float(len(cls_score))
        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None

        if bbox_pred is not None:
            bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
                                self.target_stds, img_shape)
        else:
            bboxes = rois[:, 1:].clone()
            if img_shape is not None:
                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)
                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)

        if rescale:
            if isinstance(scale_factor, float):
                bboxes /= scale_factor
            else:
                scale_factor = torch.from_numpy(scale_factor).to(bboxes.device)
                bboxes = (bboxes.view(bboxes.size(0), -1, 4) /
                          scale_factor).view(bboxes.size()[0], -1)

        if cfg is None:
            return bboxes, scores
        else:
            det_bboxes, det_labels = multiclass_nms(bboxes, scores,
                                                    cfg.score_thr, cfg.nms,
                                                    cfg.max_per_img)

            return det_bboxes, det_labels
Ejemplo n.º 8
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1]
                for feat in self.extract_feats(imgs)
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        # recompute feats to save memory
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas,
                                          self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic in zip(self.extract_feats(imgs), img_metas,
                                         semantic_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(
                    i, x, rois, semantic_feat=semantic)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta, semantic in zip(self.extract_feats(imgs),
                                                 img_metas, semantic_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    mask_feats = self.mask_roi_extractor[-1](
                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],
                        mask_rois)
                    if self.with_semantic:
                        semantic_feat = semantic
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                -2:]:
                            mask_semantic_feat = F.adaptive_avg_pool2d(
                                mask_semantic_feat, mask_feats.shape[-2:])
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg.rcnn)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
Ejemplo n.º 9
0
 def get_bboxes_single(self,
                       cls_scores,
                       bbox_preds,
                       mlvl_anchors,
                       mlvl_masks,
                       img_shape,
                       scale_factor,
                       cfg,
                       rescale=False):
     assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
     mlvl_bboxes = []
     mlvl_scores = []
     for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds,
                                                    mlvl_anchors,
                                                    mlvl_masks):
         assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
         # if no location is kept, end.
         if mask.sum() == 0:
             continue
         # reshape scores and bbox_pred
         cls_score = cls_score.permute(1, 2,
                                       0).reshape(-1, self.cls_out_channels)
         if self.use_sigmoid_cls:
             scores = cls_score.sigmoid()
         else:
             scores = cls_score.softmax(-1)
         bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)
         # filter scores, bbox_pred w.r.t. mask.
         # anchors are filtered in get_anchors() beforehand.
         scores = scores[mask, :]
         bbox_pred = bbox_pred[mask, :]
         if scores.dim() == 0:
             anchors = anchors.unsqueeze(0)
             scores = scores.unsqueeze(0)
             bbox_pred = bbox_pred.unsqueeze(0)
         # filter anchors, bbox_pred, scores w.r.t. scores
         nms_pre = cfg.get('nms_pre', -1)
         if nms_pre > 0 and scores.shape[0] > nms_pre:
             if self.use_sigmoid_cls:
                 max_scores, _ = scores.max(dim=1)
             else:
                 max_scores, _ = scores[:, 1:].max(dim=1)
             _, topk_inds = max_scores.topk(nms_pre)
             anchors = anchors[topk_inds, :]
             bbox_pred = bbox_pred[topk_inds, :]
             scores = scores[topk_inds, :]
         bboxes = delta2bbox(anchors, bbox_pred, self.target_means,
                             self.target_stds, img_shape)
         mlvl_bboxes.append(bboxes)
         mlvl_scores.append(scores)
     mlvl_bboxes = torch.cat(mlvl_bboxes)
     if rescale:
         mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)
     mlvl_scores = torch.cat(mlvl_scores)
     if self.use_sigmoid_cls:
         padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
         mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
     # multi class NMS
     det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                             cfg.score_thr, cfg.nms,
                                             cfg.max_per_img)
     return det_bboxes, det_labels