Ejemplo n.º 1
0
 def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
     aug_bboxes = []
     aug_scores = []
     for x, img_meta in zip(feats, img_metas):
         # only one image in the batch
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
         flip = img_meta[0]['flip']
         # TODO more flexible
         proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                  scale_factor, flip)
         rois = bbox2roi([proposals])
         # recompute feature maps to save GPU memory
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
         if self.with_shared_head:
             roi_feats = self.shared_head(roi_feats)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         bboxes, scores = self.bbox_head.get_det_bboxes(rois,
                                                        cls_score,
                                                        bbox_pred,
                                                        img_shape,
                                                        scale_factor,
                                                        rescale=False,
                                                        cfg=None)
         aug_bboxes.append(bboxes)
         aug_scores.append(scores)
     # after merging, bboxes will be rescaled to the original image size
     merged_bboxes, merged_scores = merge_aug_bboxes(
         aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
     det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                             rcnn_test_cfg.score_thr,
                                             rcnn_test_cfg.nms,
                                             rcnn_test_cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 2
0
 def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
     """Test det bboxes with test time augmentation."""
     aug_bboxes = []
     aug_scores = []
     for x, img_meta in zip(feats, img_metas):
         # only one image in the batch
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
         flip = img_meta[0]['flip']
         flip_direction = img_meta[0]['flip_direction']
         # TODO more flexible
         proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                  scale_factor, flip, flip_direction)
         rois = bbox2roi([proposals])
         bbox_results = self._bbox_forward(x, rois)
         bboxes, scores = self.bbox_head.get_bboxes(
             rois,
             bbox_results['cls_score'],
             bbox_results['bbox_pred'],
             img_shape,
             scale_factor,
             rescale=False,
             cfg=None)
         aug_bboxes.append(bboxes)
         aug_scores.append(scores)
     # after merging, bboxes will be rescaled to the original image size
     merged_bboxes, merged_scores = merge_aug_bboxes(
         aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
     det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                             rcnn_test_cfg.score_thr,
                                             rcnn_test_cfg.nms,
                                             rcnn_test_cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 3
0
 def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
     aug_bboxes = []
     aug_scores = []
     aug_variance = []
     for x, img_meta in zip(feats, img_metas):
         # only one image in the batch
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
         flip = img_meta[0]['flip']
         flip_direction = img_meta[0].get('flip_direction', 'horizontal')
         # TODO more flexible
         proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                  scale_factor, flip, flip_direction)
         rois = bbox2roi([proposals])
         # recompute feature maps to save GPU memory
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
         if self.with_shared_head:
             roi_feats = self.shared_head(roi_feats)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         bboxes, scores = self.bbox_head.get_det_bboxes(rois,
                                                        cls_score,
                                                        bbox_pred,
                                                        img_shape,
                                                        scale_factor,
                                                        rescale=False,
                                                        cfg=None)
         if isinstance(bboxes, tuple):
             aug_bboxes.append(bboxes[0])
             aug_variance.append(bboxes[1])
         else:
             aug_bboxes.append(bboxes)
         aug_scores.append(scores)
     # after merging, bboxes will be rescaled to the original image size
     return_variance = rcnn_test_cfg.get('return_variance', False)
     if len(aug_variance
            ) == 0 or rcnn_test_cfg.nms.type != 'soft_nms_variance_voting':
         merged_bboxes, merged_scores = merge_aug_bboxes(
             aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
         det_bboxes, det_labels = multiclass_nms(merged_bboxes,
                                                 merged_scores,
                                                 rcnn_test_cfg.score_thr,
                                                 rcnn_test_cfg.nms,
                                                 rcnn_test_cfg.max_per_img)
     else:
         merged_bboxes, merged_scores, merged_variance = merge_aug_bboxes_variance(
             aug_bboxes, aug_scores, aug_variance, img_metas, rcnn_test_cfg)
         det_bboxes, det_labels = multiclass_soft_nms_variance_voting(
             merged_bboxes,
             merged_variance,
             merged_scores,
             rcnn_test_cfg.score_thr,
             rcnn_test_cfg.nms,
             rcnn_test_cfg.max_per_img,
             return_variance=return_variance)
     return det_bboxes, det_labels
Ejemplo n.º 4
0
    def tsd_aug_test_bboxes(self, feats, img_metas, proposal_list,
                            rcnn_test_cfg):
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]["img_shape"]
            scale_factor = img_meta[0]["scale_factor"]
            flip = img_meta[0]["flip"]
            # TODO more flexible
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])
            # recompute feature maps to save GPU memory
            roi_feats = self.bbox_roi_extractor(
                x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
            cls_score, bbox_pred, TSD_cls_score, TSD_bbox_pred, delta_c, delta_r = self.bbox_head(
                roi_feats, x[:self.bbox_roi_extractor.num_inputs], rois)

            w = rois[:, 3] - rois[:, 1] + 1
            h = rois[:, 4] - rois[:, 2] + 1
            scale = 0.1
            rois_r = rois.new_zeros(rois.shape[0], rois.shape[1])
            rois_r[:, 0] = rois[:, 0]
            delta_r = delta_r.to(dtype=rois_r.dtype)
            rois_r[:, 1] = rois[:, 1] + delta_r[:, 0] * scale * w
            rois_r[:, 2] = rois[:, 2] + delta_r[:, 1] * scale * h
            rois_r[:, 3] = rois[:, 3] + delta_r[:, 0] * scale * w
            rois_r[:, 4] = rois[:, 4] + delta_r[:, 1] * scale * h

            bboxes, scores = self.bbox_head.get_det_bboxes(
                rois_r,
                TSD_cls_score,
                TSD_bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None,
            )

            aug_bboxes.append(bboxes)
            aug_scores.append(scores)
        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(
            merged_bboxes,
            merged_scores,
            rcnn_test_cfg.score_thr,
            rcnn_test_cfg.nms,
            rcnn_test_cfg.max_per_img,
        )
        return det_bboxes, det_labels
    def multi_bboxes_test(self, feats, img_metas, proposal_list, rcnn_test_cfg,
                          rescale):
        aug_scores = []
        aug_bboxes = []
        for x, img_meta in zip(feats, img_metas):
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            # "ms" in variable names means multi-stage
            ms_scores = []
            # TODO more flexible
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])

            # rois = bbox2roi(proposal_list)
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)

                cls_score, bbox_pred = bbox_head(bbox_feats)

                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / self.num_stages
            det_bboxes, scores = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_scores.append(scores)
            aug_bboxes.append(det_bboxes)
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)
        return det_bboxes, det_labels
    def nms_bboxes(self,
                   img_meta,
                   rois,
                   cls_score,
                   bbox_pred,
                   rcnn_test_cfg,
                   ms_bbox_result=None):
        aug_bboxes, aug_scores = ms_bbox_result
        img_shape = img_meta[0]['img_shape']
        scale_factor = img_meta[0]['scale_factor']

        if ms_bbox_result is not None:
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)

            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

            merged_bboxes, merged_scores = merge_aug_bboxes(
                aug_bboxes, aug_scores, [img_meta] * len(aug_bboxes),
                rcnn_test_cfg)
            det_bboxes, det_labels = multiclass_nms(merged_bboxes,
                                                    merged_scores,
                                                    rcnn_test_cfg.score_thr,
                                                    rcnn_test_cfg.nms,
                                                    rcnn_test_cfg.max_per_img)
        else:
            det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=True,
                cfg=rcnn_test_cfg)

        return det_bboxes, det_labels
Ejemplo n.º 7
0
 def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
     aug_bboxes = []
     aug_scores = []
     for x, img_meta in zip(feats, img_metas):
         # only one image in the batch
         # for each img and img_meta.
         img_shape = img_meta[0]['img_shape']
         scale_factor = img_meta[0]['scale_factor']
         flip = img_meta[0]['flip']
         # TODO more flexible
         # only use the proposal_list[0] of original image,
         # so the bboxes are corresponding. the proposal list
         # are merged from different scales.
         # RPN generate proposals for different scale images, then
         proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                  scale_factor, flip)
         rois = bbox2roi([proposals])
         # recompute feature maps to save GPU memory
         roi_feats = self.bbox_roi_extractor(
             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
         cls_score, bbox_pred = self.bbox_head(roi_feats)
         bboxes, scores = self.bbox_head.get_det_bboxes(rois,
                                                        cls_score,
                                                        bbox_pred,
                                                        img_shape,
                                                        scale_factor,
                                                        rescale=False,
                                                        cfg=None)
         aug_bboxes.append(bboxes)
         aug_scores.append(scores)
     # after merging, bboxes will be rescaled to the original image size
     # here to calculate the mean box.
     merged_bboxes, merged_scores = merge_aug_bboxes(
         aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
     det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                             rcnn_test_cfg.score_thr,
                                             rcnn_test_cfg.nms,
                                             rcnn_test_cfg.max_per_img)
     return det_bboxes, det_labels
Ejemplo n.º 8
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        ms_bbox_result = {}
        ms_segm_result = {}
        rcnn_test_cfg = self.test_cfg.rcnn

        proposal_list = self.aug_test_rpn(
            self.extract_feats(imgs), img_metas, self.test_cfg.rpn)

        aug_bboxes = []
        aug_scores = []

        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])

            if self.with_semantic:
                _, semantic_feat = self.semantic_head(x)
            else:
                semantic_feat = None
            ms_scores = []
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(
                    i, x, rois, semantic_feat=semantic_feat)
                ms_scores.append(cls_score)
                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
                                                      img_meta[0])
            cls_score = sum(ms_scores) / self.num_stages
            det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_scores)

        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        # 由于集成所有的det_bboxes都为原始的大小
        det_bboxes, det_labels = multiclass_nms(
            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)

        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_results = bbox2result(_det_bboxes, det_labels,
                                       self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_results

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1)]
            else:
                aug_sum_masks = []
                for x, img_meta in zip(self.extract_feats(imgs), img_metas):
                    if self.with_semantic:
                        _, semantic_feat = self.semantic_head(x)
                    else:
                        semantic_feat = None

                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    aug_masks = []
                    mask_roi_extractor = self.mask_roi_extractor[-1]
                    mask_feats = mask_roi_extractor(
                        x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
                    if self.with_semantic and 'mask' in self.semantic_fusion:
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                    # 先对所有stages的mask平均
                    merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages,
                                                       self.test_cfg.rcnn)
                    aug_sum_masks.append(merged_masks)
                merged_masks = np.mean(aug_sum_masks, axis=0)
                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    self.test_cfg.rcnn,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)

            ms_segm_result['ensemble'] = segm_result

        # 简易测试tta是否正确
  

        if not self.test_cfg.keep_all_stages:
            if self.with_mask:
                results = (ms_bbox_result['ensemble'],
                           ms_segm_result['ensemble'])
            else:
                results = ms_bbox_result['ensemble']
        else:
            if self.with_mask:
                results = {
                    stage: (ms_bbox_result[stage], ms_segm_result[stage])
                    for stage in ms_bbox_result
                }
            else:
                results = ms_bbox_result

        return results
Ejemplo n.º 9
0
    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            # TODO more flexible
            ms_bbox_result = {}
            ms_scores = []
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)

                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if self.test_cfg.keep_all_stages:
                    det_bboxes, det_labels = bbox_head.get_det_bboxes(
                        rois,
                        cls_score,
                        bbox_pred,
                        img_shape,
                        scale_factor,
                        rescale=False,
                        cfg=rcnn_test_cfg)
                    bbox_result = bbox2result(det_bboxes, det_labels,
                                              bbox_head.num_classes)
                    ms_bbox_result['stage{}'.format(i)] = bbox_result
                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
                                                      img_meta[0])
            cls_score = sum(ms_scores) / float(len(ms_scores))
            det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_labels)
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg
        )
        det_bboxes, det_labels = multiclass_nms(
            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
            rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img
        )
            # bbox_result = bbox2result(det_bboxes, det_labels,
            #                           self.bbox_head[-1].num_classes)
            # ms_bbox_result['ensemble'] = bbox_result
            # if not self.test_cfg.keep_all_stages:
            #     results = ms_bbox_result['ensemble']
            # else:
            #     results = ms_bbox_result



            # recompute feature maps to save GPU memory
            # roi_feats = self.bbox_roi_extractor(
            #     x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
            # if self.with_shared_head:
            #     roi_feats = self.shared_head(roi_feats)
            # cls_score, bbox_pred = self.bbox_head(roi_feats)
        #     bboxes, scores = self.bbox_head.get_det_bboxes(
        #         rois,
        #         cls_score,
        #         bbox_pred,
        #         img_shape,
        #         scale_factor,
        #         rescale=False,
        #         cfg=None)
        #     aug_bboxes.append(bboxes)
        #     aug_scores.append(scores)
        # # after merging, bboxes will be rescaled to the original image size
        # merged_bboxes, merged_scores = merge_aug_bboxes(
        #     aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        # det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
        #                                         rcnn_test_cfg.score_thr,
        #                                         rcnn_test_cfg.nms,
        #                                         rcnn_test_cfg.max_per_img)
        # bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes)
        return det_bboxes, det_labels
Ejemplo n.º 10
0
    def aug_test(self,
                 imgs,
                 img_metas,
                 templates,
                 proposals=None,
                 rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes will fit the scale
        of imgs[0].
        """
        # recompute feats to save memory
        #y = self.extract_feats(imgs, templates)
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs, templates),
                                          img_metas, self.test_cfg.rpn)
        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs, templates), img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(i, x, rois)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        return bbox_result
Ejemplo n.º 11
0
    def aug_test(self, imgs, img_metas, rescale=False, **kwargs):
        """
        Test with augmentations.
        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """

        rpn_test_cfg = self.models[0].test_cfg.rpn
        rcnn_test_cfg = self.models[0].test_cfg.rcnn

        # For each model, compute detections
        aug_bboxes = []
        aug_scores = []
        aug_img_metas = []
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.simple_test_rpn(x, img_meta,
                                                      rpn_test_cfg)

                _, semantic_feat = model.semantic_head(x)

                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']

                ms_scores = []
                rois = bbox2roi(proposal_list)
                for i in range(model.num_stages):
                    bbox_head = model.bbox_head[i]
                    cls_score, bbox_pred = model._bbox_forward_test(
                        i, x, rois, semantic_feat=semantic_feat)
                    ms_scores.append(cls_score)

                    if i < model.num_stages - 1:
                        bbox_label = cls_score.argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_pred, img_meta[0])

                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)
                aug_img_metas.append(img_meta)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes,
                                                        aug_scores,
                                                        aug_img_metas,
                                                        rcnn_test_cfg,
                                                        type='concat')
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)

        if self.models[0].with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [
                    []
                    for _ in range(self.models[0].mask_head[-1].num_classes -
                                   1)
                ]
            else:
                aug_masks = []
                aug_img_metas = []
                for model in [self.models[0]]:
                    for x, img_meta in\
                            zip(model.extract_feats(imgs), img_metas):
                        scale_factor = img_meta[0]['scale_factor']
                        flip = img_meta[0]['flip']
                        img_shape = img_meta[0]['img_shape']
                        _bboxes = (det_bboxes[:, :4] *
                                   scale_factor if rescale else det_bboxes)
                        mask_rois = bbox2roi([_bboxes])
                        mask_roi_extractor = model.mask_roi_extractor[-1]
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)

                        _, semantic_feat = model.semantic_head(x)
                        mask_semantic_feat = model.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        mask_feats += mask_semantic_feat
                        last_feat = None

                        for i in range(model.num_stages):
                            mask_head = model.mask_head[i]
                            if model.mask_info_flow:
                                mask_pred, last_feat = mask_head(
                                    mask_feats, last_feat)
                            else:
                                mask_pred = mask_head(mask_feats)
                            aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                            aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               rcnn_test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                scale_factor = img_metas[0][0]['scale_factor']
                segm_result = self.models[0].mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=rescale)

                # compute relations
                rel_model = self.models[0]
                for x, img_meta in zip(rel_model.extract_feats(imgs),
                                       img_metas):
                    _, semantic_feat = rel_model.semantic_head(x)
                    filename = img_meta[0]['filename']
                    im_height, im_width, _ = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    ori_shape = img_meta[0]['ori_shape']
                    relation_preds = rel_model._rel_forward_test(
                        x,
                        det_bboxes,
                        det_labels,
                        merged_masks,
                        scale_factor,
                        ori_shape,
                        semantic_feat=semantic_feat,
                        im_width=im_width,
                        im_height=im_height)

                    if rel_model.rel_save_folder is not None:
                        np.save(
                            os.path.join(rel_model.rel_save_folder,
                                         filename + '.npy'), relation_preds)

            return bbox_result, segm_result
        else:
            return bbox_result
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rpn_test_cfg = self.models[0].test_cfg.rpn
        imgs_per_gpu = len(img_metas[0])
        aug_proposals = [[] for _ in range(imgs_per_gpu)]
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.simple_test_rpn(x, img_meta,
                                                      rpn_test_cfg)
                for i, proposals in enumerate(proposal_list):
                    aug_proposals[i].append(proposals)

        # # after merging, proposals will be rescaled to the original image size
        proposal_list = [
            merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
            for proposals, img_meta in zip(aug_proposals, img_metas)
        ]
        rcnn_test_cfg = self.models[0].test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        aug_img_metas = []
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                # only one image in the batch
                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']
                flip = img_meta[0]['flip']

                proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                         scale_factor, flip)
                # "ms" in variable names means multi-stage
                ms_scores = []

                rois = bbox2roi([proposals])
                for i in range(model.num_stages):
                    bbox_roi_extractor = model.bbox_roi_extractor[i]
                    bbox_head = model.bbox_head[i]

                    bbox_feats = bbox_roi_extractor(
                        x[:len(bbox_roi_extractor.featmap_strides)], rois)
                    if model.with_shared_head:
                        bbox_feats = model.shared_head(bbox_feats)

                    cls_score, bbox_pred = bbox_head(bbox_feats)
                    ms_scores.append(cls_score)

                    if i < model.num_stages - 1:
                        bbox_label = cls_score.argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_pred, img_meta[0])

                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)
                aug_img_metas.append(img_meta)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)

        if self.models[0].with_mask:
            raise NotImplementedError
        else:
            return bbox_result
Ejemplo n.º 13
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1]
                for feat in self.extract_feats(imgs)
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        # recompute feats to save memory
        proposal_list = self.aug_test_rpn(
            self.extract_feats(imgs), img_metas, self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic in zip(
                self.extract_feats(imgs), img_metas, semantic_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []
            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(
                    i, x, rois, semantic_feat=semantic)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                aug_masks_wo_sigmoid = []
                for x, img_meta, semantic in zip(
                        self.extract_feats(imgs), img_metas, semantic_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    mask_feats = self.mask_roi_extractor[-1](
                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],
                        mask_rois)
                    if self.with_semantic:
                        semantic_feat = semantic
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                -2:]:
                            mask_semantic_feat = F.adaptive_avg_pool2d(
                                mask_semantic_feat, mask_feats.shape[-2:])
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                        aug_masks_wo_sigmoid.append(mask_pred.cpu().numpy())
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
                merged_masks_wo_sigmoid = merge_aug_masks(aug_masks_wo_sigmoid, aug_img_metas, self.test_cfg.rcnn)
                merged_masks_torch = torch.from_numpy(merged_masks_wo_sigmoid).to(mask_feats.dtype).to(mask_feats.device)
                assert mask_feats.size(0) == merged_masks_torch.size(0)==det_labels.size(0)
                mask_iou_pred = self.mask_iou_head(mask_feats, merged_masks_torch[range(det_labels.size(0)), det_labels])
                mask_scores = self.mask_iou_head.get_mask_scores(mask_iou_pred, det_bboxes, det_labels)
            return bbox_result, (segm_result, mask_scores)
        else:
            return bbox_result
Ejemplo n.º 14
0
    def aug_test_cascade_bboxes(self,
                                feats,
                                img_metas,
                                proposal_list,
                                rcnn_test_cfg,
                                rescale=False):
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            ori_shape = img_meta[0]['ori_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            # "ms" in variable names means multi-stage
            ms_bbox_result = {}
            ms_segm_result = {}
            ms_scores = []
            #rcnn_test_cfg = self.test_cfg.rcnn
            flip = img_meta[0]['flip']
            # TODO more flexible
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])
            #rois = bbox2roi(proposal_list)
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)

                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)
                if self.test_cfg.keep_all_stages:
                    bboxes, scores = bbox_head.get_det_bboxes(rois,
                                                              cls_score,
                                                              bbox_pred,
                                                              img_shape,
                                                              scale_factor,
                                                              rescale=False,
                                                              cfg=None)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / self.num_stages
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None
            )  # cfg must be None to prove no NMS operation is performed
            #print(bboxes.shape)
            #print(scores.shape)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)
        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)
        return det_bboxes, det_labels
Ejemplo n.º 15
0
    def aug_test(self, imgs, img_metas, rescale=False, eval_size=None):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0]. Do augmentations for stages firstly, then do augmentations for 
        different scales. 
        """

        # extract_feats = self.extract_feats(imgs) # recompute feats to save memory
        extract_feats = []
        for img in imgs:
            extract_feats.append(self.extract_feat(img))

        proposal_list = self.aug_test_rpn(extract_feats, img_metas,
                                          self.test_cfg.rpn)

        semantic_feats = []
        if self.with_semantic:
            for x in extract_feats:
                _, semantic_feat = self.semantic_head(x)
                semantic_feats.append(semantic_feat)
        else:
            for x in extract_feats:
                semantic_feats.append(None)

        # "ms" in variable names means multi-stage
        ms_bbox_result = {}
        ms_segm_result = {}
        rcnn_test_cfg = self.test_cfg.rcnn

        aug_bboxes = []
        aug_scores = []
        for x, semantic_feat, img_meta in zip(extract_feats, semantic_feats,
                                              img_metas):
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])
            ms_scores = []
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                cls_score, bbox_pred = self._bbox_forward_test(
                    i, x, rois, semantic_feat=semantic_feat)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)
        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_result = bbox2result(_det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

        ori_shape = img_metas[0][0]['ori_shape']
        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_aug_masks = []
                for x, semantic_feat, img_meta in zip(extract_feats, \
                                                      semantic_feats, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)

                    mask_rois = bbox2roi([_bboxes])
                    aug_masks = []
                    mask_roi_extractor = self.mask_roi_extractor[-1]
                    mask_feats = mask_roi_extractor(
                        x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
                    if self.with_semantic and 'mask' in self.semantic_fusion:
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                    merged_masks = merge_aug_masks(aug_masks, [img_meta] *
                                                   self.num_stages,
                                                   self.test_cfg.rcnn)
                    aug_aug_masks.append(merged_masks)
                # fipped masks have already recovered
                merged_masks = np.mean(aug_aug_masks, axis=0)
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False,
                    eval_size=eval_size)
            ms_segm_result['ensemble'] = segm_result

        if not self.test_cfg.keep_all_stages:
            if self.with_mask:
                results = (ms_bbox_result['ensemble'],
                           ms_segm_result['ensemble'])
            else:
                results = ms_bbox_result['ensemble']
        else:
            raise NotImplementedError

        return results
Ejemplo n.º 16
0
    def aug_test(self, imgs, img_metas, **kwargs):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rpn_test_cfg = self.models[0].test_cfg.rpn
        imgs_per_gpu = len(img_metas[0])
        aug_proposals = [[] for _ in range(imgs_per_gpu)]
        for model in self.models:
            # recompute feats to save memory
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.simple_test_rpn(x, img_meta,
                                                      rpn_test_cfg)
                for i, proposals in enumerate(proposal_list):
                    aug_proposals[i].append(proposals)
        # after merging, proposals will be rescaled to the original image size
        proposal_list = [
            merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
            for proposals, img_meta in zip(aug_proposals, img_metas)
        ]

        rcnn_test_cfg = self.models[0].test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        aug_img_metas = []
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                # only one image in the batch
                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']
                flip = img_meta[0]['flip']

                proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                         scale_factor, flip)
                # "ms" in variable names means multi-stage
                ms_scores = []

                rois = bbox2roi([proposals])
                for i in range(model.num_stages):
                    bbox_head = model.bbox_head[i]
                    cls_score, bbox_pred = model._bbox_forward_test(i, x, rois)
                    ms_scores.append(cls_score)

                    if i < model.num_stages - 1:
                        bbox_label = cls_score.argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_pred, img_meta[0])

                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)
                aug_img_metas.append(img_meta)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)

        if self.models[0].with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [
                    []
                    for _ in range(self.models[0].mask_head[-1].num_classes -
                                   1)
                ]
            else:
                aug_masks = []
                aug_img_metas = []
                for model in self.models:
                    for x, img_meta in zip(model.extract_feats(imgs),
                                           img_metas):
                        img_shape = img_meta[0]['img_shape']
                        scale_factor = img_meta[0]['scale_factor']
                        flip = img_meta[0]['flip']
                        _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                               scale_factor, flip)
                        mask_rois = bbox2roi([_bboxes])
                        mask_roi_extractor = model.mask_roi_extractor[-1]
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)
                        last_feat = None
                        for i in range(model.num_stages):
                            mask_head = model.mask_head[i]
                            if model.mask_info_flow:
                                mask_pred, last_feat = mask_head(
                                    mask_feats, last_feat)
                            else:
                                mask_pred = mask_head(mask_feats)
                            aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                            aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               rcnn_test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.models[0].mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
Ejemplo n.º 17
0
    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            # only one image in the batch
            img_shape = img_meta[self.key_dim]['img_shape']
            scale_factor = img_meta[self.key_dim]['scale_factor']
            flip = [meta_['flip'] for meta_ in img_meta]
            # TODO more flexible

            proposals = [
                bbox_mapping(proposal_[:, :4], img_shape, scale_factor, flip_)
                for flip_, proposal_ in zip(flip, proposal_list)
            ]

            bboxes, scores = self.simple_test_bboxes(x,
                                                     img_meta,
                                                     proposals,
                                                     None,
                                                     rescale=False)
            # rois = bbox2roi([proposals])
            # # recompute feature maps to save GPU memory
            # if self.feat_from_shared_head:
            #     # print("enter feat_from_shared_head, type of x is {}".format(type(x)))
            #     shared_head_feat_ = [self.shared_head(x[0])]
            #     roi_feats = self.bbox_roi_extractor(shared_head_feat_[:1], rois)
            # else:
            #     roi_feats = self.bbox_roi_extractor(
            #         x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
            #     if self.with_shared_head:
            #         roi_feats = self.shared_head(roi_feats)
            # cls_score, bbox_pred = self.bbox_head(roi_feats)
            # bboxes, scores = self.bbox_head.get_det_bboxes(
            #     rois,
            #     cls_score,
            #     bbox_pred,
            #     img_shape,
            #     scale_factor,
            #     rescale=False,
            #     cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        det_bboxes_collect = []
        det_labels_collect = []
        # after merging, bboxes will be rescaled to the original image size
        for i in range(len(aug_scores[0])):
            a_bboxes = [a_b[i] for a_b in aug_bboxes]
            a_scores = [a_s[i] for a_s in aug_scores]
            metas_ = [[meta_[self.key_dim]] for meta_ in img_metas]
            merged_bboxes, merged_scores = merge_aug_bboxes(
                a_bboxes, a_scores, metas_, rcnn_test_cfg)
            if hasattr(rcnn_test_cfg, 'nms'):
                det_bboxes, det_labels = multiclass_nms(
                    merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
                    rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
                det_bboxes_collect.append(det_bboxes)
                det_labels_collect.append(det_labels)
            else:
                det_bboxes_collect.append(merged_bboxes)
                det_labels_collect.append(merged_scores)

        return det_bboxes_collect, det_labels_collect
Ejemplo n.º 18
0
    def aug_test(self, features, proposal_list, img_metas, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(features, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_results = self._bbox_forward(i, x, rois)
                ms_scores.append(bbox_results['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'][:, :-1].argmax(
                        dim=1)
                    rois = self.bbox_head[i].regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(features, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_results = self._mask_forward(i, x, mask_rois)
                        aug_masks.append(
                            mask_results['mask_pred'].sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                dummy_scale_factor = np.ones(4)
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=dummy_scale_factor,
                    rescale=False)
            return [(bbox_result, segm_result)]
        else:
            return [bbox_result]
Ejemplo n.º 19
0
    def aug_test_cascadercnn_bboxes(self, feats, img_metas, proposal_list,
                                    rcnn_test_cfg):
        """ implement for cascade aug test. """
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(feats, img_metas):
            img_shape = img_meta[0]['img_shape']
            ori_shape = img_meta[0]['ori_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            # TODO more flexible
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])

            # get bbox from different stages
            ms_bbox_result = {}
            ms_segm_result = {}
            ms_scores = []
            rcnn_test_cfg = self.test_cfg_rcnn
            for i in range(self.num_stages):
                # for each image there are 3 stages to go through.
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]
                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides), rois])
                cls_score, bbox_pred = bbox_head(bbox_feats)
                # get bboxes scores from all num_stages
                ms_scores.append(cls_score)
                # the rois are the same, so the predicted bboxes can be
                # average between them
                if self.test_cfg.keep_all_stages:
                    det_bboxes, det_labels = bbox_head.get_det_bboxes(
                        rois,
                        cls_score,
                        bbox_pred,
                        img_shape,
                        scale_factor,
                        rescale=False,
                        cfg=None)
                    aug_bboxes.append(det_bboxes)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])
            cls_score = sum(ms_scores) / self.num_stages
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)
        return det_bboxes, det_labels
Ejemplo n.º 20
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        # raise NotImplementedError
        feats = self.extract_feats(imgs)
        proposal_list = self.aug_test_rpn(feats, img_metas, self.test_cfg.rpn)
        aug_bboxes = []
        aug_scores = []
        ms_bbox_result = {}
        ms_segm_result = {}
        rcnn_test_cfg = self.test_cfg.rcnn
        feats = self.extract_feats(imgs)
        for x, img_meta in zip(feats, img_metas):

            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)

            ms_scores = []
            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / self.num_stages
            det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_labels)

        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        # det_bboxes, det_labels = multiclass_nms(
        #     merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
        #     rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
        det_bboxes, det_labels = box_results_with_nms_and_limit(
            merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
            rcnn_test_cfg.nms, self.test_cfg, rcnn_test_cfg.max_per_img)
        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_result = bbox2result(_det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                img_meta_list = []
                feats = self.extract_feats(imgs)
                for x, img_meta in zip(feats, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_roi_extractor = self.mask_roi_extractor[i]
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)
                        mask_pred = self.mask_head[i](mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                    img_meta_list.extend([img_meta] * self.num_stages)
                merged_masks = merge_aug_masks(aug_masks, img_meta_list,
                                               self.test_cfg.rcnn)
                ori_shape = img_metas[0][0]['ori_shape']

                # rescale 控制输出的尺度,为True时和原图一致
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    _det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=img_metas[0][0]['scale_factor'],
                    rescale=rescale)
            ms_segm_result['ensemble'] = segm_result
        if self.with_mask:
            results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble'])
        else:
            results = ms_bbox_result['ensemble']
        return results
Ejemplo n.º 21
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.
                If rescale is False, then returned bboxes and masks will fit the scale
                of imgs[0].
                """

        ms_bbox_result = {}
        # recompute feats to save memory
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas,
                                          self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]
                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)
                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

        if self.with_rel:
            ori_shape = img_meta[0]['ori_shape']
            im_height, im_width, _ = img_meta[0]['img_shape']
            filename = img_meta[0]['filename']
            relation_preds = self._rel_forward_test(x,
                                                    det_bboxes,
                                                    det_labels,
                                                    scale_factor,
                                                    ori_shape,
                                                    im_width=im_width,
                                                    im_height=im_height)
            relation_preds['file_name'] = filename

        return (ms_bbox_result, relation_preds)
Ejemplo n.º 22
0
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        ms_bbox_result = {}
        ms_segm_result = {}
        rcnn_test_cfg = self.test_cfg.rcnn

        proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas,
                                          self.test_cfg.rpn)

        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            rois = bbox2roi([proposals])

            ms_scores = []
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])
            cls_score = sum(ms_scores) / self.num_stages
            det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes(
                rois,
                cls_score,
                bbox_pred,
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_scores)

        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        #由于集成所有的det_bboxes都为原始的大小
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        if rescale:
            _det_bboxes = det_bboxes
        else:
            _det_bboxes = det_bboxes.clone()
            _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
        bbox_results = bbox2result(_det_bboxes, det_labels,
                                   self.bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_results

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_sum_masks = []
                for x, img_meta in zip(self.extract_feats(imgs), img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    aug_masks = []
                    for i in range(self.num_stages):
                        mask_roi_extractor = self.mask_roi_extractor[i]
                        mask_feats = mask_roi_extractor(
                            x[:len(mask_roi_extractor.featmap_strides)],
                            mask_rois)
                        mask_pred = self.mask_head[i](mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                    #先对所有stages的mask平均
                    merged_masks = merge_aug_masks(aug_masks, [img_meta] *
                                                   self.num_stages,
                                                   self.test_cfg.rcnn)
                    aug_sum_masks.append(merged_masks)
                #再对所有的多尺度图片平均,由于mask大小都是28*28,所以无需复杂考虑,但是此处增强后所有图片均为正常模式,翻转的图片恢复
                merged_masks = np.mean(aug_sum_masks, axis=0)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    self.test_cfg.rcnn,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)

            ms_segm_result['ensemble'] = segm_result

        #简易测试tta是否正确
        # img_h, img_w = ori_shape[:2]
        # if img_h == 506:
        #     i = 0
        # elif img_h == 480:
        #     i = 1
        # elif img_h == 551:
        #     i = 2
        # elif img_h == 546:
        #     i = 3
        # elif img_h == 463:
        #     i = 4
        #
        # img = plt.imread("/home/zhangyun/下载/津南比赛数据集/ori_coco/train2014/{}.jpg".format(i))
        # img = np.array(img)
        # h, w, _ = img.shape
        # bboxes = np.vstack(bbox_results)
        # if segm_result is not None:
        #     segms = mmcv.concat_list(segm_result)
        #     inds = np.where(bboxes[:, -1] > 0.3)[0]
        #     for i in inds:
        #         color_mask = np.random.randint(
        #             0, 256, (1, 3), dtype=np.uint8)
        #         mask = maskUtils.decode(segms[i]).astype(np.bool)
        #         img[mask] = img[mask] * 0.3 + color_mask * 0.7
        #
        # labels = [
        #     np.full(bbox.shape[0], i, dtype=np.int32)
        #     for i, bbox in enumerate(bbox_results)
        # ]
        # labels = np.concatenate(labels)
        # if 0.3 > 0:
        #     assert bboxes.shape[1] == 5
        #     scores = bboxes[:, -1]
        #     inds = scores > 0.3
        #     bboxes = bboxes[inds, :]
        #     labels = labels[inds]
        # plt.imshow(img)
        # for bbox in  bboxes:
        #     plt.gca().add_patch(
        #         plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0],
        #                       bbox[3] - bbox[1], fill=False,
        #                       edgecolor='r', linewidth=3)
        #     )
        #
        # plt.show()

        if not self.test_cfg.keep_all_stages:
            if self.with_mask:
                results = (ms_bbox_result['ensemble'],
                           ms_segm_result['ensemble'])
            else:
                results = ms_bbox_result['ensemble']
        else:
            if self.with_mask:
                results = {
                    stage: (ms_bbox_result[stage], ms_segm_result[stage])
                    for stage in ms_bbox_result
                }
            else:
                results = ms_bbox_result

        return results
Ejemplo n.º 23
0
    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1] for feat in img_feats
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        if self.with_glbctx:
            glbctx_feats = [self.glbctx_head(feat)[1] for feat in img_feats]
        else:
            glbctx_feats = [None] * len(img_metas)

        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic_feat, glbctx_feat in zip(
                img_feats, img_metas, semantic_feats, glbctx_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                bbox_results = self._bbox_forward(i,
                                                  x,
                                                  rois,
                                                  semantic_feat=semantic_feat,
                                                  glbctx_feat=glbctx_feat)
                ms_scores.append(bbox_results['cls_score'])
                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'].argmax(dim=1)
                    rois = bbox_head.regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        det_bbox_results = bbox2result(det_bboxes, det_labels,
                                       self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                det_segm_results = [[]
                                    for _ in range(self.mask_head.num_classes)]
            else:
                aug_masks = []
                for x, img_meta, semantic_feat, glbctx_feat in zip(
                        img_feats, img_metas, semantic_feats, glbctx_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    # get relay feature on mask_rois
                    bbox_results = self._bbox_forward(
                        -1,
                        x,
                        mask_rois,
                        semantic_feat=semantic_feat,
                        glbctx_feat=glbctx_feat)
                    relayed_feat = bbox_results['relayed_feat']
                    relayed_feat = self.feat_relay_head(relayed_feat)
                    mask_results = self._mask_forward(
                        x,
                        mask_rois,
                        semantic_feat=semantic_feat,
                        glbctx_feat=glbctx_feat,
                        relayed_feat=relayed_feat)
                    mask_pred = mask_results['mask_pred']
                    aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                merged_masks = merge_aug_masks(aug_masks, img_metas,
                                               self.test_cfg)
                ori_shape = img_metas[0][0]['ori_shape']
                det_segm_results = self.mask_head.get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return [(det_bbox_results, det_segm_results)]
        else:
            return [det_bbox_results]
Ejemplo n.º 24
0
    def aug_test(self, imgs, img_metas, rescale=False):
        """Test with augmentation."""
        single_stage_cfg = self.test_cfg.single_stage
        single_stage_cfg.update(dict(aug_test=True))
        imgs_per_gpu = len(img_metas[0])
        aug_bboxes = [[] for _ in range(imgs_per_gpu)]
        aug_scores = [[] for _ in range(imgs_per_gpu)]
        mask_roi_feats = []
        stuff_outs = []
        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            bbox_outs = self.bbox_head(x)
            bbox_inputs = bbox_outs[:2] + (img_meta, single_stage_cfg, False)
            results_list = self.bbox_head.get_bboxes(*bbox_inputs)

            # mask subnet
            mask_roi_feats.append(bbox_outs[-1])

            # stuff subnet
            stuff_head_inputs = bbox_outs[-2]
            stuff_out = self.stuff_head(stuff_head_inputs)
            stuff_outs.append(stuff_out)

            for i, results in enumerate(results_list):
                mlvl_bboxes, mlvl_scores = results
                aug_bboxes[i].append(mlvl_bboxes)
                aug_scores[i].append(mlvl_scores)

        aug_img_metas = []
        for i in range(imgs_per_gpu):
            aug_img_meta = []
            for j in range(len(img_metas)):
                aug_img_meta.append(img_metas[j][i])
            aug_img_metas.append(aug_img_meta)

        det_results = []
        for aug_bbox, aug_score, aug_img_meta in zip(aug_bboxes, aug_scores,
                                                     aug_img_metas):
            merged_bboxes, merged_scores = merge_aug_bboxes(aug_bbox,
                                                            aug_score,
                                                            aug_img_meta,
                                                            single_stage_cfg,
                                                            return_mean=False)
            det_bboxes, det_labels = multiclass_nms(
                merged_bboxes, merged_scores, single_stage_cfg.score_thr,
                single_stage_cfg.nms, single_stage_cfg.max_per_img)
            det_results.append((det_bboxes, det_labels))

        bbox_results = [
            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
            for det_bboxes, det_labels in det_results
        ]

        # mask
        imgs_per_gpu = len(img_metas[0])
        aug_masks = [[] for _ in range(imgs_per_gpu)]
        for x, img_meta in zip(mask_roi_feats, img_metas):
            # we should rescale the det bboxes outside `simple_test_mask`
            # because there are `flip` for aug test setting, while in the
            # simple test setting, just have the scale
            scaled_det_results = []
            for results, meta in zip(det_results, img_meta):
                det_bboxes, _ = results
                img_shape = meta['img_shape']
                scale_factor = meta['scale_factor']
                flip = meta['flip']
                scaled_det_bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                                 scale_factor, flip)
                scaled_det_results.append(scaled_det_bboxes)
            mask_rois = bbox2roi(scaled_det_results)
            mask_feats = self.mask_roi_extractor(
                x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
            mask_pred = self.mask_head(mask_feats)
            for img_id, meta in enumerate(img_meta):
                idx_img = mask_rois[:, 0] == img_id
                mask_pred_img = mask_pred[idx_img]
                # convert to numpy array to save memory
                mask_pred_img_np = mask_pred_img.sigmoid().cpu().numpy()
                aug_masks[img_id].append(mask_pred_img_np)

        segm_results = []
        for det_result, aug_mask, aug_img_meta in zip(det_results, aug_masks,
                                                      aug_img_metas):
            det_bboxes, det_labels = det_result
            merged_masks = merge_aug_masks(aug_mask, aug_img_meta,
                                           self.test_cfg.single_stage_mask)
            # perform `get_seg_masks` here for `merged_masks`
            # `ori_shape` for all augmented images are the same here
            ori_shape = aug_img_meta[0]['ori_shape']
            segm_result = self.mask_head.get_seg_masks(
                merged_masks,
                det_bboxes,
                det_labels,
                self.test_cfg.single_stage_mask,
                ori_shape,
                scale_factor=1.0,
                rescale=False)
            segm_results.append(segm_result)

        # stuff
        stuff_results = self.stuff_head.get_stuff_map_aug(stuff_outs,
                                                          img_metas,
                                                          rescale=rescale)

        bbox_segm_stuff_results = []
        for bbox_result, segm_result, stuff_result in zip(
                bbox_results, segm_results, stuff_results):
            bbox_segm_stuff_results.append(
                (bbox_result, segm_result, stuff_result))
        return bbox_segm_stuff_results[0]
Ejemplo n.º 25
0
    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        if self.with_semantic:
            semantic_feats = [
                self.semantic_head(feat)[1] for feat in img_feats
            ]
        else:
            semantic_feats = [None] * len(img_metas)

        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_head = self.bbox_head[i]
                bbox_results = self._bbox_forward(i,
                                                  x,
                                                  rois,
                                                  semantic_feat=semantic)
                ms_scores.append(bbox_results['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'].argmax(dim=1)
                    rois = bbox_head.regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[[]
                                for _ in range(self.mask_head[-1].num_classes)]
                               ]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta, semantic in zip(img_feats, img_metas,
                                                 semantic_feats):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    mask_feats = self.mask_roi_extractor[-1](
                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],
                        mask_rois)
                    if self.with_semantic:
                        semantic_feat = semantic
                        mask_semantic_feat = self.semantic_roi_extractor(
                            [semantic_feat], mask_rois)
                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[
                                -2:]:
                            mask_semantic_feat = F.adaptive_avg_pool2d(
                                mask_semantic_feat, mask_feats.shape[-2:])
                        mask_feats += mask_semantic_feat
                    last_feat = None
                    for i in range(self.num_stages):
                        mask_head = self.mask_head[i]
                        if self.mask_info_flow:
                            mask_pred, last_feat = mask_head(
                                mask_feats, last_feat)
                        else:
                            mask_pred = mask_head(mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return [(bbox_result, segm_result)]
        else:
            return [bbox_result]
Ejemplo n.º 26
0
    def aug_test(self, imgs, img_metas, rescale=False, **kwargs):
        """
        Test with augmentations.
        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """

        ms_bbox_result = {}

        rpn_test_cfg = self.models[0].test_cfg.rpn
        rcnn_test_cfg = self.models[0].test_cfg.rcnn

        # For each model, compute detections
        aug_bboxes = []
        aug_scores = []
        aug_img_metas = []
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                proposal_list = model.simple_test_rpn(x, img_meta,
                                                      rpn_test_cfg)

                img_shape = img_meta[0]['img_shape']
                scale_factor = img_meta[0]['scale_factor']

                ms_scores = []
                rois = bbox2roi(proposal_list)
                for i in range(model.num_stages):
                    bbox_head = model.bbox_head[i]
                    bbox_roi_extractor = model.bbox_roi_extractor[i]
                    bbox_feats = bbox_roi_extractor(
                        x[:len(bbox_roi_extractor.featmap_strides)], rois)
                    cls_score, bbox_pred = bbox_head(bbox_feats)
                    ms_scores.append(cls_score)

                    if i < model.num_stages - 1:
                        bbox_label = cls_score.argmax(dim=1)
                        rois = bbox_head.regress_by_class(
                            rois, bbox_label, bbox_pred, img_meta[0])

                cls_score = sum(ms_scores) / float(len(ms_scores))
                bboxes, scores = model.bbox_head[-1].get_det_bboxes(
                    rois,
                    cls_score,
                    bbox_pred,
                    img_shape,
                    scale_factor,
                    rescale=False,
                    cfg=None)
                aug_bboxes.append(bboxes)
                aug_scores.append(scores)
                aug_img_metas.append(img_meta)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes,
                                                        aug_scores,
                                                        aug_img_metas,
                                                        rcnn_test_cfg,
                                                        type='concat')
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.models[0].bbox_head[-1].num_classes)
        ms_bbox_result['ensemble'] = bbox_result

        ori_shape = img_metas[0][0]['ori_shape']
        scale_factor = img_metas[0][0]['scale_factor']

        ensemble_relation_preds = {}
        for model in self.models:
            for x, img_meta in zip(model.extract_feats(imgs), img_metas):
                im_height, im_width, _ = img_meta[0]['img_shape']
                filename = img_meta[0]['filename']
                relation_preds = model._rel_forward_test(x,
                                                         det_bboxes,
                                                         det_labels,
                                                         scale_factor,
                                                         ori_shape,
                                                         im_width=im_width,
                                                         im_height=im_height)
                if filename not in ensemble_relation_preds:
                    ensemble_relation_preds = relation_preds
                    ensemble_relation_preds['file_name'] = filename
                else:
                    ensemble_relation_preds['hoi_prediction'].extend(
                        relation_preds['hoi_prediction'])

        ensemble_relation_preds_remove_dup = ensemble_relation_preds.copy()
        for i, hoi_pred_i in enumerate(
                ensemble_relation_preds['hoi_prediction']):
            for j, hoi_pred_j in enumerate(
                    ensemble_relation_preds['hoi_prediction']):
                if i != j:
                    sbj_i = hoi_pred_i['subject_id']
                    obj_i = hoi_pred_i['object_id']
                    cat_i = hoi_pred_i['category_id']
                    sbj_j = hoi_pred_j['subject_id']
                    obj_j = hoi_pred_j['object_id']
                    cat_j = hoi_pred_j['category_id']
                    if sbj_i == sbj_j and obj_i == obj_j and cat_i == cat_j:
                        ensemble_relation_preds_remove_dup.remove(hoi_pred_j)

        results = (ms_bbox_result['ensemble'],
                   ensemble_relation_preds_remove_dup)

        return results
    def aug_test(self, imgs, img_metas, proposals=None, rescale=False):
        """Test with augmentations.

        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        # recompute feats to save memory
        proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas,
                                          self.test_cfg.rpn)

        rcnn_test_cfg = self.test_cfg.rcnn
        aug_bboxes = []
        aug_scores = []
        for x, img_meta in zip(self.extract_feats(imgs), img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip)
            # "ms" in variable names means multi-stage
            ms_scores = []

            rois = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_roi_extractor = self.bbox_roi_extractor[i]
                bbox_head = self.bbox_head[i]

                bbox_feats = bbox_roi_extractor(
                    x[:len(bbox_roi_extractor.featmap_strides)], rois)
                if self.with_shared_head:
                    bbox_feats = self.shared_head(bbox_feats)

                cls_score, bbox_pred = bbox_head(bbox_feats)
                ms_scores.append(cls_score)

                if i < self.num_stages - 1:
                    bbox_label = cls_score.argmax(dim=1)
                    rois = bbox_head.regress_by_class(rois, bbox_label,
                                                      bbox_pred, img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois,
                                                               cls_score,
                                                               bbox_pred,
                                                               img_shape,
                                                               scale_factor,
                                                               rescale=False,
                                                               cfg=None)
            aug_bboxes.append(bboxes)
            aug_scores.append(scores)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes -
                                              1)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(self.extract_feats(imgs), img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_feats = self.mask_roi_extractor[i](
                            x[:len(self.mask_roi_extractor[i].featmap_strides
                                   )], mask_rois)
                        if self.with_shared_head:
                            mask_feats = self.shared_head(mask_feats)
                        mask_pred = self.mask_head[i](mask_feats)
                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg.rcnn)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return bbox_result, segm_result
        else:
            return bbox_result
Ejemplo n.º 28
0
    def aug_test(self, features, proposal_list, img_metas, rescale=False):
        """Test with augmentations.
        If rescale is False, then returned bboxes and masks will fit the scale
        of imgs[0].
        """
        rcnn_test_cfg = self.test_cfg
        aug_bboxes = []
        aug_scores = []
        aug_bboxes_tail = []
        aug_scores_tail = []
        for x, img_meta in zip(features, img_metas):
            # only one image in the batch
            img_shape = img_meta[0]['img_shape']
            scale_factor = img_meta[0]['scale_factor']
            flip = img_meta[0]['flip']
            flip_direction = img_meta[0]['flip_direction']

            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
                                     scale_factor, flip, flip_direction)

            # "ms" in variable names means multi-stage
            ms_scores = []
            ms_scores_tail = []

            rois = bbox2roi([proposals])
            rois_tail = bbox2roi([proposals])
            for i in range(self.num_stages):
                bbox_results = self._bbox_forward(i, x, rois)
                ms_scores.append(bbox_results['cls_score'])

                bbox_results_tail = self._bbox_forward_tail(i, x, rois_tail)
                ms_scores_tail.append(bbox_results_tail['cls_score'])

                if i < self.num_stages - 1:
                    bbox_label = bbox_results['cls_score'][:, :-1].argmax(
                        dim=1)
                    rois = self.bbox_head[i].regress_by_class(
                        rois, bbox_label, bbox_results['bbox_pred'],
                        img_meta[0])

                    bbox_label_tail = bbox_results_tail[
                        'cls_score'][:, :-1].argmax(dim=1)
                    rois_tail = self.bbox_head_tail[i].regress_by_class(
                        rois_tail, bbox_label_tail,
                        bbox_results_tail['bbox_pred'], img_meta[0])

            cls_score = sum(ms_scores) / float(len(ms_scores))
            bboxes, scores = self.bbox_head[-1].get_bboxes(
                rois,
                cls_score,
                bbox_results['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            # print('a', bboxes.shape, scores.shape)
            cls_score_tail = sum(ms_scores_tail) / float(len(ms_scores_tail))
            bboxes_tail, scores_tail = self.bbox_head_tail[-1].get_bboxes(
                rois_tail,
                cls_score_tail,
                bbox_results_tail['bbox_pred'],
                img_shape,
                scale_factor,
                rescale=False,
                cfg=None)
            # print('b', bboxes_tail.shape, scores_tail.shape)
            # print(scores_tail)
            # print(scores)
            # if self.labels is not None:
            #     inds = []
            #     for label in self.labels:
            #         inds.append(torch.nonzero(scores == label, as_tuple=False).squeeze(1))
            #     inds = torch.cat(inds)
            #     bboxes = bboxes[inds]
            #     scores = scores[inds]
            # if self.labels_tail is not None:
            #     inds = []
            #     for label in self.labels_tail:
            #         inds.append(torch.nonzero(scores_tail == label, as_tuple=False).squeeze(1))
            #     inds = torch.cat(inds)
            #     bboxes_tail = bboxes_tail[inds]
            #     scores_tail = scores_tail[inds]
            # print(bboxes,bboxes.shape)
            # print(bboxes_tail, bboxes_tail.shape)
            # if bboxes.shape[0] == 0:
            #     det_bboxes = bboxes_tail
            #     det_labels = scores_tail
            # elif bboxes_tail.shape[0] == 0:
            #     det_bboxes = bboxes
            #     det_labels = scores
            # else:
            #     det_bboxes = torch.cat((bboxes, bboxes_tail))
            #     det_labels = torch.cat((scores, scores_tail))

            # aug_bboxes.append(det_bboxes)
            # aug_scores.append(det_labels)
            # print('c', det_bboxes.shape)
            # print('d', det_labels.shape)
            det_bboxes = torch.cat((bboxes, bboxes_tail))
            det_labels = torch.cat((scores, scores_tail))
            aug_bboxes.append(det_bboxes)
            aug_scores.append(det_labels)
            # aug_bboxes_tail.append(bboxes_tail)
            # aug_scores_tail.append(scores_tail)

        # after merging, bboxes will be rescaled to the original image size
        merged_bboxes, merged_scores = merge_aug_bboxes(
            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
        # print('e', merged_bboxes.shape, merged_scores.shape)
        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
                                                rcnn_test_cfg.score_thr,
                                                rcnn_test_cfg.nms,
                                                rcnn_test_cfg.max_per_img)

        # # after merging, bboxes will be rescaled to the original image size
        # merged_bboxes_tail, merged_scores_tail = merge_aug_bboxes(
        #     aug_bboxes_tail, aug_scores_tail, img_metas, rcnn_test_cfg)
        # # print('e', merged_bboxes.shape, merged_scores.shape)
        # det_bboxes_tail, det_labels_tail = multiclass_nms(merged_bboxes_tail, merged_scores_tail,
        #                                         rcnn_test_cfg.score_thr,
        #                                         rcnn_test_cfg.nms,
        #                                         rcnn_test_cfg.max_per_img)
        # if self.labels is not None:
        #     inds = []
        #     for label in self.labels:
        #         inds.append(torch.nonzero(det_labels == label, as_tuple=False).squeeze(1))
        #     inds = torch.cat(inds)
        #     det_bboxes_post = det_bboxes[inds]
        #     det_labels_post = det_labels[inds]
        # if self.labels_tail is not None:
        #     inds = []
        #     for label in self.labels_tail:
        #         inds.append(torch.nonzero(det_labels_tail == label, as_tuple=False).squeeze(1))
        #     inds = torch.cat(inds)
        #     det_bboxes_tail_post = det_bboxes_tail[inds]
        #     det_labels_tail_post = det_labels_tail[inds]

        # det_bboxes = torch.cat((det_bboxes_post, det_bboxes_tail_post))
        # det_labels = torch.cat((det_labels_post, det_labels_tail_post))

        bbox_result = bbox2result(det_bboxes, det_labels,
                                  self.bbox_head[-1].num_classes)

        if self.with_mask:
            if det_bboxes.shape[0] == 0:
                segm_result = [[]
                               for _ in range(self.mask_head[-1].num_classes)]
            else:
                aug_masks = []
                aug_img_metas = []
                for x, img_meta in zip(features, img_metas):
                    img_shape = img_meta[0]['img_shape']
                    scale_factor = img_meta[0]['scale_factor']
                    flip = img_meta[0]['flip']
                    flip_direction = img_meta[0]['flip_direction']
                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
                                           scale_factor, flip, flip_direction)
                    mask_rois = bbox2roi([_bboxes])
                    for i in range(self.num_stages):
                        mask_results = self._mask_forward(i, x, mask_rois)
                        aug_masks.append(
                            mask_results['mask_pred'].sigmoid().cpu().numpy())
                        aug_img_metas.append(img_meta)
                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,
                                               self.test_cfg)

                ori_shape = img_metas[0][0]['ori_shape']
                segm_result = self.mask_head[-1].get_seg_masks(
                    merged_masks,
                    det_bboxes,
                    det_labels,
                    rcnn_test_cfg,
                    ori_shape,
                    scale_factor=1.0,
                    rescale=False)
            return [(bbox_result, segm_result)]
        else:
            return [bbox_result]