def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head(mask_feats) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): """Test for mask head with test time augmentation.""" if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_results = self._mask_forward(x, mask_rois) mask_results['mask_pred'] = self._mask_point_forward_test( x, mask_rois, det_labels, mask_results['mask_pred'], img_meta) # convert to numpy array to save memory aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): """Test det bboxes with test time augmentation.""" aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) rois = bbox2roi([proposals]) bbox_results = self._bbox_forward(x, rois) bboxes, scores = self.bbox_head.get_bboxes( rois, bbox_results['cls_score'], bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test_seq(self, feats, img_metas, det_bboxes, det_labels): """Test for seq head with test time augmentation.""" if det_bboxes.shape[0] == 0: rec_result = [[] for _ in range(self.seq_head.num_classes)] else: aug_seqs = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) seq_rois = bbox2roi([_bboxes]) seq_results = self._seq_forward(x, seq_rois) # convert to numpy array to save memory aug_seqs.append( seq_results['seq_pred'].sigmoid().cpu().numpy()) merged_seqs = merge_aug_seqs(aug_seqs, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] rec_result = self.seq_head.get_rec_seqs( merged_seqs, det_bboxes, det_labels, self.test_cfg, ori_shape, scale_factor=1.0, rescale=False) return rec_result
def aug_test(self, x, img_metas, rescale=False): """Test function with test time augmentation. Args: imgs (list[torch.Tensor]): List of multiple images x list[torch.Tensor]: Multi-level features that may have different resolutions. img_metas (list[dict]): List of image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: list[np.ndarray]: proposals """ proposal_list = self.rpn_head.aug_test_rpn(x, img_metas) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] flip_direction = img_meta['flip_direction'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip, flip_direction) return [proposal.cpu().numpy() for proposal in proposal_list]
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) # here is to flip back the masks. merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] # sxg: for aug test, the masks are always fixed to # the size of img[0] segm_result = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, # the mask size is the ori ones. scale_factor=1.0, # and won't rescale to the rescale img. rescale=False) return segm_result
def aug_test(self, imgs, img_metas, rescale=False): """Test function with test time augmentation. Args: imgs (list[torch.Tensor]): List of multiple images img_metas (list[dict]): List of image information. rescale (bool, optional): Whether to rescale the results. Defaults to False. Returns: np.ndarray: proposals """ proposal_list = self.rpn_head.aug_test_rpn(self.extract_feats(imgs), img_metas) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] flip_direction = img_meta['flip_direction'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip, flip_direction) # TODO: remove this restriction return proposal_list[0].cpu().numpy()
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] aug_variance = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0].get('flip_direction', 'horizontal') # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) if isinstance(bboxes, tuple): aug_bboxes.append(bboxes[0]) aug_variance.append(bboxes[1]) else: aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size return_variance = rcnn_test_cfg.get('return_variance', False) if len(aug_variance ) == 0 or rcnn_test_cfg.nms.type != 'soft_nms_variance_voting': merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) else: merged_bboxes, merged_scores, merged_variance = merge_aug_bboxes_variance( aug_bboxes, aug_scores, aug_variance, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_soft_nms_variance_voting( merged_bboxes, merged_variance, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img, return_variance=return_variance) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, rescale=False): proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip) return proposal_list[0].cpu().numpy()
def tsd_aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]["img_shape"] scale_factor = img_meta[0]["scale_factor"] flip = img_meta[0]["flip"] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred, TSD_cls_score, TSD_bbox_pred, delta_c, delta_r = self.bbox_head( roi_feats, x[:self.bbox_roi_extractor.num_inputs], rois) w = rois[:, 3] - rois[:, 1] + 1 h = rois[:, 4] - rois[:, 2] + 1 scale = 0.1 rois_r = rois.new_zeros(rois.shape[0], rois.shape[1]) rois_r[:, 0] = rois[:, 0] delta_r = delta_r.to(dtype=rois_r.dtype) rois_r[:, 1] = rois[:, 1] + delta_r[:, 0] * scale * w rois_r[:, 2] = rois[:, 2] + delta_r[:, 1] * scale * h rois_r[:, 3] = rois[:, 3] + delta_r[:, 0] * scale * w rois_r[:, 4] = rois[:, 4] + delta_r[:, 1] * scale * h bboxes, scores = self.bbox_head.get_det_bboxes( rois_r, TSD_cls_score, TSD_bbox_pred, img_shape, scale_factor, rescale=False, cfg=None, ) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img, ) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, rescale=False): proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta["img_shape"] scale_factor = img_meta["scale_factor"] flip = img_meta["flip"] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip) # TODO: remove this restriction return proposal_list[0].cpu().numpy()
def multi_bboxes_test(self, feats, img_metas, proposal_list, rcnn_test_cfg, rescale): aug_scores = [] aug_bboxes = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # "ms" in variable names means multi-stage ms_scores = [] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_scores.append(scores) aug_bboxes.append(det_bboxes) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentation.""" proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip) # TODO: remove this restriction return proposal_list[0].cpu().numpy()
def aug_test(self, imgs, img_metas, rescale=False): proposal_list = self.rpn_head.aug_test_rpn(self.extract_feats(imgs), img_metas) if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] flip_direction = img_meta['flip_direction'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip, flip_direction) # TODO: remove this restriction return proposal_list[0].cpu().numpy()
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.point_rend_roihead.num_classes - 1)] else: aug_masks = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_pred = self.point_rend_roihead._forward_mask_test(x, _bboxes, det_labels) # convert to numpy array to save memory aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.point_rend_roihead.get_seg_masks(merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test(self, imgs, img_metas, rescale=False): feats = self.extract_feats(imgs) imgs_per_gpu = len(img_metas[0]) aug_proposals = [[] for _ in range(imgs_per_gpu)] for x, img_meta in zip(feats, img_metas): proposal_list = self.common_test(x, img_meta) for i, proposals in enumerate(proposal_list): aug_proposals[i].append(proposals) # after merging, proposals will be rescaled to the original image size proposal_list = [ merge_aug_proposals(proposals, img_meta, self.test_cfg.rpn) for proposals, img_meta in zip(aug_proposals, img_metas) ] if not rescale: for proposals, img_meta in zip(proposal_list, img_metas[0]): img_shape = img_meta['img_shape'] scale_factor = img_meta['scale_factor'] flip = img_meta['flip'] proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, scale_factor, flip) # TODO: remove this restriction return proposal_list[0].cpu().numpy()
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch # for each img and img_meta. img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible # only use the proposal_list[0] of original image, # so the bboxes are corresponding. the proposal list # are merged from different scales. # RPN generate proposals for different scale images, then proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size # here to calculate the mean box. merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test_cascade_mask(self, feats, img_metas, det_bboxes, det_labels): if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] if self.mask_head.mask_score: aug_mask_ious = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) return segm_result
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_feats = self.mask_roi_extractor[i]( x[:len(self.mask_roi_extractor[i].featmap_strides )], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible ms_bbox_result = {} ms_scores = [] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_labels) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg ) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img ) # bbox_result = bbox2result(det_bboxes, det_labels, # self.bbox_head[-1].num_classes) # ms_bbox_result['ensemble'] = bbox_result # if not self.test_cfg.keep_all_stages: # results = ms_bbox_result['ensemble'] # else: # results = ms_bbox_result # recompute feature maps to save GPU memory # roi_feats = self.bbox_roi_extractor( # x[:len(self.bbox_roi_extractor.featmap_strides)], rois) # if self.with_shared_head: # roi_feats = self.shared_head(roi_feats) # cls_score, bbox_pred = self.bbox_head(roi_feats) # bboxes, scores = self.bbox_head.get_det_bboxes( # rois, # cls_score, # bbox_pred, # img_shape, # scale_factor, # rescale=False, # cfg=None) # aug_bboxes.append(bboxes) # aug_scores.append(scores) # # after merging, bboxes will be rescaled to the original image size # merged_bboxes, merged_scores = merge_aug_bboxes( # aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, # rcnn_test_cfg.score_thr, # rcnn_test_cfg.nms, # rcnn_test_cfg.max_per_img) # bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None ms_scores = [] for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # 由于集成所有的det_bboxes都为原始的大小 det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_sum_masks = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) # 先对所有stages的mask平均 merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_sum_masks.append(merged_masks) merged_masks = np.mean(aug_sum_masks, axis=0) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) ms_segm_result['ensemble'] = segm_result # 简易测试tta是否正确 if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[[] for _ in range(self.mask_head[-1].num_classes)] ] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn imgs_per_gpu = len(img_metas[0]) aug_proposals = [[] for _ in range(imgs_per_gpu)] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) for i, proposals in enumerate(proposal_list): aug_proposals[i].append(proposals) # # after merging, proposals will be rescaled to the original image size proposal_list = [ merge_aug_proposals(proposals, img_meta, rpn_test_cfg) for proposals, img_meta in zip(aug_proposals, img_metas) ] rcnn_test_cfg = self.models[0].test_cfg.rcnn aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(model.num_stages): bbox_roi_extractor = model.bbox_roi_extractor[i] bbox_head = model.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if model.with_shared_head: bbox_feats = model.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: raise NotImplementedError else: return bbox_result
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) if self.with_glbctx: glbctx_feats = [self.glbctx_head(feat)[1] for feat in img_feats] else: glbctx_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) det_bbox_results = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: det_segm_results = [[] for _ in range(self.mask_head.num_classes)] else: aug_masks = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) # get relay feature on mask_rois bbox_results = self._bbox_forward( -1, x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) relayed_feat = bbox_results['relayed_feat'] relayed_feat = self.feat_relay_head(relayed_feat) mask_results = self._mask_forward( x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat, relayed_feat=relayed_feat) mask_pred = mask_results['mask_pred'] aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] det_segm_results = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(det_bbox_results, det_segm_results)] else: return [det_bbox_results]
def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rotate_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = [ bbox_mapping(proposal_list[0][:, :8], img_shape, scale_factor, flip) ] rois = rbboxPoly2rroiRec(proposals) bbox_cls_feats = self.bbox_roi_extractor( x[:self.bbox_roi_extractor.num_inputs], rois) bbox_reg_feats = bbox_cls_feats if self.with_shared_head: bbox_cls_feats = self.shared_head(bbox_cls_feats) bbox_reg_feats = self.shared_head(bbox_reg_feats) cls_score, bbox_xy_pred, bbox_wh_pred, bbox_theta_pred = self.bbox_head( bbox_cls_feats, bbox_reg_feats) img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] bboxes, scores = self.bbox_head.get_det_rbbox2rbbox( rois, cls_score, bbox_xy_pred, bbox_wh_pred, bbox_theta_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) merged_bboxes, merged_scores = merge_aug_rotate_bboxes( aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn) det_bboxes, det_labels = multiclass_poly_nms_8_points( merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr, self.test_cfg.rcnn.nms, max_num=self.test_cfg.rcnn.max_per_img) bbox_results = rbbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) # det_bboxes always keep the original scale if self.with_mask: pass else: return bbox_results
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] dummy_scale_factor = np.ones(4) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=dummy_scale_factor, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test(self, imgs, img_metas, proposals=None, rescale=None): # raise NotImplementedError # import pdb; pdb.set_trace() proposal_list = self.aug_test_rpn_rotate( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_rbboxes = [] aug_rscores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) angle = img_meta[0]['angle'] # print('img shape: ', img_shape) if angle != 0: try: proposals = bbox_rotate_mapping(proposal_list[0][:, :4], img_shape, angle) except: import pdb; pdb.set_trace() rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bbox_label = cls_score.argmax(dim=1) rrois = self.bbox_head.regress_by_class_rbbox(roi2droi(rois), bbox_label, bbox_pred, img_meta[0]) rrois_enlarge = copy.deepcopy(rrois) rrois_enlarge[:, 3] = rrois_enlarge[:, 3] * self.rbbox_roi_extractor.w_enlarge rrois_enlarge[:, 4] = rrois_enlarge[:, 4] * self.rbbox_roi_extractor.h_enlarge rbbox_feats = self.rbbox_roi_extractor( x[:len(self.rbbox_roi_extractor.featmap_strides)], rrois_enlarge) if self.with_shared_head_rbbox: rbbox_feats = self.shared_head_rbbox(rbbox_feats) rcls_score, rbbox_pred = self.rbbox_head(rbbox_feats) rbboxes, rscores = self.rbbox_head.get_det_rbboxes( rrois, rcls_score, rbbox_pred, img_shape, scale_factor, rescale=rescale, cfg=None) aug_rbboxes.append(rbboxes) aug_rscores.append(rscores) merged_rbboxes, merged_rscores = merge_rotate_aug_bboxes( aug_rbboxes, aug_rscores, img_metas, rcnn_test_cfg ) det_rbboxes, det_rlabels = multiclass_nms_rbbox( merged_rbboxes, merged_rscores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_rbboxes = det_rbboxes else: _det_rbboxes = det_rbboxes.clone() _det_rbboxes[:, :4] *= img_metas[0][0]['scale_factor'] rbbox_results = dbbox2result(_det_rbboxes, det_rlabels, self.rbbox_head.num_classes) return rbbox_results
def aug_test_cascadercnn_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): """ implement for cascade aug test. """ aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # get bbox from different stages ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg_rcnn for i in range(self.num_stages): # for each image there are 3 stages to go through. bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides), rois]) cls_score, bbox_pred = bbox_head(bbox_feats) # get bboxes scores from all num_stages ms_scores.append(cls_score) # the rois are the same, so the predicted bboxes can be # average between them if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in self.extract_feats(imgs) ] else: semantic_feats = [None] * len(img_metas) # recompute feats to save memory proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] aug_masks_wo_sigmoid = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) aug_masks_wo_sigmoid.append(mask_pred.cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) merged_masks_wo_sigmoid = merge_aug_masks(aug_masks_wo_sigmoid, aug_img_metas, self.test_cfg.rcnn) merged_masks_torch = torch.from_numpy(merged_masks_wo_sigmoid).to(mask_feats.dtype).to(mask_feats.device) assert mask_feats.size(0) == merged_masks_torch.size(0)==det_labels.size(0) mask_iou_pred = self.mask_iou_head(mask_feats, merged_masks_torch[range(det_labels.size(0)), det_labels]) mask_scores = self.mask_iou_head.get_mask_scores(mask_iou_pred, det_bboxes, det_labels) return bbox_result, (segm_result, mask_scores) else: return bbox_result