def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): """Test det bboxes with test time augmentation.""" aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) rois = bbox2roi([proposals]) bbox_results = self._bbox_forward(x, rois) bboxes, scores = self.bbox_head.get_bboxes( rois, bbox_results['cls_score'], bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] aug_variance = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0].get('flip_direction', 'horizontal') # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) if isinstance(bboxes, tuple): aug_bboxes.append(bboxes[0]) aug_variance.append(bboxes[1]) else: aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size return_variance = rcnn_test_cfg.get('return_variance', False) if len(aug_variance ) == 0 or rcnn_test_cfg.nms.type != 'soft_nms_variance_voting': merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) else: merged_bboxes, merged_scores, merged_variance = merge_aug_bboxes_variance( aug_bboxes, aug_scores, aug_variance, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_soft_nms_variance_voting( merged_bboxes, merged_variance, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img, return_variance=return_variance) return det_bboxes, det_labels
def tsd_aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]["img_shape"] scale_factor = img_meta[0]["scale_factor"] flip = img_meta[0]["flip"] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred, TSD_cls_score, TSD_bbox_pred, delta_c, delta_r = self.bbox_head( roi_feats, x[:self.bbox_roi_extractor.num_inputs], rois) w = rois[:, 3] - rois[:, 1] + 1 h = rois[:, 4] - rois[:, 2] + 1 scale = 0.1 rois_r = rois.new_zeros(rois.shape[0], rois.shape[1]) rois_r[:, 0] = rois[:, 0] delta_r = delta_r.to(dtype=rois_r.dtype) rois_r[:, 1] = rois[:, 1] + delta_r[:, 0] * scale * w rois_r[:, 2] = rois[:, 2] + delta_r[:, 1] * scale * h rois_r[:, 3] = rois[:, 3] + delta_r[:, 0] * scale * w rois_r[:, 4] = rois[:, 4] + delta_r[:, 1] * scale * h bboxes, scores = self.bbox_head.get_det_bboxes( rois_r, TSD_cls_score, TSD_bbox_pred, img_shape, scale_factor, rescale=False, cfg=None, ) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img, ) return det_bboxes, det_labels
def multi_bboxes_test(self, feats, img_metas, proposal_list, rcnn_test_cfg, rescale): aug_scores = [] aug_bboxes = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # "ms" in variable names means multi-stage ms_scores = [] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_scores.append(scores) aug_bboxes.append(det_bboxes) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def nms_bboxes(self, img_meta, rois, cls_score, bbox_pred, rcnn_test_cfg, ms_bbox_result=None): aug_bboxes, aug_scores = ms_bbox_result img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] if ms_bbox_result is not None: bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, [img_meta] * len(aug_bboxes), rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) else: det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=True, cfg=rcnn_test_cfg) return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch # for each img and img_meta. img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible # only use the proposal_list[0] of original image, # so the bboxes are corresponding. the proposal list # are merged from different scales. # RPN generate proposals for different scale images, then proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size # here to calculate the mean box. merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None ms_scores = [] for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # 由于集成所有的det_bboxes都为原始的大小 det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_sum_masks = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): if self.with_semantic: _, semantic_feat = self.semantic_head(x) else: semantic_feat = None img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) # 先对所有stages的mask平均 merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_sum_masks.append(merged_masks) merged_masks = np.mean(aug_sum_masks, axis=0) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) ms_segm_result['ensemble'] = segm_result # 简易测试tta是否正确 if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible ms_bbox_result = {} ms_scores = [] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=rcnn_test_cfg) bbox_result = bbox2result(det_bboxes, det_labels, bbox_head.num_classes) ms_bbox_result['stage{}'.format(i)] = bbox_result if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_labels) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg ) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img ) # bbox_result = bbox2result(det_bboxes, det_labels, # self.bbox_head[-1].num_classes) # ms_bbox_result['ensemble'] = bbox_result # if not self.test_cfg.keep_all_stages: # results = ms_bbox_result['ensemble'] # else: # results = ms_bbox_result # recompute feature maps to save GPU memory # roi_feats = self.bbox_roi_extractor( # x[:len(self.bbox_roi_extractor.featmap_strides)], rois) # if self.with_shared_head: # roi_feats = self.shared_head(roi_feats) # cls_score, bbox_pred = self.bbox_head(roi_feats) # bboxes, scores = self.bbox_head.get_det_bboxes( # rois, # cls_score, # bbox_pred, # img_shape, # scale_factor, # rescale=False, # cfg=None) # aug_bboxes.append(bboxes) # aug_scores.append(scores) # # after merging, bboxes will be rescaled to the original image size # merged_bboxes, merged_scores = merge_aug_bboxes( # aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, # rcnn_test_cfg.score_thr, # rcnn_test_cfg.nms, # rcnn_test_cfg.max_per_img) # bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, templates, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes will fit the scale of imgs[0]. """ # recompute feats to save memory #y = self.extract_feats(imgs, templates) proposal_list = self.aug_test_rpn(self.extract_feats(imgs, templates), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs, templates), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test(i, x, rois) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) return bbox_result
def aug_test(self, imgs, img_metas, rescale=False, **kwargs): """ Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn rcnn_test_cfg = self.models[0].test_cfg.rcnn # For each model, compute detections aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) _, semantic_feat = model.semantic_head(x) img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] ms_scores = [] rois = bbox2roi(proposal_list) for i in range(model.num_stages): bbox_head = model.bbox_head[i] cls_score, bbox_pred = model._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg, type='concat') det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.models[0].mask_head[-1].num_classes - 1) ] else: aug_masks = [] aug_img_metas = [] for model in [self.models[0]]: for x, img_meta in\ zip(model.extract_feats(imgs), img_metas): scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] img_shape = img_meta[0]['img_shape'] _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_roi_extractor = model.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) _, semantic_feat = model.semantic_head(x) mask_semantic_feat = model.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(model.num_stages): mask_head = model.mask_head[i] if model.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, rcnn_test_cfg) ori_shape = img_metas[0][0]['ori_shape'] scale_factor = img_metas[0][0]['scale_factor'] segm_result = self.models[0].mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=rescale) # compute relations rel_model = self.models[0] for x, img_meta in zip(rel_model.extract_feats(imgs), img_metas): _, semantic_feat = rel_model.semantic_head(x) filename = img_meta[0]['filename'] im_height, im_width, _ = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] ori_shape = img_meta[0]['ori_shape'] relation_preds = rel_model._rel_forward_test( x, det_bboxes, det_labels, merged_masks, scale_factor, ori_shape, semantic_feat=semantic_feat, im_width=im_width, im_height=im_height) if rel_model.rel_save_folder is not None: np.save( os.path.join(rel_model.rel_save_folder, filename + '.npy'), relation_preds) return bbox_result, segm_result else: return bbox_result
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn imgs_per_gpu = len(img_metas[0]) aug_proposals = [[] for _ in range(imgs_per_gpu)] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) for i, proposals in enumerate(proposal_list): aug_proposals[i].append(proposals) # # after merging, proposals will be rescaled to the original image size proposal_list = [ merge_aug_proposals(proposals, img_meta, rpn_test_cfg) for proposals, img_meta in zip(aug_proposals, img_metas) ] rcnn_test_cfg = self.models[0].test_cfg.rcnn aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(model.num_stages): bbox_roi_extractor = model.bbox_roi_extractor[i] bbox_head = model.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if model.with_shared_head: bbox_feats = model.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: raise NotImplementedError else: return bbox_result
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in self.extract_feats(imgs) ] else: semantic_feats = [None] * len(img_metas) # recompute feats to save memory proposal_list = self.aug_test_rpn( self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] aug_masks_wo_sigmoid = [] for x, img_meta, semantic in zip( self.extract_feats(imgs), img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) aug_masks_wo_sigmoid.append(mask_pred.cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) merged_masks_wo_sigmoid = merge_aug_masks(aug_masks_wo_sigmoid, aug_img_metas, self.test_cfg.rcnn) merged_masks_torch = torch.from_numpy(merged_masks_wo_sigmoid).to(mask_feats.dtype).to(mask_feats.device) assert mask_feats.size(0) == merged_masks_torch.size(0)==det_labels.size(0) mask_iou_pred = self.mask_iou_head(mask_feats, merged_masks_torch[range(det_labels.size(0)), det_labels]) mask_scores = self.mask_iou_head.get_mask_scores(mask_iou_pred, det_bboxes, det_labels) return bbox_result, (segm_result, mask_scores) else: return bbox_result
def aug_test_cascade_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg, rescale=False): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} ms_scores = [] #rcnn_test_cfg = self.test_cfg.rcnn flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) #rois = bbox2roi(proposal_list) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if self.test_cfg.keep_all_stages: bboxes, scores = bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages bboxes, scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None ) # cfg must be None to prove no NMS operation is performed #print(bboxes.shape) #print(scores.shape) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, rescale=False, eval_size=None): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. Do augmentations for stages firstly, then do augmentations for different scales. """ # extract_feats = self.extract_feats(imgs) # recompute feats to save memory extract_feats = [] for img in imgs: extract_feats.append(self.extract_feat(img)) proposal_list = self.aug_test_rpn(extract_feats, img_metas, self.test_cfg.rpn) semantic_feats = [] if self.with_semantic: for x in extract_feats: _, semantic_feat = self.semantic_head(x) semantic_feats.append(semantic_feat) else: for x in extract_feats: semantic_feats.append(None) # "ms" in variable names means multi-stage ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, semantic_feat, img_meta in zip(extract_feats, semantic_feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) ms_scores = [] for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_result = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result ori_shape = img_metas[0][0]['ori_shape'] if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_aug_masks = [] for x, semantic_feat, img_meta in zip(extract_feats, \ semantic_feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) if self.with_semantic and 'mask' in self.semantic_fusion: mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_aug_masks.append(merged_masks) # fipped masks have already recovered merged_masks = np.mean(aug_aug_masks, axis=0) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False, eval_size=eval_size) ms_segm_result['ensemble'] = segm_result if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: raise NotImplementedError return results
def aug_test(self, imgs, img_metas, **kwargs): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn imgs_per_gpu = len(img_metas[0]) aug_proposals = [[] for _ in range(imgs_per_gpu)] for model in self.models: # recompute feats to save memory for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) for i, proposals in enumerate(proposal_list): aug_proposals[i].append(proposals) # after merging, proposals will be rescaled to the original image size proposal_list = [ merge_aug_proposals(proposals, img_meta, rpn_test_cfg) for proposals, img_meta in zip(aug_proposals, img_metas) ] rcnn_test_cfg = self.models[0].test_cfg.rcnn aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(model.num_stages): bbox_head = model.bbox_head[i] cls_score, bbox_pred = model._bbox_forward_test(i, x, rois) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.models[0].mask_head[-1].num_classes - 1) ] else: aug_masks = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_roi_extractor = model.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) last_feat = None for i in range(model.num_stages): mask_head = model.mask_head[i] if model.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, rcnn_test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.models[0].mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[self.key_dim]['img_shape'] scale_factor = img_meta[self.key_dim]['scale_factor'] flip = [meta_['flip'] for meta_ in img_meta] # TODO more flexible proposals = [ bbox_mapping(proposal_[:, :4], img_shape, scale_factor, flip_) for flip_, proposal_ in zip(flip, proposal_list) ] bboxes, scores = self.simple_test_bboxes(x, img_meta, proposals, None, rescale=False) # rois = bbox2roi([proposals]) # # recompute feature maps to save GPU memory # if self.feat_from_shared_head: # # print("enter feat_from_shared_head, type of x is {}".format(type(x))) # shared_head_feat_ = [self.shared_head(x[0])] # roi_feats = self.bbox_roi_extractor(shared_head_feat_[:1], rois) # else: # roi_feats = self.bbox_roi_extractor( # x[:len(self.bbox_roi_extractor.featmap_strides)], rois) # if self.with_shared_head: # roi_feats = self.shared_head(roi_feats) # cls_score, bbox_pred = self.bbox_head(roi_feats) # bboxes, scores = self.bbox_head.get_det_bboxes( # rois, # cls_score, # bbox_pred, # img_shape, # scale_factor, # rescale=False, # cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) det_bboxes_collect = [] det_labels_collect = [] # after merging, bboxes will be rescaled to the original image size for i in range(len(aug_scores[0])): a_bboxes = [a_b[i] for a_b in aug_bboxes] a_scores = [a_s[i] for a_s in aug_scores] metas_ = [[meta_[self.key_dim]] for meta_ in img_metas] merged_bboxes, merged_scores = merge_aug_bboxes( a_bboxes, a_scores, metas_, rcnn_test_cfg) if hasattr(rcnn_test_cfg, 'nms'): det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) det_bboxes_collect.append(det_bboxes) det_labels_collect.append(det_labels) else: det_bboxes_collect.append(merged_bboxes) det_labels_collect.append(merged_scores) return det_bboxes_collect, det_labels_collect
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] dummy_scale_factor = np.ones(4) segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=dummy_scale_factor, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test_cascadercnn_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): """ implement for cascade aug test. """ aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] ori_shape = img_meta[0]['ori_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # get bbox from different stages ms_bbox_result = {} ms_segm_result = {} ms_scores = [] rcnn_test_cfg = self.test_cfg_rcnn for i in range(self.num_stages): # for each image there are 3 stages to go through. bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides), rois]) cls_score, bbox_pred = bbox_head(bbox_feats) # get bboxes scores from all num_stages ms_scores.append(cls_score) # the rois are the same, so the predicted bboxes can be # average between them if self.test_cfg.keep_all_stages: det_bboxes, det_labels = bbox_head.get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): # raise NotImplementedError feats = self.extract_feats(imgs) proposal_list = self.aug_test_rpn(feats, img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn feats = self.extract_feats(imgs) for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_labels) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # det_bboxes, det_labels = multiclass_nms( # merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, # rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) det_bboxes, det_labels = box_results_with_nms_and_limit( merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, self.test_cfg, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_result = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] img_meta_list = [] feats = self.extract_feats(imgs) for x, img_meta in zip(feats, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) img_meta_list.extend([img_meta] * self.num_stages) merged_masks = merge_aug_masks(aug_masks, img_meta_list, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] # rescale 控制输出的尺度,为True时和原图一致 segm_result = self.mask_head[-1].get_seg_masks( merged_masks, _det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=img_metas[0][0]['scale_factor'], rescale=rescale) ms_segm_result['ensemble'] = segm_result if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ ms_bbox_result = {} # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result if self.with_rel: ori_shape = img_meta[0]['ori_shape'] im_height, im_width, _ = img_meta[0]['img_shape'] filename = img_meta[0]['filename'] relation_preds = self._rel_forward_test(x, det_bboxes, det_labels, scale_factor, ori_shape, im_width=im_width, im_height=im_height) relation_preds['file_name'] = filename return (ms_bbox_result, relation_preds)
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): ms_bbox_result = {} ms_segm_result = {} rcnn_test_cfg = self.test_cfg.rcnn proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) ms_scores = [] for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / self.num_stages det_bboxes, det_scores = self.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) #由于集成所有的det_bboxes都为原始的大小 det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_results if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_sum_masks = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) aug_masks = [] for i in range(self.num_stages): mask_roi_extractor = self.mask_roi_extractor[i] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) #先对所有stages的mask平均 merged_masks = merge_aug_masks(aug_masks, [img_meta] * self.num_stages, self.test_cfg.rcnn) aug_sum_masks.append(merged_masks) #再对所有的多尺度图片平均,由于mask大小都是28*28,所以无需复杂考虑,但是此处增强后所有图片均为正常模式,翻转的图片恢复 merged_masks = np.mean(aug_sum_masks, axis=0) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn, ori_shape, scale_factor=1.0, rescale=False) ms_segm_result['ensemble'] = segm_result #简易测试tta是否正确 # img_h, img_w = ori_shape[:2] # if img_h == 506: # i = 0 # elif img_h == 480: # i = 1 # elif img_h == 551: # i = 2 # elif img_h == 546: # i = 3 # elif img_h == 463: # i = 4 # # img = plt.imread("/home/zhangyun/下载/津南比赛数据集/ori_coco/train2014/{}.jpg".format(i)) # img = np.array(img) # h, w, _ = img.shape # bboxes = np.vstack(bbox_results) # if segm_result is not None: # segms = mmcv.concat_list(segm_result) # inds = np.where(bboxes[:, -1] > 0.3)[0] # for i in inds: # color_mask = np.random.randint( # 0, 256, (1, 3), dtype=np.uint8) # mask = maskUtils.decode(segms[i]).astype(np.bool) # img[mask] = img[mask] * 0.3 + color_mask * 0.7 # # labels = [ # np.full(bbox.shape[0], i, dtype=np.int32) # for i, bbox in enumerate(bbox_results) # ] # labels = np.concatenate(labels) # if 0.3 > 0: # assert bboxes.shape[1] == 5 # scores = bboxes[:, -1] # inds = scores > 0.3 # bboxes = bboxes[inds, :] # labels = labels[inds] # plt.imshow(img) # for bbox in bboxes: # plt.gca().add_patch( # plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], # bbox[3] - bbox[1], fill=False, # edgecolor='r', linewidth=3) # ) # # plt.show() if not self.test_cfg.keep_all_stages: if self.with_mask: results = (ms_bbox_result['ensemble'], ms_segm_result['ensemble']) else: results = ms_bbox_result['ensemble'] else: if self.with_mask: results = { stage: (ms_bbox_result[stage], ms_segm_result[stage]) for stage in ms_bbox_result } else: results = ms_bbox_result return results
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) if self.with_glbctx: glbctx_feats = [self.glbctx_head(feat)[1] for feat in img_feats] else: glbctx_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) det_bbox_results = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: det_segm_results = [[] for _ in range(self.mask_head.num_classes)] else: aug_masks = [] for x, img_meta, semantic_feat, glbctx_feat in zip( img_feats, img_metas, semantic_feats, glbctx_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) # get relay feature on mask_rois bbox_results = self._bbox_forward( -1, x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat) relayed_feat = bbox_results['relayed_feat'] relayed_feat = self.feat_relay_head(relayed_feat) mask_results = self._mask_forward( x, mask_rois, semantic_feat=semantic_feat, glbctx_feat=glbctx_feat, relayed_feat=relayed_feat) mask_pred = mask_results['mask_pred'] aug_masks.append(mask_pred.sigmoid().cpu().numpy()) merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] det_segm_results = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(det_bbox_results, det_segm_results)] else: return [det_bbox_results]
def aug_test(self, imgs, img_metas, rescale=False): """Test with augmentation.""" single_stage_cfg = self.test_cfg.single_stage single_stage_cfg.update(dict(aug_test=True)) imgs_per_gpu = len(img_metas[0]) aug_bboxes = [[] for _ in range(imgs_per_gpu)] aug_scores = [[] for _ in range(imgs_per_gpu)] mask_roi_feats = [] stuff_outs = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): bbox_outs = self.bbox_head(x) bbox_inputs = bbox_outs[:2] + (img_meta, single_stage_cfg, False) results_list = self.bbox_head.get_bboxes(*bbox_inputs) # mask subnet mask_roi_feats.append(bbox_outs[-1]) # stuff subnet stuff_head_inputs = bbox_outs[-2] stuff_out = self.stuff_head(stuff_head_inputs) stuff_outs.append(stuff_out) for i, results in enumerate(results_list): mlvl_bboxes, mlvl_scores = results aug_bboxes[i].append(mlvl_bboxes) aug_scores[i].append(mlvl_scores) aug_img_metas = [] for i in range(imgs_per_gpu): aug_img_meta = [] for j in range(len(img_metas)): aug_img_meta.append(img_metas[j][i]) aug_img_metas.append(aug_img_meta) det_results = [] for aug_bbox, aug_score, aug_img_meta in zip(aug_bboxes, aug_scores, aug_img_metas): merged_bboxes, merged_scores = merge_aug_bboxes(aug_bbox, aug_score, aug_img_meta, single_stage_cfg, return_mean=False) det_bboxes, det_labels = multiclass_nms( merged_bboxes, merged_scores, single_stage_cfg.score_thr, single_stage_cfg.nms, single_stage_cfg.max_per_img) det_results.append((det_bboxes, det_labels)) bbox_results = [ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in det_results ] # mask imgs_per_gpu = len(img_metas[0]) aug_masks = [[] for _ in range(imgs_per_gpu)] for x, img_meta in zip(mask_roi_feats, img_metas): # we should rescale the det bboxes outside `simple_test_mask` # because there are `flip` for aug test setting, while in the # simple test setting, just have the scale scaled_det_results = [] for results, meta in zip(det_results, img_meta): det_bboxes, _ = results img_shape = meta['img_shape'] scale_factor = meta['scale_factor'] flip = meta['flip'] scaled_det_bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) scaled_det_results.append(scaled_det_bboxes) mask_rois = bbox2roi(scaled_det_results) mask_feats = self.mask_roi_extractor( x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) mask_pred = self.mask_head(mask_feats) for img_id, meta in enumerate(img_meta): idx_img = mask_rois[:, 0] == img_id mask_pred_img = mask_pred[idx_img] # convert to numpy array to save memory mask_pred_img_np = mask_pred_img.sigmoid().cpu().numpy() aug_masks[img_id].append(mask_pred_img_np) segm_results = [] for det_result, aug_mask, aug_img_meta in zip(det_results, aug_masks, aug_img_metas): det_bboxes, det_labels = det_result merged_masks = merge_aug_masks(aug_mask, aug_img_meta, self.test_cfg.single_stage_mask) # perform `get_seg_masks` here for `merged_masks` # `ori_shape` for all augmented images are the same here ori_shape = aug_img_meta[0]['ori_shape'] segm_result = self.mask_head.get_seg_masks( merged_masks, det_bboxes, det_labels, self.test_cfg.single_stage_mask, ori_shape, scale_factor=1.0, rescale=False) segm_results.append(segm_result) # stuff stuff_results = self.stuff_head.get_stuff_map_aug(stuff_outs, img_metas, rescale=rescale) bbox_segm_stuff_results = [] for bbox_result, segm_result, stuff_result in zip( bbox_results, segm_results, stuff_results): bbox_segm_stuff_results.append( (bbox_result, segm_result, stuff_result)) return bbox_segm_stuff_results[0]
def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[[] for _ in range(self.mask_head[-1].num_classes)] ] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test(self, imgs, img_metas, rescale=False, **kwargs): """ Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ ms_bbox_result = {} rpn_test_cfg = self.models[0].test_cfg.rpn rcnn_test_cfg = self.models[0].test_cfg.rcnn # For each model, compute detections aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] ms_scores = [] rois = bbox2roi(proposal_list) for i in range(model.num_stages): bbox_head = model.bbox_head[i] bbox_roi_extractor = model.bbox_roi_extractor[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg, type='concat') det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) ms_bbox_result['ensemble'] = bbox_result ori_shape = img_metas[0][0]['ori_shape'] scale_factor = img_metas[0][0]['scale_factor'] ensemble_relation_preds = {} for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): im_height, im_width, _ = img_meta[0]['img_shape'] filename = img_meta[0]['filename'] relation_preds = model._rel_forward_test(x, det_bboxes, det_labels, scale_factor, ori_shape, im_width=im_width, im_height=im_height) if filename not in ensemble_relation_preds: ensemble_relation_preds = relation_preds ensemble_relation_preds['file_name'] = filename else: ensemble_relation_preds['hoi_prediction'].extend( relation_preds['hoi_prediction']) ensemble_relation_preds_remove_dup = ensemble_relation_preds.copy() for i, hoi_pred_i in enumerate( ensemble_relation_preds['hoi_prediction']): for j, hoi_pred_j in enumerate( ensemble_relation_preds['hoi_prediction']): if i != j: sbj_i = hoi_pred_i['subject_id'] obj_i = hoi_pred_i['object_id'] cat_i = hoi_pred_i['category_id'] sbj_j = hoi_pred_j['subject_id'] obj_j = hoi_pred_j['object_id'] cat_j = hoi_pred_j['category_id'] if sbj_i == sbj_j and obj_i == obj_j and cat_i == cat_j: ensemble_relation_preds_remove_dup.remove(hoi_pred_j) results = (ms_bbox_result['ensemble'], ensemble_relation_preds_remove_dup) return results
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_feats = self.mask_roi_extractor[i]( x[:len(self.mask_roi_extractor[i].featmap_strides )], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] aug_bboxes_tail = [] aug_scores_tail = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] ms_scores_tail = [] rois = bbox2roi([proposals]) rois_tail = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) bbox_results_tail = self._bbox_forward_tail(i, x, rois_tail) ms_scores_tail.append(bbox_results_tail['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) bbox_label_tail = bbox_results_tail[ 'cls_score'][:, :-1].argmax(dim=1) rois_tail = self.bbox_head_tail[i].regress_by_class( rois_tail, bbox_label_tail, bbox_results_tail['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) # print('a', bboxes.shape, scores.shape) cls_score_tail = sum(ms_scores_tail) / float(len(ms_scores_tail)) bboxes_tail, scores_tail = self.bbox_head_tail[-1].get_bboxes( rois_tail, cls_score_tail, bbox_results_tail['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) # print('b', bboxes_tail.shape, scores_tail.shape) # print(scores_tail) # print(scores) # if self.labels is not None: # inds = [] # for label in self.labels: # inds.append(torch.nonzero(scores == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # bboxes = bboxes[inds] # scores = scores[inds] # if self.labels_tail is not None: # inds = [] # for label in self.labels_tail: # inds.append(torch.nonzero(scores_tail == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # bboxes_tail = bboxes_tail[inds] # scores_tail = scores_tail[inds] # print(bboxes,bboxes.shape) # print(bboxes_tail, bboxes_tail.shape) # if bboxes.shape[0] == 0: # det_bboxes = bboxes_tail # det_labels = scores_tail # elif bboxes_tail.shape[0] == 0: # det_bboxes = bboxes # det_labels = scores # else: # det_bboxes = torch.cat((bboxes, bboxes_tail)) # det_labels = torch.cat((scores, scores_tail)) # aug_bboxes.append(det_bboxes) # aug_scores.append(det_labels) # print('c', det_bboxes.shape) # print('d', det_labels.shape) det_bboxes = torch.cat((bboxes, bboxes_tail)) det_labels = torch.cat((scores, scores_tail)) aug_bboxes.append(det_bboxes) aug_scores.append(det_labels) # aug_bboxes_tail.append(bboxes_tail) # aug_scores_tail.append(scores_tail) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # print('e', merged_bboxes.shape, merged_scores.shape) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) # # after merging, bboxes will be rescaled to the original image size # merged_bboxes_tail, merged_scores_tail = merge_aug_bboxes( # aug_bboxes_tail, aug_scores_tail, img_metas, rcnn_test_cfg) # # print('e', merged_bboxes.shape, merged_scores.shape) # det_bboxes_tail, det_labels_tail = multiclass_nms(merged_bboxes_tail, merged_scores_tail, # rcnn_test_cfg.score_thr, # rcnn_test_cfg.nms, # rcnn_test_cfg.max_per_img) # if self.labels is not None: # inds = [] # for label in self.labels: # inds.append(torch.nonzero(det_labels == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # det_bboxes_post = det_bboxes[inds] # det_labels_post = det_labels[inds] # if self.labels_tail is not None: # inds = [] # for label in self.labels_tail: # inds.append(torch.nonzero(det_labels_tail == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # det_bboxes_tail_post = det_bboxes_tail[inds] # det_labels_tail_post = det_labels_tail[inds] # det_bboxes = torch.cat((det_bboxes_post, det_bboxes_tail_post)) # det_labels = torch.cat((det_labels_post, det_labels_tail_post)) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]