def aug_test(self, img_feats, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in img_feats ] else: semantic_feats = [None] * len(img_metas) # recompute feats to save memory # proposal_list = self.aug_test_rpn(img_feats, img_metas, # self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] bbox_results = self._bbox_forward(i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: mask_classes = self.mask_head[-1].num_classes segm_result = [[] for _ in range(mask_classes - 1)] mask_scores = [[] for _ in range(mask_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) import numpy as np merged_masks_torch = torch.from_numpy(merged_masks).to( torch.float16).cuda() mask_results = dict( mask_pred=merged_masks_torch, mask_feats=mask_feats ) # NOTE not sure whether mask features is correct here mask_iou_pred = self.mask_iou_head( mask_results['mask_feats'], mask_results['mask_pred'][range(det_labels.size(0)), det_labels]) mask_scores = self.mask_iou_head.get_mask_scores( mask_iou_pred, det_bboxes, det_labels) return bbox_result, (segm_result, mask_scores) else: return bbox_result
def aug_test(self, imgs, img_metas, **kwargs): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn imgs_per_gpu = len(img_metas[0]) aug_proposals = [[] for _ in range(imgs_per_gpu)] for model in self.models: # recompute feats to save memory for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.rpn_head.simple_test_rpn(x, img_meta) for i, proposals in enumerate(proposal_list): aug_proposals[i].append(proposals) # after merging, proposals will be rescaled to the original image size proposal_list = [ merge_aug_proposals(proposals, img_meta, rpn_test_cfg) for proposals, img_meta in zip(aug_proposals, img_metas) ] semantic_feats = [None] * len(img_metas) rcnn_test_cfg = self.models[0].test_cfg aug_bboxes = [] aug_scores = [] for model in self.models: for x, img_meta, semantic in zip(imgs, img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(model.roi_head.num_stages): bbox_head = model.roi_head.bbox_head[i] bbox_results = model.roi_head._bbox_forward( i, x, rois, semantic_feat=semantic) ms_scores.append(bbox_results['cls_score']) if i < model.roi_head.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.models[0].mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for model in self.models: for x, img_meta, semantic in zip(imgs, img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) mask_feats = model.mask_roi_extractor[-1]( x[:len(model.mask_roi_extractor[-1].featmap_strides)], mask_rois) if model.with_semantic: semantic_feat = semantic mask_semantic_feat = model.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(model.num_stages): mask_head = model.mask_head[i] if model.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.models[0].test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.models[0].mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def _get_bboxes_single(self, cls_scores, bbox_preds, centernesses, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """Transform outputs for a single batch item into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for a single scale level Has shape (num_anchors * num_classes, H, W). bbox_preds (list[Tensor]): Box energies / deltas for a single scale level with shape (num_anchors * 4, H, W). centernesses (list[Tensor]): Centerness for a single scale level Has shape (num_anchors * 1, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. Returns: tuple(Tensor): det_bboxes (Tensor): BBox predictions in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (n,) tensor where each item is the predicted class label of the corresponding box. """ assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, centerness, anchors in zip( cls_scores, bbox_preds, centernesses, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] centerness = centerness[topk_inds] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_centerness.append(centerness) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) mlvl_centerness = torch.cat(mlvl_centerness) det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_centerness) return det_bboxes, det_labels
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[[] for _ in range(self.mask_head[-1].num_classes)] ] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): # only one image in the batch img_shape = img_metas[0]['img_shape'] scale_factor = img_metas[0]['scale_factor'] flip = img_metas[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_roi_extractor = self.bbox_roi_extractor[i] bbox_head = self.bbox_head[i] bbox_feats = bbox_roi_extractor( x[:len(bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: bbox_feats = self.shared_head(bbox_feats) cls_score, bbox_pred = bbox_head(bbox_feats) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_metas[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(self.extract_feats(imgs), img_metas): img_shape = img_metas[0]['img_shape'] scale_factor = img_metas[0]['scale_factor'] flip = img_metas[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_feats = self.mask_roi_extractor[i]( x[:len(self.mask_roi_extractor[i].featmap_strides )], mask_rois) if self.with_shared_head: mask_feats = self.shared_head(mask_feats) mask_pred = self.mask_head[i](mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def _bbox_post_process(self, mlvl_scores, mlvl_bboxes, scale_factor, cfg, rescale=False, with_nms=True, mlvl_score_factors=None, **kwargs): """bbox post-processing method. The boxes would be rescaled to the original image scale and do the nms operation. Usually with_nms is False is used for aug test. Args: mlvl_scores (list[Tensor]): Box scores from all scale levels of a single image, each item has shape (num_bboxes, num_class). mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale levels of a single image, each item has shape (num_bboxes, 4). scale_factor (ndarray, optional): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. mlvl_score_factors (list[Tensor], optional): Score factor from all scale levels of a single image, each item has shape (num_bboxes, ). Default: None. Returns: tuple[Tensor]: Results of detected bboxes and labels. If with_nms is False and mlvl_score_factor is None, return mlvl_bboxes and mlvl_scores, else return mlvl_bboxes, mlvl_scores and mlvl_score_factor. Usually with_nms is False is used for aug test. If with_nms is True, then return the following format - det_bboxes (Tensor): Predicted bboxes with shape \ [num_bboxes, 5], where the first 4 columns are bounding \ box positions (tl_x, tl_y, br_x, br_y) and the 5-th \ column are scores between 0 and 1. - det_labels (Tensor): Predicted labels of the corresponding \ box with shape [num_bboxes]. """ mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) mlvl_iou_preds = torch.cat(mlvl_score_factors) mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt() det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_nms_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=None) if self.with_score_voting and len(det_bboxes) > 0: det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels, mlvl_bboxes, mlvl_nms_scores, cfg.score_thr) return det_bboxes, det_labels
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, mlvl_masks, img_shape, scale_factor, cfg, rescale=False): cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds, mlvl_anchors, mlvl_masks): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] # if no location is kept, end. if mask.sum() == 0: continue # reshape scores and bbox_pred cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) # filter scores, bbox_pred w.r.t. mask. # anchors are filtered in get_anchors() beforehand. scores = scores[mask, :] bbox_pred = bbox_pred[mask, :] if scores.dim() == 0: anchors = anchors.unsqueeze(0) scores = scores.unsqueeze(0) bbox_pred = bbox_pred.unsqueeze(0) # filter anchors, bbox_pred, scores w.r.t. scores nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) # multi class NMS det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_points, img_shape, scale_factor, cfg, rescale=False, nms=True): cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_bboxes = [] mlvl_scores = [] for i_lvl, (cls_score, bbox_pred, points) in enumerate( zip(cls_scores, bbox_preds, mlvl_points)): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1) bboxes = bbox_pred * self.point_strides[i_lvl] + bbox_pos_center x1 = bboxes[:, 0].clamp(min=0, max=img_shape[1]) y1 = bboxes[:, 1].clamp(min=0, max=img_shape[0]) x2 = bboxes[:, 2].clamp(min=0, max=img_shape[1]) y2 = bboxes[:, 3].clamp(min=0, max=img_shape[0]) bboxes = torch.stack([x1, y1, x2, y2], dim=-1) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def get_bboxes_single(self, results_raw, scale_factor, cfg, rescale=False): assert len(results_raw) == self.num_scales multi_lvl_bboxes = [] multi_lvl_cls_scores = [] multi_lvl_conf_scores = [] for i_scale in range(self.num_scales): result_raw = results_raw[i_scale] num_grid_h = result_raw.size(1) num_grid_w = result_raw.size(2) # (anchors*(cxcywh+conf+class), h, w ) -- > (anchors, h, w, cxcywh+conf+class) prediction_raw = result_raw.view(self.num_anchors_per_scale, self.num_attrib, num_grid_h, num_grid_w).permute( 0, 2, 3, 1).contiguous() # grid x y offset, with stride step included stride = self.anchor_strides[i_scale] grid_x, grid_y = self._get_anchors_grid_xy(num_grid_h, num_grid_w, stride, result_raw.device) # Get outputs x, y x_center_pred = torch.sigmoid( prediction_raw[..., 0]) * stride + grid_x # Center x y_center_pred = torch.sigmoid( prediction_raw[..., 1]) * stride + grid_y # Center y anchors = torch.tensor(self.anchor_base_sizes[i_scale], device=result_raw.device, dtype=torch.float32) anchor_w = anchors[:, 0:1].view((-1, 1, 1)) anchor_h = anchors[:, 1:2].view((-1, 1, 1)) w_pred = torch.exp(prediction_raw[..., 2]) * anchor_w # Width h_pred = torch.exp(prediction_raw[..., 3]) * anchor_h # Height x1_pred = x_center_pred - w_pred / 2 y1_pred = y_center_pred - h_pred / 2 x2_pred = x_center_pred + w_pred / 2 y2_pred = y_center_pred + h_pred / 2 bbox_pred = torch.stack((x1_pred, y1_pred, x2_pred, y2_pred), dim=3).view((-1, 4)) # cxcywh conf_pred = torch.sigmoid(prediction_raw[..., 4]).view(-1) # Conf if self.hierarchical_label: cls_pred = self.loss_cls.parase_output(prediction_raw[..., 5:]) else: cls_pred = torch.sigmoid(prediction_raw[..., 5:]).view( -1, self.num_classes_no_bkg) # Cls pred one-hot. conf_thr = cfg.get('conf_thr', -1) conf_inds = conf_pred.ge(conf_thr).nonzero().flatten() bbox_pred = bbox_pred[conf_inds, :] cls_pred = cls_pred[conf_inds, :] conf_pred = conf_pred[conf_inds] nms_pre = cfg.get('nms_pre', -1) if 0 < nms_pre < conf_pred.size(0): _, topk_inds = conf_pred.topk(nms_pre) bbox_pred = bbox_pred[topk_inds, :] cls_pred = cls_pred[topk_inds, :] conf_pred = conf_pred[topk_inds] multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) multi_lvl_bboxes = torch.cat(multi_lvl_bboxes) multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores) multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores) if multi_lvl_conf_scores.size(0) == 0: return torch.zeros((0, 5)), torch.zeros((0, )) if rescale: multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor) padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0], 1) multi_lvl_cls_scores = torch.cat([padding, multi_lvl_cls_scores], dim=1) det_bboxes, det_labels = multiclass_nms( multi_lvl_bboxes, multi_lvl_cls_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=multi_lvl_conf_scores) return det_bboxes, det_labels
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for a single scale level has shape (num_classes, H, W). bbox_preds (list[Tensor]): Box distribution logits for a single scale level with shape (4*(n+1), H, W), n is max value of integral set. mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): Bbox predictions in shape (N, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (N,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, stride, anchors in zip( cls_scores, bbox_preds, self.anchor_generator.strides, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert stride[0] == stride[1] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0) if self.use_dfl: bbox_pred = self.integral(bbox_pred) * stride[0] else: bbox_pred = bbox_pred.reshape( (-1,4) ) * stride[0] nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = scores.max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = distance2bbox( self.anchor_center(anchors), bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def single_gpu_test(args, cfg, model): testImgList = os.listdir(args.testImgpath) for imgfile in testImgList: imgfile = imgfile.strip() img = Image.open(os.path.join(args.testImgpath, imgfile)) image = img.convert('RGB') img = np.array(image) width, height, channel = img.shape rows = int(math.ceil(1.0 * (width - args.cropsize) / args.stride)) + 1 cols = int(math.ceil(1.0 * (height - args.cropsize) / args.stride)) + 1 multi_bboxes = list() multi_scores = list() for row in range(rows): if width > args.cropsize: y_start = min(row * args.stride, width - args.cropsize) y_end = y_start + args.cropsize else: y_start = 0 y_end = width for col in range(cols): if height > args.cropsize: x_start = min(col * args.stride, height - args.cropsize) x_end = x_start + args.cropsize else: x_start = 0 x_end = height subimg = copy.deepcopy(img[y_start:y_end, x_start:x_end, :]) w, h, c = np.shape(subimg) outimg = np.zeros((args.cropsize, args.cropsize, 3)) outimg[0:w, 0:h, :] = subimg result = inference_detector(model, outimg) #15 bboxes = np.vstack(result) labels = [ #0-15 np.full(bbox.shape[0], i + 1, dtype=np.int32) for i, bbox in enumerate(result) ] labels = np.concatenate(labels) if len(bboxes) > 0: bboxes[:, :4] += [x_start, y_start, x_start, y_start] multi_bboxes.append(bboxes[:, :4]) scores = np.zeros( (bboxes.shape[0], len(ODAI_LABEL_MAP.keys()))) #0-15 for i, j in zip(range(bboxes.shape[0]), labels): scores[i, j] = bboxes[i, 4] multi_scores.append(scores) crop_num = len(multi_bboxes) if crop_num > 0: multi_bboxes = np.vstack(multi_bboxes) multi_scores = np.vstack(multi_scores) multi_bboxes = torch.Tensor(multi_bboxes) multi_scores = torch.Tensor(multi_scores) score_thr = 0.1 nms = dict(type='nms', iou_thr=0.5) max_per_img = 2000 det_bboxes, det_labels = multiclass_nms(multi_bboxes, multi_scores, score_thr, nms, max_per_img) if det_bboxes.shape[0] > 0: det_bboxes = np.array(det_bboxes) det_labels = np.array(det_labels) #0-14 image = draw_boxes_with_label_and_scores( img, det_bboxes[:, :4], det_bboxes[:, 4], det_labels, 0) image.save(os.path.join(args.saveImgpath, imgfile)) CLASS_DOTA = ODAI_LABEL_MAP.keys() LABEl_NAME_MAP = get_label_name_map() write_handle_r = {} osp(args.saveTxtpath) for sub_class in CLASS_DOTA: if sub_class == 'back-ground': continue write_handle_r[sub_class] = open( os.path.join(args.saveTxtpath, 'Task2_%s.txt' % sub_class), 'a+') """ :det_bboxes: format [x_c, y_c, w, h, theta, score] :det_labels: [label] """ boxes = [] for rect in det_bboxes[:, :4]: boxes.append([rect[0], rect[1], rect[2], rect[3]]) rboxes = np.array(boxes, dtype=np.float32) for i, rbox in enumerate(rboxes): command = '%s %.5f %.5f %.5f %.5f %.5f\n' % ( imgfile[:-4], det_bboxes[i, 4], rbox[0], rbox[1], rbox[2], rbox[3]) write_handle_r[LABEl_NAME_MAP[int(det_labels[i]) + 1]].write(command) for sub_class in CLASS_DOTA: if sub_class == 'back-ground': continue write_handle_r[sub_class].close()
def aug_test(self, imgs, img_metas, templates, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes will fit the scale of imgs[0]. """ # recompute feats to save memory #y = self.extract_feats(imgs, templates) proposal_list = self.aug_test_rpn(self.extract_feats(imgs, templates), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta in zip(self.extract_feats(imgs, templates), img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test(i, x, rois) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) return bbox_result
def _get_bboxes_single(self, cls_scores, bbox_preds, mlvl_points, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_scores (list[Tensor]): Box iou-aware scores for a single scale level with shape (num_points * num_classes, H, W). bbox_preds (list[Tensor]): Box offsets for a single scale level with shape (num_points * 4, H, W). mlvl_points (list[Tensor]): Box reference for a single scale level with shape (num_total_points, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before returning boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): BBox predictions in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (n,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, points in zip(cls_scores, bbox_preds, mlvl_points): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).contiguous().sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4).contiguous() nms_pre = cfg.get('nms_pre', -1) if 0 < nms_pre < scores.shape[0]: max_scores, _ = scores.max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = distance2bbox(points, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def get_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): """Transform network output for a batch into bbox predictions. Args: rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5). last dimension 5 arrange as (batch_index, x1, y1, x2, y2). cls_score (Tensor): Box scores, has shape (num_boxes, num_classes + 1). bbox_pred (Tensor, optional): Box energies / deltas. has shape (num_boxes, num_classes * 4). img_shape (Sequence[int], optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). scale_factor (ndarray): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). rescale (bool): If True, return boxes in original image space. Default: False. cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None Returns: tuple[Tensor, Tensor]: Fisrt tensor is `det_bboxes`, has the shape (num_boxes, 5) and last dimension 5 represent (tl_x, tl_y, br_x, br_y, score). Second tensor is the labels with shape (num_boxes, ). """ # some loss (Seesaw loss..) may have custom activation if self.custom_cls_channels: scores = self.loss_cls.get_activation(cls_score) else: scores = F.softmax(cls_score, dim=-1) if cls_score is not None else None # bbox_pred would be None in some detector when with_reg is False, # e.g. Grid R-CNN. if bbox_pred is not None: bboxes = self.bbox_coder.decode(rois[..., 1:], bbox_pred, max_shape=img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1]) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0]) if rescale and bboxes.size(0) > 0: scale_factor = bboxes.new_tensor(scale_factor) bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view( bboxes.size()[0], -1) if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_cls_preds, bbox_reg_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): cfg = self.test_cfg if cfg is None else cfg mlvl_bboxes = [] mlvl_scores = [] mlvl_confids = [] assert len(cls_scores) == len(bbox_cls_preds) == len( bbox_reg_preds) == len(mlvl_anchors) for cls_score, bbox_cls_pred, bbox_reg_pred, anchors in zip( cls_scores, bbox_cls_preds, bbox_reg_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_cls_pred.size( )[-2:] == bbox_reg_pred.size()[-2::] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_cls_pred = bbox_cls_pred.permute(1, 2, 0).reshape( -1, self.side_num * 4) bbox_reg_pred = bbox_reg_pred.permute(1, 2, 0).reshape( -1, self.side_num * 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_cls_pred = bbox_cls_pred[topk_inds, :] bbox_reg_pred = bbox_reg_pred[topk_inds, :] scores = scores[topk_inds, :] bbox_preds = [ bbox_cls_pred.contiguous(), bbox_reg_pred.contiguous() ] bboxes, confids = self.bbox_coder.decode( anchors.contiguous(), bbox_preds, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_confids.append(confids) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) mlvl_confids = torch.cat(mlvl_confids) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) det_bboxes, det_labels = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_confids) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, mlvl_masks, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds, mlvl_anchors, mlvl_masks): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] # if no centeration is kept, end. if mask.sum() == 0: continue # reshape scores and bbox_pred cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) # filter scores, bbox_pred w.r.t. mask. # anchors are filtered in get_anchors() beforehand. scores = scores[mask, :] bbox_pred = bbox_pred[mask, :] if scores.dim() == 0: anchors = anchors.unsqueeze(0) scores = scores.unsqueeze(0) bbox_pred = bbox_pred.unsqueeze(0) # filter anchors, bbox_pred, scores w.r.t. scores nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) # multi class NMS det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, rescale=False, **kwargs): """ Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rpn_test_cfg = self.models[0].test_cfg.rpn rcnn_test_cfg = self.models[0].test_cfg.rcnn # For each model, compute detections aug_bboxes = [] aug_scores = [] aug_img_metas = [] for model in self.models: for x, img_meta in zip(model.extract_feats(imgs), img_metas): proposal_list = model.simple_test_rpn(x, img_meta, rpn_test_cfg) _, semantic_feat = model.semantic_head(x) img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] ms_scores = [] rois = bbox2roi(proposal_list) for i in range(model.num_stages): bbox_head = model.bbox_head[i] cls_score, bbox_pred = model._bbox_forward_test( i, x, rois, semantic_feat=semantic_feat) ms_scores.append(cls_score) if i < model.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class( rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = model.bbox_head[-1].get_det_bboxes( rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) aug_img_metas.append(img_meta) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes(aug_bboxes, aug_scores, aug_img_metas, rcnn_test_cfg, type='concat') det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.models[0].bbox_head[-1].num_classes) if self.models[0].with_mask: if det_bboxes.shape[0] == 0: segm_result = [ [] for _ in range(self.models[0].mask_head[-1].num_classes - 1) ] else: aug_masks = [] aug_img_metas = [] for model in [self.models[0]]: for x, img_meta in\ zip(model.extract_feats(imgs), img_metas): scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] img_shape = img_meta[0]['img_shape'] _bboxes = (det_bboxes[:, :4] * scale_factor if rescale else det_bboxes) mask_rois = bbox2roi([_bboxes]) mask_roi_extractor = model.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( x[:len(mask_roi_extractor.featmap_strides)], mask_rois) _, semantic_feat = model.semantic_head(x) mask_semantic_feat = model.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None for i in range(model.num_stages): mask_head = model.mask_head[i] if model.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, rcnn_test_cfg) ori_shape = img_metas[0][0]['ori_shape'] scale_factor = img_metas[0][0]['scale_factor'] segm_result = self.models[0].mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=rescale) # compute relations rel_model = self.models[0] for x, img_meta in zip(rel_model.extract_feats(imgs), img_metas): _, semantic_feat = rel_model.semantic_head(x) filename = img_meta[0]['filename'] im_height, im_width, _ = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] ori_shape = img_meta[0]['ori_shape'] relation_preds = rel_model._rel_forward_test( x, det_bboxes, det_labels, merged_masks, scale_factor, ori_shape, semantic_feat=semantic_feat, im_width=im_width, im_height=im_height) if rel_model.rel_save_folder is not None: np.save( os.path.join(rel_model.rel_save_folder, filename + '.npy'), relation_preds) return bbox_result, segm_result else: return bbox_result
def _get_bboxes_single(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: cls_score_list (list[Tensor]): Box scores for a single scale level Has shape (num_anchors * num_classes, H, W). bbox_pred_list (list[Tensor]): Box energies / deltas for a single scale level with shape (num_anchors * 4, H, W). mlvl_anchors (list[Tensor]): Box reference for a single scale level with shape (num_total_anchors, 4). img_shape (tuple[int]): Shape of the input image, (height, width, 3). scale_factor (ndarray): Scale factor of the image arange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: Tensor: Labeled boxes in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. """ cfg = self.test_cfg if cfg is None else cfg assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: # remind that we set FG labels to [0, num_class-1] # since mmdet v2.0 # BG cat_id: num_class max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) if with_nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def _get_bboxes_single(self, pred_maps_list, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into bbox predictions. Args: pred_maps_list (list[Tensor]): Prediction maps for different scales of each single image in the batch. scale_factor (ndarray): Scale factor of the image arrange as (w_scale, h_scale, w_scale, h_scale). cfg (mmcv.Config | None): Test / postprocessing configuration, if None, test_cfg would be used. rescale (bool): If True, return boxes in original image space. Default: False. with_nms (bool): If True, do nms before return boxes. Default: True. Returns: tuple(Tensor): det_bboxes (Tensor): BBox predictions in shape (n, 5), where the first 4 columns are bounding box positions (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between 0 and 1. det_labels (Tensor): A (n,) tensor where each item is the predicted class label of the corresponding box. """ cfg = self.test_cfg if cfg is None else cfg assert len(pred_maps_list) == self.num_levels multi_lvl_bboxes = [] multi_lvl_cls_scores = [] multi_lvl_conf_scores = [] num_levels = len(pred_maps_list) featmap_sizes = [ pred_maps_list[i].shape[-2:] for i in range(num_levels) ] multi_lvl_anchors = self.anchor_generator.grid_anchors( featmap_sizes, pred_maps_list[0][0].device) for i in range(self.num_levels): # get some key info for current scale pred_map = pred_maps_list[i] stride = self.featmap_strides[i] # (h, w, num_anchors*num_attrib) -> (h*w*num_anchors, num_attrib) pred_map = pred_map.permute(1, 2, 0).reshape(-1, self.num_attrib) pred_map[..., :2] = torch.sigmoid(pred_map[..., :2]) bbox_pred = self.bbox_coder.decode(multi_lvl_anchors[i], pred_map[..., :4], stride) # conf and cls conf_pred = torch.sigmoid(pred_map[..., 4]).view(-1) cls_pred = torch.sigmoid(pred_map[..., 5:]).view( -1, self.num_classes) # Cls pred one-hot. # Filtering out all predictions with conf < conf_thr conf_thr = cfg.get('conf_thr', -1) if conf_thr > 0: # add as_tuple=False for compatibility in Pytorch 1.6 conf_inds = conf_pred.ge(conf_thr).nonzero( as_tuple=False).flatten() bbox_pred = bbox_pred[conf_inds, :] cls_pred = cls_pred[conf_inds, :] conf_pred = conf_pred[conf_inds] # Get top-k prediction nms_pre = cfg.get('nms_pre', -1) if 0 < nms_pre < conf_pred.size(0): _, topk_inds = conf_pred.topk(nms_pre) bbox_pred = bbox_pred[topk_inds, :] cls_pred = cls_pred[topk_inds, :] conf_pred = conf_pred[topk_inds] # Save the result of current scale multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) # Merge the results of different scales together multi_lvl_bboxes = torch.cat(multi_lvl_bboxes) multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores) multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores) if with_nms and (multi_lvl_conf_scores.size(0) == 0): return torch.zeros((0, 5)), torch.zeros((0, )) if rescale: multi_lvl_bboxes /= multi_lvl_bboxes.new_tensor(scale_factor) # In mmdet 2.x, the class_id for background is num_classes. # i.e., the last column. padding = multi_lvl_cls_scores.new_zeros(multi_lvl_cls_scores.shape[0], 1) multi_lvl_cls_scores = torch.cat([multi_lvl_cls_scores, padding], dim=1) # Support exporting to onnx without nms if with_nms and cfg.get('nms', None) is not None: det_bboxes, det_labels = multiclass_nms( multi_lvl_bboxes, multi_lvl_cls_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=multi_lvl_conf_scores) return det_bboxes, det_labels else: return (multi_lvl_bboxes, multi_lvl_cls_scores, multi_lvl_conf_scores)
def _get_bboxes_single(self, cls_scores, bbox_preds, iou_preds, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False, with_nms=True): """Transform outputs for a single batch item into labeled boxes. This method is almost same as `ATSSHead._get_bboxes_single()`. We use sqrt(iou_preds * cls_scores) in NMS process instead of just cls_scores. Besides, score voting is used when `` score_voting`` is set to True. """ assert with_nms, 'PAA only supports "with_nms=True" now' assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_iou_preds = [] for cls_score, bbox_pred, iou_preds, anchors in zip( cls_scores, bbox_preds, iou_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) iou_preds = iou_preds.permute(1, 2, 0).reshape(-1).sigmoid() nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * iou_preds[:, None]).sqrt().max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] iou_preds = iou_preds[topk_inds] bboxes = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_iou_preds.append(iou_preds) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) # Add a dummy background class to the backend when using sigmoid # remind that we set FG labels to [0, num_class-1] since mmdet v2.0 # BG cat_id: num_class padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) mlvl_iou_preds = torch.cat(mlvl_iou_preds) mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt() det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_nms_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=None) if self.with_score_voting: det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels, mlvl_bboxes, mlvl_nms_scores, cfg.score_thr) return det_bboxes, det_labels
def aug_test(self, features, proposal_list, img_metas, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ rcnn_test_cfg = self.test_cfg aug_bboxes = [] aug_scores = [] aug_bboxes_tail = [] aug_scores_tail = [] for x, img_meta in zip(features, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip, flip_direction) # "ms" in variable names means multi-stage ms_scores = [] ms_scores_tail = [] rois = bbox2roi([proposals]) rois_tail = bbox2roi([proposals]) for i in range(self.num_stages): bbox_results = self._bbox_forward(i, x, rois) ms_scores.append(bbox_results['cls_score']) bbox_results_tail = self._bbox_forward_tail(i, x, rois_tail) ms_scores_tail.append(bbox_results_tail['cls_score']) if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'][:, :-1].argmax( dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_results['bbox_pred'], img_meta[0]) bbox_label_tail = bbox_results_tail[ 'cls_score'][:, :-1].argmax(dim=1) rois_tail = self.bbox_head_tail[i].regress_by_class( rois_tail, bbox_label_tail, bbox_results_tail['bbox_pred'], img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_bboxes( rois, cls_score, bbox_results['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) # print('a', bboxes.shape, scores.shape) cls_score_tail = sum(ms_scores_tail) / float(len(ms_scores_tail)) bboxes_tail, scores_tail = self.bbox_head_tail[-1].get_bboxes( rois_tail, cls_score_tail, bbox_results_tail['bbox_pred'], img_shape, scale_factor, rescale=False, cfg=None) # print('b', bboxes_tail.shape, scores_tail.shape) # print(scores_tail) # print(scores) # if self.labels is not None: # inds = [] # for label in self.labels: # inds.append(torch.nonzero(scores == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # bboxes = bboxes[inds] # scores = scores[inds] # if self.labels_tail is not None: # inds = [] # for label in self.labels_tail: # inds.append(torch.nonzero(scores_tail == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # bboxes_tail = bboxes_tail[inds] # scores_tail = scores_tail[inds] # print(bboxes,bboxes.shape) # print(bboxes_tail, bboxes_tail.shape) # if bboxes.shape[0] == 0: # det_bboxes = bboxes_tail # det_labels = scores_tail # elif bboxes_tail.shape[0] == 0: # det_bboxes = bboxes # det_labels = scores # else: # det_bboxes = torch.cat((bboxes, bboxes_tail)) # det_labels = torch.cat((scores, scores_tail)) # aug_bboxes.append(det_bboxes) # aug_scores.append(det_labels) # print('c', det_bboxes.shape) # print('d', det_labels.shape) det_bboxes = torch.cat((bboxes, bboxes_tail)) det_labels = torch.cat((scores, scores_tail)) aug_bboxes.append(det_bboxes) aug_scores.append(det_labels) # aug_bboxes_tail.append(bboxes_tail) # aug_scores_tail.append(scores_tail) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) # print('e', merged_bboxes.shape, merged_scores.shape) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) # # after merging, bboxes will be rescaled to the original image size # merged_bboxes_tail, merged_scores_tail = merge_aug_bboxes( # aug_bboxes_tail, aug_scores_tail, img_metas, rcnn_test_cfg) # # print('e', merged_bboxes.shape, merged_scores.shape) # det_bboxes_tail, det_labels_tail = multiclass_nms(merged_bboxes_tail, merged_scores_tail, # rcnn_test_cfg.score_thr, # rcnn_test_cfg.nms, # rcnn_test_cfg.max_per_img) # if self.labels is not None: # inds = [] # for label in self.labels: # inds.append(torch.nonzero(det_labels == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # det_bboxes_post = det_bboxes[inds] # det_labels_post = det_labels[inds] # if self.labels_tail is not None: # inds = [] # for label in self.labels_tail: # inds.append(torch.nonzero(det_labels_tail == label, as_tuple=False).squeeze(1)) # inds = torch.cat(inds) # det_bboxes_tail_post = det_bboxes_tail[inds] # det_labels_tail_post = det_labels_tail[inds] # det_bboxes = torch.cat((det_bboxes_post, det_bboxes_tail_post)) # det_labels = torch.cat((det_labels_post, det_labels_tail_post)) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes)] else: aug_masks = [] aug_img_metas = [] for x, img_meta in zip(features, img_metas): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] flip_direction = img_meta[0]['flip_direction'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip, flip_direction) mask_rois = bbox2roi([_bboxes]) for i in range(self.num_stages): mask_results = self._mask_forward(i, x, mask_rois) aug_masks.append( mask_results['mask_pred'].sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return [(bbox_result, segm_result)] else: return [bbox_result]