def aug_test(self, imgs, img_metas, rescale=False): # recompute feats to save memory feats = self.extract_feats(imgs) aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch outs = self.bbox_head(x) bbox_inputs = outs + (img_meta, self.test_cfg, False, False) det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0] aug_bboxes.append(det_bboxes) aug_scores.append(det_scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = self.merge_aug_results( aug_bboxes, aug_scores, img_metas) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, self.test_cfg.score_thr, self.test_cfg.nms, self.test_cfg.max_per_img) if rescale: _det_bboxes = det_bboxes else: _det_bboxes = det_bboxes.clone() _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] bbox_results = bbox2result(_det_bboxes, det_labels, self.bbox_head.num_classes) return bbox_results
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): aug_bboxes = [] aug_scores = [] for x, img_meta in zip(feats, img_metas): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] # TODO more flexible proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) rois = bbox2roi([proposals]) # recompute feature maps to save GPU memory roi_feats = self.bbox_roi_extractor( x[:len(self.bbox_roi_extractor.featmap_strides)], rois) if self.with_shared_head: roi_feats = self.shared_head(roi_feats) cls_score, bbox_pred = self.bbox_head(roi_feats) bboxes, scores = self.bbox_head.get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_points, img_shape, scale_factor, cfg, rescale=False, nms=True): assert len(cls_scores) == len(bbox_preds) == len(mlvl_points) mlvl_bboxes = [] mlvl_scores = [] for i_lvl, (cls_score, bbox_pred, points) in enumerate( zip(cls_scores, bbox_preds, mlvl_points)): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) points = points[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1) bboxes = bbox_pred * self.point_strides[i_lvl] + bbox_pos_center x1 = bboxes[:, 0].clamp(min=0, max=img_shape[1]) y1 = bboxes[:, 1].clamp(min=0, max=img_shape[0]) x2 = bboxes[:, 2].clamp(min=0, max=img_shape[1]) y2 = bboxes[:, 3].clamp(min=0, max=img_shape[0]) bboxes = torch.stack([x1, y1, x2, y2], dim=-1) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) if nms: det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels else: return mlvl_bboxes, mlvl_scores
def get_bboxes_single(self, cls_scores, bbox_preds, centernesses, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, centerness, anchors in zip( cls_scores, bbox_preds, centernesses, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) centerness = centerness.permute(1, 2, 0).reshape(-1).sigmoid() nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: max_scores, _ = (scores * centerness[:, None]).max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] centerness = centerness[topk_inds] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_centerness.append(centerness) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) mlvl_centerness = torch.cat(mlvl_centerness) det_bboxes, det_labels = multiclass_nms( mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img, score_factors=mlvl_centerness) return det_bboxes, det_labels
def get_bboxes_single(self, cls_score_list, bbox_pred_list, mlvl_anchors, img_shape, scale_factor, cfg, rescale=False): """ Transform outputs for a single batch item into labeled boxes. """ assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors in zip(cls_score_list, bbox_pred_list, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: # Get maximum scores for foreground classes. if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_bboxes_single(self, cls_scores, bbox_preds, featmap_sizes, point_list, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(point_list) det_bboxes = [] det_scores = [] for cls_score, bbox_pred, featmap_size, stride, base_len, (y, x) \ in zip(cls_scores, bbox_preds, featmap_sizes, self.strides, self.base_edge_list, point_list): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] scores = cls_score.permute(1, 2, 0).reshape( -1, self.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4).exp() nms_pre = cfg.get('nms_pre', -1) if (nms_pre > 0) and (scores.shape[0] > nms_pre): max_scores, _ = scores.max(dim=1) _, topk_inds = max_scores.topk(nms_pre) bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] y = y[topk_inds] x = x[topk_inds] x1 = (stride * x - base_len * bbox_pred[:, 0]).\ clamp(min=0, max=img_shape[1] - 1) y1 = (stride * y - base_len * bbox_pred[:, 1]).\ clamp(min=0, max=img_shape[0] - 1) x2 = (stride * x + base_len * bbox_pred[:, 2]).\ clamp(min=0, max=img_shape[1] - 1) y2 = (stride * y + base_len * bbox_pred[:, 3]).\ clamp(min=0, max=img_shape[0] - 1) bboxes = torch.stack([x1, y1, x2, y2], -1) det_bboxes.append(bboxes) det_scores.append(scores) det_bboxes = torch.cat(det_bboxes) if rescale: det_bboxes /= det_bboxes.new_tensor(scale_factor) det_scores = torch.cat(det_scores) padding = det_scores.new_zeros(det_scores.shape[0], 1) det_scores = torch.cat([padding, det_scores], dim=1) det_bboxes, det_labels = multiclass_nms(det_bboxes, det_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def get_det_bboxes(self, rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None): if isinstance(cls_score, list): cls_score = sum(cls_score) / float(len(cls_score)) scores = F.softmax(cls_score, dim=1) if cls_score is not None else None if bbox_pred is not None: bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means, self.target_stds, img_shape) else: bboxes = rois[:, 1:].clone() if img_shape is not None: bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1) bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1) if rescale: if isinstance(scale_factor, float): bboxes /= scale_factor else: scale_factor = torch.from_numpy(scale_factor).to(bboxes.device) bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view(bboxes.size()[0], -1) if cfg is None: return bboxes, scores else: det_bboxes, det_labels = multiclass_nms(bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels
def aug_test(self, imgs, img_metas, proposals=None, rescale=False): """Test with augmentations. If rescale is False, then returned bboxes and masks will fit the scale of imgs[0]. """ if self.with_semantic: semantic_feats = [ self.semantic_head(feat)[1] for feat in self.extract_feats(imgs) ] else: semantic_feats = [None] * len(img_metas) # recompute feats to save memory proposal_list = self.aug_test_rpn(self.extract_feats(imgs), img_metas, self.test_cfg.rpn) rcnn_test_cfg = self.test_cfg.rcnn aug_bboxes = [] aug_scores = [] for x, img_meta, semantic in zip(self.extract_feats(imgs), img_metas, semantic_feats): # only one image in the batch img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, scale_factor, flip) # "ms" in variable names means multi-stage ms_scores = [] rois = bbox2roi([proposals]) for i in range(self.num_stages): bbox_head = self.bbox_head[i] cls_score, bbox_pred = self._bbox_forward_test( i, x, rois, semantic_feat=semantic) ms_scores.append(cls_score) if i < self.num_stages - 1: bbox_label = cls_score.argmax(dim=1) rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred, img_meta[0]) cls_score = sum(ms_scores) / float(len(ms_scores)) bboxes, scores = self.bbox_head[-1].get_det_bboxes(rois, cls_score, bbox_pred, img_shape, scale_factor, rescale=False, cfg=None) aug_bboxes.append(bboxes) aug_scores.append(scores) # after merging, bboxes will be rescaled to the original image size merged_bboxes, merged_scores = merge_aug_bboxes( aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) bbox_result = bbox2result(det_bboxes, det_labels, self.bbox_head[-1].num_classes) if self.with_mask: if det_bboxes.shape[0] == 0: segm_result = [[] for _ in range(self.mask_head[-1].num_classes - 1)] else: aug_masks = [] aug_img_metas = [] for x, img_meta, semantic in zip(self.extract_feats(imgs), img_metas, semantic_feats): img_shape = img_meta[0]['img_shape'] scale_factor = img_meta[0]['scale_factor'] flip = img_meta[0]['flip'] _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, scale_factor, flip) mask_rois = bbox2roi([_bboxes]) mask_feats = self.mask_roi_extractor[-1]( x[:len(self.mask_roi_extractor[-1].featmap_strides)], mask_rois) if self.with_semantic: semantic_feat = semantic mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) if mask_semantic_feat.shape[-2:] != mask_feats.shape[ -2:]: mask_semantic_feat = F.adaptive_avg_pool2d( mask_semantic_feat, mask_feats.shape[-2:]) mask_feats += mask_semantic_feat last_feat = None for i in range(self.num_stages): mask_head = self.mask_head[i] if self.mask_info_flow: mask_pred, last_feat = mask_head( mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) aug_masks.append(mask_pred.sigmoid().cpu().numpy()) aug_img_metas.append(img_meta) merged_masks = merge_aug_masks(aug_masks, aug_img_metas, self.test_cfg.rcnn) ori_shape = img_metas[0][0]['ori_shape'] segm_result = self.mask_head[-1].get_seg_masks( merged_masks, det_bboxes, det_labels, rcnn_test_cfg, ori_shape, scale_factor=1.0, rescale=False) return bbox_result, segm_result else: return bbox_result
def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors, mlvl_masks, img_shape, scale_factor, cfg, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds, mlvl_anchors, mlvl_masks): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] # if no location is kept, end. if mask.sum() == 0: continue # reshape scores and bbox_pred cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) # filter scores, bbox_pred w.r.t. mask. # anchors are filtered in get_anchors() beforehand. scores = scores[mask, :] bbox_pred = bbox_pred[mask, :] if scores.dim() == 0: anchors = anchors.unsqueeze(0) scores = scores.unsqueeze(0) bbox_pred = bbox_pred.unsqueeze(0) # filter anchors, bbox_pred, scores w.r.t. scores nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, 1:].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] bboxes = delta2bbox(anchors, bbox_pred, self.target_means, self.target_stds, img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes) if rescale: mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor) mlvl_scores = torch.cat(mlvl_scores) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([padding, mlvl_scores], dim=1) # multi class NMS det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, cfg.max_per_img) return det_bboxes, det_labels