def forward(self, feat, x): module = self.module cls_scores, bbox_preds, centernesses = module(feat) mlvl_anchors = self.anchor_generator(cls_scores, device=cls_scores[0].device) mlvl_scores = [] mlvl_proposals = [] mlvl_centerness = [] nms_pre = self.test_cfg.get('nms_pre', -1) for cls_score, bbox_pred, centerness, anchors in zip( cls_scores, bbox_preds, centernesses, mlvl_anchors): centerness = centerness.permute(0, 2, 3, 1).reshape(centerness.shape[0], -1).sigmoid() scores, proposals = self.bbox_coder( cls_score, bbox_pred, anchors, min_num_bboxes=-1, num_classes=cls_score.shape[1] * 4 // bbox_pred.shape[1], use_sigmoid_cls=True, input_x=x) if nms_pre > 0: scores = mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) centerness = mm2trt_util.pad_with_value(centerness, 1, nms_pre) proposals = mm2trt_util.pad_with_value(proposals, 1, nms_pre) max_scores, _ = (scores * centerness[:, :, None]).max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) proposals = mm2trt_util.gather_topk(proposals, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) centerness = mm2trt_util.gather_topk(centerness, 1, topk_inds) mlvl_scores.append(scores) mlvl_proposals.append(proposals) mlvl_centerness.append(centerness) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = torch.cat(mlvl_proposals, dim=1) mlvl_centerness = torch.cat(mlvl_centerness, dim=1) # mlvl_scores = mlvl_scores*mlvl_centerness[:, :, None] max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) mlvl_scores = mm2trt_util.pad_with_value(mlvl_scores, 2, 1, 0.) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms( mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): module = self.module cls_scores, bbox_preds, iou_preds = module(feat) num_levels = len(cls_scores) mlvl_anchors = self.anchor_generator(cls_scores, device = cls_scores[0].device) mlvl_scores = [] mlvl_proposals = [] mlvl_iou_preds = [] nms_pre = self.test_cfg.get('nms_pre', -1) for cls_score, bbox_pred, iou_pred, anchors in zip( cls_scores, bbox_preds, iou_preds, mlvl_anchors): iou_pred = iou_pred.permute(0, 2, 3, 1).reshape(iou_pred.shape[0],-1).sigmoid() scores, proposals = self.bbox_coder(cls_score, bbox_pred, anchors, min_num_bboxes = -1, num_classes = cls_score.shape[1]*4//bbox_pred.shape[1], use_sigmoid_cls = True, input_x = x ) if nms_pre>0: scores=mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) iou_pred=mm2trt_util.pad_with_value(iou_pred, 1, nms_pre) proposals=mm2trt_util.pad_with_value(proposals, 1, nms_pre) max_scores, _ = (scores * iou_pred[:, :, None]).sqrt().max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) proposals = mm2trt_util.gather_topk(proposals, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) iou_pred = mm2trt_util.gather_topk(iou_pred, 1, topk_inds) mlvl_scores.append(scores) mlvl_proposals.append(proposals) mlvl_iou_preds.append(iou_pred) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = torch.cat(mlvl_proposals, dim=1) mlvl_iou_preds = torch.cat(mlvl_iou_preds, dim=1) mlvl_scores = (mlvl_scores*mlvl_iou_preds[:, :, None]).sqrt() max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) mlvl_scores=mm2trt_util.pad_with_value(mlvl_scores, 2, 1, 0.) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) if module.with_score_voting: return self.score_voting_batched(num_detected, proposals, scores, cls_id, mlvl_proposals, mlvl_scores, self.test_cfg.score_thr) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): module = self.module cfg = self.test_cfg dense_outputs = module(feat) if len(dense_outputs) == 3: # old cls_scores, _, bbox_preds_refine = dense_outputs else: # new cls_scores, bbox_preds_refine = dense_outputs mlvl_points = self.get_points(cls_scores) mlvl_bboxes = [] mlvl_scores = [] for cls_score, bbox_pred, points in zip(cls_scores, bbox_preds_refine, mlvl_points): scores = cls_score.permute(0, 2, 3, 1).reshape( cls_score.shape[0], -1, module.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.shape[0], -1, 4) points = points.unsqueeze(0) points = points.expand_as(bbox_pred[:, :, :2]) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0: # concate zero to enable topk, # dirty way, will find a better way in future scores = mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred = mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) points = mm2trt_util.pad_with_value(points, 1, nms_pre) # do topk max_scores, _ = scores.max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) points = mm2trt_util.gather_topk(points, 1, topk_inds) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) bboxes = batched_distance2bbox(points, bbox_pred, x.shape[2:]) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = mlvl_bboxes.unsqueeze(2) max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms( mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): module = self.module cls_scores, bbox_preds = module(feat) num_levels = len(cls_scores) mlvl_anchors = self.anchor_generator(cls_scores, device = cls_scores[0].device) mlvl_scores = [] mlvl_proposals = [] nms_pre = self.test_cfg.get('nms_pre', -1) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] #.squeeze() rpn_bbox_pred = bbox_preds[idx] #.squeeze() anchors = mlvl_anchors[idx] scores, proposals = self.bbox_coder(rpn_cls_score, rpn_bbox_pred, anchors, min_num_bboxes = nms_pre, num_classes = rpn_cls_score.shape[1]*4//rpn_bbox_pred.shape[1], use_sigmoid_cls = self.use_sigmoid_cls, input_x = x ) if nms_pre>0: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=2) else: max_scores, _ = scores[:, :, :-1].max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) proposals = mm2trt_util.gather_topk(proposals, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) mlvl_scores.append(scores) mlvl_proposals.append(proposals) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = torch.cat(mlvl_proposals, dim=1) if self.use_sigmoid_cls: max_scores, _ = mlvl_scores.max(dim=2) else: max_scores, _ = mlvl_scores[:, :, :mlvl_scores.shape[2]-1].max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) if self.use_sigmoid_cls: padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], mlvl_scores.shape[1], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=2) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feats, x): module = self.module cfg = self.test_cfg pred_maps_list = module(feats)[0] multi_lvl_anchors = self.anchor_generator(pred_maps_list, device = pred_maps_list[0].device) multi_lvl_bboxes=[] multi_lvl_cls_scores=[] multi_lvl_conf_scores=[] for i in range(self.num_levels): # get some key info for current scale pred_map = pred_maps_list[i] stride = self.featmap_strides[i] batch_size = pred_map.shape[0] pred_map = pred_map.permute(0, 2, 3, 1).reshape(batch_size, -1, self.num_attrib) pred_map[..., :2] = torch.sigmoid(pred_map[..., :2]) pred_map_pre_proposal = torch.sigmoid(pred_map[..., :2]) pred_map_post_proposal = pred_map[..., 2:4] pred_map_proposal = torch.cat([pred_map_pre_proposal, pred_map_post_proposal], dim=-1) anchors = multi_lvl_anchors[i].unsqueeze(0).expand_as(pred_map_proposal) bbox_pred = self.bbox_coder.decode(anchors, pred_map_proposal, stride) conf_pred = torch.sigmoid(pred_map[..., 4]).view(batch_size, -1) cls_pred = torch.sigmoid(pred_map[..., 5:]).view(batch_size, -1, self.num_classes) # Cls pred one-hot. nms_pre = cfg.get('nms_pre', -1) if 0 < nms_pre: conf_pred = mm2trt_util.pad_with_value(conf_pred, 1, nms_pre, 0.) cls_pred = mm2trt_util.pad_with_value(cls_pred, 1, nms_pre) bbox_pred = mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) _, topk_inds = conf_pred.topk(nms_pre, dim=1) conf_pred = mm2trt_util.gather_topk(conf_pred, 1, topk_inds) cls_pred = mm2trt_util.gather_topk(cls_pred, 1, topk_inds) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) conf_thr = cfg.get('conf_thr', -1) conf_inds = conf_pred.ge(conf_thr).float() conf_pred = conf_pred*conf_inds multi_lvl_bboxes.append(bbox_pred) multi_lvl_cls_scores.append(cls_pred) multi_lvl_conf_scores.append(conf_pred) multi_lvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1) multi_lvl_cls_scores = torch.cat(multi_lvl_cls_scores, dim=1) multi_lvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1) multi_lvl_cls_scores = multi_lvl_cls_scores*multi_lvl_conf_scores.unsqueeze(2) multi_lvl_bboxes = multi_lvl_bboxes.unsqueeze(2) num_bboxes = multi_lvl_bboxes.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(multi_lvl_cls_scores, multi_lvl_bboxes, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def score_voting_batched(self, num_detected, proposals, scores, cls_id, mlvl_bboxes, mlvl_nms_scores, score_thr): module = self.module batch_size = num_detected.size(0) mlvl_bboxes = mlvl_bboxes.view(batch_size, -1, 4) eps = mlvl_nms_scores.new_tensor([1e-6]) cls_id_valid = (cls_id >= 0).float() cls_id_new = cls_id * cls_id_valid + ( 1 - cls_id_valid) * module.cls_out_channels cls_id_new = cls_id_new.long() candidate_cls_bboxes = mlvl_bboxes det_cls_bboxes = proposals det_candidate_ious = bbox_overlaps_batched(det_cls_bboxes, candidate_cls_bboxes, eps=eps) pos_ious = det_candidate_ious cls_id_new = cls_id_new.unsqueeze(-1).expand_as(pos_ious).permute( 0, 2, 1) candidate_cls_scores = mm2trt_util.gather_topk(mlvl_nms_scores, 2, cls_id_new) candidate_cls_scores = candidate_cls_scores.permute(0, 2, 1) pis = (torch.exp(-(1 - pos_ious)**2 / 0.025) * candidate_cls_scores).unsqueeze(-1) voted_bbox = torch.sum(pis * candidate_cls_bboxes.unsqueeze(1), dim=2) / (torch.sum(pis, dim=2) + 1e-10) proposals_voted = voted_bbox scores_voted = scores cls_id_voted = cls_id return num_detected, proposals_voted, scores_voted, cls_id_voted
def forward(self, feat, x): module = self.module nms_pre = self.test_cfg.nms_pre if self.test_cfg.nms_pre > 0 else 1000 nms_post = self.test_cfg.nms_post use_sigmoid_cls = module.use_sigmoid_cls cls_scores, bbox_preds = module(feat) num_levels = len(cls_scores) mlvl_anchors = self.anchor_generator(cls_scores, device=cls_scores[0].device) mlvl_scores = [] mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] #.squeeze() rpn_bbox_pred = bbox_preds[idx] #.squeeze() anchors = mlvl_anchors[idx] scores, proposals = self.bbox_coder( rpn_cls_score, rpn_bbox_pred, anchors, min_num_bboxes=nms_pre, num_classes=1, use_sigmoid_cls=use_sigmoid_cls, input_x=x) if nms_pre > 0: _, topk_inds = scores.squeeze(2).topk(nms_pre, dim=1) proposals = mm2trt_util.gather_topk(proposals, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) mlvl_scores.append(scores) mlvl_proposals.append(proposals) scores = torch.cat(mlvl_scores, dim=1) proposals = torch.cat(mlvl_proposals, dim=1) _, proposals, scores, _ = self.rpn_nms(scores, proposals, scores.size(1), nms_post) return proposals
def get_bboxes(self, mlvl_anchors, cls_scores, bbox_preds, img_metas, test_cfg): x = img_metas['x'] nms_pre = test_cfg.nms_pre if self.test_cfg.nms_pre > 0 else 1000 nms_post = test_cfg.nms_post use_sigmoid_cls = self.module.use_sigmoid_cls mlvl_scores = [] mlvl_proposals = [] for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] anchors = mlvl_anchors[idx] scores, proposals = self.bbox_coder( rpn_cls_score, rpn_bbox_pred, anchors.squeeze(0), min_num_bboxes=nms_pre, num_classes=1, use_sigmoid_cls=use_sigmoid_cls, input_x=x) if nms_pre > 0: _, topk_inds = scores.squeeze(2).topk(nms_pre, dim=1) proposals = mm2trt_util.gather_topk(proposals, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) mlvl_scores.append(scores) mlvl_proposals.append(proposals) scores = torch.cat(mlvl_scores, dim=1) proposals = torch.cat(mlvl_proposals, dim=1) _, proposals, scores, _ = self.rpn_nms(scores, proposals, scores.size(1), nms_post) return proposals
def forward(self, feat, proposals, img_shape): batch_size = proposals.shape[0] num_proposals = proposals.shape[1] rois_pad = mm2trt_util.arange_by_input(proposals, 0).unsqueeze(1) rois_pad = rois_pad.repeat(1, num_proposals).view(-1, 1) proposals = proposals.view(-1, 4) rois = torch.cat([rois_pad, proposals], dim=1) # rcnn bbox_results = self._bbox_forward(feat, rois) cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] bbox_head_outputs = self.bbox_head.get_bboxes(rois, cls_score, bbox_pred, img_shape, batch_size, num_proposals, self.test_cfg) num_detections, det_boxes, det_scores, det_classes = bbox_head_outputs result = [num_detections, det_boxes, det_scores, det_classes] if self.enable_mask: # mask roi input num_mask_proposals = det_boxes.size(1) rois_pad = mm2trt_util.arange_by_input(det_boxes, 0).unsqueeze(1) rois_pad = rois_pad.repeat(1, num_mask_proposals).view(-1, 1) mask_proposals = det_boxes.view(-1, 4) mask_rois = torch.cat([rois_pad, mask_proposals], dim=1) mask_results = self._mask_forward(feat, mask_rois) mask_pred = mask_results['mask_pred'] mc, mh, mw = mask_pred.shape[1:] mask_pred = mask_pred.reshape(batch_size, -1, mc, mh, mw).sigmoid() if not self.module.mask_head.class_agnostic: det_index = det_classes.unsqueeze(-1).long() det_index = det_index + 1 mask_pad = mask_pred[:, :, 0:1, ...] * 0 mask_pred = torch.cat([mask_pad, mask_pred], dim=2) mask_pred = mm2trt_util.gather_topk(mask_pred, dim=2, index=det_index) mask_pred = mask_pred.squeeze(2) result += [mask_pred] return result
def forward(self, feat, proposals, img_shape): ms_scores = [] batch_size = proposals.shape[0] num_proposals = proposals.shape[1] rois_pad = mm2trt_util.arange_by_input(proposals, 0).unsqueeze(1) rois_pad = rois_pad.repeat(1, num_proposals).view(-1, 1) proposals = proposals.view(-1, 4) rois = proposals if self.module.with_semantic: _, semantic_feat = self.semantic_head(feat) else: semantic_feat = None for i in range(self.num_stages): bbox_results = self._bbox_forward( i, feat, torch.cat([rois_pad, rois], dim=1), semantic_feat=semantic_feat) ms_scores.append(bbox_results['cls_score']) bbox_pred = bbox_results['bbox_pred'] if i < self.num_stages - 1: bbox_label = bbox_results['cls_score'].argmax(dim=1) rois = self.bbox_head[i].regress_by_class( rois, bbox_label, bbox_pred, img_shape) rois = torch.cat([rois_pad, rois], dim=1) # bbox_head.get_boxes cls_score = bbox_results['cls_score'] bbox_pred = bbox_results['bbox_pred'] num_detections, det_boxes, det_scores, det_classes = self.bbox_head[ -1].get_bboxes(rois, cls_score, bbox_pred, img_shape, batch_size, num_proposals, self.test_cfg) result = [num_detections, det_boxes, det_scores, det_classes] if self.enable_mask: # mask roi input num_mask_proposals = det_boxes.size(1) rois_pad = mm2trt_util.arange_by_input(det_boxes, 0).unsqueeze(1) rois_pad = rois_pad.repeat(1, num_mask_proposals).view(-1, 1) mask_proposals = det_boxes.view(-1, 4) mask_rois = torch.cat([rois_pad, mask_proposals], dim=1) mask_roi_extractor = self.mask_roi_extractor[-1] mask_feats = mask_roi_extractor( feat[:mask_roi_extractor.num_inputs], mask_rois) if self.module.with_semantic and ('mask' in self.module.semantic_fusion): mask_semantic_feat = self.semantic_roi_extractor( [semantic_feat], mask_rois) mask_feats += mask_semantic_feat last_feat = None aug_masks = [] for i in range(self.num_stages): mask_head = self.mask_head[i] if self.module.mask_info_flow: mask_pred, last_feat = mask_head(mask_feats, last_feat) else: mask_pred = mask_head(mask_feats) mask_pred = mask_pred.sigmoid() aug_masks.append(mask_pred) mask_pred = merge_aug_masks(aug_masks, self.test_cfg) mc, mh, mw = mask_pred.shape[1:] mask_pred = mask_pred.reshape(batch_size, -1, mc, mh, mw) if not self.module.mask_head[-1].class_agnostic: det_index = det_classes.unsqueeze(-1).long() det_index = det_index + 1 mask_pad = mask_pred[:, :, 0:1, ...] * 0 mask_pred = torch.cat([mask_pad, mask_pred], dim=2) mask_pred = mm2trt_util.gather_topk( mask_pred, dim=2, index=det_index) mask_pred = mask_pred.squeeze(2) result += [mask_pred] return result
def forward(self, feat, x): img_shape = x.shape[2:] module = self.module cfg = self.test_cfg cls_scores, bbox_preds, shape_preds, loc_preds = module(feat) _, mlvl_anchors, mlvl_masks = self.get_anchors( cls_scores, shape_preds, loc_preds, use_loc_filter=True) mlvl_scores = [] mlvl_proposals = [] nms_pre = cfg.get('nms_pre', -1) for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds, mlvl_anchors, mlvl_masks): scores = cls_score.permute(0, 2, 3, 1).reshape(cls_score.shape[0], -1, module.cls_out_channels).sigmoid() if module.use_sigmoid_cls: scores = scores.sigmoid() else: scores = scores.softmax(-1) scores = scores*mask.unsqueeze(2) bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.shape[0], -1, 4) if nms_pre > 0: # concate zero to enable topk, dirty way, will find a better way in future scores=mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred=mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) anchors=mm2trt_util.pad_with_value(anchors, 1, nms_pre) # do topk max_scores, _ = (scores).max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) anchors = mm2trt_util.gather_topk(anchors, 1, topk_inds) proposals = self.bbox_coder.decode( anchors, bbox_pred, max_shape=img_shape) mlvl_scores.append(scores) mlvl_proposals.append(proposals) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = torch.cat(mlvl_proposals, dim=1) mlvl_proposals = mlvl_proposals.unsqueeze(2) max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): img_shape = x.shape[2:] module = self.module cfg = self.test_cfg cls_scores, _, pts_preds_refine = module(feat) bbox_preds_refine = [ module.points2bbox(pts_pred_refine) for pts_pred_refine in pts_preds_refine ] num_levels = len(cls_scores) mlvl_points = [ self.point_generators[i](cls_scores[i], module.point_strides[i]) for i in range(num_levels) ] mlvl_bboxes = [] mlvl_scores = [] for i_lvl, (cls_score, bbox_pred, points) in enumerate( zip(cls_scores, bbox_preds_refine, mlvl_points)): scores = cls_score.permute(0, 2, 3, 1).reshape(cls_score.shape[0], -1, module.cls_out_channels).sigmoid() if module.use_sigmoid_cls: scores = scores.sigmoid() else: scores = scores.softmax(-1)[:,:,:-1] bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.shape[0], -1, 4) points = points[:, :2].unsqueeze(0).expand_as(bbox_pred[:,:,:2]) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0: # concate zero to enable topk, dirty way, will find a better way in future scores=mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred=mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) points=mm2trt_util.pad_with_value(points, 1, nms_pre) max_scores, _ = (scores).max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) points = mm2trt_util.gather_topk(points, 1, topk_inds) bbox_pos_center = torch.cat([points[:, :, :2], points[:, :, :2]], dim=2) bboxes = bbox_pred * module.point_strides[i_lvl] + bbox_pos_center x1 = bboxes[:, :, 0].clamp(min=0, max=img_shape[1]) y1 = bboxes[:, :, 1].clamp(min=0, max=img_shape[0]) x2 = bboxes[:, :, 2].clamp(min=0, max=img_shape[1]) y2 = bboxes[:, :, 3].clamp(min=0, max=img_shape[0]) bboxes = torch.stack([x1, y1, x2, y2], dim=-1) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_bboxes = mlvl_bboxes.unsqueeze(2) # topk again if nms_pre > 0: max_scores, _ = (mlvl_scores).max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) mlvl_bboxes = mm2trt_util.gather_topk(mlvl_bboxes, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) num_bboxes = mlvl_bboxes.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(mlvl_scores, mlvl_bboxes, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): img_shape = x.shape[2:] module = self.module cfg = self.test_cfg cls_scores, bbox_preds = module(feat) num_levels = len(cls_scores) mlvl_anchors = self.anchor_generator(cls_scores, device=cls_scores[0].device) mlvl_scores = [] mlvl_proposals = [] nms_pre = self.test_cfg.get('nms_pre', -1) for idx in range(num_levels): rpn_cls_score = cls_scores[idx] rpn_bbox_pred = bbox_preds[idx] anchors = mlvl_anchors[idx] stride = module.anchor_generator.strides[idx] scores = rpn_cls_score.permute(0, 2, 3, 1).reshape( rpn_cls_score.shape[0], -1, module.cls_out_channels).sigmoid() bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1) bbox_pred = self.batched_integral(module.integral, bbox_pred) * stride[0] anchors = anchors.unsqueeze(0) nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0: # concate zero to enable topk, # dirty way, will find a better way in future scores = mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred = mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) anchors = mm2trt_util.pad_with_value(anchors, 1, nms_pre) # do topk max_scores, _ = scores.max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) anchors = mm2trt_util.gather_topk(anchors, 1, topk_inds) proposals = batched_distance2bbox( self.batched_anchor_center(anchors), bbox_pred, max_shape=img_shape) mlvl_scores.append(scores) mlvl_proposals.append(proposals) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = torch.cat(mlvl_proposals, dim=1) mlvl_proposals = mlvl_proposals.unsqueeze(2) topk_pre = max(1000, nms_pre) max_scores, _ = mlvl_scores.max(dim=2) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.size(1)), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms( mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): batch_size = feat[0].size(0) module = self.module img_shape = x.shape[2:] cfg = self.test_cfg cls_scores, bbox_preds = module(feat) num_levels = len(cls_scores) mlvl_anchors = self.square_anchor_generator( cls_scores, device=cls_scores[0].device) mlvl_scores = [] mlvl_bboxes = [] mlvl_confids = [] nms_pre = self.test_cfg.get('nms_pre', -1) bbox_cls_preds = [bb[0] for bb in bbox_preds] bbox_reg_preds = [bb[1] for bb in bbox_preds] for cls_score, bbox_cls_pred, bbox_reg_pred, anchors in zip( cls_scores, bbox_cls_preds, bbox_reg_preds, mlvl_anchors): cls_score = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1, module.cls_out_channels) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_cls_pred = bbox_cls_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, self.side_num * 4) bbox_reg_pred = bbox_reg_pred.permute(0, 2, 3, 1).reshape( batch_size, -1, self.side_num * 4) anchors = anchors.unsqueeze(0).expand_as(bbox_cls_pred[:, :, :4]) # do topk nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0: # pad to make sure shape>nms_pred scores = mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_cls_pred = mm2trt_util.pad_with_value( bbox_cls_pred, 1, nms_pre) bbox_reg_pred = mm2trt_util.pad_with_value( bbox_reg_pred, 1, nms_pre) anchors = mm2trt_util.pad_with_value(anchors, 1, nms_pre) if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=2) else: max_scores, _ = scores[:, :-1].max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) bbox_cls_pred = mm2trt_util.gather_topk( bbox_cls_pred, 1, topk_inds) bbox_reg_pred = mm2trt_util.gather_topk( bbox_reg_pred, 1, topk_inds) anchors = mm2trt_util.gather_topk(anchors, 1, topk_inds) bbox_preds = [ bbox_cls_pred.contiguous(), bbox_reg_pred.contiguous() ] bboxes, confids = self.bbox_coder.decode(anchors.contiguous(), bbox_preds, max_shape=img_shape) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_confids.append(confids) mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_confids = torch.cat(mlvl_confids, dim=1) mlvl_bboxes = mlvl_bboxes.unsqueeze(2) mlvl_scores = mlvl_scores * mlvl_confids.unsqueeze(-1) max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) mlvl_bboxes = mm2trt_util.gather_topk(mlvl_bboxes, 1, topk_inds) if self.use_sigmoid_cls: padding = mlvl_scores[:, :, :1] * 0 mlvl_scores = torch.cat([mlvl_scores, padding], dim=2) # if not self.use_sigmoid_cls: # mlvl_scores = mlvl_scores[:,:,:-1] mlvl_bboxes = mlvl_bboxes.repeat(1, 1, self.num_classes + 1, 1) num_bboxes = mlvl_bboxes.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms( mlvl_scores, mlvl_bboxes, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id
def forward(self, feat, x): img_shape = x.shape[2:] module = self.module cls_scores, bbox_preds, shape_preds, loc_preds = module(feat) _, guided_anchors, loc_masks = self.get_anchors(cls_scores, shape_preds, loc_preds, use_loc_filter=True) mlvl_scores = [] mlvl_proposals = [] nms_pre = self.test_cfg.get('nms_pre', -1) for idx in range(len(cls_scores)): rpn_cls_score = cls_scores[idx] #.squeeze() rpn_bbox_pred = bbox_preds[idx] #.squeeze() anchors = guided_anchors[idx] mask = loc_masks[idx] rpn_cls_score = rpn_cls_score.permute(0, 2, 3, 1) if self.use_sigmoid_cls: rpn_cls_score = rpn_cls_score.reshape(rpn_cls_score.shape[0], -1) scores = rpn_cls_score.sigmoid() else: rpn_cls_score = rpn_cls_score.reshape(rpn_cls_score.shape[0], -1, 2) scores = rpn_cls_score.softmax(dim=2)[:, :, :-1] scores = scores * mask rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3, 1).reshape( rpn_bbox_pred.size(0), -1, 4) if nms_pre > 0: # concate zero to enable topk, dirty way, will find a better way in future scores = mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred = mm2trt_util.pad_with_value(rpn_bbox_pred, 1, nms_pre) anchors = mm2trt_util.pad_with_value(anchors, 1, nms_pre) # do topk # max_scores, _ = scores.max(dim=2) max_scores = scores _, topk_inds = max_scores.topk(nms_pre, dim=1) anchors = mm2trt_util.gather_topk(anchors, 1, topk_inds) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) proposals = self.bbox_coder.decode(anchors, bbox_pred, max_shape=img_shape) scores = scores.unsqueeze(-1) proposals = proposals.unsqueeze(2) _, proposals, scores, _ = self.rpn_nms(scores, proposals, self.test_cfg.nms_pre, self.test_cfg.nms_post) mlvl_scores.append(scores.squeeze(0)) mlvl_proposals.append(proposals.squeeze(0)) scores = torch.cat(mlvl_scores, dim=0) proposals = torch.cat(mlvl_proposals, dim=0) _, topk_inds = scores.topk(self.test_cfg.max_num) proposals = proposals[topk_inds, :] return proposals
def forward(self, feat, x): img_shape = x.shape[2:] module = self.module cfg = self.test_cfg cls_scores, bbox_preds = module(feat) mlvl_points = self.get_points(cls_scores, flatten=True) mlvl_bboxes = [] mlvl_scores = [] mlvl_centerness = [] for cls_score, bbox_pred, stride, base_len, (y, x) in zip( cls_scores, bbox_preds, module.strides, module.base_edge_list, mlvl_points): scores = cls_score.permute(0, 2, 3, 1).reshape(cls_score.shape[0], -1, module.cls_out_channels).sigmoid() bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.shape[0], -1, 4).exp() x = x.unsqueeze(0)+0.5 y = y.unsqueeze(0)+0.5 nms_pre = cfg.get('nms_pre', -1) if nms_pre > 0: # concate zero to enable topk, dirty way, will find a better way in future scores=mm2trt_util.pad_with_value(scores, 1, nms_pre, 0.) bbox_pred=mm2trt_util.pad_with_value(bbox_pred, 1, nms_pre) y=mm2trt_util.pad_with_value(y, 1, nms_pre) x=mm2trt_util.pad_with_value(x, 1, nms_pre) # do topk max_scores, _ = (scores).max(dim=2) _, topk_inds = max_scores.topk(nms_pre, dim=1) bbox_pred = mm2trt_util.gather_topk(bbox_pred, 1, topk_inds) scores = mm2trt_util.gather_topk(scores, 1, topk_inds) y = mm2trt_util.gather_topk(y, 1, topk_inds) x = mm2trt_util.gather_topk(x, 1, topk_inds) x1 = (stride * x - base_len * bbox_pred[:, :, 0]).\ clamp(min=0, max=img_shape[1] - 1) y1 = (stride * y - base_len * bbox_pred[:, :, 1]).\ clamp(min=0, max=img_shape[0] - 1) x2 = (stride * x + base_len * bbox_pred[:, :, 2]).\ clamp(min=0, max=img_shape[1] - 1) y2 = (stride * y + base_len * bbox_pred[:, :, 3]).\ clamp(min=0, max=img_shape[0] - 1) bboxes = torch.stack([x1, y1, x2, y2], -1) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_bboxes = torch.cat(mlvl_bboxes, dim=1) mlvl_scores = torch.cat(mlvl_scores, dim=1) mlvl_proposals = mlvl_bboxes.unsqueeze(2) max_scores, _ = mlvl_scores.max(dim=2) topk_pre = max(1000, nms_pre) _, topk_inds = max_scores.topk(min(topk_pre, mlvl_scores.shape[1]), dim=1) mlvl_proposals = mm2trt_util.gather_topk(mlvl_proposals, 1, topk_inds) mlvl_scores = mm2trt_util.gather_topk(mlvl_scores, 1, topk_inds) num_bboxes = mlvl_proposals.shape[1] num_detected, proposals, scores, cls_id = self.rcnn_nms(mlvl_scores, mlvl_proposals, num_bboxes, self.test_cfg.max_per_img) return num_detected, proposals, scores, cls_id