def get_bboxes(self, tl_result, br_result, mask, mid_tl_result, mid_br_result, mid_mask, img_metas, cfg, rescale=False): tl_heat = tl_result[:, :self.num_classes, :, :] tl_off_c = tl_result[:, self.num_classes + 2:self.num_classes + 4, :, :] tl_regr = tl_result[:, -2:, :, :] br_heat = br_result[:, :self.num_classes, :, :] br_off_c = br_result[:, self.num_classes + 2:self.num_classes + 4, :, :] br_regr = br_result[:, -2:, :, :] #pdb.set_trace() if len(tl_heat) == 2: img_metas = img_metas[0] if isinstance(img_metas, list): img_metas_1 = img_metas[0] else: img_metas_1 = img_metas batch_bboxes, batch_scores, batch_clses = _decode_center( tl_heat=tl_heat, br_heat=br_heat, tl_off_c=tl_off_c, br_off_c=br_off_c, tl_regr=tl_regr, br_regr=br_regr, img_meta=img_metas_1) #[0] h, w, _ = img_metas_1['ori_shape'] #h, w, _ = img_metas[0]['ori_shape'] scale = img_metas_1['scale'] #batch_bboxes /= scale if len(batch_bboxes) == 2: # print('flip') batch_bboxes[1, :, [0, 2]] = w - batch_bboxes[1, :, [2, 0]] batch_bboxes = batch_bboxes.view([-1, 4]).unsqueeze(0) batch_scores = batch_scores.view([-1, 1]).unsqueeze(0) batch_clses = batch_clses.view([-1, 1]).unsqueeze(0) # pdb.set_trace() # assert len(img_metas)==len(batch_bboxes) result_list = [] for img_id in range(len(img_metas)): # pdb.set_trace() bboxes = batch_bboxes[img_id] scores = batch_scores[img_id] clses = batch_clses[img_id] scores_n = scores.cpu().numpy() idx = scores_n.argsort(0)[::-1] idx = torch.Tensor(idx.astype(float)).long() bboxes = bboxes[idx].squeeze() scores = scores[idx].view(-1) clses = clses[idx].view(-1) det_num = len(bboxes) # img_h, img_w, _ = img_metas[img_id]['img_shape'] # ori_h, ori_w, _ = img_metas[img_id]['ori_shape'] # h_scale = float(ori_h) / float(img_h) # w_scale = float(ori_w) / float(img_w) # bboxes[:,0::2] *= w_scale # bboxes[:,1::2] *= h_scale '''clses_idx = (clses + 1).long() det_idx = torch.Tensor(np.arange(det_num)).long() scores_81 = -1*torch.ones(det_num, self.num_classes + 1).type_as(scores) scores_81[det_idx, clses_idx] = scores bboxes_scores = torch.cat([bboxes, scores.unsqueeze(-1)], 1) nms_bboxes, _ = nms(bboxes_scores, 0.5) #nms_bboxes, nms_labels = multiclass_nms(bboxes, scores_81, 0.5, cfg.nms, cfg.max_per_img) result_list.append((nms_bboxes, nms_labels))''' detections = torch.cat([bboxes, scores.unsqueeze(-1)], -1) keepinds = (detections[:, -1] > -0.1) # 0.05 detections = detections[keepinds] labels = clses[keepinds] areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1]) areas = areas[keepinds] #pdb.set_trace() if scale == 0.8: keepinds2 = (areas >= 96**2) detections = detections[keepinds2] labels = labels[keepinds2] topk = 35 #elif scale == 2.0: # keepinds2 = (areas <= 32**2) # detections = detections[keepinds2] # labels = labels[keepinds2] # topk = 40 else: topk = 100 # idx = detections[:,-1].topk(len(detections))[1] # detections = detections[idx] # labels = labels[idx] out_bboxes = [] out_labels = [] # pdb.set_trace() for i in range(80): keepinds = (labels == i) nms_detections = detections[keepinds] a = nms_detections.size(0) if nms_detections.size(0) == 0: # print('no NMS') continue nms_detections, _ = soft_nms(nms_detections, 0.5, 'gaussian', sigma=0.7) b = nms_detections.size(0) # print(a,b) out_bboxes.append(nms_detections) out_labels += [i for _ in range(len(nms_detections))] if len(out_bboxes) > 0: out_bboxes = torch.cat(out_bboxes) # out_labels = 1 + torch.Tensor(out_labels) out_labels = torch.Tensor(out_labels) else: out_bboxes = torch.Tensor(out_bboxes).cuda() out_labels = torch.Tensor(out_labels) # out_labels = 1+torch.Tensor(out_labels) # pdb.set_trace() if len(out_bboxes) > 0: out_bboxes_np = out_bboxes.cpu().numpy() out_labels_np = out_labels.cpu().numpy() idx = np.argsort(out_bboxes_np[:, -1])[::-1][:topk] #100 out_bboxes_np = out_bboxes_np[idx, :] out_labels_np = out_labels_np[idx] out_bboxes = torch.Tensor(out_bboxes_np).type_as(out_bboxes) out_labels = torch.Tensor(out_labels_np).type_as(out_labels) # pdb.set_trace() result_list.append((out_bboxes, out_labels)) return result_list
def merge_detects_all(data_all, img_dir=None, is_soft=False, is_vote=False, is_small_extend=False, beta=5): data_vote = {} max_dets = {} start = time.time() print('==> data load', start - start0) count = 0 for key, bxs in data_all.items(): if is_border: img = cv2.imread(img_dir + key) sz = img.shape count += 1 if count % 5 == 0: end = time.time() print(count, 'time=%0.2f' % (end - start)) if not key in data_vote: data_vote[key] = [ np.empty((0, 5), np.float32) for cls in range(num_cls) ] result = [] if img_dir is not None: im = cv2.imread(os.path.join(img_dir, key)) im_sz = im.shape # bxs=get_merged_box(bxs, im_sz) # for cls in range(num_cls): if len(bxs[cls]) == 0: continue # dets_all= bxs[cls].astype(np.float32).copy() dets_all = np.array(bxs[cls], np.float32) dets_nms, _ = nms(dets_all, nms_th) group_coef = 0.75 if is_vote: vote_dets = box_utils.box_voting(dets_nms, dets_all, group_coef, scoring_method='IOU_WAVG', beta=beta) else: vote_dets = dets_nms # print(vote_dets[:,4]) if is_soft: idx = np.argsort(-vote_dets[:, 4]) dim = min(4000, len(idx)) vote_dets = vote_dets[idx[:dim]].copy() vote_dets, _ = soft_nms(vote_dets, 0.5, min_score=1e-9) # idx = np.argsort(-vote_dets[:, 4]) # vote_dets = vote_dets[idx].copy() if is_border: vote_dets[vote_dets[:, 0] < 10, 4] *= 0.7 vote_dets[vote_dets[:, 1] < 10, 4] *= 0.7 vote_dets[vote_dets[:, 3] > sz[0] - 10, 4] *= 0.7 vote_dets[vote_dets[:, 2] > sz[1] - 10, 4] *= 0.7 vote_dets = vote_dets[vote_dets[:, 4] > cls_th_v2] data_vote[key][cls] = vote_dets.copy() zz = 0 max_dets[key] = max_class_per_position(data_vote[key]) return max_dets
def get_bboxes_v1(self, pred_hm_b1, pred_hm_b2, pred_wh_b1, pred_wh_b2, img_metas, cfg, rescale=False): topk = getattr(cfg, 'max_per_img', 100) heat_b1, inds_b1, clses_b1, scores_b1, bboxes_b1, xs_b1, ys_b1, wh_filter_b1 = \ self.get_bboxes_single(pred_hm_b1, pred_wh_b1, self.down_ratio_b1, topk, idx=0) heat_b2, inds_b2, clses_b2, scores_b2, bboxes_b2, xs_b2, ys_b2, wh_filter_b2 = \ self.get_bboxes_single(pred_hm_b2, pred_wh_b2, self.down_ratio_b2, topk, idx=1) result_list = [] score_thr = getattr(cfg, 'score_thr', 0.01) if 'b2' not in self.inf_branch: bboxes = bboxes_b1 scores = scores_b1 clses = clses_b1 wh_filter = wh_filter_b1 elif 'b1' not in self.inf_branch: bboxes = bboxes_b2 scores = scores_b2 clses = clses_b2 wh_filter = wh_filter_b2 else: bboxes = torch.cat([bboxes_b1, bboxes_b2], dim=1) scores = torch.cat([scores_b1, scores_b2], dim=1) clses = torch.cat([clses_b1, clses_b2], dim=1) wh_filter = torch.cat([wh_filter_b1, wh_filter_b2], dim=1) for batch_i in range(bboxes.shape[0]): scores_per_img = scores[batch_i] wh_filter_per_img = wh_filter[batch_i] scores_keep = (scores_per_img > score_thr).squeeze(-1) & wh_filter_per_img scores_per_img = scores_per_img[scores_keep] bboxes_per_img = bboxes[batch_i][scores_keep] labels_per_img = clses[batch_i][scores_keep].squeeze(-1) img_shape = img_metas[batch_i]['pad_shape'] bboxes_per_img[:, 0::2] = bboxes_per_img[:, 0::2].clamp( min=0, max=img_shape[1] - 1) bboxes_per_img[:, 1::2] = bboxes_per_img[:, 1::2].clamp( min=0, max=img_shape[0] - 1) if rescale: scale_factor = img_metas[batch_i]['scale_factor'] bboxes_per_img /= bboxes_per_img.new_tensor(scale_factor) if self.use_simple_nms: bboxes_per_img = torch.cat([bboxes_per_img, scores_per_img], dim=1) else: if self.nms_agnostic: bboxes_per_img, ori_idx = soft_nms(torch.cat( (bboxes_per_img, scores_per_img), dim=1), iou_thr=0.95) labels_per_img = labels_per_img[ori_idx] else: labels_int_flatten = labels_per_img.int() unique_cls_ids = list( set(list(labels_int_flatten.cpu().numpy()))) bboxes_per_img_per_cls = bboxes_per_img.new_zeros((0, 5)) labels_per_img_per_cls = labels_int_flatten.new_zeros( (0, )) for cls_id in unique_cls_ids: cls_id_idx = (labels_int_flatten == cls_id) soft_bboxes, ori_idx = soft_nms(torch.cat( (bboxes_per_img[cls_id_idx], scores_per_img[cls_id_idx]), dim=1), iou_thr=0.6) unique_labels = labels_int_flatten[cls_id_idx][ori_idx] bboxes_per_img_per_cls = torch.cat( (bboxes_per_img_per_cls, soft_bboxes), dim=0) labels_per_img_per_cls = torch.cat( (labels_per_img_per_cls, unique_labels)) bboxes_per_img = bboxes_per_img_per_cls labels_per_img = labels_per_img_per_cls.float() labels_per_img = labels_per_img.squeeze(-1) result_list.append((bboxes_per_img, labels_per_img)) return result_list
def get_bboxes(self, pred_feat, pred_heatmap, pred_wh, img_metas, cfg, rescale=False): batch, cat, height, width = pred_heatmap.size() pred_heatmap = pred_heatmap.detach().sigmoid_() wh = pred_wh.detach() # write_txt(pred_heatmap, filename='pred_hm', thre=0.001) # perform nms on heatmaps if self.use_simple_nms and not getattr(cfg, 'debug', False): heat = simple_nms( pred_heatmap) # used maxpool to filter the max score else: heat = pred_heatmap kernel = 3 pad = (kernel - 1) // 2 hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) keep = (hmax == heat).float() keep_pad = keep.new_zeros(batch, cat, height + 2, width + 2) keep_pad[..., 1:-1, 1:-1] = keep keep = keep_pad # keep = ((keep[..., :-2, :-2] + keep[..., :-2, 1:-1] + keep[..., :-2, 2:] + # keep[..., 1:-1, :-2] + keep[..., 1:-1, 1:-1] + keep[..., 1:-1, 2:] + # keep[..., 2:, :-2] + keep[..., 2:, 1:-1] + keep[..., 2:, 2:]) > 0).float() keep = ((keep[..., :-2, 1:-1] + keep[..., 1:-1, :-2] + keep[..., 1:-1, 1:-1] + keep[..., 1:-1, 2:] + keep[..., 2:, 1:-1]) > 0).float() heat = heat * keep topk = getattr(cfg, 'max_per_img', 100) # (batch, topk) scores, inds, clses, ys, xs = self._topk(heat, topk=topk) xs = xs.view(batch, topk, -1, 1) * self.down_ratio ys = ys.view(batch, topk, -1, 1) * self.down_ratio wh = wh.permute(0, 2, 3, 1).contiguous() wh = wh.view(wh.size(0), -1, wh.size(3)) inds = inds.view(batch, -1, 1) wh_inds = inds.expand(*inds.shape[:-1], wh.size(2)) wh = wh.gather(1, wh_inds) if not self.wh_agnostic: wh = wh.view(-1, topk, self.num_fg, 4) wh = torch.gather( wh, 2, clses[..., None, None].expand(clses.size(0), clses.size(1), 1, 4).long()) wh = wh.view(batch, topk, -1, 4) clses = clses.view(batch, topk, 1).long() scores = scores.view(batch, topk, 1) bboxes = torch.cat([ xs - wh[..., [0]], ys - wh[..., [1]], xs + wh[..., [2]], ys + wh[..., [3]] ], dim=-1) if self.aug_reg: heat = pred_heatmap.permute(0, 2, 3, 1).contiguous() heat = heat.view(heat.size(0), -1, heat.size(3)) score_inds = inds.expand(*inds.shape[:-1], heat.size(2)) area_scores = heat.gather(1, score_inds).view(batch, topk, -1, self.num_fg) area_scores = area_scores.gather( -1, clses.expand(*clses.shape[:-1], area_scores.size(-2)).unsqueeze(-1)).squeeze(-1) bbox_weight = torch.cat([ bboxes.new_ones((*bboxes.shape[:-2], 1)), torch.exp(-1 / (2 * (wh[..., 0, :] / 24)**2)) ], dim=-1) * area_scores # print(bbox_weight) bboxes = (bboxes * bbox_weight.unsqueeze(-1) ).sum(-2) / bbox_weight.sum(-1, keepdims=True) else: bboxes = bboxes.squeeze(-2) clses = clses.float() roi_boxes = bboxes.new_tensor([]) if self.two_stage: for batch_i in range(bboxes.shape[0]): vaid_pre_boxes_i = bboxes[batch_i] # (xx, 4) roi_boxes = torch.cat([ roi_boxes, torch.cat([ vaid_pre_boxes_i.new_ones( [vaid_pre_boxes_i.size(0), 1]) * batch_i, vaid_pre_boxes_i ], dim=1) ], dim=0) if roi_boxes.size(0) > 0: rois = self.align(pred_feat, roi_boxes) # (n, cha, 7, 7) pred_wh2 = self.wh2(rois).view(-1, 4) bboxes = bboxes.view(-1, 4) bboxes[:, [0, 1]] = bboxes[:, [0, 1]] - pred_wh2[:, [0, 1]] * 16 bboxes[:, [2, 3]] = bboxes[:, [2, 3]] + pred_wh2[:, [2, 3]] * 16 bboxes = bboxes.view(batch, topk, 4) result_list = [] score_thr = getattr(cfg, 'score_thr', 0.01) for batch_i in range(bboxes.shape[0]): scores_per_img = scores[batch_i] scores_keep = (scores_per_img > score_thr).squeeze(-1) scores_per_img = scores_per_img[scores_keep] bboxes_per_img = bboxes[batch_i][scores_keep] labels_per_img = clses[batch_i][scores_keep].squeeze(-1) img_shape = img_metas[batch_i]['pad_shape'] bboxes_per_img[:, 0::2] = bboxes_per_img[:, 0::2].clamp( min=0, max=img_shape[1] - 1) bboxes_per_img[:, 1::2] = bboxes_per_img[:, 1::2].clamp( min=0, max=img_shape[0] - 1) if rescale: scale_factor = img_metas[batch_i]['scale_factor'] bboxes_per_img /= bboxes_per_img.new_tensor(scale_factor) if self.use_simple_nms: bboxes_per_img = torch.cat([bboxes_per_img, scores_per_img], dim=1) else: labels_int_flatten = labels_per_img.int() unique_cls_ids = list( set(list(labels_int_flatten.cpu().numpy()))) bboxes_per_img_per_cls = bboxes_per_img.new_zeros((0, 5)) labels_per_img_per_cls = labels_int_flatten.new_zeros((0, )) for cls_id in unique_cls_ids: cls_id_idx = (labels_int_flatten == cls_id) soft_bboxes, ori_idx = soft_nms(torch.cat( (bboxes_per_img[cls_id_idx], scores_per_img[cls_id_idx]), dim=1), iou_thr=0.6) unique_labels = labels_int_flatten[cls_id_idx][ori_idx] bboxes_per_img_per_cls = torch.cat( (bboxes_per_img_per_cls, soft_bboxes), dim=0) labels_per_img_per_cls = torch.cat( (labels_per_img_per_cls, unique_labels)) bboxes_per_img = bboxes_per_img_per_cls labels_per_img = labels_per_img_per_cls.float() result_list.append((bboxes_per_img, labels_per_img)) return result_list