def ctdet_decode(hmap, regs, w_h_, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([xs - w_h_[..., 0:1] / 2, ys - w_h_[..., 1:2] / 2, xs + w_h_[..., 0:1] / 2, ys + w_h_[..., 1:2] / 2], dim=2) detections = torch.cat([bboxes, scores, clses], dim=2) return detections
def compute_embeddings_lfw(args, dataset, model, batch_size, dump_embeddings=False, pdist=lambda x, y: 1. - F.cosine_similarity(x, y), flipped_embeddings=False): """Computes embeddings of all images from the LFW dataset using PyTorch""" val_loader = DataLoader(dataset, batch_size=batch_size, num_workers=4, shuffle=False) scores_with_gt = [] embeddings = [] ids = [] for batch_idx, data in enumerate(tqdm(val_loader, 'Computing embeddings')): images_1 = data['img1'] images_2 = data['img2'] is_same = data['is_same'] if torch.cuda.is_available() and args.devices[0] != -1: images_1 = images_1.cuda() images_2 = images_2.cuda() emb_1 = model(images_1) emb_2 = model(images_2) if flipped_embeddings: images_1_flipped = flip_tensor(images_1, 3) images_2_flipped = flip_tensor(images_2, 3) emb_1_flipped = model(images_1_flipped) emb_2_flipped = model(images_2_flipped) emb_1 = (emb_1 + emb_1_flipped) * .5 emb_2 = (emb_2 + emb_2_flipped) * .5 scores = pdist(emb_1, emb_2).data.cpu().numpy() for i, _ in enumerate(scores): scores_with_gt.append({ 'score': scores[i], 'is_same': is_same[i], 'idx': batch_idx * batch_size + i }) if dump_embeddings: id0 = data['id0'] id1 = data['id1'] ids.append(id0) ids.append(id1) to_dump_1 = emb_1.data.cpu() to_dump_2 = emb_2.data.cpu() embeddings.append(to_dump_1) embeddings.append(to_dump_2) if dump_embeddings: total_emb = np.concatenate(embeddings, axis=0) total_ids = np.concatenate(ids, axis=0) log_path = './logs/{:%Y_%m_%d_%H_%M}'.format(datetime.datetime.now()) writer = SummaryWriter(log_path) writer.add_embedding(torch.from_numpy(total_emb), total_ids) return scores_with_gt
def ctdet_decode(hmap, regs, w_h_, K=100): ''' hmap提取中心点位置为xs,ys regs保存的是偏置,需要加在xs,ys上,代表精确的中心位置 w_h_保存的是对应目标的宽和高 ''' # dets = ctdet_decode(*output, K=cfg.test_topk) batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # 归一化到0-1 # if flip test if batch > 1: # batch > 1代表使用了翻转 # img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0) hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] batch = 1 # 这里的nms和带anchor的目标检测方法中的不一样,这里使用的是3x3的maxpool筛选 hmap = _nms(hmap) # perform nms on heatmaps # 找到前K个极大值点代表存在目标 scores, inds, clses, ys, xs = _topk(hmap, K=K) # from [bs c h w] to [bs, h, w, c] regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) # xs,ys是中心坐标,w_h_[...,0:1]是w,1:2是h bboxes = torch.cat([ xs - w_h_[..., 0:1] / 2, ys - w_h_[..., 1:2] / 2, xs + w_h_[..., 0:1] / 2, ys + w_h_[..., 1:2] / 2 ], dim=2) detections = torch.cat([bboxes, scores, clses], dim=2) return detections
def ctsegm_inmodal_norm_code_decode(hmap, regs, w_h_, codes_, offsets_, contour_std, dictionary, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] codes_ = codes_[0:1] offsets_ = offsets_[0:1] contour_std = contour_std[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) contour_std = _tranpose_and_gather_feature(contour_std, inds) contour_std = contour_std.view(batch, K, 1) codes_ = _tranpose_and_gather_feature(codes_, inds) codes_ = codes_.view(batch, K, 64) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([xs - w_h_[..., 0:1] / 2, ys - w_h_[..., 1:2] / 2, xs + w_h_[..., 0:1] / 2, ys + w_h_[..., 1:2] / 2], dim=2) offsets_ = _tranpose_and_gather_feature(offsets_, inds) segms = torch.matmul(codes_, dictionary) * contour_std segms = segms.view(batch, K, 32, 2) + offsets_.view(batch, K, 1, 2) + \ torch.cat([xs, ys], dim=2).view(batch, K, 1, 2) segmentations = torch.cat([segms.view(batch, K, -1), bboxes, scores, clses], dim=2) return segmentations
def ctsegm_fourier_decode(hmap, regs, w_h_, real_, imaginary_, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] real_ = real_[0:1] imaginary_ = imaginary_[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 4) real_ = _tranpose_and_gather_feature(real_, inds) real_ = real_.view(batch, K, 32, 1) imaginary_ = _tranpose_and_gather_feature(imaginary_, inds) imaginary_ = imaginary_.view(batch, K, 32, 1) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([xs - w_h_[..., 2:3], ys - w_h_[..., 0:1], xs + w_h_[..., 3:4], ys + w_h_[..., 1:2]], dim=2) complex_codes = torch.cat([real_, imaginary_], dim=3) * 32. segms = torch.ifft(complex_codes, signal_ndim=1) segms = segms + torch.cat([xs, ys], dim=2).view(batch, K, 1, 2) segmentations = torch.cat([segms.view(batch, K, -1), bboxes, scores, clses], dim=2) return segmentations
def ctsegm_amodal_cmm_whiten_decode(hmap, regs, w_h_, codes_, offsets_, dictionary, code_range, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] codes_ = codes_[0:1] offsets_ = offsets_[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) codes_ = _tranpose_and_gather_feature(codes_, inds) codes_ = codes_.view(batch, K, 64) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([xs - w_h_[..., 0:1] / 2, ys - w_h_[..., 1:2] / 2, xs + w_h_[..., 0:1] / 2, ys + w_h_[..., 1:2] / 2], dim=2) offsets_ = _tranpose_and_gather_feature(offsets_, inds) # codes_ = codes_ * code_stat[1].view(1, 1, -1) + code_stat[0].view(1, 1, -1) # recover the original unnormalized codes codes_ = (codes_ + 1) / 2. * (code_range[1] - code_range[0]) + code_range[0] segms = torch.matmul(codes_, dictionary) segms = segms.view(batch, K, 32, 2) + offsets_.view(batch, K, 1, 2) + \ torch.cat([xs, ys], dim=2).view(batch, K, 1, 2) segmentations = torch.cat([segms.view(batch, K, -1), bboxes, scores, clses], dim=2) return segmentations
def ctdet_decode(hmap, regs, w_h_, pxpy, K=100): batch, cat, height, width = hmap.shape # C,W和H # height , width = 128 hmap = torch.sigmoid(hmap) # 这里,test的batch是 1 # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 # w_h_ 第一列是宽度,第二列是高度。 regs = regs[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) pxpy = _tranpose_and_gather_feature(pxpy, inds) pxpy = pxpy.view(batch, K, 2) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) width1 = torch.abs(torch.mul(pxpy[..., 0:1], torch.cos(pxpy[..., 1:2]))) # 半宽度 height1 = torch.abs(torch.mul(pxpy[..., 0:1], torch.sin(pxpy[..., 1:2]))) # 半高度 width1 = 0.1 * width1 + 0.9 * w_h_[..., 0:1] / 2 height1 = 0.1 * height1 + 0.9 * w_h_[..., 1:2] / 2 bboxes = torch.cat([xs - width1, ys - height1, xs + width1, ys + height1], dim=2) detections = torch.cat([bboxes, scores, clses], dim=2) return detections
def ctsegm_shift_code_decode(hmap, regs, w_h_, codes_, dictionary, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] codes_ = codes_[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 4) codes_ = _tranpose_and_gather_feature(codes_, inds) codes_ = codes_.view(batch, K, 64) # codes_ = torch.log(codes_).view(batch, K, 64) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) bboxes = torch.cat([xs - w_h_[..., 2:3], ys - w_h_[..., 0:1], xs + w_h_[..., 3:4], ys + w_h_[..., 1:2]], dim=2) segms = torch.matmul(codes_, dictionary) segms = segms.view(batch, K, 32, 2) + torch.cat([xs, ys], dim=2).view(batch, K, 1, 2) segmentations = torch.cat([segms.view(batch, K, -1), bboxes, scores, clses], dim=2) return segmentations
def ctsegm_decode(hmap, regs, w_h_, codes_, dictionary, K=100): batch, cat, height, width = hmap.shape hmap = torch.sigmoid(hmap) # if flip test if batch > 1: hmap = (hmap[0:1] + flip_tensor(hmap[1:2])) / 2 w_h_ = (w_h_[0:1] + flip_tensor(w_h_[1:2])) / 2 regs = regs[0:1] codes_ = codes_[0:1] batch = 1 hmap = _nms(hmap) # perform nms on heatmaps scores, inds, clses, ys, xs = _topk(hmap, K=K) regs = _tranpose_and_gather_feature(regs, inds) regs = regs.view(batch, K, 2) xs = xs.view(batch, K, 1) + regs[:, :, 0:1] ys = ys.view(batch, K, 1) + regs[:, :, 1:2] w_h_ = _tranpose_and_gather_feature(w_h_, inds) w_h_ = w_h_.view(batch, K, 2) std_ = torch.sqrt(torch.sum(w_h_ ** 2., dim=2, keepdim=True)) codes_ = _tranpose_and_gather_feature(codes_, inds) # codes_ = codes_.view(batch, K, 64) codes_ = torch.log(codes_).view(batch, K, 64) clses = clses.view(batch, K, 1).float() scores = scores.view(batch, K, 1) segms = torch.matmul(codes_, dictionary) # print('Sizes:', segms.size(), std_.size(), xs.size()) segms = (segms * std_).view(batch, K, 32, 2) + torch.cat([xs, ys], dim=2).view(batch, K, 1, 2) segmentations = torch.cat([segms.view(batch, K, -1), scores, clses], dim=2) return segmentations