def estimate(tenOrig): global netNetwork if netNetwork is None: netNetwork = Network('bsds500').cuda().eval() # end intPadWidth = 32 tenInput = Pad(intPadWidth, padding_mode='edge')(tenOrig).float() intWidth = tenInput.shape[2] intHeight = tenInput.shape[1] tenOutput = torch.zeros(tenOrig.shape) arrShift = [100, 150, 200] for intShift in arrShift: tenInf = netNetwork(tenInput.cuda().view(1, 3, intHeight, intWidth), intShift)[0, :, :, :].cpu() tenOutput = torch.maximum( tenOutput, CenterCrop(tenOrig.shape[1:3])(tenInf).float()) return tenOutput
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument("--insize", default=config.insize, type=tuple) parser.add_argument("--local_grid_size", default=config.local_grid_size, type=tuple) parser.add_argument("--val_json", default=config.val_json, type=str) parser.add_argument("--image_root", default=config.image_root, type=str) parser.add_argument("--checkpoint", type=str, default="model/best288.pth") args = parser.parse_args() val_json = json.load(open(args.val_json, "r")) images = {} for image in val_json["images"]: images[int(image['id'])] = image['file_name'], [], [], [], [] for anno in val_json['annotations']: # 遍历所有anno if anno['num_keypoints'] < 1: continue if anno['iscrowd'] != 0: continue image_id = int(anno['image_id']) d = np.array(anno['keypoints'], dtype='float32').reshape(-1, 3) keypoints = d[:, [1, 0]] # array of y,x 获得每个点的坐标 # 为啥要反过来? bbox = anno['bbox'] # 人体框 is_visible = d[:, 2] == 2 # 是否可见 is_labeled = d[:, 2] >= 1 # 是否有标签 entry = images[image_id] # 得到 entry[1].append(np.asarray(keypoints)) entry[2].append(np.asarray(bbox, dtype='float32')) entry[3].append(np.asarray(is_visible).astype(np.bool)) entry[4].append(np.asarray(is_labeled).astype(np.bool)) image_paths = [] keypoints = [] bbox = [] is_visible = [] is_labeled = [] for filename, k, b, v, l in images.values(): if len(k) == 0: continue image_paths.append(filename) bbox.append(b) keypoints.append(k) is_visible.append(v) is_labeled.append(l) pose_proposal_net = model.poseprosalnet(KEYPOINT_NAMES, EDGES, args.local_grid_size, args.insize, args.checkpoint) pose_proposal_net.load_state_dict(torch.load(args.checkpoint)) pose_proposal_net.eval() CUDA = torch.cuda.is_available() if CUDA: pose_proposal_net.cuda() pck_object = [[], [], [], []] with torch.no_grad(): for i in range(len(image_paths)): image = Image.open(args.image_root + "/" + image_paths[i]) oriW, oriH = image.size h_pad = int(np.clip(((max(oriH, oriW) - oriH) + 1) // 2, 0, 1e6)) # 填充在两边 w_pad = int(np.clip(((max(oriH, oriW) - oriW) + 1) // 2, 0, 1e6)) image = Pad((w_pad, h_pad), (123, 116, 103))(image) padedWH = max(oriH, oriW) image = Resize(args.insize)(image) image = ToTensor()(image) image = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(image) image = image.unsqueeze(0) if CUDA: image = image.cuda() pre = pose_proposal_net(image) ####################pad####################### paded_keypoints = [ pad_keypoint(points, h_pad, w_pad) for points in keypoints[i] ] paded_bbox = [] for x, y, bw, bh in bbox[i]: [[y, x]] = pad_keypoint(np.array([[y, x]]), h_pad, w_pad) paded_bbox.append(np.array([x, y, bw, bh])) ###################resize############################# new_keypoints = [ resize_point(points, (padedWH, padedWH), args.insize) for points in paded_keypoints ] new_bbox = [] for x, y, bw, bh in paded_bbox: [[y, x]] = resize_point(np.array([[y, x]]), (padedWH, padedWH), args.insize) bw *= args.insize[1] / padedWH bh *= args.insize[0] / padedWH new_bbox.append(np.array([x, y, bw, bh], dtype='float32')) humans = utils.get_humans_by_feature(pre, args.insize, pose_proposal_net.outsize, args.local_grid_size, 0.02) pck_object[0].append(new_keypoints) pck_object[1].append(humans) pck_object[2].append(new_bbox) pck_object[3].append(is_visible[i]) evaluation(pck_object)