def generate_kpts(video_name, smooth=False): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) # # ret_val, input_image = cam.read() # # Video writer # fourcc = cv2.VideoWriter_fourcc(*'mp4v') # input_fps = cam.get(cv2.CAP_PROP_FPS) pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = preprocess( input_image, bboxs, scores, cfg) except Exception as e: print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # if len(preds) != 1: # print('here') if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) result = np.array(kpts_result) return result
def getKptsFromImage(human_model, pose_model, image, smooth=None): args = get_args() update_config(cfg, args) bboxs, scores = yolo_det(image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = preprocess(image, bboxs, scores, cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 3D video pose (only support single human) return preds[0]
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() resize_W = 640 resize_H = 384 input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() first_frame = 1 flow_boxs = 0 flow_kpts = 0 item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) if first_frame == 0: try: t0 = ckpt_time() flow_result = flow_net(pre_image, input_image, flow_model) flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result) _, t1 = ckpt_time(t0, 1) except Exception as e: print(e) continue pre_image = input_image first_frame = 0 try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location if type(flow_boxs) == int: inputs, origin_img, center, scale = preprocess(input_image, bboxs, scores, cfg) else: # flow_boxs = (flow_boxs + bboxs) /2 inputs, origin_img, center, scale = preprocess(input_image, flow_boxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0); cv2.resizeWindow("enhanced", 1080, 720); cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if type(flow_boxs) != int: preds = (preds + flow_kpts) / 2 origin_img = np.zeros(origin_img.shape, np.uint8) image = plot_keypoint(origin_img, preds, maxvals, 0.1) out.write(image) keypoints = np.concatenate((preds, maxvals), 2) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); cv2.resizeWindow("enhanced", 1920, 1080); cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): tick = 0 args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() if args.camera: # 为取得实时速度,每两帧取一帧预测 if item == 0: item = 1 continue item = 0 try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = preprocess( input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 平滑点 preds = smooth_filter(preds) # preds = np.expand_dims(preds, 0) origin_img = np.zeros(origin_img.shape, np.uint8) image = plot_keypoint(origin_img, preds, maxvals, 0.1) if i >= 14: out.write(image) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)