def getKptsFromImage(human_model, pose_model, image, smooth=None): bboxs, scores = yolo_det(image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(image, bboxs, scores, cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 选择 y 坐标最大的人 # max_index = 0 # max_y = np.mean(preds[0, :, 1]) # for k in range(len(preds)): # tmp_y = np.mean(preds[k, :, 1]) # if tmp_y > max_y: # max_index = k # max_y = tmp_y # result = np.concatenate((preds[max_index], maxvals[max_index]), 1) # 3D video pose (only support single human) result = np.concatenate((preds[0], maxvals[0]), 1) return result
def detect_pose(pa,c): image = pa frame = cv2.imread(image, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) bboxs, scores = yolo_det(frame, human_model) src_dir = pa if len(bboxs)>0: inputs, origin_img, center, scale = PreProcess(frame, bboxs, scores, cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) thresh = 0.7 if maxvals[0][0]>thresh and maxvals[0][1]>thresh and maxvals[0][2]>thresh and maxvals[0][3]>thresh and maxvals[0][4]>thresh and maxvals[0][13]>thresh and maxvals[0][14]>thresh and maxvals[0][15]>thresh and maxvals[0][16]>thresh: #dst_dir = '/content/full_shot' + '/' + f'image_{c}' + '.jpg' -- want to move all full shot to single folder #shutil.copy(src_dir,dst_dir) return 1 else: return 0 else: return 0
def generate_kpts(video_name, smooth=None, no_nan=True): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) cam_w = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) cam_h = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length-1)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) except Exception as e: if not no_nan: # append NaN so we can interpolate later kpts_result.append(np.full((17, 2), np.nan, dtype=np.float32)) print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) result = np.array(kpts_result) return result, input_fps, cam_w, cam_h
def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detecotor model from lib.detector.yolo.human_detector import load_model as yolo_model human_model = yolo_model() from lib.detector.yolo.human_detector import main as yolo_det bboxs, scores = yolo_det(args.img_input, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg) # load MODEL model = model_load(cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = model(inputs) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) image = plot_keypoint(origin_img, preds, maxvals, 0.3) cv2.imwrite(args.img_output, image)
def generate_kpts(video_name): args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # 关键点收集 kpts_result = [] for i in tqdm(range(video_length-1)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) except Exception as e: print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 平滑点 preds = smooth_filter(preds) # 3D video pose 只支持单人 kpts_result.append(preds[0]) result = np.array(kpts_result) return result
def getKptsFromImage(human_model, pose_model, image, smooth=None): args = get_args() update_config(cfg, args) bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 3D video pose (only support single human) return preds[0]
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() resize_W = 640 resize_H = 384 input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() first_frame = 1 flow_boxs = 0 flow_kpts = 0 item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) if first_frame == 0: try: t0 = ckpt_time() flow_result = flow_net(pre_image, input_image, flow_model) flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result) _, t1 = ckpt_time(t0, 1) except Exception as e: print(e) continue pre_image = input_image first_frame = 0 try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location if type(flow_boxs) == int: inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) else: # flow_boxs = (flow_boxs + bboxs) /2 inputs, origin_img, center, scale = PreProcess( input_image, flow_boxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 1080, 720) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if type(flow_boxs) != int: preds = (preds + flow_kpts) / 2 origin_img = np.zeros(origin_img.shape, np.uint8) image = plot_keypoint(origin_img, preds, maxvals, 0.1) out.write(image) keypoints = np.concatenate((preds, maxvals), 2) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 1920, 1080) cv2.imshow('enhanced', image) cv2.waitKey(1)
def generate_kpts(video_name, smooth=None): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except Exception as e: print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # print("input shape: ", inputs.shape) # 1 3 256 192 # print("output shape: ", output.shape) # 1 17 64 48 # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) # preds[0] (17, 2) # # 选择 y 坐标最大的人 # max_index = 0 # max_y = np.mean(preds[0, :, 1]) # # for k in range(len(preds)): # tmp_y = np.mean(preds[k, :, 1]) # if tmp_y > max_y: # max_index = k # max_y = tmp_y # kpts_result.append(preds[max_index]) # # print("maxvals[max_index]:", np.mean(maxvals[max_index])) result = np.array(kpts_result) return result
def main(): tick = 0 args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() item = 0 for i in tqdm(range(video_length-1)): x0 = ckpt_time() ret_val, input_image = cam.read() if args.camera: # 为取得实时速度,每两帧取一帧预测 if item == 0: item = 1 continue item = 0 try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced",0); cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 平滑点 preds = smooth_filter(preds) # preds = np.expand_dims(preds, 0) origin_img = np.zeros(origin_img.shape, np.uint8) image = plot_keypoint(origin_img, preds, maxvals, 0.1) if i >= 14: out.write(image) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): global max_id args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # 保持长宽都是64的倍数,用于flownet2 resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int( (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64 image_resolution = (resize_W, resize_H) print(resize_W, resize_H) input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) try: if i > 0: pdb() flow_result = flow_net(pre_image, input_image, flow_model) flow_boxes, flow_kpts = flow_propagation( prev_kpts, flow_result) flow_boxes = np.concatenate( (flow_boxes, np.expand_dims(prev_boxes[..., 4], -1)), -1) # flow_boxes + previous boxes scores flow_kpts = np.concatenate((flow_kpts, prev_kpts_scores), -1) # boxes_threthold is 0.9 detected_boxes, detected_scores = yolo_det( input_image, human_model) # bbox is coordinate location detected_scores = np.expand_dims(detected_scores.flatten(), -1) detected_boxes = np.concatenate((detected_boxes, detected_scores), -1) # (N, 17, 3) if i == 0: inputs, origin_img, center, scale = PreProcess( input_image, detected_boxes[..., :4], detected_boxes[..., 4], cfg) # ploted_image = plot_boxes(input_image, detected_boxes, [i for i in range(len(detected_boxes))]) # cv2.imshow('image', ploted_image) # cv2.waitKey(100) else: # 最难! 会重新给pose net一个输入顺序, 并且给出相应的ids print('before mapping: ', previous_ids) new_boxes, new_ids = boxes_nms_test(flow_boxes, detected_boxes, previous_ids, image_resolution) print('after mapping: ', new_ids) print(flow_boxes[:, 1], detected_boxes[:, 1]) # ploted_image = plot_boxes(input_image, new_boxes, new_ids) # cv2.imshow('image', ploted_image) # cv2.waitKey(100) inputs, origin_img, center, scale = PreProcess( input_image, new_boxes[..., :4], new_boxes[..., 4], cfg) except Exception as e: print(e) out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue # 姿态检测 with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate detected_kpts, detected_kpts_scores = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) detected_kpts = np.concatenate( (detected_kpts, detected_kpts_scores), 2) # TRACK Assign IDs. flow_boxes; detected_boxes, new_ids if i > 0: pose_similarity_matrix = compute_pairwise_oks( flow_kpts, flow_boxes[..., :4], detected_kpts) box_similarity_matrix = boxes_similarity(flow_boxes[..., :4], detected_boxes[..., :4]) ratio = 0.5 similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * ( 1 - ratio) prev_filter_ids, cur_ids = bipartite_matching_greedy( similarity_matrix) print('previous frame boxes: ', prev_pose_ids) cur_len = len(detected_kpts) new_pose_ids = pose_match_ids(prev_pose_ids, prev_filter_ids, cur_ids, cur_len) # detected_kpts = detected_kpts[ [i-1 for i in new_ids],:] # detected_kpts_scores = detected_kpts_scores[[i-1 for i in new_ids],:] print(prev_filter_ids, cur_ids) print('after map: ', new_pose_ids) # 为下一帧处理做准备 pre_image = input_image.copy() prev_kpts = detected_kpts prev_kpts_scores = detected_kpts_scores if i == 0: prev_boxes = detected_boxes previous_ids = [j for j in range(len(detected_boxes))] prev_pose_ids = previous_ids else: previous_ids = new_ids prev_boxes = new_boxes prev_pose_ids = new_pose_ids if i > 1: image = plot_keypoint_track(origin_img, detected_kpts, detected_kpts_scores, new_boxes[..., :4], new_pose_ids, 0.1) else: image = plot_keypoint_track(origin_img, detected_kpts, detected_kpts_scores, detected_boxes[..., :4], previous_ids, 0.1) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # 保持长宽都是64的倍数 resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int( (input_image.shape[0] / input_image.shape[1] * resize_W) / 64) * 64 print(resize_W, resize_H) input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() flow_boxs = 0 flow_kpts = 0 previous_ids = 0 pdb() for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) if i > 0: try: flow_result = flow_net(pre_image, input_image, flow_model) flow_boxs, flow_kpts = flow_propagation( pre_keypoints, flow_result) flow_kpts = np.concatenate((flow_kpts, flow_pose_scores), -1) except Exception as e: print(e) continue pre_image = input_image try: # boxes_threthold is 0.6 bboxs, scores = yolo_det( input_image, human_model) # bbox is coordinate location # 第一帧 if i == 0: inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) # 初始IDs, 和 socres map previous_ids = [i for i in range(len(bboxs))] # id_scores_map = {} # for i in range(len(bboxs)): id_scores_map.update({previous_ids[i]: scores[i]}) else: # 本帧、上一帧 边框置信度NMS # new_boxs, new_ids = boxes_nms(flow_boxs, bboxs, previous_ids) inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except Exception as e: print(e) out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) keypoints = np.concatenate((preds, maxvals), 2) # 当前帧边框置信度, 作为下一帧流边框的置信度 # flow_bbox_scores = scores.copy() # if i != 1: # preds = (preds + flow_kpts) / 2 # shift-kpts, shift-boxes, cur_kpts ------> TRACK if i > 0: kps_b = keypoints.copy() box_b = bboxs[:preds.shape[0]] kps_a = flow_kpts # (N, 17, 3) box_a = flow_boxs pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b) box_similarity_matrix = boxs_similarity(box_a, box_b) ratio = 0.5 similarity_matrix = pose_similarity_matrix * ratio + box_similarity_matrix * ( 1 - ratio) prev_filter_ids, cur_ids = bipartite_matching_greedy( similarity_matrix) print('previous frame boxes: ', previous_ids) print(prev_filter_ids, cur_ids) cur_len = len(box_b) + len(box_a) - len(cur_ids) cur_maps = -np.ones(shape=(cur_len, )) new_boxes = [] new_kpts = [] for pos, num in enumerate(cur_ids): cur_maps[pos] = previous_ids[prev_filter_ids[pos]] new_boxes.append(bo) prev_max_id = max(max(previous_ids), prev_max_id) for i in range(cur_len): if cur_maps[i] == -1.: prev_max_id += 1 cur_maps[i] = prev_max_id previous_ids = cur_maps.astype(np.uint8).tolist() print('after map: ', previous_ids) # 整理好传给下一帧flownet的关键点, ids, if i == 0: pre_flow_keypoints = keypoints pre_flow_pkt_scores = scores.copy() # 根据映射结果 else: pre_flow_keypoints = tracked_keypoints pre_flow_pkt_scores = tracked_scores if i > 1: image = plot_keypoint_track(origin_img, preds, maxvals, box_b, previous_ids, 0.1) if args.display and i > 1: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): previous_ids = 0 args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64 input_image = cv2.resize(input_image, (resize_W, resize_H)) image_resolution = (resize_W, resize_H) fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() #### load optical flow model flow_model = load_model() item = 0 prev_max_id = 0 for i in tqdm(range(video_length-1)): x0 = ckpt_time() ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) item = 0 try: bboxs, scores = yolo_det(input_image, human_model) bboxs = np.concatenate((bboxs, scores.transpose(1,0)), -1) # 加入flownet 模块 if i>0: flow_result = flow_net(pre_image, input_image, flow_model) flow_boxes, flow_kpts = flow_propagation(prev_kpts, flow_result) flow_boxes = np.concatenate((flow_boxes, np.expand_dims(prev_boxes[...,4], -1)), -1) flow_kpts = np.concatenate((flow_kpts,prev_kpts_scores), -1) detected_boxes = bboxs.copy() # plot_boxes(input_image.copy(), flow_boxes, [i for i in range(len(flow_boxes))], '{}_flow.png'.format(1000+i)) # plot_boxes(input_image.copy(), detected_boxes, [i for i in range(len(detected_boxes))], '{}_detected.png'.format(1000+i)) bboxs = boxes_nms_test(flow_boxes, bboxs, image_resolution) # plot_boxes(input_image.copy(), bboxs, [i for i in range(len(bboxs))], 'nms_{}.png'.format(100+i)) inputs, origin_img, center, scale = PreProcess(input_image, bboxs[..., :4], bboxs[...,4], cfg) except Exception as e: print(e) pdb() continue try: with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) except Exception as e: print(e) continue kps_b = np.concatenate((preds, maxvals), 2) prev_kpts = kps_b.copy() box_b = bboxs[:preds.shape[0]] if previous_ids == 0: previous_ids = [j for j in range(len(preds))] if i>0: # kps_a是前一帧的 kps_b是当前hrnet检测出来的 kps_a = flow_kpts box_a = flow_boxes pose_similarity_matrix = compute_pairwise_oks(kps_a, box_a, kps_b) box_similarity_matrix = boxes_similarity(box_a, box_b) # pose similarity ratio ratio = 0.5 similarity_matrix = pose_similarity_matrix*ratio + box_similarity_matrix*(1-ratio) # prev_filter_ids: 经过筛选之后的上一帧ids序列 prev_filter_ids, cur_ids = bipartite_matching_greedy(similarity_matrix) print('previous frame boxes: ',previous_ids) print(prev_filter_ids, cur_ids) cur_len = len(box_b) cur_maps = -np.ones(shape=(cur_len,)) for pos, num in enumerate(cur_ids): cur_maps[num] = previous_ids[prev_filter_ids[pos]] prev_max_id = max(max(previous_ids), prev_max_id) for j in range(cur_len): if cur_maps[j] == -1.: prev_max_id += 1 cur_maps[j] = prev_max_id # 作为下一次循环的上一帧ids序列 previous_ids = cur_maps.astype(np.uint8).tolist() print('after map: ', previous_ids) # 作为下一次循环的上一帧 kps_a = kps_b.copy() box_a = box_b.copy() prev_kpts = kps_b prev_kpts_scores = maxvals pre_image = input_image prev_boxes = bboxs if i>0: image = plot_keypoint_track(origin_img, preds, maxvals, box_a, previous_ids, 0.1) out.write(image) if args.display and i>0: ########### 指定屏幕大小 winname = 'image' cv2.namedWindow(winname) # Create a named window cv2.moveWindow(winname, 1000,850) # Move it to (40,30) cv2.imshow(winname, image) cv2.waitKey(100)
def main(): args = parse_args() update_config(cfg, args) #### load pose-hrnet MODEL pose_model = model_load(cfg) # pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda() pose_model.cuda() from pycocotools.coco import COCO annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json' im_root = '/ssd/xyliu/data/coco/images/val2017/' coco = COCO(annFile) catIds = coco.getCatIds(catNms=['person']) # 所有人体图片的id imgIds = coco.getImgIds(catIds=catIds) kpts_result = [] detected_image_num = 0 box_num = 0 for imgId in tqdm(imgIds[:]): img = coco.loadImgs(imgId)[0] im_name = img['file_name'] img = im_root + im_name img_input = plt.imread(img) try: bboxs, scores = mm_det(human_model, img_input, 0.3) inputs, origin_img, center, scale = PreProcess( img_input, bboxs, scores, cfg) except Exception as e: print(e) continue detected_image_num += 1 with torch.no_grad(): output = pose_model(inputs.cuda()) preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # vis = np.ones(shape=maxvals.shape,) vis = maxvals preds = preds.astype(np.float16) keypoints = np.concatenate((preds, vis), -1) for k, s in zip(keypoints, scores.tolist()): box_num += 1 k = k.flatten().tolist() item = { "image_id": imgId, "category_id": 1, "keypoints": k, "score": s } kpts_result.append(item) num_joints = 17 in_vis_thre = 0.2 oks_thre = 0.5 oks_nmsed_kpts = [] for i in range(len(kpts_result)): img_kpts = kpts_result[i]['keypoints'] kpt = np.array(img_kpts).reshape(17, 3) box_score = kpts_result[i]['score'] kpt_score = 0 valid_num = 0 # each joint for bbox for n_jt in range(0, num_joints): # score t_s = kpt[n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring 关节点的置信度 与 box的置信度的乘积 kpts_result[i]['score'] = kpt_score * box_score import json data = json.dumps(kpts_result) print( 'image num is {} \tdetected_image num is {}\t person num is {}'.format( len(imgIds), detected_image_num, box_num)), # data = json.dumps(str(kpts_result)) with open('person_keypoints.json', 'wt') as f: # pass f.write(data)
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) # pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda() pose_model.cuda() item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() # if args.camera: # # 为取得实时速度,每两帧取一帧预测 # if item == 0: # item = 1 # continue item = 0 try: bboxs, scores = mm_det(human_model, input_image) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) image = plot_keypoint(origin_img, preds, maxvals, 0.1) out.write(image) if args.display: ######### 全屏 # out_win = "output_style_full_screen" # cv2.namedWindow(out_win, cv2.WINDOW_NORMAL) # cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # cv2.imshow(out_win, image) ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)