def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detecotor model from lib.detector.mmdetection.high_api import load_model human_model = load_model() from lib.detector.mmdetection.high_api import human_boxes_get as mmd_detector bboxs, scores = mmd_detector(human_model, args.img_input) # bboxes (N, 4) [x0, y0, x1, y1] # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg) # load HRNET MODEL model = model_load(cfg) with torch.no_grad(): # compute output heatmap # inputs = inputs[:,[2,1,0]] # inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB) output = model(inputs) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) image = plot_keypoint(origin_img, preds, maxvals, 0.3) cv2.imwrite(args.img_output, image) if args.display: cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', image) cv2.waitKey(5000)
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) # pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda() pose_model.cuda() item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() # if args.camera: # # 为取得实时速度,每两帧取一帧预测 # if item == 0: # item = 1 # continue item = 0 try: bboxs, scores = mm_det(human_model, input_image) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) image = plot_keypoint(origin_img, preds, maxvals, 0.1) out.write(image) if args.display: ######### 全屏 # out_win = "output_style_full_screen" # cv2.namedWindow(out_win, cv2.WINDOW_NORMAL) # cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # cv2.imshow(out_win, image) ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): json_data = {} args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(1) video_length = 30 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) # pose_model = torch.nn.DataParallel(pose_model, device_ids=[0,1]).cuda() pose_model.to(device) item = 0 index = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() # if args.camera: # # 为取得实时速度,每两帧取一帧预测 # if item == 0: # item = 1 # continue item = 0 try: detections = yolov5_model(input_image) # print(detections) scores = [] bboxs = [] if detections is not None: for i, det in enumerate(detections.pred): inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.to(device)) for bbox in complete_bbox: if bbox[4] > 0.25 and bbox[5] == 0: # print("detections", complete_bbox[:4]) bboxs.append(bbox[:4]) # print("Our scores", bbox[4]) scores.append(bbox[4]) #print("Our scores", complete_bbox[4]) # bbox is coordinate location # print("boxes", bboxs) # print("scores", scores) inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap print("We here babby ") inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.to(device)) # print("Output from pose mode", output) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) json_data[index] = list() json_data[index].append(preds.tolist()) print("Key points", preds) index += 1 image = plot_keypoint(origin_img, preds, maxvals, 0.25) out.write(image) if args.display: ######### 全屏 # out_win = "output_style_full_screen" # cv2.namedWindow(out_win, cv2.WINDOW_NORMAL) # cv2.setWindowProperty(out_win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # cv2.imshow(out_win, image) ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1) with open('outputs/output.json', 'w') as json_file: print(json_data) json.dump(json_data, json_file)
def main(): args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # 保持长宽都是64的倍数 resize_W = int(input_image.shape[1] / 64) * 64 resize_H = int((input_image.shape[0] / input_image.shape[1] * resize_W) / 64 ) * 64 print(resize_W, resize_H) input_image = cv2.resize(input_image, (resize_W, resize_H)) # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output,fourcc, input_fps, (input_image.shape[1],input_image.shape[0])) #### load optical flow model flow_model = load_model() #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() first_frame = 1 flow_boxs = 0 flow_kpts = 0 item = 0 for i in tqdm(range(video_length-1)): x0 = ckpt_time() ret_val, input_image = cam.read() input_image = cv2.resize(input_image, (resize_W, resize_H)) if first_frame == 0: try: t0 = ckpt_time() flow_result = flow_net(pre_image, input_image, flow_model) flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result) print('每次flownet耗时:{:0.3f}'.format(time.time()- t0)) except Exception as e: print(e) continue pre_image = input_image first_frame = 0 try: bboxs, scores = yolo_det(input_image, human_model) # 第一帧 if i == 0: inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) else: # 本帧、上一帧 边框置信度NMS if not (flow_bbox_scores>scores).tolist()[0][0]: flow_boxs = bboxs inputs, origin_img, center, scale = PreProcess(input_image, flow_boxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced",0); cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 当前帧边框置信度, 作为下一帧流边框的置信度 flow_bbox_scores = scores.copy() if i != 1: preds = (preds + flow_kpts) / 2 image = plot_keypoint(origin_img, preds, maxvals, 0.1) out.write(image) keypoints = np.concatenate((preds, maxvals), 2) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', image) cv2.waitKey(1)
def main(): tick = 0 args = parse_args() update_config(cfg, args) if not args.camera: # handle video cam = cv2.VideoCapture(args.video_input) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) else: cam = cv2.VideoCapture(0) video_length = 30000 ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0])) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() item = 0 for i in tqdm(range(video_length - 1)): x0 = ckpt_time() ret_val, input_image = cam.read() if args.camera: # 为取得实时速度,每两帧取一帧预测 if item == 0: item = 1 continue item = 0 try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except: out.write(input_image) cv2.namedWindow("enhanced", 0) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', input_image) cv2.waitKey(2) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # 平滑点 preds = smooth_filter(preds) image = plot_keypoint(origin_img, preds, maxvals, 0.3) if i >= 9: out.write(image) if args.display: ########### 指定屏幕大小 cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL) cv2.resizeWindow("enhanced", 960, 480) cv2.imshow('enhanced', image) cv2.waitKey(1)