def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detecotor model from lib.detector.yolo.human_detector import load_model as yolo_model human_model = yolo_model() from lib.detector.yolo.human_detector import main as yolo_det bboxs, scores = yolo_det(args.img_input, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg) # load MODEL model = model_load(cfg) with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = model(inputs) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) image = plot_keypoint(origin_img, preds, maxvals, 0.3) cv2.imwrite(args.img_output, image)
def getTwoModel(): #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # load YoloV3 Model bbox_model = yolo_model() return bbox_model, pose_model
def getTwoModel(): # args = get_args() # update_config(cfg, args) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # load YoloV3 Model bbox_model = yolo_model() return bbox_model, pose_model
def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detector model from lib.detector.yolo.human_detector import load_model as yolo_model human_model = yolo_model() from lib.detector.yolo.human_detector import human_bbox_get as yolo_det print(args.img_input) img = cv2.imread(args.img_input) # print(type(img)) # cv2.imshow("test", img) # cv2.waitKey(0) # cv2.destroyAllWindows() bboxs, scores = yolo_det(args.img_input, human_model, confidence=0.5) # bboxes (N, 4) [x0, y0, x1, y1] # print("bboxs = ", bboxs) # print("scores = ", scores) # ipdb.set_trace() # bbox is coordinate location inputs, origin_img, center, scale = preprocess(args.img_input, bboxs, scores, cfg) # load MODEL model = model_load(cfg) with torch.no_grad(): # compute output heatmap # inputs = inputs[:,[2,1,0]] # inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB) output = model(inputs) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) print("preds = ", preds) print("maxvals = ", maxvals) image = plot_keypoint(origin_img, preds, maxvals, 0.5) cv2.imwrite(args.img_output, image) #if args.display: #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); #cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', image) cv2.waitKey(0) cv2.destroyAllWindows()
def generate_kpts(video_name, smooth=False): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) # # ret_val, input_image = cam.read() # # Video writer # fourcc = cv2.VideoWriter_fourcc(*'mp4v') # input_fps = cam.get(cv2.CAP_PROP_FPS) pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = preprocess( input_image, bboxs, scores, cfg) except Exception as e: print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) # if len(preds) != 1: # print('here') if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) result = np.array(kpts_result) return result
def generate_kpts(video_name, smooth=None, no_nan=True): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) cam_w = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH)) cam_h = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT)) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length-1)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg) except Exception as e: if not no_nan: # append NaN so we can interpolate later kpts_result.append(np.full((17, 2), np.nan, dtype=np.float32)) print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:,[2,1,0]] output = pose_model(inputs.cuda()) # compute coordinate preds, maxvals = get_final_preds( cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) result = np.array(kpts_result) return result, input_fps, cam_w, cam_h
def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detecotor model from lib.detector.yolo.human_detector import load_model as yolo_model human_model = yolo_model() from lib.detector.yolo.human_detector import main as yolo_det from pycocotools.coco import COCO annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json' im_root = '/ssd/xyliu/data/coco/images/val2017/' coco = COCO(annFile) catIds = coco.getCatIds(catNms=['person']) # 所有人体图片的id imgIds = coco.getImgIds(catIds=catIds) detection_person = [] for imgId in tqdm(imgIds): # 获得 bbox: (x0,y0,w,h) score img = coco.loadImgs(imgId)[0] im_name = img['file_name'] img = im_root + im_name img_input = plt.imread(img) try: bbox, score = yolo_det(img_input, human_model) except Exception as e: print(e) continue for bbox_item, score_item in zip(bbox, score[0]): bbox_item = [ bbox_item[0], bbox_item[1], bbox_item[2] - bbox_item[0], bbox_item[3] - bbox_item[1] ] item = { 'bbox': bbox_item, 'category_id': 1, 'image_id': imgId, 'score': score_item } detection_person.append(item) import json data = json.dumps(str(detection_person)) with open('yolo_detection_person.json', 'wt') as f: f.write(data)
return model def ckpt_time(t0=None, display=None): if not t0: return time.time() else: t1 = time.time() if display: print('consume {:2f} second'.format(t1 - t0)) return t1 - t0, t1 ###### LOAD human detecotor model human_model = yolo_model() def generate_kpts(video_name, smooth=None): args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) #### load pose-hrnet MODEL pose_model = model_load(cfg)
def generate_kpts(video_name, smooth=None): human_model = yolo_model() args = get_args() update_config(cfg, args) cam = cv2.VideoCapture(video_name) video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT)) ret_val, input_image = cam.read() # Video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') input_fps = cam.get(cv2.CAP_PROP_FPS) #### load pose-hrnet MODEL pose_model = model_load(cfg) pose_model.cuda() # collect keypoints coordinate kpts_result = [] for i in tqdm(range(video_length - 1)): ret_val, input_image = cam.read() try: bboxs, scores = yolo_det(input_image, human_model) # bbox is coordinate location inputs, origin_img, center, scale = PreProcess( input_image, bboxs, scores, cfg) except Exception as e: print(e) continue with torch.no_grad(): # compute output heatmap inputs = inputs[:, [2, 1, 0]] output = pose_model(inputs.cuda()) # print("input shape: ", inputs.shape) # 1 3 256 192 # print("output shape: ", output.shape) # 1 17 64 48 # compute coordinate preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale)) if smooth: # smooth and fine-tune coordinates preds = smooth_filter(preds) # 3D video pose (only support single human) kpts_result.append(preds[0]) # preds[0] (17, 2) # # 选择 y 坐标最大的人 # max_index = 0 # max_y = np.mean(preds[0, :, 1]) # # for k in range(len(preds)): # tmp_y = np.mean(preds[k, :, 1]) # if tmp_y > max_y: # max_index = k # max_y = tmp_y # kpts_result.append(preds[max_index]) # # print("maxvals[max_index]:", np.mean(maxvals[max_index])) result = np.array(kpts_result) return result
def loadBboxModel(): return yolo_model()
state_dict = torch.load(model_file_name) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k # remove module. # print(name,'\t') new_state_dict[name] = v model.load_state_dict(new_state_dict) model.eval() return model #calling the function with updated configuration pose_model = model_load(cfg).cuda()#HrNet model human_model = yolo_model()#YOLO V3 model """<h3>Detect full pose image</h3>""" #function to detect full pose image def detect_pose(pa,c): image = pa frame = cv2.imread(image, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) bboxs, scores = yolo_det(frame, human_model) src_dir = pa if len(bboxs)>0: inputs, origin_img, center, scale = PreProcess(frame, bboxs, scores, cfg) with torch.no_grad():
def main(): args = parse_args() update_config(cfg, args) # cudnn related setting cudnn.benchmark = cfg.CUDNN.BENCHMARK torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED ########## 加载human detector model from lib.detector.yolo.human_detector import load_model as yolo_model human_model = yolo_model() from lib.detector.yolo.human_detector import human_bbox_get as yolo_det print(args.img_input) img = cv2.imread(args.img_input) print(type(img)) cv2.imshow("test", img) cv2.waitKey(0) cv2.destroyAllWindows() bboxs, scores = yolo_det(args.img_input, human_model, confidence=0.5) # bboxes (N, 4) [x0, y0, x1, y1] print("bboxs = ", bboxs) print("scores = ", scores) # bbox is coordinate location inputs, origin_img, center, scale = preprocess(args.img_input, bboxs, scores, cfg) print("inputs type = ", type(inputs)) print("inputs shape after preprocess = ", inputs.shape) import onnxruntime import numpy as np sess = onnxruntime.InferenceSession("onnx_hrnet_human.onnx") input_name = sess.get_inputs()[0].name print("input name", input_name) input_shape = sess.get_inputs()[0].shape print("input shape", input_shape) input_type = sess.get_inputs()[0].type print("input type", input_type) output_name = sess.get_outputs()[0].name print("output name", output_name) output_shape = sess.get_outputs()[0].shape print("output shape", output_shape) output_type = sess.get_outputs()[0].type print("output type", output_type) # inference import numpy.random x = inputs.numpy() x = x.astype(numpy.float32) res = sess.run([output_name], {input_name: x}) print("np.array(res[0]) shape = ", np.array(res[0]).shape) #ipdb.set_trace() preds, maxvals = get_final_preds(cfg, np.array(res[0]), np.asarray(center), np.asarray(scale)) print("preds = ", preds) print("maxvals = ", maxvals) image = plot_keypoint(origin_img, preds, maxvals, 0.5) cv2.imwrite(args.img_output, image) #if args.display: #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL); #cv2.resizeWindow("enhanced", 960, 480); cv2.imshow('enhanced', image) cv2.waitKey(0) cv2.destroyAllWindows()