Ejemplo n.º 1
0
def main():
    args = parse_args()
    update_config(cfg, args)
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED


    ########## 加载human detecotor model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import main as yolo_det
    bboxs, scores = yolo_det(args.img_input, human_model)

    # bbox is coordinate location
    inputs, origin_img, center, scale = PreProcess(args.img_input, bboxs, scores, cfg)

    # load MODEL
    model = model_load(cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:,[2,1,0]]
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    image = plot_keypoint(origin_img, preds, maxvals, 0.3)
    cv2.imwrite(args.img_output, image)
Ejemplo n.º 2
0
def getTwoModel():
    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # load YoloV3 Model
    bbox_model = yolo_model()

    return bbox_model, pose_model
Ejemplo n.º 3
0
def getTwoModel():
    #  args = get_args()
    #  update_config(cfg, args)
    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # load YoloV3 Model
    bbox_model = yolo_model()

    return bbox_model, pose_model
Ejemplo n.º 4
0
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detector model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import human_bbox_get as yolo_det
    print(args.img_input)
    img = cv2.imread(args.img_input)
    # print(type(img))
    # cv2.imshow("test", img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    bboxs, scores = yolo_det(args.img_input, human_model,
                             confidence=0.5)  # bboxes (N, 4) [x0, y0, x1, y1]
    # print("bboxs = ", bboxs)
    # print("scores = ", scores)
    # ipdb.set_trace()

    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(args.img_input, bboxs,
                                                   scores, cfg)

    # load MODEL
    model = model_load(cfg)

    with torch.no_grad():
        # compute output heatmap
        #  inputs = inputs[:,[2,1,0]]
        #  inputs = cv2.cvtColor(inputs, cv2.COLOR_BGR2RGB)
        output = model(inputs)
        # compute coordinate
        preds, maxvals = get_final_preds(cfg,
                                         output.clone().cpu().numpy(),
                                         np.asarray(center), np.asarray(scale))
        print("preds = ", preds)
        print("maxvals = ", maxvals)

    image = plot_keypoint(origin_img, preds, maxvals, 0.5)
    cv2.imwrite(args.img_output, image)
    #if args.display:
    #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
    #cv2.resizeWindow("enhanced", 960, 480);
    cv2.imshow('enhanced', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
Ejemplo n.º 5
0
def generate_kpts(video_name, smooth=False):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    # # ret_val, input_image = cam.read()
    # # Video writer
    # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # input_fps = cam.get(cv2.CAP_PROP_FPS)

    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = preprocess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            # if len(preds) != 1:
            #     print('here')

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result
Ejemplo n.º 6
0
def generate_kpts(video_name, smooth=None, no_nan=True):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    cam_w = int(cam.get(cv2.CAP_PROP_FRAME_WIDTH))
    cam_h = int(cam.get(cv2.CAP_PROP_FRAME_HEIGHT))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length-1)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(input_image, bboxs, scores, cfg)
        except Exception as e:
            if not no_nan:
                # append NaN so we can interpolate later
                kpts_result.append(np.full((17, 2), np.nan, dtype=np.float32))
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:,[2,1,0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result, input_fps, cam_w, cam_h
Ejemplo n.º 7
0
def main():
    args = parse_args()
    update_config(cfg, args)
    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detecotor model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()
    from lib.detector.yolo.human_detector import main as yolo_det

    from pycocotools.coco import COCO
    annFile = '/ssd/xyliu/data/coco/annotations/instances_val2017.json'
    im_root = '/ssd/xyliu/data/coco/images/val2017/'
    coco = COCO(annFile)
    catIds = coco.getCatIds(catNms=['person'])
    # 所有人体图片的id
    imgIds = coco.getImgIds(catIds=catIds)
    detection_person = []
    for imgId in tqdm(imgIds):
        # 获得 bbox: (x0,y0,w,h)  score
        img = coco.loadImgs(imgId)[0]
        im_name = img['file_name']
        img = im_root + im_name
        img_input = plt.imread(img)

        try:
            bbox, score = yolo_det(img_input, human_model)
        except Exception as e:
            print(e)
            continue

        for bbox_item, score_item in zip(bbox, score[0]):
            bbox_item = [
                bbox_item[0], bbox_item[1], bbox_item[2] - bbox_item[0],
                bbox_item[3] - bbox_item[1]
            ]
            item = {
                'bbox': bbox_item,
                'category_id': 1,
                'image_id': imgId,
                'score': score_item
            }
            detection_person.append(item)

    import json
    data = json.dumps(str(detection_person))
    with open('yolo_detection_person.json', 'wt') as f:
        f.write(data)
Ejemplo n.º 8
0
    return model


def ckpt_time(t0=None, display=None):
    if not t0:
        return time.time()
    else:
        t1 = time.time()
        if display:
            print('consume {:2f} second'.format(t1 - t0))
        return t1 - t0, t1


###### LOAD human detecotor model
human_model = yolo_model()


def generate_kpts(video_name, smooth=None):
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
Ejemplo n.º 9
0
def generate_kpts(video_name, smooth=None):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length - 1)):

        ret_val, input_image = cam.read()
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = PreProcess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # print("input shape: ", inputs.shape)  # 1 3 256 192
            # print("output shape: ", output.shape) # 1 17 64 48
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])  # preds[0] (17, 2)

        # # 选择 y 坐标最大的人
        # max_index = 0
        # max_y = np.mean(preds[0, :, 1])
        #
        # for k in range(len(preds)):
        #     tmp_y = np.mean(preds[k, :, 1])
        #     if tmp_y > max_y:
        #         max_index = k
        #         max_y = tmp_y
        # kpts_result.append(preds[max_index])
        # # print("maxvals[max_index]:", np.mean(maxvals[max_index]))

    result = np.array(kpts_result)
    return result
Ejemplo n.º 10
0
def loadBboxModel():
    return yolo_model()
Ejemplo n.º 11
0
  state_dict = torch.load(model_file_name)
  from collections import OrderedDict
  new_state_dict = OrderedDict()
  for k, v in state_dict.items():
      name = k # remove module.
      #  print(name,'\t')
      new_state_dict[name] = v
  model.load_state_dict(new_state_dict)
  model.eval()
  return model
  
#calling the function with updated configuration
pose_model = model_load(cfg).cuda()#HrNet model


human_model = yolo_model()#YOLO V3 model

"""<h3>Detect full pose image</h3>"""

#function to detect full pose image

def detect_pose(pa,c):

  image = pa
  frame = cv2.imread(image, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
  bboxs, scores = yolo_det(frame, human_model)
  src_dir = pa 
  if len(bboxs)>0:
    inputs, origin_img, center, scale = PreProcess(frame, bboxs, scores, cfg)

    with torch.no_grad():
def main():
    args = parse_args()
    update_config(cfg, args)

    # cudnn related setting
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    ########## 加载human detector model
    from lib.detector.yolo.human_detector import load_model as yolo_model
    human_model = yolo_model()

    from lib.detector.yolo.human_detector import human_bbox_get as yolo_det
    print(args.img_input)
    img = cv2.imread(args.img_input)
    print(type(img))
    cv2.imshow("test", img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    bboxs, scores = yolo_det(args.img_input, human_model,
                             confidence=0.5)  # bboxes (N, 4) [x0, y0, x1, y1]
    print("bboxs = ", bboxs)
    print("scores = ", scores)

    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(args.img_input, bboxs,
                                                   scores, cfg)
    print("inputs type = ", type(inputs))
    print("inputs shape after preprocess = ", inputs.shape)

    import onnxruntime
    import numpy as np
    sess = onnxruntime.InferenceSession("onnx_hrnet_human.onnx")

    input_name = sess.get_inputs()[0].name
    print("input name", input_name)
    input_shape = sess.get_inputs()[0].shape
    print("input shape", input_shape)
    input_type = sess.get_inputs()[0].type
    print("input type", input_type)

    output_name = sess.get_outputs()[0].name
    print("output name", output_name)
    output_shape = sess.get_outputs()[0].shape
    print("output shape", output_shape)
    output_type = sess.get_outputs()[0].type
    print("output type", output_type)

    # inference
    import numpy.random
    x = inputs.numpy()
    x = x.astype(numpy.float32)
    res = sess.run([output_name], {input_name: x})
    print("np.array(res[0]) shape = ", np.array(res[0]).shape)

    #ipdb.set_trace()

    preds, maxvals = get_final_preds(cfg, np.array(res[0]), np.asarray(center),
                                     np.asarray(scale))
    print("preds = ", preds)
    print("maxvals = ", maxvals)

    image = plot_keypoint(origin_img, preds, maxvals, 0.5)
    cv2.imwrite(args.img_output, image)
    #if args.display:
    #cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
    #cv2.resizeWindow("enhanced", 960, 480);
    cv2.imshow('enhanced', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()