Ejemplo n.º 1
0
def generate_kpts(video_name, smooth=False):
    human_model = yolo_model()
    args = get_args()
    update_config(cfg, args)
    cam = cv2.VideoCapture(video_name)
    video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))

    # # ret_val, input_image = cam.read()
    # # Video writer
    # fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    # input_fps = cam.get(cv2.CAP_PROP_FPS)

    pose_model = model_load(cfg)
    pose_model.cuda()

    # collect keypoints coordinate
    kpts_result = []
    for i in tqdm(range(video_length)):

        ret_val, input_image = cam.read()

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = preprocess(
                input_image, bboxs, scores, cfg)
        except Exception as e:
            print(e)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

            # if len(preds) != 1:
            #     print('here')

        if smooth:
            # smooth and fine-tune coordinates
            preds = smooth_filter(preds)

        # 3D video pose (only support single human)
        kpts_result.append(preds[0])

    result = np.array(kpts_result)
    return result
Ejemplo n.º 2
0
def getKptsFromImage(human_model, pose_model, image, smooth=None):
    args = get_args()
    update_config(cfg, args)

    bboxs, scores = yolo_det(image, human_model)
    # bbox is coordinate location
    inputs, origin_img, center, scale = preprocess(image, bboxs, scores, cfg)

    with torch.no_grad():
        # compute output heatmap
        inputs = inputs[:, [2, 1, 0]]
        output = pose_model(inputs.cuda())
        # compute coordinate
        preds, maxvals = get_final_preds(
            cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

    # 3D video pose (only support single human)
    return preds[0]
Ejemplo n.º 3
0
def main():
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    resize_W = 640
    resize_H = 384
    input_image = cv2.resize(input_image, (resize_W, resize_H))
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps, (input_image.shape[1], input_image.shape[0]))

    #### load optical flow model
    flow_model = load_model()

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    first_frame = 1

    flow_boxs = 0
    flow_kpts = 0

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()
        input_image = cv2.resize(input_image, (resize_W, resize_H))

        if first_frame == 0:
            try:
                t0 = ckpt_time()
                flow_result = flow_net(pre_image, input_image, flow_model)
                flow_boxs, flow_kpts = flow_propagation(keypoints, flow_result)
                _, t1 = ckpt_time(t0, 1)
            except Exception as e:
                print(e)
                continue

        pre_image = input_image
        first_frame = 0

        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            if type(flow_boxs) == int:
                inputs, origin_img, center, scale = preprocess(input_image, bboxs, scores, cfg)
            else:
                #  flow_boxs = (flow_boxs + bboxs) /2
                inputs, origin_img, center, scale = preprocess(input_image, flow_boxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0);
            cv2.resizeWindow("enhanced", 1080, 720);
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(
                cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))

        if type(flow_boxs) != int:
            preds = (preds + flow_kpts) / 2

        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        out.write(image)
        keypoints = np.concatenate((preds, maxvals), 2)

        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL);
            cv2.resizeWindow("enhanced", 1920, 1080);
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)
Ejemplo n.º 4
0
def main():
    tick = 0
    args = parse_args()
    update_config(cfg, args)

    if not args.camera:
        # handle video
        cam = cv2.VideoCapture(args.video_input)
        video_length = int(cam.get(cv2.CAP_PROP_FRAME_COUNT))
    else:
        cam = cv2.VideoCapture(0)
        video_length = 30000

    ret_val, input_image = cam.read()
    # Video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    out = cv2.VideoWriter(args.video_output, fourcc, input_fps,
                          (input_image.shape[1], input_image.shape[0]))

    #### load pose-hrnet MODEL
    pose_model = model_load(cfg)
    pose_model.cuda()

    item = 0
    for i in tqdm(range(video_length - 1)):

        x0 = ckpt_time()
        ret_val, input_image = cam.read()

        if args.camera:
            # 为取得实时速度,每两帧取一帧预测
            if item == 0:
                item = 1
                continue

        item = 0
        try:
            bboxs, scores = yolo_det(input_image, human_model)
            # bbox is coordinate location
            inputs, origin_img, center, scale = preprocess(
                input_image, bboxs, scores, cfg)
        except:
            out.write(input_image)
            cv2.namedWindow("enhanced", 0)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', input_image)
            cv2.waitKey(2)
            continue

        with torch.no_grad():
            # compute output heatmap
            inputs = inputs[:, [2, 1, 0]]
            output = pose_model(inputs.cuda())
            # compute coordinate
            preds, maxvals = get_final_preds(cfg,
                                             output.clone().cpu().numpy(),
                                             np.asarray(center),
                                             np.asarray(scale))

        # 平滑点
        preds = smooth_filter(preds)
        #  preds = np.expand_dims(preds, 0)
        origin_img = np.zeros(origin_img.shape, np.uint8)
        image = plot_keypoint(origin_img, preds, maxvals, 0.1)
        if i >= 14:
            out.write(image)
        if args.display:
            ########### 指定屏幕大小
            cv2.namedWindow("enhanced", cv2.WINDOW_GUI_NORMAL)
            cv2.resizeWindow("enhanced", 960, 480)
            cv2.imshow('enhanced', image)
            cv2.waitKey(1)