def run_frank_mocap_cpu(args, bbox_detector, body_mocap, hand_mocap,visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)
    cur_frame = args.start_frame
    video_frame = 0

    #Nossa estrutura de saida


    while True:
        # load data
        load_bbox = False

        if input_type =='image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr  = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr  = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':      
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
          # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        
        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame +=1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break   
        print("--------------------------------------")
        
        # bbox detection
        if not load_bbox:
            body_bbox_list, hand_bbox_list = list(), list()
        
        # regression (includes integration)
        body_bbox_list, hand_bbox_list, pred_output_list = run_regress_cpu(
            args, img_original_bgr, 
            body_bbox_list, hand_bbox_list, bbox_detector,
            body_mocap, hand_mocap)


        # save the obtained body & hand bbox to json file
        if args.save_bbox_output: 
            demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)

        if len(body_bbox_list) < 1: 
            print(f"No body deteced: {image_path}")
            continue

        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(
            img_original_bgr,
            pred_mesh_list = pred_mesh_list,
            body_bbox_list = body_bbox_list,
            hand_bbox_list = hand_bbox_list)
        
       # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save result image
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'frank'
            demo_utils.save_pred_to_pkl(
                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)

        print(f"Processed : {image_path}")

    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type =='webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
Exemple #2
0
def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    timer = Timer()
    while True:
        timer.tic()
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        if load_bbox:
            body_pose_list = None
        else:
            body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(
                img_original_bgr)
        hand_bbox_list = [
            None,
        ] * len(body_bbox_list)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        #Sort the bbox using bbox size
        # (to make the order as consistent as possible without tracking)
        bbox_size = [(x[2] * x[3]) for x in body_bbox_list]
        idx_big2small = np.argsort(bbox_size)[::-1]
        body_bbox_list = [body_bbox_list[i] for i in idx_big2small]
        if args.single_person and len(body_bbox_list) > 0:
            body_bbox_list = [
                body_bbox_list[0],
            ]

        # Body Pose Regression
        pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        # pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # # visualization
        # res_img = visualizer.visualize(
        #     img_original_bgr,
        #     pred_mesh_list = pred_mesh_list,
        #     body_bbox_list = body_bbox_list)

        # # show result in the screen
        # if not args.no_display:
        #     res_img = res_img.astype(np.uint8)
        #     ImShow(res_img)

        # # save result image
        # if args.out_dir is not None:
        #     demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'body'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)

            for pidx, pred_output in enumerate(pred_output_list):
                # 描画設定
                fig = plt.figure(figsize=(15, 15), dpi=100)
                # 3DAxesを追加
                ax = fig.add_subplot(111, projection='3d')

                joints = pred_output['pred_joints_img']
                min_joint = np.min(joints, axis=0)
                max_joint = np.max(joints, axis=0)

                # ジョイント出力
                ax.set_xlim3d(int(min_joint[0]), int(max_joint[0]))
                ax.set_ylim3d(int(min_joint[2]), int(max_joint[2]))
                ax.set_zlim3d(int(-max_joint[1]), int(-min_joint[1]))
                ax.set(xlabel='x', ylabel='y', zlabel='z')

                for jidx, (jsname, jename) in enumerate([('OP Nose', 'OP Neck'), ('OP Neck', 'OP RShoulder'), ('OP RShoulder', 'OP RElbow'), ('OP RElbow', 'OP RWrist'), \
                                                         ('OP Neck', 'OP LShoulder'), ('OP LShoulder', 'OP LElbow'), ('OP LElbow', 'OP LWrist'), \
                                                         ('OP MidHip', 'OP RHip'), ('OP RHip', 'OP RKnee'), ('OP RKnee', 'OP RAnkle'), ('OP RAnkle', 'OP RHeel'), ('OP RHeel', 'OP RSmallToe'), ('OP RHeel', 'OP RBigToe'), \
                                                         ('OP MidHip', 'OP LHip'), ('OP LHip', 'OP LKnee'), ('OP LKnee', 'OP LAnkle'), ('OP LAnkle', 'OP LHeel'), ('OP LHeel', 'OP LSmallToe'), ('OP LHeel', 'OP LBigToe'), \
                                                         ('OP Nose', 'OP REye'), ('OP REye', 'OP REar'), ('OP Nose', 'OP LEye'), ('OP LEye', 'OP LEar'), ('OP Neck', 'OP MidHip')]):
                    xs = [
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                    ]
                    ys = [
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                    ]
                    zs = [
                        -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                        -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                    ]

                    ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#0000FF")


                for jidx, (jsname, jename) in enumerate([('Nose', 'Neck (LSP)'), ('Neck (LSP)', 'Right Shoulder'), ('Right Shoulder', 'Right Elbow'), ('Right Elbow', 'Right Wrist'), \
                                                         ('Neck (LSP)', 'Left Shoulder'), ('Left Shoulder', 'Left Elbow'), ('Left Elbow', 'Left Wrist'), \
                                                         ('Pelvis (MPII)', 'Right Hip'), ('Right Hip', 'Right Knee'), ('Right Knee', 'Right Ankle'), \
                                                         ('Pelvis (MPII)', 'Left Hip'), ('Left Hip', 'Left Knee'), ('Left Knee', 'Left Ankle'), \
                                                         ('Nose', 'Right Eye'), ('Right Eye', 'Right Ear'), ('Nose', 'Left Eye'), ('Left Eye', 'Left Ear'), \
                                                         ('Pelvis (MPII)', 'Thorax (MPII)'), ('Thorax (MPII)', 'Spine (H36M)'), ('Spine (H36M)', 'Jaw (H36M)'), ('Jaw (H36M)', 'Nose')]):
                    xs = [
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                    ]
                    ys = [
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                        joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                    ]
                    zs = [
                        -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                        -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                    ]

                    ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#FF0000")

                plt.savefig(
                    osp.join(args.out_dir, "frames",
                             f"joints_{pidx:02d}_{cur_frame:05d}.jpg"))
                plt.close()

        timer.toc(bPrint=True, title="Time")
        print(f"Processed : {image_path}")

    #save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
Exemple #3
0
def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    while True:
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
        # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        # bbox detection
        if not load_bbox:
            body_bbox_list, hand_bbox_list = list(), list()

        # regression (includes integration)
        body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
            args, img_original_bgr, body_bbox_list, hand_bbox_list,
            bbox_detector, body_mocap, hand_mocap)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        #     # visualization
        #     res_img = visualizer.visualize(
        #         img_original_bgr,
        #         pred_mesh_list = pred_mesh_list,
        #         body_bbox_list = body_bbox_list,
        #         hand_bbox_list = hand_bbox_list)

        #    # show result in the screen
        #     if not args.no_display:
        #         res_img = res_img.astype(np.uint8)
        #         ImShow(res_img)

        #     # save result image
        #     if args.out_dir is not None:
        #         demo_utils.save_res_img(args.out_dir, image_path, res_img)

        demo_type = 'frank'
        demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                    body_bbox_list, hand_bbox_list,
                                    pred_output_list)

        for pidx, pred_output in enumerate(pred_output_list):
            # 描画設定
            fig = plt.figure(figsize=(15, 15), dpi=100)
            # 3DAxesを追加
            ax = fig.add_subplot(111, projection='3d')

            joints = pred_output['pred_joints_img']
            left_hand_joints = pred_output['pred_left_hand_joints_img']
            right_hand_joints = pred_output['pred_right_hand_joints_img']
            min_joint = np.min(joints, axis=0)
            max_joint = np.max(joints, axis=0)

            # ジョイント出力
            ax.set_xlim3d(int(min_joint[0]), int(max_joint[0]))
            ax.set_ylim3d(int(min_joint[2]), int(max_joint[2]))
            ax.set_zlim3d(int(-max_joint[1]), int(-min_joint[1]))
            ax.set(xlabel='x', ylabel='y', zlabel='z')

            for jidx, (jsname, jename) in enumerate([('OP Nose', 'OP Neck'), ('OP Neck', 'OP RShoulder'), ('OP RShoulder', 'OP RElbow'), ('OP RElbow', 'OP RWrist'), \
                                                        ('OP Neck', 'OP LShoulder'), ('OP LShoulder', 'OP LElbow'), ('OP LElbow', 'OP LWrist'), \
                                                        ('OP MidHip', 'OP RHip'), ('OP RHip', 'OP RKnee'), ('OP RKnee', 'OP RAnkle'), ('OP RAnkle', 'OP RHeel'), ('OP RHeel', 'OP RSmallToe'), ('OP RHeel', 'OP RBigToe'), \
                                                        ('OP MidHip', 'OP LHip'), ('OP LHip', 'OP LKnee'), ('OP LKnee', 'OP LAnkle'), ('OP LAnkle', 'OP LHeel'), ('OP LHeel', 'OP LSmallToe'), ('OP LHeel', 'OP LBigToe'), \
                                                        ('OP Nose', 'OP REye'), ('OP REye', 'OP REar'), ('OP Nose', 'OP LEye'), ('OP LEye', 'OP LEar'), ('OP Neck', 'OP MidHip')]):
                xs = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                ]
                ys = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                ]
                zs = [
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                ]

                ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#0000FF")


            for jidx, (jsname, jename) in enumerate([('Nose', 'Neck (LSP)'), ('Neck (LSP)', 'Right Shoulder'), ('Right Shoulder', 'Right Elbow'), ('Right Elbow', 'Right Wrist'), \
                                                        ('Neck (LSP)', 'Left Shoulder'), ('Left Shoulder', 'Left Elbow'), ('Left Elbow', 'Left Wrist'), \
                                                        ('Pelvis (MPII)', 'Right Hip'), ('Right Hip', 'Right Knee'), ('Right Knee', 'Right Ankle'), \
                                                        ('Pelvis (MPII)', 'Left Hip'), ('Left Hip', 'Left Knee'), ('Left Knee', 'Left Ankle'), \
                                                        ('Nose', 'Right Eye'), ('Right Eye', 'Right Ear'), ('Nose', 'Left Eye'), ('Left Eye', 'Left Ear'), \
                                                        ('Pelvis (MPII)', 'Thorax (MPII)'), ('Thorax (MPII)', 'Spine (H36M)'), ('Spine (H36M)', 'Jaw (H36M)'), ('Jaw (H36M)', 'Nose')]):
                xs = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                ]
                ys = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                ]
                zs = [
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                ]

                ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#FF0000")

            plt.savefig(
                osp.join(args.out_dir, "frames",
                         f"3d_{pidx:02d}_{cur_frame:05d}.jpg"))
            plt.close()

            img = Vis_Skeleton_2D_SPIN49(joints, image=img_original_bgr)
            cv2.imwrite(
                osp.join(args.out_dir, "frames",
                         f"2d_{pidx:02d}_{cur_frame:05d}.png"), img)

            # json出力
            joint_dict = {}
            joint_dict["image"] = {
                "width": img_original_bgr.shape[1],
                "height": img_original_bgr.shape[0]
            }
            joint_dict["bbox"] = {"x": float(body_bbox_list[pidx][0]), "y": float(body_bbox_list[pidx][1]), \
                                  "width": float(body_bbox_list[pidx][2]), "height": float(body_bbox_list[pidx][3])}
            joint_dict["joints"] = {}

            for jidx, jname in enumerate(JOINT_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(joints[J49_FLIP_PERM[jidx]][0]),
                    'y': float(joints[J49_FLIP_PERM[jidx]][1]),
                    'z': float(joints[J49_FLIP_PERM[jidx]][2])
                }

            for jidx, jname in enumerate(RIGHT_HAND_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(right_hand_joints[jidx][0]),
                    'y': float(right_hand_joints[jidx][1]),
                    'z': float(right_hand_joints[jidx][2])
                }

            for jidx, jname in enumerate(LEFT_HAND_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(left_hand_joints[jidx][0]),
                    'y': float(left_hand_joints[jidx][1]),
                    'z': float(left_hand_joints[jidx][2])
                }

            params_json_path = osp.join(
                args.out_dir, "frames",
                f"joints_{pidx:02d}_{cur_frame:05d}.json")

            with open(params_json_path, 'w') as f:
                json.dump(joint_dict, f, indent=4)

        print(f"Processed : {image_path}")

    # save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer):
    #Set up input data (images or webcam)
    input_type, input_data = demo_utils.setup_input(args)
 
    assert args.out_dir is not None, "Please specify output dir to store the results"
    cur_frame = args.start_frame
    video_frame = 0

    while True:
        # load data
        load_bbox = False

        if input_type =='image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr  = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr  = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':      
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        
        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame +=1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break   
        print("--------------------------------------")

        # bbox detection
        if args.load_bboxes:
            json_name = f"{cur_frame:05d}_bbox.json"
            json_path = osp.join(args.out_dir, 'bbox', json_name)
            _, body_bbox_list, hand_bbox_list = demo_utils.load_info_from_json(json_path)
        else:       
            raise Exception("Should have computed already the bboxes, yeah?")     
            # Input images has other body part or hand not cropped.
            # Use hand detection model & body detector for hand detection
            assert args.crop_type == 'no_crop'
            detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy())
            body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output
        
            # save the obtained body & hand bbox to json file
            if args.save_bbox_output:
                demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)

        if len(hand_bbox_list) < 1:
            print(f"No hand deteced: {image_path}")
            continue
    
        # Hand Pose Regression
        pred_output_list = hand_mocap.regress(
                img_original_bgr, hand_bbox_list, add_margin=True)
        assert len(hand_bbox_list) == len(body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualize
        res_img = visualizer.visualize(
            img_original_bgr, 
            pred_mesh_list = pred_mesh_list, 
            hand_bbox_list = hand_bbox_list)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save the image (we can make an option here)
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'hand'
            demo_utils.save_pred_to_pkl(
                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)

        print(f"Processed : {image_path}")
        
    #save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    # When everything done, release the capture
    if input_type =='webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
Exemple #5
0
def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    timer = Timer()
    while True:
        timer.tic()
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        if load_bbox:
            body_pose_list = None
        else:
            body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(
                img_original_bgr)
        hand_bbox_list = [
            None,
        ] * len(body_bbox_list)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        #Sort the bbox using bbox size
        # (to make the order as consistent as possible without tracking)
        bbox_size = [(x[2] * x[3]) for x in body_bbox_list]
        idx_big2small = np.argsort(bbox_size)[::-1]
        body_bbox_list = [body_bbox_list[i] for i in idx_big2small]
        if args.single_person and len(body_bbox_list) > 0:
            body_bbox_list = [
                body_bbox_list[0],
            ]

        # Body Pose Regression
        pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(img_original_bgr,
                                       pred_mesh_list=pred_mesh_list,
                                       body_bbox_list=body_bbox_list)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save result image
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'body'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)

        timer.toc(bPrint=True, title="Time")
        print(f"Processed : {image_path}")

    #save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()