예제 #1
0
    def callback_side(self, data):
        self.start = time.time()
        try:
            img_original = self.br.imgmsg_to_cv2(data, "bgr8")
        except CvBridgeError as e:
            print(e)
        self.frame_id = self.frame_id + 1
        pose, body_bbox_list = self.detect_body_pose(img_original)

        bbox_size =  [ (x[2] * x[3]) for x in body_bbox_list]
        idx_big2small = np.argsort(bbox_size)[::-1]
        body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]

        if True and len(body_bbox_list)>0:
            body_bbox_list = [body_bbox_list[0], ]
         

        pred_body_list = self.body_regress(img_original, body_bbox_list)
       
        self.body_side=np.array(pred_body_list[0]['pred_joints_img'][1:25,:])

        #render
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_body_list)
        imgside_render = self.visualizer.visualize(img_original, pred_mesh_list = pred_mesh_list,body_bbox_list = body_bbox_list)
        imgside_render = imgside_render.astype(np.uint8)
        self.publisher_pose.publish(self.br.cv2_to_imgmsg(imgside_render))

        #save result
        if self.save_result is True:
            cv2.imwrite(str(self.result_path) +'side'+str(self.frame_id)+'.jpg',imgside_render)
예제 #2
0
def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer, frame):
    #Set up input data (images or webcam)
    img_original_bgr = frame
    detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy())
    body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output

    if len(hand_bbox_list) < 1:
        print(f"No hand deteced")
        print(f"revised")
        return "None"

    # Hand Pose Regression
    pred_output_list = hand_mocap.regress(img_original_bgr,
                                          hand_bbox_list,
                                          add_margin=True)

    hand = pred_output_list[0]

    if hand['right_hand'] == None and hand['left_hand'] == None:
        return "None"
    elif hand['right_hand'] == None and hand['left_hand'] != None:
        Lefthand = hand['left_hand']
        print('Lefthand:', Lefthand)
        Lhandjoint = Lefthand['pred_joints_img']
        return [0, Lhandjoint]
    elif hand['right_hand'] != None and hand['left_hand'] == None:
        Righthand = hand['right_hand']
        print(Righthand)
        Rhandjoint = Righthand['pred_joints_img']
        return [Rhandjoint, 0]
    else:
        Righthand = hand['right_hand']
        Rhandjoint = Righthand['pred_joints_img']
        Lefthand = hand['left_hand']
        Lhandjoint = Lefthand['pred_joints_img']
        return [Rhandjoint, Lhandjoint]

    # extract mesh for rendering (vertices in image space and faces) from pred_output_list
    pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

    # visualize
    res_img = visualizer.visualize(img_original_bgr,
                                   pred_mesh_list=pred_mesh_list,
                                   hand_bbox_list=hand_bbox_list)

    # show result in the screen

    if not args.no_display:
        res_img = res_img.astype(np.uint8)
        ImShow(res_img)
예제 #3
0
def visualize_prediction(args, demo_type, smpl_type, smpl_model, pkl_files,
                         visualizer):
    for pkl_file in pkl_files:
        # load data
        saved_data = gnu.load_pkl(pkl_file)

        image_path = saved_data['image_path']
        img_original_bgr = cv2.imread(image_path)
        if img_original_bgr is None:
            print(f"{image_path} does not exists, skip")

        print("--------------------------------------")

        demo_type = saved_data['demo_type']
        assert saved_data['smpl_type'] == smpl_type

        hand_bbox_list = saved_data['hand_bbox_list']
        body_bbox_list = saved_data['body_bbox_list']
        pred_output_list = saved_data['pred_output_list']

        if not saved_data['save_mesh']:
            __calc_mesh(demo_type, smpl_type, smpl_model,
                        img_original_bgr.shape[:2], pred_output_list)
        else:
            pass

        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(img_original_bgr,
                                       pred_mesh_list=pred_mesh_list,
                                       body_bbox_list=body_bbox_list,
                                       hand_bbox_list=hand_bbox_list)

        # save result image
        demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            args.use_smplx = smpl_type == 'smplx'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)
예제 #4
0
def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    while True:
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = osp.join(args.frame_dir, f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = f"scene_{cur_frame:05d}.jpg"
            # image_path = osp.join(args.frame_dir, f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                # cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        # bbox detection
        if not load_bbox:
            body_bbox_list, hand_bbox_list = list(), list()

        # regression (includes integration)
        body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
            args, img_original_bgr, body_bbox_list, hand_bbox_list,
            bbox_detector, body_mocap, hand_mocap)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(img_original_bgr,
                                       pred_mesh_list=pred_mesh_list,
                                       body_bbox_list=body_bbox_list,
                                       hand_bbox_list=hand_bbox_list)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save result image
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'frank'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)

        print(f"Processed : {image_path}")

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    while True:
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = osp.join(args.frame_dir, f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = f"scene_{cur_frame:05d}.jpg"
            if img_original_bgr is not None:
                # cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1
            # assert input_type == 'webcam'
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        if load_bbox:
            body_pose_list = None
        else:
            body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(
                img_original_bgr)
        hand_bbox_list = [
            None,
        ] * len(body_bbox_list)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        #Sort the bbox using bbox size
        # (to make the order as consistent as possible without tracking)
        bbox_size = [(x[2] * x[3]) for x in body_bbox_list]
        idx_big2small = np.argsort(bbox_size)[::-1]
        body_bbox_list = [body_bbox_list[i] for i in idx_big2small]
        if args.single_person and len(body_bbox_list) > 0:
            body_bbox_list = [
                body_bbox_list[0],
            ]

        # Body Pose Regression
        pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(img_original_bgr,
                                       pred_mesh_list=pred_mesh_list,
                                       body_bbox_list=body_bbox_list)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save result image
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'body'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)

        print(f"Processed : {image_path}")

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer):
    #Set up input data (images or webcam)
    input_type, input_data = demo_utils.setup_input(args)
 
    assert args.out_dir is not None, "Please specify output dir to store the results"
    cur_frame = args.start_frame
    video_frame = 0

    while True:
        # load data
        load_bbox = False

        if input_type =='image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr  = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr  = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':      
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        
        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame +=1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break   
        print("--------------------------------------")

        # bbox detection
        if args.load_bboxes:
            json_name = f"{cur_frame:05d}_bbox.json"
            json_path = osp.join(args.out_dir, 'bbox', json_name)
            _, body_bbox_list, hand_bbox_list = demo_utils.load_info_from_json(json_path)
        else:       
            raise Exception("Should have computed already the bboxes, yeah?")     
            # Input images has other body part or hand not cropped.
            # Use hand detection model & body detector for hand detection
            assert args.crop_type == 'no_crop'
            detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy())
            body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output
        
            # save the obtained body & hand bbox to json file
            if args.save_bbox_output:
                demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)

        if len(hand_bbox_list) < 1:
            print(f"No hand deteced: {image_path}")
            continue
    
        # Hand Pose Regression
        pred_output_list = hand_mocap.regress(
                img_original_bgr, hand_bbox_list, add_margin=True)
        assert len(hand_bbox_list) == len(body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualize
        res_img = visualizer.visualize(
            img_original_bgr, 
            pred_mesh_list = pred_mesh_list, 
            hand_bbox_list = hand_bbox_list)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save the image (we can make an option here)
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'hand'
            demo_utils.save_pred_to_pkl(
                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)

        print(f"Processed : {image_path}")
        
    #save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    # When everything done, release the capture
    if input_type =='webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
예제 #7
0
def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    while True:
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
        # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            # save the obtained video frames
            image_path = osp.join(args.out_dir, "frames",
                                  f"scene_{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                video_frame += 1
                if args.save_frame:
                    gnu.make_subdir(image_path)
                    cv2.imwrite(image_path, img_original_bgr)
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        # bbox detection
        if not load_bbox:
            body_bbox_list, hand_bbox_list = list(), list()

        # regression (includes integration)
        body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
            args, img_original_bgr, body_bbox_list, hand_bbox_list,
            bbox_detector, body_mocap, hand_mocap)

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(body_bbox_list) < 1:
            print(f"No body deteced: {image_path}")
            continue

        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        #     # visualization
        #     res_img = visualizer.visualize(
        #         img_original_bgr,
        #         pred_mesh_list = pred_mesh_list,
        #         body_bbox_list = body_bbox_list,
        #         hand_bbox_list = hand_bbox_list)

        #    # show result in the screen
        #     if not args.no_display:
        #         res_img = res_img.astype(np.uint8)
        #         ImShow(res_img)

        #     # save result image
        #     if args.out_dir is not None:
        #         demo_utils.save_res_img(args.out_dir, image_path, res_img)

        demo_type = 'frank'
        demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                    body_bbox_list, hand_bbox_list,
                                    pred_output_list)

        for pidx, pred_output in enumerate(pred_output_list):
            # 描画設定
            fig = plt.figure(figsize=(15, 15), dpi=100)
            # 3DAxesを追加
            ax = fig.add_subplot(111, projection='3d')

            joints = pred_output['pred_joints_img']
            left_hand_joints = pred_output['pred_left_hand_joints_img']
            right_hand_joints = pred_output['pred_right_hand_joints_img']
            min_joint = np.min(joints, axis=0)
            max_joint = np.max(joints, axis=0)

            # ジョイント出力
            ax.set_xlim3d(int(min_joint[0]), int(max_joint[0]))
            ax.set_ylim3d(int(min_joint[2]), int(max_joint[2]))
            ax.set_zlim3d(int(-max_joint[1]), int(-min_joint[1]))
            ax.set(xlabel='x', ylabel='y', zlabel='z')

            for jidx, (jsname, jename) in enumerate([('OP Nose', 'OP Neck'), ('OP Neck', 'OP RShoulder'), ('OP RShoulder', 'OP RElbow'), ('OP RElbow', 'OP RWrist'), \
                                                        ('OP Neck', 'OP LShoulder'), ('OP LShoulder', 'OP LElbow'), ('OP LElbow', 'OP LWrist'), \
                                                        ('OP MidHip', 'OP RHip'), ('OP RHip', 'OP RKnee'), ('OP RKnee', 'OP RAnkle'), ('OP RAnkle', 'OP RHeel'), ('OP RHeel', 'OP RSmallToe'), ('OP RHeel', 'OP RBigToe'), \
                                                        ('OP MidHip', 'OP LHip'), ('OP LHip', 'OP LKnee'), ('OP LKnee', 'OP LAnkle'), ('OP LAnkle', 'OP LHeel'), ('OP LHeel', 'OP LSmallToe'), ('OP LHeel', 'OP LBigToe'), \
                                                        ('OP Nose', 'OP REye'), ('OP REye', 'OP REar'), ('OP Nose', 'OP LEye'), ('OP LEye', 'OP LEar'), ('OP Neck', 'OP MidHip')]):
                xs = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                ]
                ys = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                ]
                zs = [
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                ]

                ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#0000FF")


            for jidx, (jsname, jename) in enumerate([('Nose', 'Neck (LSP)'), ('Neck (LSP)', 'Right Shoulder'), ('Right Shoulder', 'Right Elbow'), ('Right Elbow', 'Right Wrist'), \
                                                        ('Neck (LSP)', 'Left Shoulder'), ('Left Shoulder', 'Left Elbow'), ('Left Elbow', 'Left Wrist'), \
                                                        ('Pelvis (MPII)', 'Right Hip'), ('Right Hip', 'Right Knee'), ('Right Knee', 'Right Ankle'), \
                                                        ('Pelvis (MPII)', 'Left Hip'), ('Left Hip', 'Left Knee'), ('Left Knee', 'Left Ankle'), \
                                                        ('Nose', 'Right Eye'), ('Right Eye', 'Right Ear'), ('Nose', 'Left Eye'), ('Left Eye', 'Left Ear'), \
                                                        ('Pelvis (MPII)', 'Thorax (MPII)'), ('Thorax (MPII)', 'Spine (H36M)'), ('Spine (H36M)', 'Jaw (H36M)'), ('Jaw (H36M)', 'Nose')]):
                xs = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][0],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][0]
                ]
                ys = [
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][2],
                    joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][2]
                ]
                zs = [
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jsname)]][1],
                    -joints[J49_FLIP_PERM[JOINT_NAMES.index(jename)]][1]
                ]

                ax.plot3D(xs, ys, zs, marker="o", ms=2, c="#FF0000")

            plt.savefig(
                osp.join(args.out_dir, "frames",
                         f"3d_{pidx:02d}_{cur_frame:05d}.jpg"))
            plt.close()

            img = Vis_Skeleton_2D_SPIN49(joints, image=img_original_bgr)
            cv2.imwrite(
                osp.join(args.out_dir, "frames",
                         f"2d_{pidx:02d}_{cur_frame:05d}.png"), img)

            # json出力
            joint_dict = {}
            joint_dict["image"] = {
                "width": img_original_bgr.shape[1],
                "height": img_original_bgr.shape[0]
            }
            joint_dict["bbox"] = {"x": float(body_bbox_list[pidx][0]), "y": float(body_bbox_list[pidx][1]), \
                                  "width": float(body_bbox_list[pidx][2]), "height": float(body_bbox_list[pidx][3])}
            joint_dict["joints"] = {}

            for jidx, jname in enumerate(JOINT_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(joints[J49_FLIP_PERM[jidx]][0]),
                    'y': float(joints[J49_FLIP_PERM[jidx]][1]),
                    'z': float(joints[J49_FLIP_PERM[jidx]][2])
                }

            for jidx, jname in enumerate(RIGHT_HAND_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(right_hand_joints[jidx][0]),
                    'y': float(right_hand_joints[jidx][1]),
                    'z': float(right_hand_joints[jidx][2])
                }

            for jidx, jname in enumerate(LEFT_HAND_NAMES):
                joint_dict["joints"][jname] = {
                    'x': float(left_hand_joints[jidx][0]),
                    'y': float(left_hand_joints[jidx][1]),
                    'z': float(left_hand_joints[jidx][2])
                }

            params_json_path = osp.join(
                args.out_dir, "frames",
                f"joints_{pidx:02d}_{cur_frame:05d}.json")

            with open(params_json_path, 'w') as f:
                json.dump(joint_dict, f, indent=4)

        print(f"Processed : {image_path}")

    # save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
예제 #8
0
def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
    #Setup input data to handle different types of inputs
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    timer = Timer()
    while True:
        timer.tic()
        
        # always video
        _, img_original_bgr = input_data.read()
        if video_frame < cur_frame:
            video_frame += 1
            continue

        # save the obtained video frames
        if img_original_bgr is not None:
            video_frame += 1

        cur_frame +=1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break  
        print("--------------------------------------")

        # save the obtained body & hand bbox to json file
        if args.load_bboxes:
            json_name = f"{cur_frame:05d}_bbox.json"
            json_path = osp.join(args.out_dir, 'bbox_smoothed', json_name)
            data = gnu.load_json(json_path)
            print(data)
            break
        else:
            body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(img_original_bgr)
            break

        hand_bbox_list = [None, ] * len(body_bbox_list)


        if len(body_bbox_list) < 1: 
            print(f"No body deteced: {image_path}")
            continue

        #Sort the bbox using bbox size 
        # (to make the order as consistent as possible without tracking)
        bbox_size =  [ (x[2] * x[3]) for x in body_bbox_list]
        idx_big2small = np.argsort(bbox_size)[::-1]
        body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]
        if args.single_person and len(body_bbox_list)>0:
            body_bbox_list = [body_bbox_list[0], ]       

        # Body Pose Regression
        pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualization
        res_img = visualizer.visualize(
            img_original_bgr,
            pred_mesh_list = pred_mesh_list, 
            body_bbox_list = body_bbox_list)
        
        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save result image
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'body'
            demo_utils.save_pred_to_pkl(
                args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)

        timer.toc(bPrint=True,title="Time")
        print(f"Processed : {image_path}")

    #save images as a video
    if not args.no_video_out and input_type in ['video', 'webcam']:
        demo_utils.gen_video_out(args.out_dir, args.seq_name)

    if input_type =='webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()
예제 #9
0
    def body_regress(self, img_original, body_bbox_list):
        """
            args: 
                img_original: original raw image (BGR order by using cv2.imread)
                body_bbox: bounding box around the target: (minX, minY, width, height)
            outputs:
                pred_vertices_img:
                pred_joints_vis_img:
                pred_rotmat
                pred_betas
                pred_camera
                bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]])
                bboxTopLeft:  bbox top left (redundant)
                boxScale_o2n: bbox scaling factor (redundant) 
        """
        pred_output_list = list()

        for body_bbox in body_bbox_list:
            img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox(
                img_original, body_bbox, input_res=constants.IMG_RES)
            bboxTopLeft = np.array(bboxTopLeft)

            # bboxTopLeft = bbox['bboxXYWH'][:2]
            if img is None:
                pred_output_list.append(None)
                continue

            with torch.no_grad():
                # model forward
                pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device))

                #Convert rot_mat to aa since hands are always in aa
                # pred_aa = rotmat3x3_to_angle_axis(pred_rotmat)
                pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda()
                pred_aa = pred_aa.reshape(pred_aa.shape[0], 72)
                smpl_output = self.smpl(
                    betas=pred_betas, 
                    body_pose=pred_aa[:,3:],
                    global_orient=pred_aa[:,:3], 
                    pose2rot=True)
                pred_vertices = smpl_output.vertices
                pred_joints_3d = smpl_output.joints

                pred_vertices = pred_vertices[0].cpu().numpy()

                pred_camera = pred_camera.cpu().numpy().ravel()
                camScale = pred_camera[0] # *1.15
                camTrans = pred_camera[1:]

                pred_output = dict()
                # Convert mesh to original image space (X,Y are aligned to image)
                # 1. SMPL -> 2D bbox
                # 2. 2D bbox -> original 2D image
                pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans)
                pred_vertices_img = convert_bbox_to_oriIm(
                    pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])

                # Convert joint to original image space (X,Y are aligned to image)
                pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3)
                pred_joints_vis = pred_joints_3d[:,:3]  # (49,3)
    
                pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans) 
                pred_joints_vis_img = convert_bbox_to_oriIm(
                    pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 

                # Output
                pred_output['img_cropped'] = img[:, :, ::-1]
                pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space
                pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space
                pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space

                pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0])
                pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72)
                pred_body_pose = pred_output['pred_body_pose']
                pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3)
                pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10)

                pred_output['pred_camera'] = pred_camera
                pred_output['bbox_top_left'] = bboxTopLeft
                pred_output['bbox_scale_ratio'] = boxScale_o2n
                pred_output['faces'] = self.smpl.faces

                if self.use_smplx:
                    img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5
                    # right hand
                    pred_joints = smpl_output.right_hand_joints[0].cpu().numpy()     
                    pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
                    pred_joints_img = convert_bbox_to_oriIm(
                        pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
                    pred_output['right_hand_joints_img_coord'] = pred_joints_img
                    # left hand 
                    pred_joints = smpl_output.left_hand_joints[0].cpu().numpy()
                    pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
                    pred_joints_img = convert_bbox_to_oriIm(
                        pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
                    pred_output['left_hand_joints_img_coord'] = pred_joints_img
                
                pred_output_list.append(pred_output)
            if self.body_only is True:
                with open('/home/student/bodyonly_side/' + 'results.txt', 'a') as file_handle:
    	                file_handle.write('\nframe_id:')
    	                file_handle.write(str(self.frame_id))
    	                file_handle.write('\npred_body_pose:')
    	                file_handle.write(str(pred_body_pose))
    	                file_handle.write('\npred_joints_vis_img:')
    	                file_handle.write(str(pred_joints_vis_img))
		    #save images(only body_module has visualizer)
		    # extract mesh for rendering (vertices in image space and faces) from pred_output_list
                pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
		     # visualization
                res_img = self.visualizer.visualize(img_original,pred_mesh_list = pred_mesh_list, body_bbox_list = body_bbox_list)

		    # show result in the screen
		   
                res_img = res_img.astype(np.uint8)
		   # ImShow(res_img)
		    
                cv2.imwrite('/home/student/bodyonly_side/' +str(self.frame_id)+'.jpg',res_img)

        return pred_output_list
예제 #10
0
    def callback_front(self,data):       
        try:
            if self.rot90 is True:
                img_original = cv2.flip(cv2.transpose(self.br.imgmsg_to_cv2(data, "bgr8")), 0)
            else:
                img_original = self.br.imgmsg_to_cv2(data, "bgr8")
          
            self.frame_id = self.frame_id + 1
            img_shape = [img_original.shape[0],img_original.shape[1]]
            h,w = img_shape
        except CvBridgeError as e:
            print(e)
        pose, body_bbox_list = self.detect_body_pose(img_original)

        pred_body_list = self.body_regress(img_original, body_bbox_list)

        risk = Int64()
        angles = Float32MultiArray()

        #hand detection and integration
        if self.body_only is False:    
           #hand_bbox_list = self.get_hand_bboxes(pred_body_list, img_shape)
            body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = self.hand_bbox_detector.detect_hand_bbox(img_original)
            
            pred_hand_list = self.hand_regress(img_original, hand_bbox_list, add_margin=False)
        
            integrate_output_list = self.integration_copy_paste(pred_body_list, pred_hand_list, self.smpl, img_shape)
            body=np.array(integrate_output_list[0]['pred_body_joints_img'][0:25,:])
            rhand=np.array(integrate_output_list[0]['pred_rhand_joints_img'][0:21,:])
            lhand=np.array(integrate_output_list[0]['pred_lhand_joints_img'][0:21,:])
            hands=np.vstack((lhand,rhand))
            whole_body=np.vstack((body,hands))
            #print(self.angle_leg)
            risklevel, self.angles = eva3d.scoring(whole_body, self.body_side, self.load)
            #print(risklevel, main_angles)

            end = time.time()
            fps = 1 / (end - self.start)
            print('FPS:'+str(fps))
            risk.data = risklevel.item()
            angles.data = tuple(self.angles)

            #output
            self.publisher_risk.publish(risk)
            self.publisher_angles.publish(angles)

            #render and visulization
            pred_mesh_list = demo_utils.extract_mesh_from_output(integrate_output_list)
            img_render = self.visualizer.visualize(img_original, pred_mesh_list = pred_mesh_list,body_bbox_list = body_bbox_list,hand_bbox_list = hand_bbox_list)
            img_render = img_render.astype(np.uint8)
            self.publisher_pose.publish(self.br.cv2_to_imgmsg(img_render))

            #save result
            if self.save_result is True:
                cv2.imwrite(str(self.result_path) +str(self.frame_id)+'.jpg',img_render)
                with open(str(self.result_path)+ 'results.txt', 'a') as file_handle:
                        file_handle.write('\nframe_id:')
                        file_handle.write(str(self.frame_id))
                        file_handle.write('\nrisk_level:')
                        file_handle.write(str(risklevel))
                        file_handle.write('\nmain_angles:')
                        file_handle.write(str(main_angles))
예제 #11
0
def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer):
    #Set up input data (images or webcam)
    input_type, input_data = demo_utils.setup_input(args)

    cur_frame = args.start_frame
    video_frame = 0
    while True:
        # load data
        load_bbox = False

        if input_type == 'image_dir':
            if cur_frame < len(input_data):
                image_path = input_data[cur_frame]
                img_original_bgr = cv2.imread(image_path)
            else:
                img_original_bgr = None

        elif input_type == 'bbox_dir':
            if cur_frame < len(input_data):
                print("Use pre-computed bounding boxes")
                image_path = input_data[cur_frame]['image_path']
                hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
                body_bbox_list = input_data[cur_frame]['body_bbox_list']
                img_original_bgr = cv2.imread(image_path)
                load_bbox = True
            else:
                img_original_bgr = None

        elif input_type == 'video':
            _, img_original_bgr = input_data.read()
            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = osp.join(args.frame_dir, f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1

        elif input_type == 'webcam':
            _, img_original_bgr = input_data.read()

            if video_frame < cur_frame:
                video_frame += 1
                continue
            video_path = args.input_path
            # save the obtained video frames
            image_path = f"scene_{cur_frame:05d}.jpg"
            # image_path = osp.join(args.frame_dir, f"{cur_frame:05d}.jpg")
            if img_original_bgr is not None:
                # cv2.imwrite(image_path, img_original_bgr)
                video_frame += 1
        else:
            assert False, "Unknown input_type"

        cur_frame += 1
        if img_original_bgr is None or cur_frame > args.end_frame:
            break
        print("--------------------------------------")

        # bbox detection
        if load_bbox:
            body_pose_list = None
            raw_hand_bboxes = None
        elif args.crop_type == 'hand_crop':
            # hand already cropped, thererore, no need for detection
            img_h, img_w = img_original_bgr.shape[:2]
            body_pose_list = None
            raw_hand_bboxes = None
            hand_bbox_list = [dict(right_hand=np.array([0, 0, img_w, img_h]))]
        else:
            # Input images has other body part or hand not cropped.
            # Use hand detection model & body detector for hand detection
            assert args.crop_type == 'no_crop'
            detect_output = bbox_detector.detect_hand_bbox(
                img_original_bgr.copy())
            body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output

        # save the obtained body & hand bbox to json file
        if args.save_bbox_output:
            demo_utils.save_info_to_json(args, image_path, body_bbox_list,
                                         hand_bbox_list)

        if len(hand_bbox_list) < 1:
            print(f"No hand deteced: {image_path}")
            continue

        # Hand Pose Regression
        hand_bbox_list_processed, pred_output_list = hand_mocap.regress(
            img_original_bgr, hand_bbox_list, add_margin=True)
        assert len(hand_bbox_list) == len(body_bbox_list)
        assert len(body_bbox_list) == len(pred_output_list)

        # extract mesh for rendering (vertices in image space and faces) from pred_output_list
        pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)

        # visualize
        res_img = visualizer.visualize(img_original_bgr,
                                       pred_mesh_list=pred_mesh_list,
                                       hand_bbox_list=hand_bbox_list)

        # save the image (we can make an option here)
        if args.out_dir is not None:
            demo_utils.save_res_img(args.out_dir, image_path, res_img)

        # show result in the screen
        if not args.no_display:
            res_img = res_img.astype(np.uint8)
            ImShow(res_img)

        # save predictions to pkl
        if args.save_pred_pkl:
            demo_type = 'hand'
            demo_utils.save_pred_to_pkl(args, demo_type, image_path,
                                        body_bbox_list, hand_bbox_list,
                                        pred_output_list)

        print(f"Processed : {image_path}")
        continue

    # When everything done, release the capture
    if input_type == 'webcam' and input_data is not None:
        input_data.release()
    cv2.destroyAllWindows()