예제 #1
0
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False):

    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)

    if with_renderer:
        print("Initialize Hand Visualizer...")
        hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize)

    print("Initialize openpose. Net output size",outSize,"...")
    op = OP.OpenPose((160, 160), config["handnet_dims"], tuple(outSize), "COCO", config["OPENPOSE_ROOT"] + os.sep + "models" + os.sep, 0,
                     False, OP.OpenPose.ScaleMode.ZeroToOne, False, True)



    left_hand_model = config["model_left"]
    started = False
    delay = {True: 0, False: 1}
    ik_ms = est_ms = 0
    p2d = bbox = None
    count = 0
    mbv_viz = opviz = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["handnet_dims"][0]
    stride = 8
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")

    while True:
        try:
            imgs, clbs = acq.grab()
            if imgs is None or len(imgs)==0:
                break
        except Exception as e:
            print("Failed to grab", e)
            break

        st = time.time()
        bgr = imgs[0]
        clb = clbs[0]

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        if bbox is None:
            bbox = config["default_bbox"]

        score = -1
        result_pose = None
        crop_viz = None

        # STEP2 detect 2D joints for the detected hand.
        if started and bbox is not None:
            x,y,w,h = bbox
            # print("BBOX: ",bbox)
            crop = bgr[y:y+h,x:x+w]
            img, pad = mva19.preprocess(crop, boxsize, stride)

            t = time.time()
            op.detectHands(bgr, np.array([[0,0,0,0]+bbox],dtype=np.int32))
            op_ms = (time.time() - t) * 1000.0

            opviz = op.render(np.copy(bgr))
            cv2.imshow("OPVIZ", opviz)

            leftHands, rightHands = op.getKeypoints(op.KeypointType.HAND)

            if rightHands is not None:
                keypoints = rightHands[0][:,:3]
            
                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3
                
                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints)
                obsVec = IK.ObservationsVector([rgbKp, ])
                t = time.time()
                score, res = pose_estimator.estimate(obsVec)
                ik_ms = (time.time() - t)
                # print(count,)
                pose_estimator.print_report()

                if track:
                    result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)

                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:# < 20000:
                        pose_estimator.model.init_pose = Core.ParamVector(result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()

                if score > -1:  # compute result points
                    p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d,bgr.shape[1::-1])



        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d, keypoints, thre=peaks_thre)
            cv2.imshow("2D CNN estimation",viz2d)
            header = "FPS OPT+VIZ %03d, OPT  %03fms (CNN %03fms, 3D %03fms)"%(1/(time.time()-st),(est_ms+ik_ms),est_ms, ik_ms) 
            
            if with_renderer:
                hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb)
                mbv_viz = hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0))


        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."
        


        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA)
        cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = config['default_bbox']
            print("RESETING BBOX",bbox)
        if key & 255 == ord('s'):
            started = not started


        count += 1
예제 #2
0
    def __init__(self, args):
        self.clb = OpenCVCalib2CameraMeta(
            LoadOpenCVCalib("res/calib_ho3d.json"))
        self.config = {
            "model":
            "models/hand_skinned.xml",
            "model_left":
            False,
            "model_init_pose": [
                -109.80840809323652, 95.70022984677065, 584.613931114289,
                292.3322807284121, -1547.742897973965, -61.60146881490577,
                435.33025195547793, 1.5707458637241434, 0.21444030289465843,
                0.11033385117688158, 0.021952050059337137, 0.5716581133215294,
                0.02969734913698679, 0.03414155945643072, 0.0,
                1.1504613679382742, -0.5235922979328, 0.15626331136368257,
                0.03656410417088128, 8.59579088582312e-07, 0.35789633949684985,
                0.00012514308785717494, 0.005923001258945023,
                0.24864102398139007, 0.2518954858979162, 0.0,
                3.814694400000002e-13
            ],
            "model_map":
            IK.ModelAwareBundleAdjuster.HAND_SKINNED_TO_OP_RIGHT_HAND,
            "ba_iter":
            100,
            "padding":
            0.3,
            "minDim":
            170,
            "smoothing":
            0.2,
            "model_file":
            "models/mobnet4f_cmu_adadelta_t1_model.pb",
            "input_layer":
            "input_1",
            "output_layer":
            "k2tfout_0",
            "stride":
            4,
            "boxsize":
            224,
            "peaks_thre":
            0.1
        }

        self.data_path = args.data_path
        self.with_renderer = args.with_renderer
        self.track = args.track
        self.paused = args.paused
        self.visualize = args.visualize
        self.save = args.save
        self.start_frame = args.start_frame
        self.repeat_first_frame = args.repeat_first_frame

        if args.save:
            self.save_path = os.path.join(self.data_path, 'hand_tracker')
            if not os.path.exists(self.save_path):
                try:
                    os.makedirs(self.save_path)
                except OSError:
                    print(
                        'ERROR: Unable to create the save directory {}'.format(
                            self.save_path))
                    return

        print("Initialize WACV18 3D Pose estimator (IK)...")
        self.pose_estimator = factory.HandPoseEstimator(self.config)
        self.hand_visualizer = None
        if self.with_renderer:
            print("Initialize Hand Visualizer...")
            self.hand_visualizer = pipeline.HandVisualizer(
                factory.mmanager, (640, 480))

        print("Initialize MVA19 CVRL Hand pose net...")
        self.estimator = mva19.Estimator(self.config["model_file"],
                                         self.config["input_layer"],
                                         self.config["output_layer"])
        self.detection_graph, self.sess = detector_utils.load_inference_graph()

        # Loop variables
        self.started = True
        self.delay = {True: 0, False: 1}
        self.ik_ms = self.est_ms = 0
        self.p2d = self.bbox = None
        self.smoothing = self.config.get("smoothing", 0)
        self.boxsize = self.config["boxsize"]
        self.stride = self.config["stride"]
        self.peaks_thre = self.config["peaks_thre"]

        self.rgb_path = os.path.join(self.data_path, 'rgb')
        self.meta_path = os.path.join(self.data_path, 'meta')

        self.mono_hand_loop()
예제 #3
0
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False):

    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)

    if with_renderer:
        print("Initialize Hand Visualizer...")
        hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize)

    print("Initialize MVA19 CVRL Hand pose net...")
    estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"])

    left_hand_model = config["model_left"]
    started = False
    delay = {True: 0, False: 1}
    ik_ms = est_ms = 0
    p2d = bbox = None
    count = 0
    mbv_viz = opviz = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["boxsize"]
    stride = config["stride"]
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")

    while True:
        try:
            imgs, clbs = acq.grab()
            if imgs is None or len(imgs)==0:
                break
        except Exception as e:
            print("Failed to grab", e)
            break

        st = time.time()
        bgr = imgs[0]
        clb = clbs[0]

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        if bbox is None:
            bbox = config["default_bbox"]

        score = -1
        result_pose = None
        crop_viz = None

        # STEP2 detect 2D joints for the detected hand.
        if started and bbox is not None:
            x,y,w,h = bbox
            # print("BBOX: ",bbox)
            crop = bgr[y:y+h,x:x+w]
            img, pad = mva19.preprocess(crop, boxsize, stride)

            t = time.time()
            hm = estimator.predict(img)
            est_ms = (time.time() - t)
        
            # use joint tools to recover keypoints
            scale = float(boxsize) / float(crop.shape[0])
            scale = stride/scale
            ocparts = np.zeros_like(hm[...,0])
            peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale)

            # convert peaks to hand keypoints
            hand = mva19.peaks_to_hand(peaks, x, y)

            if hand is not None:
                keypoints = hand
            
                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3
                
                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints)
                obsVec = IK.ObservationsVector([rgbKp, ])
                t = time.time()
                score, res = pose_estimator.estimate(obsVec)
                ik_ms = (time.time() - t)
                # print(count,)
                pose_estimator.print_report()

                if track:
                    result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)

                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:# < 20000:
                        pose_estimator.model.init_pose = Core.ParamVector(result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()

                if score > -1:  # compute result points
                    p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d,bgr.shape[1::-1])



        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre)
            cv2.imshow("2D CNN estimation",viz2d)
            header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)"%(1/(time.time()-st),1/(est_ms+ik_ms),1.0/est_ms, 1.0/ik_ms) 
            
            if with_renderer:
                hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb)
                mbv_viz = hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0))


        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."
        


        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA)
        cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = config['default_bbox']
            print("RESETING BBOX",bbox)
        if key & 255 == ord('s'):
            started = not started


        count += 1
    def mono_hand_loop(self,
                       out_size,
                       track=False,
                       paused=False,
                       with_renderer=False):
        print("Initialize WACV18 3D Pose estimator (IK)...")
        pose_estimator = factory.HandPoseEstimator(self.config)
        hand_visualizer = None
        if with_renderer:
            print("Initialize Hand Visualizer...")
            hand_visualizer = pipeline.HandVisualizer(factory.mmanager,
                                                      out_size)

        print("Initialize MVA19 CVRL Hand pose net...")
        estimator = mva19.Estimator(self.config["model_file"],
                                    self.config["input_layer"],
                                    self.config["output_layer"])

        detection_graph, sess = detector_utils.load_inference_graph()

        started = True
        delay = {True: 0, False: 1}
        ik_ms = est_ms = 0
        p2d = bbox = None
        smoothing = self.config.get("smoothing", 0)
        boxsize = self.config["boxsize"]
        stride = self.config["stride"]
        peaks_thre = self.config["peaks_thre"]
        print("Entering main Loop")
        saved_hand_poses = []
        saved_framed_ids = []
        image_names = get_image_names(self.object_anno_path, self.object_name)
        for i_name in image_names:
            frame_path = join(self.frame_root_path, i_name)
            bgr = cv2.imread(frame_path)
            st = time.time()

            # Compute kp using model initial pose
            points2d = pose_estimator.ba.decodeAndProject(
                pose_estimator.model.init_pose, self.clb)
            oldKp = np.array(points2d).reshape(-1, 2)

            result_pose = None
            hand = None
            # STEP2 detect 2D joints for the detected hand
            if started:
                if bbox is None:
                    bbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                    if bbox is None:
                        cv2.imshow("2D CNN estimation", bgr)
                        cv2.waitKey(1)
                        continue

                else:
                    dbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                    if dbox is not None:
                        x, y, w, h = bbox
                        x1, y1, w1, h1 = dbox
                        if (x1 > x + w
                                or x1 + w1 < x) or y1 + h1 < y or y1 > y + h:
                            bbox = dbox
                            print("updated")

                x, y, w, h = bbox
                crop = bgr[y:y + h, x:x + w]
                img, pad = mva19.preprocess(crop, boxsize, stride)
                t = time.time()
                hm = estimator.predict(img)
                est_ms = (time.time() - t)

                # Use joint tools to recover keypoints
                scale = float(boxsize) / float(crop.shape[0])
                scale = stride / scale
                ocparts = np.zeros_like(hm[..., 0])
                peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale,
                                     scale)

                # Convert peaks to hand keypoints
                hand = mva19.peaks_to_hand(peaks, x, y)

                if hand is not None:
                    keypoints = hand

                    mask = keypoints[:, 2] < peaks_thre
                    keypoints[mask] = [0, 0, 1.0]

                    if track:
                        keypoints[mask, :2] = oldKp[mask]

                    keypoints[:, 2] = keypoints[:, 2]**3

                    rgbKp = IK.Observations(IK.ObservationType.COLOR, self.clb,
                                            keypoints)
                    obsVec = IK.ObservationsVector([
                        rgbKp,
                    ])
                    t = time.time()
                    score, res = pose_estimator.estimate(obsVec)
                    ik_ms = (time.time() - t)

                    if track:
                        result_pose = list(
                            smoothing *
                            np.array(pose_estimator.model.init_pose) +
                            (1.0 - smoothing) * np.array(res))
                    else:
                        result_pose = list(res)
                    # score is the residual, the lower the better, 0 is best
                    # -1 is failed optimization.
                    if track:
                        if -1 < score:  # < 150:
                            pose_estimator.model.init_pose = Core.ParamVector(
                                result_pose)
                        else:
                            print(
                                "\n===> Resetting init position for IK <===\n")
                            pose_estimator.model.reset_pose()
                            bbox = None

                    if score > -1:  # compute result points
                        p2d = np.array(
                            pose_estimator.ba.decodeAndProject(
                                Core.ParamVector(result_pose),
                                self.clb)).reshape(-1, 2)
                        bbox = mva19.update_bbox(p2d, bgr.shape[1::-1])

            viz = np.copy(bgr)
            viz2d = np.zeros_like(bgr)
            if started and result_pose is not None:
                viz2d = mva19.visualize_2dhand_skeleton(viz2d,
                                                        hand,
                                                        thre=peaks_thre)
                cv2.imshow("2D CNN estimation", viz2d)
                header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % (
                    1 / (time.time() - st), 1 /
                    (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms)

                if with_renderer:
                    hand_visualizer.render(pose_estimator.model,
                                           Core.ParamVector(result_pose),
                                           self.clb)
                    mbv_viz = hand_visualizer.getDepth()
                    cv2.imshow("MBV VIZ", mbv_viz)
                    mask = mbv_viz != [0, 0, 0]
                    viz[mask] = mbv_viz[mask]
                else:
                    viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                    pipeline.draw_rect(viz,
                                       "Hand",
                                       bbox,
                                       box_color=(0, 255, 0),
                                       text_color=(200, 200, 0))
            else:
                header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."

            cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1,
                        cv2.LINE_AA)
            cv2.imshow("3D Hand Model re-projection", viz)

            key = cv2.waitKey(delay[paused])
            if key & 255 == ord('p'):
                paused = not paused
            if key & 255 == ord('q'):
                break
            if key & 255 == ord('r'):
                print("\n===> Resetting init position for IK <===\n")
                pose_estimator.model.reset_pose()
                bbox = None
                print("RESETTING BBOX", bbox)
            if key & 255 == ord('s'):
                started = not started

            p3d = np.array(
                pose_estimator.ba.decode(Core.ParamVector(result_pose),
                                         self.clb)).reshape(-1, 3)

            # Transform to OpenGL coordinates and ICCV format
            coord_change_mat = np.array(
                [[1., 0., 0.], [0, -1., 0.], [0., 0., -1.]], dtype=np.float32)
            p3d = p3d.dot(coord_change_mat.T)
            p3d = to_iccv_format(p3d)

            # Save
            # saved_hand_poses.append(p3d)
            # saved_framed_ids.append(i_name)

            # Save any frames where a good score is achieved
            # score is the residual, the lower the better, 0 is best
            if score < 10:
                print('Saved good pose:', i_name)
                img_save_filename = join(self.monohand_image_path, i_name)
                cv2.imwrite(img_save_filename, viz)
                saved_hand_poses.append(p3d)
                saved_framed_ids.append(i_name)

        # Save data
        save_annotation_file(self.joint_anno_path, saved_framed_ids,
                             saved_hand_poses)