Ejemplo n.º 1
0
    def opencv_vis(self, bgr, result_pose, hand, st):
        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if self.started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d,
                                                    hand,
                                                    thre=self.peaks_thre)
            cv2.imshow("2D CNN estimation", viz2d)
            header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % (
                1 / (time.time() - st), 1 / (self.est_ms + self.ik_ms),
                1.0 / self.est_ms, 1.0 / self.ik_ms)

            if self.with_renderer:
                self.hand_visualizer.render(self.pose_estimator.model,
                                            Core.ParamVector(result_pose),
                                            self.clb)
                mbv_viz = self.hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, self.p2d)
                pipeline.draw_rect(viz,
                                   "Hand",
                                   self.bbox,
                                   box_color=(0, 255, 0),
                                   text_color=(200, 200, 0))
        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."

        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1,
                    cv2.LINE_AA)
        cv2.imshow("3D Hand Model re-projection", viz)

        key = cv2.waitKey(self.delay[self.paused])
        if key & 255 == ord('p'):
            self.paused = not self.paused
        if key & 255 == ord('q'):
            return False
        if key & 255 == ord('r'):
            print("\n===> Resetting init position for IK <===\n")
            self.pose_estimator.model.reset_pose()
            bbox = None
            print("RESETTING BBOX", bbox)
        if key & 255 == ord('s'):
            self.started = not self.started

        return True
Ejemplo n.º 2
0
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False):

    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)

    if with_renderer:
        print("Initialize Hand Visualizer...")
        hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize)

    print("Initialize openpose. Net output size",outSize,"...")
    op = OP.OpenPose((160, 160), config["handnet_dims"], tuple(outSize), "COCO", config["OPENPOSE_ROOT"] + os.sep + "models" + os.sep, 0,
                     False, OP.OpenPose.ScaleMode.ZeroToOne, False, True)



    left_hand_model = config["model_left"]
    started = False
    delay = {True: 0, False: 1}
    ik_ms = est_ms = 0
    p2d = bbox = None
    count = 0
    mbv_viz = opviz = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["handnet_dims"][0]
    stride = 8
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")

    while True:
        try:
            imgs, clbs = acq.grab()
            if imgs is None or len(imgs)==0:
                break
        except Exception as e:
            print("Failed to grab", e)
            break

        st = time.time()
        bgr = imgs[0]
        clb = clbs[0]

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        if bbox is None:
            bbox = config["default_bbox"]

        score = -1
        result_pose = None
        crop_viz = None

        # STEP2 detect 2D joints for the detected hand.
        if started and bbox is not None:
            x,y,w,h = bbox
            # print("BBOX: ",bbox)
            crop = bgr[y:y+h,x:x+w]
            img, pad = mva19.preprocess(crop, boxsize, stride)

            t = time.time()
            op.detectHands(bgr, np.array([[0,0,0,0]+bbox],dtype=np.int32))
            op_ms = (time.time() - t) * 1000.0

            opviz = op.render(np.copy(bgr))
            cv2.imshow("OPVIZ", opviz)

            leftHands, rightHands = op.getKeypoints(op.KeypointType.HAND)

            if rightHands is not None:
                keypoints = rightHands[0][:,:3]
            
                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3
                
                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints)
                obsVec = IK.ObservationsVector([rgbKp, ])
                t = time.time()
                score, res = pose_estimator.estimate(obsVec)
                ik_ms = (time.time() - t)
                # print(count,)
                pose_estimator.print_report()

                if track:
                    result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)

                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:# < 20000:
                        pose_estimator.model.init_pose = Core.ParamVector(result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()

                if score > -1:  # compute result points
                    p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d,bgr.shape[1::-1])



        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d, keypoints, thre=peaks_thre)
            cv2.imshow("2D CNN estimation",viz2d)
            header = "FPS OPT+VIZ %03d, OPT  %03fms (CNN %03fms, 3D %03fms)"%(1/(time.time()-st),(est_ms+ik_ms),est_ms, ik_ms) 
            
            if with_renderer:
                hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb)
                mbv_viz = hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0))


        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."
        


        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA)
        cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = config['default_bbox']
            print("RESETING BBOX",bbox)
        if key & 255 == ord('s'):
            started = not started


        count += 1
Ejemplo n.º 3
0
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False):

    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)

    if with_renderer:
        print("Initialize Hand Visualizer...")
        hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize)

    print("Initialize MVA19 CVRL Hand pose net...")
    estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"])

    left_hand_model = config["model_left"]
    started = False
    delay = {True: 0, False: 1}
    ik_ms = est_ms = 0
    p2d = bbox = None
    count = 0
    mbv_viz = opviz = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["boxsize"]
    stride = config["stride"]
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")

    while True:
        try:
            imgs, clbs = acq.grab()
            if imgs is None or len(imgs)==0:
                break
        except Exception as e:
            print("Failed to grab", e)
            break

        st = time.time()
        bgr = imgs[0]
        clb = clbs[0]

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        if bbox is None:
            bbox = config["default_bbox"]

        score = -1
        result_pose = None
        crop_viz = None

        # STEP2 detect 2D joints for the detected hand.
        if started and bbox is not None:
            x,y,w,h = bbox
            # print("BBOX: ",bbox)
            crop = bgr[y:y+h,x:x+w]
            img, pad = mva19.preprocess(crop, boxsize, stride)

            t = time.time()
            hm = estimator.predict(img)
            est_ms = (time.time() - t)
        
            # use joint tools to recover keypoints
            scale = float(boxsize) / float(crop.shape[0])
            scale = stride/scale
            ocparts = np.zeros_like(hm[...,0])
            peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale)

            # convert peaks to hand keypoints
            hand = mva19.peaks_to_hand(peaks, x, y)

            if hand is not None:
                keypoints = hand
            
                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3
                
                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints)
                obsVec = IK.ObservationsVector([rgbKp, ])
                t = time.time()
                score, res = pose_estimator.estimate(obsVec)
                ik_ms = (time.time() - t)
                # print(count,)
                pose_estimator.print_report()

                if track:
                    result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)

                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:# < 20000:
                        pose_estimator.model.init_pose = Core.ParamVector(result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()

                if score > -1:  # compute result points
                    p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d,bgr.shape[1::-1])



        viz = np.copy(bgr)
        viz2d = np.zeros_like(bgr)
        if started and result_pose is not None:
            viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre)
            cv2.imshow("2D CNN estimation",viz2d)
            header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)"%(1/(time.time()-st),1/(est_ms+ik_ms),1.0/est_ms, 1.0/ik_ms) 
            
            if with_renderer:
                hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb)
                mbv_viz = hand_visualizer.getDepth()
                cv2.imshow("MBV VIZ", mbv_viz)
                mask = mbv_viz != [0, 0, 0]
                viz[mask] = mbv_viz[mask]
            else:
                viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0))


        else:
            header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."
        


        cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA)
        cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = config['default_bbox']
            print("RESETING BBOX",bbox)
        if key & 255 == ord('s'):
            started = not started


        count += 1
Ejemplo n.º 4
0
    def callback(self, img_msg):
        try:
            bgr = self.bridge.imgmsg_to_cv2(img_msg, "bgr8")
            bgr = cv2.resize(bgr, (640, 480), interpolation=cv2.INTER_AREA)
        except Exception as e:
            print("Failed to grab", e)
            return

        clb = self.fake_clb

        # compute kp using model initial pose
        points2d = self.pose_estimator.ba.decodeAndProject(
            self.pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)

        score = -1
        result_pose = None
        # STEP2 detect 2D joints for the detected hand.
        if self.started:
            if self.bbox is None:
                self.bbox = detector_utils.hand_bbox(bgr, self.detection_graph,
                                                     self.sess)
                if self.bbox is None:
                    cv2.imshow("3D Hand Model reprojection", bgr)
                    cv2.waitKey(1)
                    return
            else:
                dbox = detector_utils.hand_bbox(bgr, self.detection_graph,
                                                self.sess)
                if dbox is not None:
                    x, y, w, h = self.bbox
                    x1, y1, w1, h1 = dbox
                    if (x1 > x + w
                            or x1 + w1 < x) or y1 + h1 < y or y1 > y + h:
                        self.bbox = dbox
                        print("updated")
                    else:
                        x, y, w, h = dbox
                        b = max((w, h, 224))
                        b = int(b + b * 0.3)
                        cx = x + w / 2
                        cy = y + h / 2
                        x = cx - b / 2
                        y = cy - b / 2

                        x = max(0, int(x))
                        y = max(0, int(y))

                        x = min(x, bgr.shape[1] - b)
                        y = min(y, bgr.shape[0] - b)

                        self.bbox = [x, y, b, b]

            x, y, w, h = self.bbox
            crop = bgr[y:y + h, x:x + w]
            img, pad = mva19.preprocess(crop, self.boxsize, self.stride)
            t = time.time()
            hm = self.estimator.predict(img)
            est_ms = (time.time() - t)

            # use joint tools to recover keypoints
            scale = float(self.boxsize) / float(crop.shape[0])
            scale = self.stride / scale
            ocparts = np.zeros_like(hm[..., 0])
            peaks = jt.FindPeaks(hm[..., :-1], ocparts, self.peaks_thre, scale,
                                 scale)

            # convert peaks to hand keypoints
            hand = mva19.peaks_to_hand(peaks, x, y)

            if hand is not None:
                keypoints = hand

                mask = keypoints[:, 2] < self.peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3

                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb,
                                        keypoints)
                obsVec = IK.ObservationsVector([
                    rgbKp,
                ])
                score, res = self.pose_estimator.estimate(obsVec)

                if track:
                    result_pose = list(
                        self.smoothing *
                        np.array(self.pose_estimator.model.init_pose) +
                        (1.0 - self.smoothing) * np.array(res))
                else:
                    result_pose = list(res)
                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score:  #< 150:
                        self.pose_estimator.model.init_pose = Core.ParamVector(
                            result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        self.pose_estimator.model.reset_pose()
                        self.bbox = None

                if score > -1:  # compute result points
                    self.p2d = np.array(
                        self.pose_estimator.ba.decodeAndProject(
                            Core.ParamVector(result_pose),
                            clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    self.bbox = mva19.update_bbox(self.p2d, bgr.shape[1::-1])

            p3d = np.array(
                self.pose_estimator.ba.decode(Core.ParamVector(result_pose),
                                              clb))
            joints_msg = Float32MultiArray()
            joints_msg.data = p3d.tolist()
            self.joints_publisher.publish(joints_msg)

        viz = np.copy(bgr)
        if self.started and result_pose is not None:
            viz = mva19.visualize_3dhand_skeleton(viz, self.p2d)
            pipeline.draw_rect(viz,
                               "Hand",
                               self.bbox,
                               box_color=(0, 255, 0),
                               text_color=(200, 200, 0))
            cv2.putText(viz, 'Hand pose estimation', (20, 20), 0, 0.7,
                        (50, 20, 20), 1, cv2.LINE_AA)
            cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(self.delay[self.paused])
        if key & 255 == ord('p'):
            self.paused = not self.paused
        if key & 255 == ord('q'):
            cv2.destroyAllWindows()
            sys.exit(0)
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            self.pose_estimator.model.reset_pose()
            self.bbox = None
            print("RESETING BBOX", self.bbox)
def mono_hand_loop(acq, outSize, config, track=False, paused=False):
    
    print("Initialize WACV18 3D Pose estimator (IK)...")
    pose_estimator = factory.HandPoseEstimator(config)


    print("Initialize MVA19 CVRL Hand pose net...")
    estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"])


    detection_graph, sess = detector_utils.load_inference_graph()
    

    started = True
    delay = {True: 0, False: 1}
    p2d = bbox = None
    smoothing = config.get("smoothing", 0)
    boxsize = config["boxsize"]
    stride = config["stride"]
    peaks_thre = config["peaks_thre"]
    print("Entering main Loop.")

    for topic, img_msg, ros_time in tqdm(Bag(sys.argv[2]).read_messages()):
        if topic != "camera/rgb/image_raw":
            continue
        try:
            bgr = bridge.imgmsg_to_cv2(img_msg, "bgr8")
            bgr = cv2.resize(bgr, (640,480), interpolation = cv2.INTER_AREA) 
        except Exception as e:
            print("Failed to grab", e)
            break

        clb = fake_clb

        # compute kp using model initial pose
        points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb)
        oldKp = np.array(points2d).reshape(-1, 2)


        score = -1
        result_pose = None
        # STEP2 detect 2D joints for the detected hand.
        if started:
            if bbox is None:
                bbox = detector_utils.hand_bbox(bgr,detection_graph,sess)
                if bbox is None:
                    if sys.argv[4]=='1':
                        cv2.imshow("3D Hand Model reprojection",bgr)
                        cv2.waitKey(1)
                    to_AC_format(np.zeros((21,3)),ros_time, 1)
                    continue
            else:
                dbox = detector_utils.hand_bbox(bgr,detection_graph,sess)
                if dbox is not None:
                    x,y,w,h = bbox
                    x1,y1,w1,h1 = dbox
                    if (x1>x+w or x1+w1<x ) or y1+h1<y or y1>y+h:
                        bbox = dbox
                        print("updated")
                    else:
                        x,y,w,h = dbox
                        b = max((w,h,224))
                        b = int(b + b*0.3)
                        cx = x + w/2
                        cy = y + h/2
                        x = cx-b/2
                        y = cy-b/2

                        x = max(0,int(x))
                        y = max(0,int(y))

                        x = min(x, bgr.shape[1]-b)
                        y = min(y, bgr.shape[0]-b)
                        
                        bbox = [x,y,b,b]

            x,y,w,h = bbox
            crop = bgr[y:y+h,x:x+w]
            img, pad = mva19.preprocess(crop, boxsize, stride)
            t = time.time()
            hm = estimator.predict(img)
            est_ms = (time.time() - t)

            # use joint tools to recover keypoints
            scale = float(boxsize) / float(crop.shape[0])
            scale = stride/scale
            ocparts = np.zeros_like(hm[...,0])
            peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale)

            # convert peaks to hand keypoints
            hand = mva19.peaks_to_hand(peaks, x, y)

            if hand is not None:
                keypoints = hand
            
                mask = keypoints[:, 2] < peaks_thre
                keypoints[mask] = [0, 0, 1.0]

                if track:
                    keypoints[mask, :2] = oldKp[mask]

                keypoints[:, 2] = keypoints[:, 2]**3
                
                rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints)
                obsVec = IK.ObservationsVector([rgbKp, ])
                score, res = pose_estimator.estimate(obsVec)
                
                if track:
                    result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res))
                else:
                    result_pose = list(res)
                # score is the residual, the lower the better, 0 is best
                # -1 is failed optimization.
                if track:
                    if -1 < score: #< 150:
                        pose_estimator.model.init_pose = Core.ParamVector(result_pose)
                    else:
                        print("\n===>Reseting init position for IK<===\n")
                        pose_estimator.model.reset_pose()
                        bbox = None

                if score > -1:  # compute result points
                    p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2)
                    # scale = w/config.boxsize
                    bbox = mva19.update_bbox(p2d,bgr.shape[1::-1])
            
            p3d = np.array(pose_estimator.ba.decode(Core.ParamVector(result_pose), clb)).reshape(-1,3)
            to_AC_format(p3d,ros_time,1)


        viz = np.copy(bgr)
        if sys.argv[4] == '1' and started and result_pose is not None:
            viz = mva19.visualize_3dhand_skeleton(viz, p2d)
            pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0))
            cv2.putText(viz, 'Hand pose estimation', (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA)
            cv2.imshow("3D Hand Model reprojection", viz)

        key = cv2.waitKey(delay[paused])
        if key & 255 == ord('p'):
            paused = not paused
        if key & 255 == ord('q'):
            break
        if key & 255 == ord('r'):
            print("\n===>Reseting init position for IK<===\n")
            pose_estimator.model.reset_pose()
            bbox = None
            print("RESETING BBOX",bbox)
    def mono_hand_loop(self,
                       out_size,
                       track=False,
                       paused=False,
                       with_renderer=False):
        print("Initialize WACV18 3D Pose estimator (IK)...")
        pose_estimator = factory.HandPoseEstimator(self.config)
        hand_visualizer = None
        if with_renderer:
            print("Initialize Hand Visualizer...")
            hand_visualizer = pipeline.HandVisualizer(factory.mmanager,
                                                      out_size)

        print("Initialize MVA19 CVRL Hand pose net...")
        estimator = mva19.Estimator(self.config["model_file"],
                                    self.config["input_layer"],
                                    self.config["output_layer"])

        detection_graph, sess = detector_utils.load_inference_graph()

        started = True
        delay = {True: 0, False: 1}
        ik_ms = est_ms = 0
        p2d = bbox = None
        smoothing = self.config.get("smoothing", 0)
        boxsize = self.config["boxsize"]
        stride = self.config["stride"]
        peaks_thre = self.config["peaks_thre"]
        print("Entering main Loop")
        saved_hand_poses = []
        saved_framed_ids = []
        image_names = get_image_names(self.object_anno_path, self.object_name)
        for i_name in image_names:
            frame_path = join(self.frame_root_path, i_name)
            bgr = cv2.imread(frame_path)
            st = time.time()

            # Compute kp using model initial pose
            points2d = pose_estimator.ba.decodeAndProject(
                pose_estimator.model.init_pose, self.clb)
            oldKp = np.array(points2d).reshape(-1, 2)

            result_pose = None
            hand = None
            # STEP2 detect 2D joints for the detected hand
            if started:
                if bbox is None:
                    bbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                    if bbox is None:
                        cv2.imshow("2D CNN estimation", bgr)
                        cv2.waitKey(1)
                        continue

                else:
                    dbox = detector_utils.hand_bbox(bgr, detection_graph, sess)
                    if dbox is not None:
                        x, y, w, h = bbox
                        x1, y1, w1, h1 = dbox
                        if (x1 > x + w
                                or x1 + w1 < x) or y1 + h1 < y or y1 > y + h:
                            bbox = dbox
                            print("updated")

                x, y, w, h = bbox
                crop = bgr[y:y + h, x:x + w]
                img, pad = mva19.preprocess(crop, boxsize, stride)
                t = time.time()
                hm = estimator.predict(img)
                est_ms = (time.time() - t)

                # Use joint tools to recover keypoints
                scale = float(boxsize) / float(crop.shape[0])
                scale = stride / scale
                ocparts = np.zeros_like(hm[..., 0])
                peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale,
                                     scale)

                # Convert peaks to hand keypoints
                hand = mva19.peaks_to_hand(peaks, x, y)

                if hand is not None:
                    keypoints = hand

                    mask = keypoints[:, 2] < peaks_thre
                    keypoints[mask] = [0, 0, 1.0]

                    if track:
                        keypoints[mask, :2] = oldKp[mask]

                    keypoints[:, 2] = keypoints[:, 2]**3

                    rgbKp = IK.Observations(IK.ObservationType.COLOR, self.clb,
                                            keypoints)
                    obsVec = IK.ObservationsVector([
                        rgbKp,
                    ])
                    t = time.time()
                    score, res = pose_estimator.estimate(obsVec)
                    ik_ms = (time.time() - t)

                    if track:
                        result_pose = list(
                            smoothing *
                            np.array(pose_estimator.model.init_pose) +
                            (1.0 - smoothing) * np.array(res))
                    else:
                        result_pose = list(res)
                    # score is the residual, the lower the better, 0 is best
                    # -1 is failed optimization.
                    if track:
                        if -1 < score:  # < 150:
                            pose_estimator.model.init_pose = Core.ParamVector(
                                result_pose)
                        else:
                            print(
                                "\n===> Resetting init position for IK <===\n")
                            pose_estimator.model.reset_pose()
                            bbox = None

                    if score > -1:  # compute result points
                        p2d = np.array(
                            pose_estimator.ba.decodeAndProject(
                                Core.ParamVector(result_pose),
                                self.clb)).reshape(-1, 2)
                        bbox = mva19.update_bbox(p2d, bgr.shape[1::-1])

            viz = np.copy(bgr)
            viz2d = np.zeros_like(bgr)
            if started and result_pose is not None:
                viz2d = mva19.visualize_2dhand_skeleton(viz2d,
                                                        hand,
                                                        thre=peaks_thre)
                cv2.imshow("2D CNN estimation", viz2d)
                header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % (
                    1 / (time.time() - st), 1 /
                    (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms)

                if with_renderer:
                    hand_visualizer.render(pose_estimator.model,
                                           Core.ParamVector(result_pose),
                                           self.clb)
                    mbv_viz = hand_visualizer.getDepth()
                    cv2.imshow("MBV VIZ", mbv_viz)
                    mask = mbv_viz != [0, 0, 0]
                    viz[mask] = mbv_viz[mask]
                else:
                    viz = mva19.visualize_3dhand_skeleton(viz, p2d)
                    pipeline.draw_rect(viz,
                                       "Hand",
                                       bbox,
                                       box_color=(0, 255, 0),
                                       text_color=(200, 200, 0))
            else:
                header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame."

            cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1,
                        cv2.LINE_AA)
            cv2.imshow("3D Hand Model re-projection", viz)

            key = cv2.waitKey(delay[paused])
            if key & 255 == ord('p'):
                paused = not paused
            if key & 255 == ord('q'):
                break
            if key & 255 == ord('r'):
                print("\n===> Resetting init position for IK <===\n")
                pose_estimator.model.reset_pose()
                bbox = None
                print("RESETTING BBOX", bbox)
            if key & 255 == ord('s'):
                started = not started

            p3d = np.array(
                pose_estimator.ba.decode(Core.ParamVector(result_pose),
                                         self.clb)).reshape(-1, 3)

            # Transform to OpenGL coordinates and ICCV format
            coord_change_mat = np.array(
                [[1., 0., 0.], [0, -1., 0.], [0., 0., -1.]], dtype=np.float32)
            p3d = p3d.dot(coord_change_mat.T)
            p3d = to_iccv_format(p3d)

            # Save
            # saved_hand_poses.append(p3d)
            # saved_framed_ids.append(i_name)

            # Save any frames where a good score is achieved
            # score is the residual, the lower the better, 0 is best
            if score < 10:
                print('Saved good pose:', i_name)
                img_save_filename = join(self.monohand_image_path, i_name)
                cv2.imwrite(img_save_filename, viz)
                saved_hand_poses.append(p3d)
                saved_framed_ids.append(i_name)

        # Save data
        save_annotation_file(self.joint_anno_path, saved_framed_ids,
                             saved_hand_poses)