img = cv2.imread('frame.jpg') # load screenshot as variable img #----------------------------- Make predictions and return three most likely character into array for text output-----------------------# if img is not None: img = skimage.transform.resize(img, (64, 64, 3)) #resize image to fit model parameters input_data = np.asarray(img).reshape(-1, 64, 64, 3) #flip image pred = loaded_model.predict(input_data) pred = np.array(pred[0]) indeces = pred.argsort()[-3:][::-1] #return three letter predictions with highest confidence text = ['', '', ''] for i in range(len(indeces)): text[i] = character_list[indeces[i]] + ':' + ' '*((5 + i*2) - len(character_list[indeces[i]])) + str((pred[indeces[i]]*100)//1) + '%' #format predictions to percent value #------------------------------------------------Joint Demo Code ---------------------------------------------# crop_res = cv2.resize(image, (boxsize, boxsize)) newImg, pad = preprocess(crop_res, boxsize, stride) tic = time.time() hm = estimator.predict(newImg) dt = time.time() - tic print("TTP %.5f, FPS %f" % (dt, 1.0/dt), "HM.shape ", hm.shape) hm = cv2.resize(hm, (0, 0), fx=stride, fy=stride) #bg = cv2.normalize(hm[:, :, -1], None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8UC1) viz = cv2.normalize(np.sum(hm[:, :, :-1], axis=2), None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8UC1) #cv2.imshow("Background", bg) # cv2.imshow("Input frame", frame)
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize) print("Initialize openpose. Net output size",outSize,"...") op = OP.OpenPose((160, 160), config["handnet_dims"], tuple(outSize), "COCO", config["OPENPOSE_ROOT"] + os.sep + "models" + os.sep, 0, False, OP.OpenPose.ScaleMode.ZeroToOne, False, True) left_hand_model = config["model_left"] started = False delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None count = 0 mbv_viz = opviz = None smoothing = config.get("smoothing", 0) boxsize = config["handnet_dims"][0] stride = 8 peaks_thre = config["peaks_thre"] print("Entering main Loop.") while True: try: imgs, clbs = acq.grab() if imgs is None or len(imgs)==0: break except Exception as e: print("Failed to grab", e) break st = time.time() bgr = imgs[0] clb = clbs[0] # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) if bbox is None: bbox = config["default_bbox"] score = -1 result_pose = None crop_viz = None # STEP2 detect 2D joints for the detected hand. if started and bbox is not None: x,y,w,h = bbox # print("BBOX: ",bbox) crop = bgr[y:y+h,x:x+w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() op.detectHands(bgr, np.array([[0,0,0,0]+bbox],dtype=np.int32)) op_ms = (time.time() - t) * 1000.0 opviz = op.render(np.copy(bgr)) cv2.imshow("OPVIZ", opviz) leftHands, rightHands = op.getKeypoints(op.KeypointType.HAND) if rightHands is not None: keypoints = rightHands[0][:,:3] mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) # print(count,) pose_estimator.print_report() if track: result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score:# < 20000: pose_estimator.model.init_pose = Core.ParamVector(result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() if score > -1: # compute result points p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d,bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, keypoints, thre=peaks_thre) cv2.imshow("2D CNN estimation",viz2d) header = "FPS OPT+VIZ %03d, OPT %03fms (CNN %03fms, 3D %03fms)"%(1/(time.time()-st),(est_ms+ik_ms),est_ms, ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = config['default_bbox'] print("RESETING BBOX",bbox) if key & 255 == ord('s'): started = not started count += 1
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"]) left_hand_model = config["model_left"] started = False delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None count = 0 mbv_viz = opviz = None smoothing = config.get("smoothing", 0) boxsize = config["boxsize"] stride = config["stride"] peaks_thre = config["peaks_thre"] print("Entering main Loop.") while True: try: imgs, clbs = acq.grab() if imgs is None or len(imgs)==0: break except Exception as e: print("Failed to grab", e) break st = time.time() bgr = imgs[0] clb = clbs[0] # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) if bbox is None: bbox = config["default_bbox"] score = -1 result_pose = None crop_viz = None # STEP2 detect 2D joints for the detected hand. if started and bbox is not None: x,y,w,h = bbox # print("BBOX: ",bbox) crop = bgr[y:y+h,x:x+w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride/scale ocparts = np.zeros_like(hm[...,0]) peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale) # convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) # print(count,) pose_estimator.print_report() if track: result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score:# < 20000: pose_estimator.model.init_pose = Core.ParamVector(result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() if score > -1: # compute result points p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d,bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre) cv2.imshow("2D CNN estimation",viz2d) header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)"%(1/(time.time()-st),1/(est_ms+ik_ms),1.0/est_ms, 1.0/ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = config['default_bbox'] print("RESETING BBOX",bbox) if key & 255 == ord('s'): started = not started count += 1
def callback(self, img_msg): try: bgr = self.bridge.imgmsg_to_cv2(img_msg, "bgr8") bgr = cv2.resize(bgr, (640, 480), interpolation=cv2.INTER_AREA) except Exception as e: print("Failed to grab", e) return clb = self.fake_clb # compute kp using model initial pose points2d = self.pose_estimator.ba.decodeAndProject( self.pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) score = -1 result_pose = None # STEP2 detect 2D joints for the detected hand. if self.started: if self.bbox is None: self.bbox = detector_utils.hand_bbox(bgr, self.detection_graph, self.sess) if self.bbox is None: cv2.imshow("3D Hand Model reprojection", bgr) cv2.waitKey(1) return else: dbox = detector_utils.hand_bbox(bgr, self.detection_graph, self.sess) if dbox is not None: x, y, w, h = self.bbox x1, y1, w1, h1 = dbox if (x1 > x + w or x1 + w1 < x) or y1 + h1 < y or y1 > y + h: self.bbox = dbox print("updated") else: x, y, w, h = dbox b = max((w, h, 224)) b = int(b + b * 0.3) cx = x + w / 2 cy = y + h / 2 x = cx - b / 2 y = cy - b / 2 x = max(0, int(x)) y = max(0, int(y)) x = min(x, bgr.shape[1] - b) y = min(y, bgr.shape[0] - b) self.bbox = [x, y, b, b] x, y, w, h = self.bbox crop = bgr[y:y + h, x:x + w] img, pad = mva19.preprocess(crop, self.boxsize, self.stride) t = time.time() hm = self.estimator.predict(img) est_ms = (time.time() - t) # use joint tools to recover keypoints scale = float(self.boxsize) / float(crop.shape[0]) scale = self.stride / scale ocparts = np.zeros_like(hm[..., 0]) peaks = jt.FindPeaks(hm[..., :-1], ocparts, self.peaks_thre, scale, scale) # convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < self.peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([ rgbKp, ]) score, res = self.pose_estimator.estimate(obsVec) if track: result_pose = list( self.smoothing * np.array(self.pose_estimator.model.init_pose) + (1.0 - self.smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score: #< 150: self.pose_estimator.model.init_pose = Core.ParamVector( result_pose) else: print("\n===>Reseting init position for IK<===\n") self.pose_estimator.model.reset_pose() self.bbox = None if score > -1: # compute result points self.p2d = np.array( self.pose_estimator.ba.decodeAndProject( Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize self.bbox = mva19.update_bbox(self.p2d, bgr.shape[1::-1]) p3d = np.array( self.pose_estimator.ba.decode(Core.ParamVector(result_pose), clb)) joints_msg = Float32MultiArray() joints_msg.data = p3d.tolist() self.joints_publisher.publish(joints_msg) viz = np.copy(bgr) if self.started and result_pose is not None: viz = mva19.visualize_3dhand_skeleton(viz, self.p2d) pipeline.draw_rect(viz, "Hand", self.bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) cv2.putText(viz, 'Hand pose estimation', (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(self.delay[self.paused]) if key & 255 == ord('p'): self.paused = not self.paused if key & 255 == ord('q'): cv2.destroyAllWindows() sys.exit(0) if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") self.pose_estimator.model.reset_pose() self.bbox = None print("RESETING BBOX", self.bbox)
def mono_hand_loop(acq, outSize, config, track=False, paused=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"]) detection_graph, sess = detector_utils.load_inference_graph() started = True delay = {True: 0, False: 1} p2d = bbox = None smoothing = config.get("smoothing", 0) boxsize = config["boxsize"] stride = config["stride"] peaks_thre = config["peaks_thre"] print("Entering main Loop.") for topic, img_msg, ros_time in tqdm(Bag(sys.argv[2]).read_messages()): if topic != "camera/rgb/image_raw": continue try: bgr = bridge.imgmsg_to_cv2(img_msg, "bgr8") bgr = cv2.resize(bgr, (640,480), interpolation = cv2.INTER_AREA) except Exception as e: print("Failed to grab", e) break clb = fake_clb # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) score = -1 result_pose = None # STEP2 detect 2D joints for the detected hand. if started: if bbox is None: bbox = detector_utils.hand_bbox(bgr,detection_graph,sess) if bbox is None: if sys.argv[4]=='1': cv2.imshow("3D Hand Model reprojection",bgr) cv2.waitKey(1) to_AC_format(np.zeros((21,3)),ros_time, 1) continue else: dbox = detector_utils.hand_bbox(bgr,detection_graph,sess) if dbox is not None: x,y,w,h = bbox x1,y1,w1,h1 = dbox if (x1>x+w or x1+w1<x ) or y1+h1<y or y1>y+h: bbox = dbox print("updated") else: x,y,w,h = dbox b = max((w,h,224)) b = int(b + b*0.3) cx = x + w/2 cy = y + h/2 x = cx-b/2 y = cy-b/2 x = max(0,int(x)) y = max(0,int(y)) x = min(x, bgr.shape[1]-b) y = min(y, bgr.shape[0]-b) bbox = [x,y,b,b] x,y,w,h = bbox crop = bgr[y:y+h,x:x+w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride/scale ocparts = np.zeros_like(hm[...,0]) peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale) # convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([rgbKp, ]) score, res = pose_estimator.estimate(obsVec) if track: result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score: #< 150: pose_estimator.model.init_pose = Core.ParamVector(result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = None if score > -1: # compute result points p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d,bgr.shape[1::-1]) p3d = np.array(pose_estimator.ba.decode(Core.ParamVector(result_pose), clb)).reshape(-1,3) to_AC_format(p3d,ros_time,1) viz = np.copy(bgr) if sys.argv[4] == '1' and started and result_pose is not None: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) cv2.putText(viz, 'Hand pose estimation', (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = None print("RESETING BBOX",bbox)
def estimate_hand_pose(self, bgr): # Compute kp using model initial pose points2d = self.pose_estimator.ba.decodeAndProject( self.pose_estimator.model.init_pose, self.clb) oldKp = np.array(points2d).reshape(-1, 2) result_pose = None hand = None score = 1000.0 # STEP2 detect 2D joints for the detected hand if self.started: if self.bbox is None: self.bbox = detector_utils.hand_bbox(bgr, self.detection_graph, self.sess) print('Bounding box is None') if self.bbox is None: cv2.imshow("2D CNN estimation", bgr) cv2.waitKey(1) return result_pose, hand, score else: dbox = detector_utils.hand_bbox(bgr, self.detection_graph, self.sess) if dbox is not None: x, y, w, h = self.bbox x1, y1, w1, h1 = dbox if (x1 > x + w or x1 + w1 < x) or y1 + h1 < y or y1 > y + h: self.bbox = dbox print("updated") x, y, w, h = self.bbox crop = bgr[y:y + h, x:x + w] img, pad = mva19.preprocess(crop, self.boxsize, self.stride) t = time.time() hm = self.estimator.predict(img) self.est_ms = (time.time() - t) # Use joint tools to recover keypoints scale = float(self.boxsize) / float(crop.shape[0]) scale = self.stride / scale ocparts = np.zeros_like(hm[..., 0]) peaks = jt.FindPeaks(hm[..., :-1], ocparts, self.peaks_thre, scale, scale) # Convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < self.peaks_thre keypoints[mask] = [0, 0, 1.0] if self.track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, self.clb, keypoints) obsVec = IK.ObservationsVector([ rgbKp, ]) t = time.time() score, res = self.pose_estimator.estimate(obsVec) self.ik_ms = (time.time() - t) if self.track: result_pose = list( self.smoothing * np.array(self.pose_estimator.model.init_pose) + (1.0 - self.smoothing) * np.array(res)) else: result_pose = list(res) # Score is the residual, the lower the better, 0 is best # -1 is failed optimization. if self.track: if -1 < score: # < 150: self.pose_estimator.model.init_pose = Core.ParamVector( result_pose) else: print("\n===> Resetting init position for IK <===\n") self.pose_estimator.model.reset_pose() self.bbox = None if score > -1: # compute result points self.p2d = np.array( self.pose_estimator.ba.decodeAndProject( Core.ParamVector(result_pose), self.clb)).reshape(-1, 2) self.bbox = mva19.update_bbox(self.p2d, bgr.shape[1::-1]) return result_pose, hand, score
def mono_hand_loop(self, out_size, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(self.config) hand_visualizer = None if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, out_size) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(self.config["model_file"], self.config["input_layer"], self.config["output_layer"]) detection_graph, sess = detector_utils.load_inference_graph() started = True delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None smoothing = self.config.get("smoothing", 0) boxsize = self.config["boxsize"] stride = self.config["stride"] peaks_thre = self.config["peaks_thre"] print("Entering main Loop") saved_hand_poses = [] saved_framed_ids = [] image_names = get_image_names(self.object_anno_path, self.object_name) for i_name in image_names: frame_path = join(self.frame_root_path, i_name) bgr = cv2.imread(frame_path) st = time.time() # Compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject( pose_estimator.model.init_pose, self.clb) oldKp = np.array(points2d).reshape(-1, 2) result_pose = None hand = None # STEP2 detect 2D joints for the detected hand if started: if bbox is None: bbox = detector_utils.hand_bbox(bgr, detection_graph, sess) if bbox is None: cv2.imshow("2D CNN estimation", bgr) cv2.waitKey(1) continue else: dbox = detector_utils.hand_bbox(bgr, detection_graph, sess) if dbox is not None: x, y, w, h = bbox x1, y1, w1, h1 = dbox if (x1 > x + w or x1 + w1 < x) or y1 + h1 < y or y1 > y + h: bbox = dbox print("updated") x, y, w, h = bbox crop = bgr[y:y + h, x:x + w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # Use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride / scale ocparts = np.zeros_like(hm[..., 0]) peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale, scale) # Convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, self.clb, keypoints) obsVec = IK.ObservationsVector([ rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) if track: result_pose = list( smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score: # < 150: pose_estimator.model.init_pose = Core.ParamVector( result_pose) else: print( "\n===> Resetting init position for IK <===\n") pose_estimator.model.reset_pose() bbox = None if score > -1: # compute result points p2d = np.array( pose_estimator.ba.decodeAndProject( Core.ParamVector(result_pose), self.clb)).reshape(-1, 2) bbox = mva19.update_bbox(p2d, bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre) cv2.imshow("2D CNN estimation", viz2d) header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % ( 1 / (time.time() - st), 1 / (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), self.clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model re-projection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===> Resetting init position for IK <===\n") pose_estimator.model.reset_pose() bbox = None print("RESETTING BBOX", bbox) if key & 255 == ord('s'): started = not started p3d = np.array( pose_estimator.ba.decode(Core.ParamVector(result_pose), self.clb)).reshape(-1, 3) # Transform to OpenGL coordinates and ICCV format coord_change_mat = np.array( [[1., 0., 0.], [0, -1., 0.], [0., 0., -1.]], dtype=np.float32) p3d = p3d.dot(coord_change_mat.T) p3d = to_iccv_format(p3d) # Save # saved_hand_poses.append(p3d) # saved_framed_ids.append(i_name) # Save any frames where a good score is achieved # score is the residual, the lower the better, 0 is best if score < 10: print('Saved good pose:', i_name) img_save_filename = join(self.monohand_image_path, i_name) cv2.imwrite(img_save_filename, viz) saved_hand_poses.append(p3d) saved_framed_ids.append(i_name) # Save data save_annotation_file(self.joint_anno_path, saved_framed_ids, saved_hand_poses)