def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize) print("Initialize openpose. Net output size",outSize,"...") op = OP.OpenPose((160, 160), config["handnet_dims"], tuple(outSize), "COCO", config["OPENPOSE_ROOT"] + os.sep + "models" + os.sep, 0, False, OP.OpenPose.ScaleMode.ZeroToOne, False, True) left_hand_model = config["model_left"] started = False delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None count = 0 mbv_viz = opviz = None smoothing = config.get("smoothing", 0) boxsize = config["handnet_dims"][0] stride = 8 peaks_thre = config["peaks_thre"] print("Entering main Loop.") while True: try: imgs, clbs = acq.grab() if imgs is None or len(imgs)==0: break except Exception as e: print("Failed to grab", e) break st = time.time() bgr = imgs[0] clb = clbs[0] # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) if bbox is None: bbox = config["default_bbox"] score = -1 result_pose = None crop_viz = None # STEP2 detect 2D joints for the detected hand. if started and bbox is not None: x,y,w,h = bbox # print("BBOX: ",bbox) crop = bgr[y:y+h,x:x+w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() op.detectHands(bgr, np.array([[0,0,0,0]+bbox],dtype=np.int32)) op_ms = (time.time() - t) * 1000.0 opviz = op.render(np.copy(bgr)) cv2.imshow("OPVIZ", opviz) leftHands, rightHands = op.getKeypoints(op.KeypointType.HAND) if rightHands is not None: keypoints = rightHands[0][:,:3] mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) # print(count,) pose_estimator.print_report() if track: result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score:# < 20000: pose_estimator.model.init_pose = Core.ParamVector(result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() if score > -1: # compute result points p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d,bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, keypoints, thre=peaks_thre) cv2.imshow("2D CNN estimation",viz2d) header = "FPS OPT+VIZ %03d, OPT %03fms (CNN %03fms, 3D %03fms)"%(1/(time.time()-st),(est_ms+ik_ms),est_ms, ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = config['default_bbox'] print("RESETING BBOX",bbox) if key & 255 == ord('s'): started = not started count += 1
def __init__(self, args): self.clb = OpenCVCalib2CameraMeta( LoadOpenCVCalib("res/calib_ho3d.json")) self.config = { "model": "models/hand_skinned.xml", "model_left": False, "model_init_pose": [ -109.80840809323652, 95.70022984677065, 584.613931114289, 292.3322807284121, -1547.742897973965, -61.60146881490577, 435.33025195547793, 1.5707458637241434, 0.21444030289465843, 0.11033385117688158, 0.021952050059337137, 0.5716581133215294, 0.02969734913698679, 0.03414155945643072, 0.0, 1.1504613679382742, -0.5235922979328, 0.15626331136368257, 0.03656410417088128, 8.59579088582312e-07, 0.35789633949684985, 0.00012514308785717494, 0.005923001258945023, 0.24864102398139007, 0.2518954858979162, 0.0, 3.814694400000002e-13 ], "model_map": IK.ModelAwareBundleAdjuster.HAND_SKINNED_TO_OP_RIGHT_HAND, "ba_iter": 100, "padding": 0.3, "minDim": 170, "smoothing": 0.2, "model_file": "models/mobnet4f_cmu_adadelta_t1_model.pb", "input_layer": "input_1", "output_layer": "k2tfout_0", "stride": 4, "boxsize": 224, "peaks_thre": 0.1 } self.data_path = args.data_path self.with_renderer = args.with_renderer self.track = args.track self.paused = args.paused self.visualize = args.visualize self.save = args.save self.start_frame = args.start_frame self.repeat_first_frame = args.repeat_first_frame if args.save: self.save_path = os.path.join(self.data_path, 'hand_tracker') if not os.path.exists(self.save_path): try: os.makedirs(self.save_path) except OSError: print( 'ERROR: Unable to create the save directory {}'.format( self.save_path)) return print("Initialize WACV18 3D Pose estimator (IK)...") self.pose_estimator = factory.HandPoseEstimator(self.config) self.hand_visualizer = None if self.with_renderer: print("Initialize Hand Visualizer...") self.hand_visualizer = pipeline.HandVisualizer( factory.mmanager, (640, 480)) print("Initialize MVA19 CVRL Hand pose net...") self.estimator = mva19.Estimator(self.config["model_file"], self.config["input_layer"], self.config["output_layer"]) self.detection_graph, self.sess = detector_utils.load_inference_graph() # Loop variables self.started = True self.delay = {True: 0, False: 1} self.ik_ms = self.est_ms = 0 self.p2d = self.bbox = None self.smoothing = self.config.get("smoothing", 0) self.boxsize = self.config["boxsize"] self.stride = self.config["stride"] self.peaks_thre = self.config["peaks_thre"] self.rgb_path = os.path.join(self.data_path, 'rgb') self.meta_path = os.path.join(self.data_path, 'meta') self.mono_hand_loop()
def mono_hand_loop(acq, outSize, config, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(config) if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, outSize) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(config["model_file"], config["input_layer"], config["output_layer"]) left_hand_model = config["model_left"] started = False delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None count = 0 mbv_viz = opviz = None smoothing = config.get("smoothing", 0) boxsize = config["boxsize"] stride = config["stride"] peaks_thre = config["peaks_thre"] print("Entering main Loop.") while True: try: imgs, clbs = acq.grab() if imgs is None or len(imgs)==0: break except Exception as e: print("Failed to grab", e) break st = time.time() bgr = imgs[0] clb = clbs[0] # compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject(pose_estimator.model.init_pose, clb) oldKp = np.array(points2d).reshape(-1, 2) if bbox is None: bbox = config["default_bbox"] score = -1 result_pose = None crop_viz = None # STEP2 detect 2D joints for the detected hand. if started and bbox is not None: x,y,w,h = bbox # print("BBOX: ",bbox) crop = bgr[y:y+h,x:x+w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride/scale ocparts = np.zeros_like(hm[...,0]) peaks = jt.FindPeaks(hm[...,:-1], ocparts, peaks_thre, scale, scale) # convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, clb, keypoints) obsVec = IK.ObservationsVector([rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) # print(count,) pose_estimator.print_report() if track: result_pose = list(smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score:# < 20000: pose_estimator.model.init_pose = Core.ParamVector(result_pose) else: print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() if score > -1: # compute result points p2d = np.array(pose_estimator.ba.decodeAndProject(Core.ParamVector(result_pose), clb)).reshape(-1, 2) # scale = w/config.boxsize bbox = mva19.update_bbox(p2d,bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre) cv2.imshow("2D CNN estimation",viz2d) header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)"%(1/(time.time()-st),1/(est_ms+ik_ms),1.0/est_ms, 1.0/ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model reprojection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===>Reseting init position for IK<===\n") pose_estimator.model.reset_pose() bbox = config['default_bbox'] print("RESETING BBOX",bbox) if key & 255 == ord('s'): started = not started count += 1
def mono_hand_loop(self, out_size, track=False, paused=False, with_renderer=False): print("Initialize WACV18 3D Pose estimator (IK)...") pose_estimator = factory.HandPoseEstimator(self.config) hand_visualizer = None if with_renderer: print("Initialize Hand Visualizer...") hand_visualizer = pipeline.HandVisualizer(factory.mmanager, out_size) print("Initialize MVA19 CVRL Hand pose net...") estimator = mva19.Estimator(self.config["model_file"], self.config["input_layer"], self.config["output_layer"]) detection_graph, sess = detector_utils.load_inference_graph() started = True delay = {True: 0, False: 1} ik_ms = est_ms = 0 p2d = bbox = None smoothing = self.config.get("smoothing", 0) boxsize = self.config["boxsize"] stride = self.config["stride"] peaks_thre = self.config["peaks_thre"] print("Entering main Loop") saved_hand_poses = [] saved_framed_ids = [] image_names = get_image_names(self.object_anno_path, self.object_name) for i_name in image_names: frame_path = join(self.frame_root_path, i_name) bgr = cv2.imread(frame_path) st = time.time() # Compute kp using model initial pose points2d = pose_estimator.ba.decodeAndProject( pose_estimator.model.init_pose, self.clb) oldKp = np.array(points2d).reshape(-1, 2) result_pose = None hand = None # STEP2 detect 2D joints for the detected hand if started: if bbox is None: bbox = detector_utils.hand_bbox(bgr, detection_graph, sess) if bbox is None: cv2.imshow("2D CNN estimation", bgr) cv2.waitKey(1) continue else: dbox = detector_utils.hand_bbox(bgr, detection_graph, sess) if dbox is not None: x, y, w, h = bbox x1, y1, w1, h1 = dbox if (x1 > x + w or x1 + w1 < x) or y1 + h1 < y or y1 > y + h: bbox = dbox print("updated") x, y, w, h = bbox crop = bgr[y:y + h, x:x + w] img, pad = mva19.preprocess(crop, boxsize, stride) t = time.time() hm = estimator.predict(img) est_ms = (time.time() - t) # Use joint tools to recover keypoints scale = float(boxsize) / float(crop.shape[0]) scale = stride / scale ocparts = np.zeros_like(hm[..., 0]) peaks = jt.FindPeaks(hm[..., :-1], ocparts, peaks_thre, scale, scale) # Convert peaks to hand keypoints hand = mva19.peaks_to_hand(peaks, x, y) if hand is not None: keypoints = hand mask = keypoints[:, 2] < peaks_thre keypoints[mask] = [0, 0, 1.0] if track: keypoints[mask, :2] = oldKp[mask] keypoints[:, 2] = keypoints[:, 2]**3 rgbKp = IK.Observations(IK.ObservationType.COLOR, self.clb, keypoints) obsVec = IK.ObservationsVector([ rgbKp, ]) t = time.time() score, res = pose_estimator.estimate(obsVec) ik_ms = (time.time() - t) if track: result_pose = list( smoothing * np.array(pose_estimator.model.init_pose) + (1.0 - smoothing) * np.array(res)) else: result_pose = list(res) # score is the residual, the lower the better, 0 is best # -1 is failed optimization. if track: if -1 < score: # < 150: pose_estimator.model.init_pose = Core.ParamVector( result_pose) else: print( "\n===> Resetting init position for IK <===\n") pose_estimator.model.reset_pose() bbox = None if score > -1: # compute result points p2d = np.array( pose_estimator.ba.decodeAndProject( Core.ParamVector(result_pose), self.clb)).reshape(-1, 2) bbox = mva19.update_bbox(p2d, bgr.shape[1::-1]) viz = np.copy(bgr) viz2d = np.zeros_like(bgr) if started and result_pose is not None: viz2d = mva19.visualize_2dhand_skeleton(viz2d, hand, thre=peaks_thre) cv2.imshow("2D CNN estimation", viz2d) header = "FPS OPT+VIZ %03d, OPT %03d (CNN %03d, 3D %03d)" % ( 1 / (time.time() - st), 1 / (est_ms + ik_ms), 1.0 / est_ms, 1.0 / ik_ms) if with_renderer: hand_visualizer.render(pose_estimator.model, Core.ParamVector(result_pose), self.clb) mbv_viz = hand_visualizer.getDepth() cv2.imshow("MBV VIZ", mbv_viz) mask = mbv_viz != [0, 0, 0] viz[mask] = mbv_viz[mask] else: viz = mva19.visualize_3dhand_skeleton(viz, p2d) pipeline.draw_rect(viz, "Hand", bbox, box_color=(0, 255, 0), text_color=(200, 200, 0)) else: header = "Press 's' to start, 'r' to reset pose, 'p' to pause frame." cv2.putText(viz, header, (20, 20), 0, 0.7, (50, 20, 20), 1, cv2.LINE_AA) cv2.imshow("3D Hand Model re-projection", viz) key = cv2.waitKey(delay[paused]) if key & 255 == ord('p'): paused = not paused if key & 255 == ord('q'): break if key & 255 == ord('r'): print("\n===> Resetting init position for IK <===\n") pose_estimator.model.reset_pose() bbox = None print("RESETTING BBOX", bbox) if key & 255 == ord('s'): started = not started p3d = np.array( pose_estimator.ba.decode(Core.ParamVector(result_pose), self.clb)).reshape(-1, 3) # Transform to OpenGL coordinates and ICCV format coord_change_mat = np.array( [[1., 0., 0.], [0, -1., 0.], [0., 0., -1.]], dtype=np.float32) p3d = p3d.dot(coord_change_mat.T) p3d = to_iccv_format(p3d) # Save # saved_hand_poses.append(p3d) # saved_framed_ids.append(i_name) # Save any frames where a good score is achieved # score is the residual, the lower the better, 0 is best if score < 10: print('Saved good pose:', i_name) img_save_filename = join(self.monohand_image_path, i_name) cv2.imwrite(img_save_filename, viz) saved_hand_poses.append(p3d) saved_framed_ids.append(i_name) # Save data save_annotation_file(self.joint_anno_path, saved_framed_ids, saved_hand_poses)