def process_video_frame(video_frame): video_frame = video_frame[:, :, :3] video_frame = scipy.misc.imresize(video_frame, (240, 320)) image_v = np.expand_dims((video_frame.astype('float') / 255.0) - 0.5, 0) keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf = network_elements keypoint_coord3d_v, scale_v, center_v, keypoints_scoremap_v = sess.run( [keypoint_coord3d_tf, scale_tf, center_tf, keypoints_scoremap_tf], feed_dict={image_tf: image_v}) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plot_hand_2d(coord_hw, video_frame) score_label = process_keypoints(keypoint_coord3d_v) if score_label is not None: font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(video_frame, score_label, (10, 200), font, 1.0, (255, 0, 0), 2, cv2.LINE_AA) return video_frame
def main(args): webcamId = 0 try: if len(args) > 1 : webcamId = int(args[1]) except ValueError: print("Invalid webcam id. Fall back to default value '" + str(webcamId) + "'.") # stream creation inputStream = cv2.VideoCapture(webcamId) if not inputStream.isOpened(): print("Can not use camera with id " + str(webcamId) + ".") return 1 # network input image_tf = tf.placeholder(tf.float32, shape=(1, 240, 320, 3)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, \ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(session) while True: _, image_raw = inputStream.read() image_resized = cv2.resize(image_raw, (320, 240)) image_rgb = cv2.cvtColor(image_resized,cv2.COLOR_BGR2RGB) image_v = np.expand_dims((image_rgb.astype('float') / 255.0) - 0.5, 0) start_time = time.time() hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = session.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) delta_time = time.time() - start_time print("Inference time: " + str(delta_time)) # post processing keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plot_hand_cv2(image_resized, coord_hw) image_fullsize = cv2.resize(image_resized, (1600, 1200)) cv2.imshow('result', image_fullsize) cv2.waitKey(1) cv2.releaseAllWindows() return 0
def doHandPoseEstimate(image_cv, sess, args): image_tf = args['image_tf'] hand_side_tf = args['hand_side_tf'] evaluation = args['evaluation'] net = args['net'] hand_scoremap_tf = args['hand_scoremap_tf'] image_crop_tf = args['image_crop_tf'] scale_tf = args['scale_tf'] center_tf = args['center_tf'] keypoints_scoremap_tf = args['keypoints_scoremap_tf'] keypoint_coord3d_tf = args['keypoint_coord3d_tf'] image_raw = image_cv[:, :, ::-1] image_raw = cv2.resize(image_raw, (320, 240)) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) plt.ion() plt.clf() ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) plt.show() plt.pause(0.0001) plt.show()
def get_feature(img, sess, sess_args): image_v = np.expand_dims((img.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run(sess_args, feed_dict={image_tf: image_v}) coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) center_v = np.array([0., 0.]) scale_v = np.array([1.]) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) return coord_hw #/100#coord_hw_crop# - center_v
def get_coords_and_figure_from_name(img_name): image_raw = scipy.misc.imread(img_name) hand_scoremap_v, image_crop_v, scale_v, center_v, \ keypoints_scoremap_v, keypoint_coord3d_v = run_model_on_image(image_raw) # from here on: saving stuff basename = os.path.splitext(os.path.basename(img_name))[0] # post processing hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # save keypoint coordinates keypoint_save_filename = "{:s}/{:s}_coords.pkl".format( output_coords_dir, basename) with open(keypoint_save_filename, 'wb') as f: pickle.dump((keypoint_coord3d_v, scale_v, center_v), f, protocol=-1) print("Saved keypoint coordinates to {:s}".format(keypoint_save_filename)) # save image image_save_filename = "{:s}/{:s}_figures.png".format( output_figures_dir, basename) fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) plt.savefig(image_save_filename) plt.close() print("Saved figure to {:s}".format(image_save_filename))
def process_img(known_finger_poses, img_name): image_raw = scipy.misc.imread(img_name)[:, :, :3] image_raw = np.array(Image.fromarray(image_raw).resize((320, 240))) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) #if args.plot_fingers == 1: scale_v, center_v, keypoints_scoremap_v, \ keypoint_coord3d_v = sess.run([scale_tf, center_tf, keypoints_scoremap_tf,\ keypoint_coord3d_tf], feed_dict = {image_tf: image_v}) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plot_hand_2d(coord_hw, image_raw) # Classifying based on Geometry #if args.solve_by == 0: score_label = predict_by_geometry(keypoint_coord3d_v, known_finger_poses, 0.55) # Classifying based on Neural networks # elif args.solve_by == 1: # score_label = predict_by_neural_network(keypoint_coord3d_v, known_finger_poses, # args.pb_file, args.threshold) # Classifying based on SVM # elif args.solve_by == 2: # score_label = predict_by_svm(keypoint_coord3d_v, known_finger_poses, args.svc_file) # save processed image font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(image_raw, score_label, (10, 200), font, 1.0, (255, 0, 0), 2, cv2.LINE_AA) file_save_path = os.path.join('../images_out', "{}.png".format(int(time.time()))) mpimg.imsave(file_save_path, image_raw) return score_label
exclude_var_list=['PosePrior', 'ViewpointNet']) util = EvalUtil() # iterate dataset for i in range(dataset.num_samples): # get prediction keypoints_scoremap_v,\ scale_crop_v, center_v, kp_uv21_gt, kp_vis = sess.run([keypoints_scoremap, scale_crop, center, data['keypoint_uv21'], data['keypoint_vis21']]) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) kp_uv21_gt = np.squeeze(kp_uv21_gt) kp_vis = np.squeeze(kp_vis) # detect keypoints coord_hw_pred_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw_pred = trafo_coords(coord_hw_pred_crop, center_v, scale_crop_v, 256) coord_uv_pred = np.stack([coord_hw_pred[:, 1], coord_hw_pred[:, 0]], 1) # scale pred to image size of the dataset (to match with stored coordinates) coord_uv_pred[:, 1] /= scale[0] coord_uv_pred[:, 0] /= scale[1] # some datasets are already stored with downsampled resolution scale2orig_res = 1.0 if hasattr(dataset, 'resolution'): scale2orig_res = dataset.resolution util.feed(kp_uv21_gt / scale2orig_res, kp_vis, coord_uv_pred / scale2orig_res) if (i % 100) == 0:
start = datetime.datetime.now() hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) print(datetime.datetime.now() - start) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) # keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) plt.cla() # fig = plt.imshow(image_raw, aspect='equal', shape=(240, 320)) fig = plt.imshow(np.zeros([240, 320, 3]), aspect='equal', shape=(240, 320)) plot_hand(coord_hw, plt) plt.axis('off') fig.axes.get_xaxis().set_visible(False) fig.axes.get_yaxis().set_visible(False) plt.pause(0.001) plt.show(False) name = datetime.datetime.now() if not os.path.exists("out"):
net = ColorHandPose3DNetwork() for image_name in image_list: image = cv2.imread(image_name) if isinstance(image_name, str) else image_name image = cv2.resize(image, (320, 240)) image_v = np.expand_dims((image.astype(np.float32) / 255.) - .5, axis=0) inference = net.inference(image_v) hand_score_map, image_crop, scale, center, keypoint_score_map, keypoint_coord3d = tuple( inference) hand_score_map = np.squeeze( hand_score_map, axis=0) # (1, 256, 256, 2) -> (256, 256, 2) image_crop = np.squeeze( image_crop) # (1, 256, 256, 3) -> (256, 256, 3) keypoint_score_map = np.squeeze( keypoint_score_map) # (1, 256, 256, 21) -> (256, 256, 21) keypoint_coord3d = np.squeeze( keypoint_coord3d) # (1, 21, 3) -> (21, 3) image_crop = ((image_crop + .5) * 255).astype(np.uint8) coord_hw_crop = detect_keypoints(np.squeeze(keypoint_score_map)) coord_hw = trafo_coords(coord_hw_crop, center, scale, 256) # visualize plot_inference(image, image_crop, coord_hw, coord_hw_crop, hand_score_map, keypoint_coord3d)
def process(self, image_list): """ Args: image_list: list of tuples, first item being the name/path of the image, and the second item being a RGB matrix. """ results = [] print('Extracting masks...') for image_name, image_raw in tqdm(image_list): save_name = os.path.join(self.cache_loc, ('#'.join(image_name.split('/')[-3:])[:-self.extension_length])+'.pkl') if os.path.exists(save_name) and not self.overwrite: # loading directly from cache with open(save_name, 'rb') as f: results.append(pickle.load(f)) else: image_raw_shape = image_raw.shape[:2] image_raw = imresize(image_raw, (240, 320)) image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = \ self.sess.run([self.hand_scoremap_tf, self.image_crop_tf, self.scale_tf, self.center_tf, self.keypoints_scoremap_tf, self.keypoint_coord3d_tf], feed_dict={self.image_tf: image_v}) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # return # TODO: these coordinates are all normalized with respect to # the rescaled image. aprameters would have to be scaled back. image_result = {'image_name': image_name, 'original_shape': image_raw_shape, 'confidence': hand_scoremap_v, 'binary_mask': np.argmax(hand_scoremap_v, 2), 'hand_joints_2d': coord_hw_crop, 'hand_joints_3d': keypoint_coord3d_v} # assuming that there is a 4-character extension like .jpg for the # image name with open(save_name, 'wb') as f: pickle.dump(image_result, f) results.append(image_result) # if self.visualize: # # visualize # fig = plt.figure(1, figsize=(10,10)) # ax1 = fig.add_subplot(221) # ax2 = fig.add_subplot(222) # ax3 = fig.add_subplot(223) # ax4 = fig.add_subplot(224, projection='3d') # ax1.imshow(image_raw) # plot_hand(coord_hw, ax1) # ax2.imshow(image_crop_v) # plot_hand(coord_hw_crop, ax2) # ax3.imshow(np.argmax(hand_scoremap_v, 2)) # plot_hand_3d(keypoint_coord3d_v, ax4) # ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view # ax4.set_xlim([-3, 3]) # ax4.set_ylim([-3, 1]) # ax4.set_zlim([-3, 3]) # # image_save_name = os.path.join(self.visualize_save_loc, # os.path.basename(image_name)) # plt.savefig(image_save_name) return results
ax1.view_init( azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view plt.xlabel('x') plt.ylabel('y') ax2 = fig.add_subplot(122) plt.imshow(image_scaled_v) plt.show() # pdb.set_trace() if args.save: fig = plt.figure(figsize=(12, 6)) keypoints2d = detect_keypoints(keypoints_scoremap_v) coord_hw = trafo_coords(keypoints2d, center_v, scale_v, 256) coord_uv21 = keypoint_uv21_v[:, ::-1] / 2 ax1 = fig.add_subplot(121) plt.imshow(image_scaled_v) plot_hand(coord_hw, ax1, color_fixed=np.array((0., 0., 1.0))) plot_hand(coord_uv21, ax1, color_fixed=np.array((1., 0., 0.0))) ax2 = fig.add_subplot(122, projection='3d') plot_hand_3d(coord3d_pred_v, ax2, color_fixed=np.array([0.0, 0.0, 1.0])) plot_hand_3d(keypoint_xyz21, ax2, color_fixed=np.array([1.0, 0.0, 0.0])) ax2.set_xlabel('x') ax2.set_ylabel('y')
def get_pic(image_list): # images to be shown # image_list = list() # image_list.append('./data/img30.jpg') #image_list.append('./data/img31.jpg') #image_list.append('./data/img32.jpg') #image_list.append('./data/img33.jpg') # network input image_tf = tf.placeholder(tf.float32, shape=(1, 320, 240, 3)) hand_side_tf = tf.constant([[1.0, 0.0]]) # left hand (true for all samples provided) evaluation = tf.placeholder_with_default(True, shape=()) # build network net = ColorHandPose3DNetwork() hand_scoremap_tf, image_crop_tf, scale_tf, center_tf,\ keypoints_scoremap_tf, keypoint_coord3d_tf = net.inference(image_tf, hand_side_tf, evaluation) # Start TF # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) sess = tf.Session()#config=tf.ConfigProto(gpu_options=gpu_options)) # initialize network net.init(sess) # Feed image list through network final = list() for i, image in tqdm(enumerate(image_list)): image_raw = image # image_raw = cv2.resize(image_raw, dsize=(240, 320))#, interpolation=cv2.INTER_CUBI) image_raw = make_it_small.small(image_raw) for row in image_raw: for pixel in row: temp = pixel[0] pixel[0] = pixel[2] pixel[2] = temp image_v = np.expand_dims((image_raw.astype('float') / 255.0) - 0.5, 0) hand_scoremap_v, image_crop_v, scale_v, center_v,\ keypoints_scoremap_v, keypoint_coord3d_v = sess.run([hand_scoremap_tf, image_crop_tf, scale_tf, center_tf, keypoints_scoremap_tf, keypoint_coord3d_tf], feed_dict={image_tf: image_v}) img_angle = description_of_hand_position(keypoint_coord3d_v) hand_scoremap_v = np.squeeze(hand_scoremap_v) image_crop_v = np.squeeze(image_crop_v) keypoints_scoremap_v = np.squeeze(keypoints_scoremap_v) keypoint_coord3d_v = np.squeeze(keypoint_coord3d_v) # post processing image_crop_v = ((image_crop_v + 0.5) * 255).astype('uint8') coord_hw_crop = detect_keypoints(np.squeeze(keypoints_scoremap_v)) coord_hw = trafo_coords(coord_hw_crop, center_v, scale_v, 256) # visualize fig = plt.figure(1) ax1 = fig.add_subplot(221) ax2 = fig.add_subplot(222) ax3 = fig.add_subplot(223) ax4 = fig.add_subplot(224, projection='3d') ax1.imshow(image_raw) plot_hand(coord_hw, ax1) ax2.imshow(image_crop_v) plot_hand(coord_hw_crop, ax2) ax3.imshow(np.argmax(hand_scoremap_v, 2)) plot_hand_3d(keypoint_coord3d_v, ax4) ax4.view_init(azim=-90.0, elev=-90.0) # aligns the 3d coord with the camera view ax4.set_xlim([-3, 3]) ax4.set_ylim([-3, 1]) ax4.set_zlim([-3, 3]) fig.suptitle(img_angle,fontsize = 10) plt.savefig("imgs/{}.png".format(str(i))) plt.close(fig) img = imageio.imread("imgs/{}.png".format(str(i))) final.append(img) return final