def test_blank_image(self): pose = mp_pose.Pose() image = np.zeros([100, 100, 3], dtype=np.uint8) image.fill(255) results = pose.process(image) self.assertIsNone(results.pose_landmarks) pose.close()
def draw_pose_video(filename=0): cap = cv2.VideoCapture(filename) with pose.Pose( min_detection_confidence=0.5, min_tracking_confidence=0.5, ) as extracter: while cap.isOpened(): success, image = cap.read() if not success: print("Ignoring empty camera frame.") continue image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) image.flags.writeable = False results = extracter.process(image) image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) drawing_utils.draw_landmarks( image=image, landmark_list=results.pose_landmarks, connections=pose.POSE_CONNECTIONS, ) cv2.imshow("MediaPipe Pose", image) if cv2.waitKey(5) & 0xFF == 27: break cap.release()
def test_on_image(self, static_image_mode, model_complexity, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg') expected_segmentation_path = os.path.join( os.path.dirname(__file__), 'testdata/pose_segmentation.png') image = cv2.imread(image_path) expected_segmentation = self._rgb_to_segmentation( Image.open(expected_segmentation_path).convert('RGB')) with mp_pose.Pose(static_image_mode=static_image_mode, model_complexity=model_complexity, enable_segmentation=True) as pose: for idx in range(num_frames): results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) segmentation = results.segmentation_mask.round().astype( np.uint8) # TODO: Add rendering of world 3D when supported. self._annotate(image.copy(), results, idx) self._annotate_segmentation(segmentation, expected_segmentation, idx) self._assert_diff_less( self._landmarks_list_to_array(results.pose_landmarks, image.shape)[:, :2], EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD) self._assert_diff_less( self._world_landmarks_list_to_array( results.pose_world_landmarks), EXPECTED_POSE_WORLD_LANDMARKS, WORLD_DIFF_THRESHOLD) self.assertGreaterEqual( self._segmentation_iou(expected_segmentation, segmentation), IOU_THRESHOLD)
def test_blank_image(self): with mp_pose.Pose(enable_segmentation=True) as pose: image = np.zeros([100, 100, 3], dtype=np.uint8) image.fill(255) results = pose.process(image) self.assertIsNone(results.pose_landmarks) self.assertIsNone(results.segmentation_mask)
def test_full_body_model(self, static_image_mode, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg') image = cv2.imread(image_path) with mp_pose.Pose(static_image_mode=static_image_mode) as pose: for _ in range(num_frames): results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) self._assert_diff_less( self._landmarks_list_to_array(results.pose_landmarks, image.shape), EXPECTED_FULL_BODY_LANDMARKS, DIFF_THRESHOLD)
def test_full_body_model(self, static_image_mode, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg') pose = mp_pose.Pose(static_image_mode=static_image_mode) image = cv2.imread(image_path) for _ in range(num_frames): results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) self._verify_output_landmarks(results.pose_landmarks, image.shape, 33) pose.close()
def test_on_video(self, upper_body_only, expected_name): """Tests pose models on a video.""" # If set to `True` will dump actual predictions to .npz and JSON files. dump_predictions = False # Set threshold for comparing actual and expected predictions in pixels. diff_threshold = 50 video_path = os.path.join(os.path.dirname(__file__), 'testdata/pose_squats.mp4') expected_path = os.path.join(os.path.dirname(__file__), 'testdata/{}'.format(expected_name)) # Predict pose landmarks for each frame. video_cap = cv2.VideoCapture(video_path) actual_per_frame = [] with mp_pose.Pose( static_image_mode=False, upper_body_only=upper_body_only) as pose: while True: # Get next frame of the video. success, input_frame = video_cap.read() if not success: break # Run pose tracker. input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) result = pose.process(image=input_frame) pose_landmarks = self._landmarks_list_to_array(result.pose_landmarks, input_frame.shape) actual_per_frame.append(pose_landmarks) actual = np.asarray(actual_per_frame) if dump_predictions: # Dump .npz with tempfile.NamedTemporaryFile(delete=False) as tmp_file: np.savez(tmp_file, predictions=np.array(actual)) print('Predictions saved as .npz to {}'.format(tmp_file.name)) # Dump JSON with tempfile.NamedTemporaryFile(delete=False) as tmp_file: with open(tmp_file.name, 'w') as fl: dump_data = {'predictions': np.around(actual, 3).tolist()} fl.write(json.dumps(dump_data, indent=2, separators=(',', ': '))) print('Predictions saved as JSON to {}'.format(tmp_file.name)) # Validate actual vs. expected predictions. expected = np.load(expected_path)['predictions'] assert actual.shape == expected.shape, ( 'Unexpected shape of predictions: {} instead of {}'.format( actual.shape, expected.shape)) self._assert_diff_less( actual[..., :2], expected[..., :2], threshold=diff_threshold)
def draw_pose_img(filename: str): with pose.Pose(static_image_mode=True, min_detection_confidence=0.5) as extracter: image = cv2.imread(filename) results = extracter.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) img_annot = image.copy() drawing_utils.draw_landmarks( image=img_annot, landmark_list=results.pose_landmarks, connections=pose.POSE_CONNECTIONS, ) cv2.imwrite(filename.split(".")[0] + "-pose" + ".png", img_annot)
def test_on_image(self, static_image_mode, model_complexity, num_frames): image_path = os.path.join(os.path.dirname(__file__), 'testdata/pose.jpg') image = cv2.imread(image_path) with mp_pose.Pose(static_image_mode=static_image_mode, model_complexity=model_complexity) as pose: for idx in range(num_frames): results = pose.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) self._annotate(image.copy(), results, idx) self._assert_diff_less( self._landmarks_list_to_array(results.pose_landmarks, image.shape)[:, :2], EXPECTED_POSE_LANDMARKS, DIFF_THRESHOLD)
def get_skeleton_by_frame(filename: str): cap = cv2.VideoCapture(filename) with pose.Pose( min_detection_confidence=0.5, min_tracking_confidence=0.5, ) as extracter: ls_frame = [] while cap.isOpened(): success, image = cap.read() if not success: break result = extracter.process( cv2.cvtColor(image, cv2.COLOR_BGR2RGB) ).pose_landmarks.landmark ls_frame.append(_get_skeleton(result)) cap.release() output = np.expand_dims(np.array(ls_frame).transpose(2, 0, 1), axis=[-1, 0]) return np.array(np.squeeze(output, axis=0))
def bootstrap(self, per_pose_class_limit=None): """Bootstraps images in a given folder. Required image in folder (same use for image out folder): pushups_up/ image_001.jpg image_002.jpg ... pushups_down/ image_001.jpg image_002.jpg ... ... Produced CSVs out folder: pushups_up.csv pushups_down.csv Produced CSV structure with pose 3D landmarks: sample_00001,x1,y1,z1,x2,y2,z2,.... sample_00002,x1,y1,z1,x2,y2,z2,.... """ # Create output folder for CVSs. if not os.path.exists(self._csvs_out_folder): os.makedirs(self._csvs_out_folder) for pose_class_name in self._pose_class_names: print('Bootstrapping ', pose_class_name, file=sys.stderr) # Paths for the pose class. images_in_folder = os.path.join(self._images_in_folder, pose_class_name) images_out_folder = os.path.join(self._images_out_folder, pose_class_name) csv_out_path = os.path.join(self._csvs_out_folder, pose_class_name + '.csv') if not os.path.exists(images_out_folder): os.makedirs(images_out_folder) with open(csv_out_path, 'w') as csv_out_file: csv_out_writer = csv.writer(csv_out_file, delimiter=',', quoting=csv.QUOTE_MINIMAL) # Get list of images. image_names = sorted([ n for n in os.listdir(images_in_folder) if not n.startswith('.') ]) if per_pose_class_limit is not None: image_names = image_names[:per_pose_class_limit] # Bootstrap every image. for image_name in tqdm.tqdm(image_names): # Load image. input_frame = cv2.imread( os.path.join(images_in_folder, image_name)) input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) # Initialize fresh pose tracker and run it. with mp_pose.Pose(upper_body_only=False) as pose_tracker: result = pose_tracker.process(image=input_frame) pose_landmarks = result.pose_landmarks # Save image with pose prediction (if pose was detected). output_frame = input_frame.copy() if pose_landmarks is not None: mp_drawing.draw_landmarks( image=output_frame, landmark_list=pose_landmarks, connections=mp_pose.POSE_CONNECTIONS) output_frame = cv2.cvtColor(output_frame, cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(images_out_folder, image_name), output_frame) # Save landmarks if pose was detected. if pose_landmarks is not None: # Get landmarks. frame_height, frame_width = output_frame.shape[ 0], output_frame.shape[1] pose_landmarks = np.array([[ lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width ] for lmk in pose_landmarks.landmark], dtype=np.float32) assert pose_landmarks.shape == ( 33, 3), 'Unexpected landmarks shape: {}'.format( pose_landmarks.shape) csv_out_writer.writerow( [image_name] + pose_landmarks.flatten().astype(np.str).tolist()) # Draw XZ projection and concatenate with the image. projection_xz = self._draw_xz_projection( output_frame=output_frame, pose_landmarks=pose_landmarks) output_frame = np.concatenate( (output_frame, projection_xz), axis=1)
# Get some video parameters to generate output video with classificaiton. video_n_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT) video_fps = video_cap.get(cv2.CAP_PROP_FPS) video_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Initilize tracker, classifier and counter. # Do that before every video as all of them have state. from mediapipe.python.solutions import pose as mp_pose # Folder with pose class CSVs. That should be the same folder you using while # building classifier to output CSVs. pose_samples_folder = 'fitness_poses_csvs_out' # Initialize tracker. pose_tracker = mp_pose.Pose(upper_body_only=False) # Initialize embedder. pose_embedder = FullBodyPoseEmbedder() # Initialize classifier. # Ceck that you are using the same parameters as during bootstrapping. pose_classifier = PoseClassifier(pose_samples_folder=pose_samples_folder, pose_embedder=pose_embedder, top_n_by_max_distance=30, top_n_by_mean_distance=10) # # Uncomment to validate target poses used by classifier and find outliers. # outliers = pose_classifier.find_pose_sample_outliers() # print('Number of pose sample outliers (consider removing them): ', len(outliers))
def test_invalid_image_shape(self): with mp_pose.Pose() as pose: with self.assertRaisesRegex( ValueError, 'Input image must contain three channel rgb data.'): pose.process(np.arange(36, dtype=np.uint8).reshape(3, 3, 4))
def classify(filename): # Specify your video name and target pose class to count the repetitions. video_path = filename class_names = ['pushups_down', 'lunges_down', 'squats_down'] pose_samples_folders = [ 'fitness_poses_csvs_out_pushups', 'fitness_poses_csvs_out_lunges', 'fitness_poses_csvs_out_squats' ] all_repetition_counts = [] # out_video_path = f'{filename[:-3]}mov' # Open the video. for i in range(len(class_names)): class_name = class_names[i] video_cap = cv2.VideoCapture(video_path) # Get some video parameters to generate output video with classificaiton. video_n_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT) # video_fps = video_cap.get(cv2.CAP_PROP_FPS) # video_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # video_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # Initilize tracker, classifier and counter. # Do that before every video as all of them have state. # Folder with pose class CSVs. That should be the same folder you using while # building classifier to output CSVs. pose_samples_folder = pose_samples_folders[i] # Initialize tracker. pose_tracker = mp_pose.Pose(upper_body_only=False) # Initialize embedder. pose_embedder = FullBodyPoseEmbedder() # Initialize classifier. # Ceck that you are using the same parameters as during bootstrapping. pose_classifier = PoseClassifier( pose_samples_folder=pose_samples_folder, pose_embedder=pose_embedder, top_n_by_max_distance=30, top_n_by_mean_distance=10) # # Uncomment to validate target poses used by classifier and find outliers. # outliers = pose_classifier.find_pose_sample_outliers() # print('Number of pose sample outliers (consider removing them): ', len(outliers)) # Initialize EMA smoothing. pose_classification_filter = EMADictSmoothing(window_size=10, alpha=0.2) # Initialize counter. repetition_counter = RepetitionCounter(class_name=class_name, enter_threshold=6, exit_threshold=4) # Initialize renderer. # pose_classification_visualizer = PoseClassificationVisualizer( # class_name=class_name, # plot_x_max=video_n_frames, # # Graphic looks nicer if it's the same as `top_n_by_mean_distance`. # plot_y_max=10) # Run classification on a video. import os # Open output video. # out_video = cv2.VideoWriter(out_video_path, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (video_width, video_height)) frame_idx = 0 repetitions_count = -1 # output_frame = None with tqdm.tqdm(total=video_n_frames, position=0, leave=True) as pbar: while True: # Get next frame of the video. success, input_frame = video_cap.read() if not success: break # Run pose tracker. input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) result = pose_tracker.process(image=input_frame) pose_landmarks = result.pose_landmarks # Draw pose prediction. output_frame = input_frame.copy() if pose_landmarks is not None: mp_drawing.draw_landmarks( image=output_frame, landmark_list=pose_landmarks, connections=mp_pose.POSE_CONNECTIONS) if pose_landmarks is not None: # Get landmarks. frame_height, frame_width = output_frame.shape[ 0], output_frame.shape[1] pose_landmarks = np.array([[ lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width ] for lmk in pose_landmarks.landmark], dtype=np.float32) assert pose_landmarks.shape == ( 33, 3), 'Unexpected landmarks shape: {}'.format( pose_landmarks.shape) # Classify the pose on the current frame. pose_classification = pose_classifier(pose_landmarks) # Smooth classification using EMA. pose_classification_filtered = pose_classification_filter( pose_classification) # Count repetitions. repetitions_count = repetition_counter( pose_classification_filtered) else: # No pose => no classification on current frame. # pose_classification = None # Still add empty classification to the filter to maintaing correct # smoothing for future frames. # pose_classification_filtered = pose_classification_filter(dict()) # pose_classification_filtered = None # Don't update the counter presuming that person is 'frozen'. Just # take the latest repetitions count. repetitions_count = repetition_counter.n_repeats # Draw classification plot and repetition counter. # output_frame = pose_classification_visualizer( # frame=output_frame, # pose_classification=pose_classification, # pose_classification_filtered=pose_classification_filtered, # repetitions_count=repetitions_count) # Save the output frame. # out_video.write(cv2.cvtColor(np.array(output_frame), cv2.COLOR_RGB2BGR)) # Show intermediate frames of the video to track progress. # if frame_idx % 50 == 0: # show_image(output_frame) frame_idx += 1 pbar.update() all_repetition_counts.append(repetitions_count) pose_tracker.close() # Close output video. # out_video.release() # Release MediaPipe resources. print(all_repetition_counts) pushup_counts, lunge_counts, squat_counts = all_repetition_counts[ 0], all_repetition_counts[1], all_repetition_counts[2] return pushup_counts, lunge_counts, squat_counts
def test_on_video(self, model_complexity, expected_name): """Tests pose models on a video.""" # Set threshold for comparing actual and expected predictions in pixels. diff_threshold = 15 world_diff_threshold = 0.1 video_path = os.path.join(os.path.dirname(__file__), 'testdata/pose_squats.mp4') expected_path = os.path.join(os.path.dirname(__file__), 'testdata/{}'.format(expected_name)) # Predict pose landmarks for each frame. video_cap = cv2.VideoCapture(video_path) actual_per_frame = [] actual_world_per_frame = [] frame_idx = 0 with mp_pose.Pose(static_image_mode=False, model_complexity=model_complexity) as pose: while True: # Get next frame of the video. success, input_frame = video_cap.read() if not success: break # Run pose tracker. input_frame = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB) result = pose.process(image=input_frame) pose_landmarks = self._landmarks_list_to_array( result.pose_landmarks, input_frame.shape) pose_world_landmarks = self._world_landmarks_list_to_array( result.pose_world_landmarks) actual_per_frame.append(pose_landmarks) actual_world_per_frame.append(pose_world_landmarks) input_frame = cv2.cvtColor(input_frame, cv2.COLOR_RGB2BGR) self._annotate(input_frame, result, frame_idx) frame_idx += 1 actual = np.array(actual_per_frame) actual_world = np.array(actual_world_per_frame) # Dump actual .npz. npz_path = self._get_output_path(expected_name) np.savez(npz_path, predictions=actual, predictions_world=actual_world) # Dump actual JSON. json_path = self._get_output_path( expected_name.replace('.npz', '.json')) with open(json_path, 'w') as fl: dump_data = { 'predictions': np.around(actual, 3).tolist(), 'predictions_world': np.around(actual_world, 3).tolist() } fl.write(json.dumps(dump_data, indent=2, separators=(',', ': '))) # Validate actual vs. expected landmarks. expected = np.load(expected_path)['predictions'] assert actual.shape == expected.shape, ( 'Unexpected shape of predictions: {} instead of {}'.format( actual.shape, expected.shape)) self._assert_diff_less(actual[..., :2], expected[..., :2], threshold=diff_threshold) # Validate actual vs. expected world landmarks. expected_world = np.load(expected_path)['predictions_world'] assert actual_world.shape == expected_world.shape, ( 'Unexpected shape of world predictions: {} instead of {}'.format( actual_world.shape, expected_world.shape)) self._assert_diff_less(actual_world, expected_world, threshold=world_diff_threshold)