def __init__(self, pd_path="models/palm_detection_6_shaves.blob", pd_score_thresh=0.65, pd_nms_thresh=0.3, lm_path="models/hand_landmark_6_shaves.blob", lm_score_threshold=0.5, show_landmarks=True, show_hand_box=True, asl_path="models/hand_asl_6_shaves.blob", asl_recognition=True, show_asl=True): self.pd_path = pd_path self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_path = lm_path self.lm_score_threshold = lm_score_threshold self.asl_path = asl_path self.show_landmarks = show_landmarks self.show_hand_box = show_hand_box self.asl_recognition = asl_recognition self.show_asl = show_asl anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") self.preview_width = 576 self.preview_height = 324 self.frame_size = None self.ft = cv2.freetype.createFreeType2() self.ft.loadFontData(fontFileName='HelveticaNeue.ttf', id=0) self.right_char_queue = collections.deque(maxlen=5) self.left_char_queue = collections.deque(maxlen=5) self.previous_right_char = "" self.right_sentence = "" self.previous_right_update_time = time.time() self.previous_left_char = "" self.left_sentence = "" self.previous_left_update_time = time.time()
def __init__(self, input_src=None, pd_path=POSE_DETECTION_MODEL, pd_score_thresh=0.5, pd_nms_thresh=0.3, lm_path=FULL_BODY_LANDMARK_MODEL, lm_score_threshold=0.7, full_body=True, use_gesture=False, smoothing=True, filter_window_size=5, filter_velocity_scale=10, show_3d=False, crop=False, multi_detection=False, output=None, internal_fps=15): self.pd_path = pd_path self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_path = lm_path self.lm_score_threshold = lm_score_threshold self.full_body = full_body self.use_gesture = use_gesture self.smoothing = smoothing self.show_3d = show_3d self.crop = crop self.multi_detection = multi_detection if self.multi_detection: print("With multi-detection, smoothing filter is disabled.") self.smoothing = False self.internal_fps = internal_fps if input_src == None: self.input_type = "internal" # OAK* internal color camera self.video_fps = internal_fps # Used when saving the output in a video file. Should be close to the real fps video_height = video_width = 1080 # Depends on cam.setResolution() in create_pipeline() elif input_src.endswith('.jpg') or input_src.endswith('.png'): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 video_height, video_width = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) self.nb_kps = 33 if self.full_body else 25 if self.smoothing: self.filter = mpu.LandmarksSmoothingFilter(filter_window_size, filter_velocity_scale, (self.nb_kps, 3)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Rendering flags self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture self.show_fps = True if self.show_3d: self.vis3d = o3d.visualization.Visualizer() self.vis3d.create_window() opt = self.vis3d.get_render_option() opt.background_color = np.asarray([0, 0, 0]) z = min(video_height, video_width) / 3 self.grid_floor = create_grid([0, video_height, -z], [video_width, video_height, -z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.grid_wall = create_grid([0, 0, z], [video_width, 0, z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.vis3d.add_geometry(self.grid_floor) self.vis3d.add_geometry(self.grid_wall) view_control = self.vis3d.get_view_control() view_control.set_up(np.array([0, -1, 0])) view_control.set_front(np.array([0, 0, -1])) if output is None: self.output = None else: fourcc = cv2.VideoWriter_fourcc(*"MJPG") self.output = cv2.VideoWriter(output, fourcc, self.video_fps, (video_width, video_height))
def __init__(self, input_file=None, pd_path="models/palm_detection.blob", pd_score_thresh=0.5, pd_nms_thresh=0.3, use_lm=True, lm_path="models/hand_landmark.blob", lm_score_threshold=0.5): self.camera = input_file is None self.pd_path = pd_path self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.use_lm = use_lm self.lm_path = lm_path self.lm_score_threshold = lm_score_threshold self.regions = [] self.seq_num = None if not self.camera: if input_file == "direct": self.image_mode = None elif input_file.endswith('.jpg') or input_file.endswith('.png'): self.image_mode = True self.img = cv2.imread(input_file) self.video_size = np.min(self.img.shape[:2]) else: self.image_mode = False self.cap = cv2.VideoCapture(input_file) width = self.cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float `width` height = self.cap.get( cv2.CAP_PROP_FRAME_HEIGHT) # float `height` self.video_size = int(min(width, height)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/palm_detection/palm_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Rendering flags if self.use_lm: self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_handedness = False self.show_landmarks = True self.show_scores = False else: self.show_pd_box = True self.show_pd_kps = False self.show_rot_rect = False self.show_scores = False
def __init__(self, input_src=None, pd_xml=POSE_DETECTION_MODEL, pd_device="CPU", pd_score_thresh=0.5, pd_nms_thresh=0.3, lm_xml=FULL_BODY_LANDMARK_MODEL, lm_device="CPU", lm_score_threshold=0.7, full_body=True, use_gesture=False, smoothing=True, filter_window_size=5, filter_velocity_scale=10, show_3d=False, crop=False, multi_detection=False, force_detection=False, output=None): self.pd_score_thresh = pd_score_thresh self.pd_nms_thresh = pd_nms_thresh self.lm_score_threshold = lm_score_threshold self.full_body = full_body self.use_gesture = use_gesture self.smoothing = smoothing self.show_3d = show_3d self.crop = crop self.multi_detection = multi_detection self.force_detection = force_detection if self.multi_detection: print( "Warning: with multi-detection, smoothing filter is disabled and pose detection is forced on every frame." ) self.smoothing = False self.force_detection = True if input_src.endswith('.jpg') or input_src.endswith('.png'): self.input_type = "image" self.img = cv2.imread(input_src) self.video_fps = 25 video_height, video_width = self.img.shape[:2] else: self.input_type = "video" if input_src.isdigit(): input_type = "webcam" input_src = int(input_src) self.cap = cv2.VideoCapture(input_src) self.video_fps = int(self.cap.get(cv2.CAP_PROP_FPS)) video_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) video_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print("Video FPS:", self.video_fps) # The full body landmark model predict 39 landmarks. # We are interested in the first 35 landmarks # from 1 to 33 correspond to the well documented body parts, # 34th (mid hips) and 35th (a point above the head) are used to predict ROI of next frame # Same for upper body model but with 8 less landmarks self.nb_lms = 35 if self.full_body else 27 if self.smoothing: self.filter = mpu.LandmarksSmoothingFilter(filter_window_size, filter_velocity_scale, (self.nb_lms - 2, 3)) # Create SSD anchors # https://github.com/google/mediapipe/blob/master/mediapipe/modules/pose_detection/pose_detection_cpu.pbtxt anchor_options = mpu.SSDAnchorOptions( num_layers=4, min_scale=0.1484375, max_scale=0.75, input_size_height=128, input_size_width=128, anchor_offset_x=0.5, anchor_offset_y=0.5, strides=[8, 16, 16, 16], aspect_ratios=[1.0], reduce_boxes_in_lowest_layer=False, interpolated_scale_aspect_ratio=1.0, fixed_anchor_size=True) self.anchors = mpu.generate_anchors(anchor_options) self.nb_anchors = self.anchors.shape[0] print(f"{self.nb_anchors} anchors have been created") # Load Openvino models self.load_models(pd_xml, pd_device, lm_xml, lm_device) # Rendering flags self.show_pd_box = False self.show_pd_kps = False self.show_rot_rect = False self.show_landmarks = True self.show_scores = False self.show_gesture = self.use_gesture self.show_fps = True self.show_segmentation = False if self.show_3d: self.vis3d = o3d.visualization.Visualizer() self.vis3d.create_window() opt = self.vis3d.get_render_option() opt.background_color = np.asarray([0, 0, 0]) z = min(video_height, video_width) / 3 self.grid_floor = create_grid([0, video_height, -z], [video_width, video_height, -z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.grid_wall = create_grid([0, 0, z], [video_width, 0, z], [video_width, video_height, z], [0, video_height, z], 5, 2, color=(1, 1, 1)) self.vis3d.add_geometry(self.grid_floor) self.vis3d.add_geometry(self.grid_wall) view_control = self.vis3d.get_view_control() view_control.set_up(np.array([0, -1, 0])) view_control.set_front(np.array([0, 0, -1])) if output is None: self.output = None else: fourcc = cv2.VideoWriter_fourcc(*"MJPG") self.output = cv2.VideoWriter(output, fourcc, self.video_fps, (video_width, video_height))