def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print("Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) if args.out != None: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] tm = cv2.TickMeter() cnt = 0 input_path = args.input_path listdir = os.listdir(input_path) for v_name in listdir: v_path = os.path.join(input_path, v_name) cap = cv2.VideoCapture(v_path) while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box( frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) # Show preview. # cv2.imshow("Preview", frame) # if cv2.waitKey(10) == 27: # break if args.out != None: output_movie.write(frame) else: cv2.imshow("Preview", frame) cnt = cnt + 1 if cnt % 100 == 0: print(str(cnt), flush=True) # Clean up the multiprocessing process. box_process.terminate() box_process.join() cv2.destroyAllWindows()
def main(): # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--draw-markers", action="store_true", default=False, help="") ap.add_argument("-c", "--draw-confidence", action="store_true", default=False, help="") ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9, help="") ap.add_argument("-p", "--draw-pose", action="store_false", default=True, help="") ap.add_argument("-u", "--draw-unstable", action="store_true", default=False, help="") ap.add_argument("-s", "--draw-segmented", action="store_true", default=False, help="") args = vars(ap.parse_args()) confidence_threshold = args["confidence_threshold"] """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) if isWindows(): thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) thread.daemon = True thread.start() else: box_process = Process(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. result = box_queue.get() if result is not None: if args["draw_confidence"]: mark_detector.face_detector.draw_result(frame, result) # unpack result facebox, confidence = result # fix facebox if needed if facebox[1] > facebox[3]: facebox[1] = 0 if facebox[0] > facebox[2]: facebox[0] = 0 # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # segment the image based on markers and facebox seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0]) if args["draw_segmented"]: mark_detector.draw_box(frame, seg.getSegmentBBs()) cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"]) if args["draw_markers"]: mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stable_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) if args["draw_unstable"]: pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) if args["draw_pose"]: pose_estimator.draw_annotation_box( frame, stable_pose[0], stable_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. if not isWindows(): box_process.terminate() box_process.join()
[355.48022, 222.92123], [342.7749, 221.94604]]) pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process.
def main(): #bagreader = BagFileReader(args.video, 640,480,848,480,30,30) bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15) # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = bagreader.get_color_frame() sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB) height, width, _ = sample_frame.shape fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps, (width, height)) # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: t1 = time.time() # Read frame, crop it, flip it, suits your needs. frame = bagreader.get_color_frame() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frame is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. # if video_src == 0: # frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. faceboxes = box_queue.get() print(faceboxes) mess = "Not detect pose" if faceboxes is not None: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), thickness=2) # Show preview. cv2.imshow("Preview", frame) out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break out.release() # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" cv2.namedWindow("Test") # Create a named window cv2.moveWindow("Test", 900, 600) # Move it to (40,30) screenWidth, screenHeight = pyautogui.size() st = 'Last command' cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Setting up process for listening to audio commands voice_command_queue = Q() stt_process = Thread(target=get_voice_command, args=(voice_command_queue, )) stt_process.setDaemon(True) stt_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(255, 128, 128)) # Uncomment following line to draw head axes on frame. endpoints = pose_estimator.getEndPoints(frame, steady_pose[0], steady_pose[1]) deltax = endpoints[1][0] - endpoints[0][0] deltay = endpoints[1][1] - endpoints[0][1] xpos = math.floor((deltax + 44) * screenWidth / 88) ypos = math.floor((deltay + 14) * screenHeight / 58) # print(xpos, ypos) pyautogui.moveTo(xpos, ypos) if not voice_command_queue.empty(): command = voice_command_queue.get_nowait() if 'click' in command or 'select' in command: pyautogui.click() st = 'Click' elif 'double' in command or 'in' in command: pyautogui.doubleClick() st = 'Double Click' elif 'right' in command or 'menu' in command or 'light' in command: pyautogui.rightClick() st = 'Right Click' print(command) cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255) scale_percent = 30 # calculate the 50 percent of original dimensions width = int(frame.shape[1] * scale_percent / 100) height = int(frame.shape[0] * scale_percent / 100) # dsize dsize = (width, height) # resize image output = cv2.resize(frame, dsize) cv2.moveWindow("Test", screenWidth - width, screenHeight - height) # Show preview. cv2.imshow("Test", output) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. pose_estimator.draw_annotation_box( frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def process_image(transform, processing_model, img): global mark_detector, box_process, img_queue, box_queue, pose_estimator, pose_stabilizers, tm, width, height tracks = [] try: frame = img h, w, d = frame.shape if pose_estimator is None or w != width or h != height: # sample_frame = frame # img_queue.put(sample_frame) # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = h, w # (height, width) = (1062 , 485) # (height, width) = (720 , 1280) pose_estimator = PoseEstimator(img_size=(height, width)) # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) img = frame except Exception as e: track = traceback.format_exc() print(track) print("HandPose Exception", e) pass return tracks, img
marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the locations from local face area to the global image. marks *= (x2 - x1) marks[:, 0] += x1 marks[:, 1] += y1 # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # All done. The best way to show the result would be drawing the # pose on the frame in realtime. # Do you want to see the pose annotation? pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(0, 255, 0)) # Do you want to see the head axes? # pose_estimator.draw_axes(frame, pose[0], pose[1]) # Do you want to see the marks? # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Do you want to see the facebox? # mark_detector.draw_box(frame, [facebox]) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(1) == 27: break
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish self.pub_headpose_info = rospy.Publisher("/headpose_info", GazingInfo, \ queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans, parts=[4, 7]): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None image_h, image_w, _ = img.shape if parts is None or len(parts) == 0: return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) else: for human in humans: for part in parts: if part in human.body_parts.keys(): body_part = human.body_parts[part] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) img = cv2.circle(img, coord, 2, (0, 255, 0)) return img def color_callback( self, data ): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480, 640, 3)) #print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def get_top_vertice_human(self, human, img_w, img_h): sorted_id = sorted(human["pose"].body_parts.keys()) body_part = human["pose"].body_parts[sorted_id[0]] return (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) def is_gazing(self, face_direction, human, img): def get_angle_direct_with_peakhand(face_direction, peak): d0, d1 = face_direction peak_direction = (peak[0] - d0[0], peak[1] - d0[1]) face_direct = (d1[0] - d0[0], d1[1] - d0[1]) d_peak_direction = np.sqrt(peak_direction[0]**2 + peak_direction[1]**2) d_face_direct = np.sqrt(face_direct[0]**2 + face_direct[1]**2) return (peak_direction[0]*face_direct[0] + \ peak_direction[1]*face_direct[1])/(d_peak_direction*\ d_face_direct) thresh = 0.1 #0.5 #cos(PI/3) image_h, image_w, _ = img.shape if 4 in human.body_parts.keys(): body_part = human.body_parts[4] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle left = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh if 7 in human.body_parts.keys(): body_part = human.body_parts[7] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) print("angle right = ", get_angle_direct_with_peakhand(face_direction, coord)) return get_angle_direct_with_peakhand(face_direction, coord) > thresh return False def get_gazing_status(self, face_directions, face_coords, humans, img): def get_dist_to_humans(human_coords, coord): result = [] for _coord in human_coords: result.append(np.sqrt((coord[0]-_coord[0])**2) + \ (coord[1]-_coord[1])**2) return result image_h, image_w, _ = img.shape gazing_status = [] human_coords = [] for human in humans: sorted_id = sorted(human.body_parts.keys()) body_part = human.body_parts[sorted_id[0]] coord = (int(body_part.x * image_w + 0.5), \ int(body_part.y * image_h + 0.5)) human_coords.append(coord) for fdirection, fcoord in zip(face_directions, face_coords): dists = get_dist_to_humans(human_coords, fcoord) idx = np.argmin(dists) gazing_status.append(self.is_gazing(fdirection, humans[idx], img)) del human_coords[idx] return gazing_status def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None or self.humans is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480 * 2, 640 * 2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes = self.mark_detector.extract_cnn_facebox(frame) mess = "Not detect pose" face_directions = [] face_coords = [] if faceboxes is not None and len(faceboxes) > 0: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: #facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_coords.append((int((facebox[0]+facebox[2])/2), \ int((facebox[1]+facebox[3])/2))) face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. face_direction = self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) face_directions.append(face_direction) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) gazing_status = self.get_gazing_status(face_directions, face_coords, \ copy.deepcopy(self.humans), self.color_img) msg = GazingInfo() msg.is_gazing = [] msg.coords = [] for coord, gazestatus in zip(face_coords, gazing_status): _coord = Coord2D() _coord.x = coord[0] _coord.y = coord[1] msg.coords.append(_coord) msg.is_gazing.append(gazestatus) self.pub_headpose_info.publish(msg) print(gazing_status) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: engine.say( "Warning: video source not assigned, default webcam will be used") engine.runAndWait() video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() gaze = GazeTracking() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() head_flag = 0 gaze_flag = 0 while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] #audio_record(AUDIO_OUTPUT, 3) #sphinx_recog(AUDIO_OUTPUT) # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() gaze.refresh(frame) frame = gaze.annotated_frame() text = "" if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) gray = cv2.cvtColor(face_img, cv2.COLOR_BGR2GRAY) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) rects = detector(gray, 0) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # for rect in rects: # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array shape = predictor(gray, rect) shape = face_utils.shape_to_np( shape) #converting to NumPy Array矩阵运算 mouth = shape[Start:End] leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] leftEAR = eye_aspect_ratio(leftEye) #眼睛长宽比 rightEAR = eye_aspect_ratio(rightEye) ear = (leftEAR + rightEAR) / 2.0 #长宽比平均值 lipdistance = lip_distance(shape) if (lipdistance > YAWN_THRESH): #print(lipdistance) flag0 += 1 print("yawning time: ", flag0) if flag0 >= 40: cv2.putText(frame, "Yawn Alert", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(frame, "Yawn Alert", (220, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) engine.say("don't yawn") engine.runAndWait() flag0 = 0 else: flag0 = 0 if (ear < thresh): flag += 1 print("eyes closing time: ", flag) if flag >= frame_check: cv2.putText(frame, "****************ALERT!****************", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) cv2.putText(frame, "****************ALERT!****************", (10, 250), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) engine.say("open your eyes") engine.runAndWait() flag = 0 else: flag = 0 if gaze.is_right(): print("Looking right") text = "Looking right" elif gaze.is_left(): print("Looking left") text = "Looking left" elif gaze.is_up(): text = "Looking up" else: text = "Looking center" if text is not "Looking center": gaze_flag += 1 if gaze_flag >= 20: engine.say("look forward") engine.runAndWait() gaze_flag = 0 else: gaze_flag = 0 marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # get angles angles = pose_estimator.get_angles(pose[0], pose[1]) if ((-8 > angles[0] or angles[0] > 8) or (-8 > angles[1] or angles[1] > 8)): head_flag += 1 if head_flag >= 40: print(angles[0]) engine.say("please look ahead") engine.runAndWait() else: head_flag = 0 # pose_estimator.draw_info(frame, angles) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) #pose_estimator.show_3d_model # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(1) & 0xFF == ord("q"): break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(video_src): """MAIN :param video_src: Source of video to analyze :type video_src: str or int""" cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() face_detector = FaceDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=(mark_detector, img_queue, box_queue,)) #box_process = Process(target=get_faces, args=(face_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] #face expression recognizer initialization from keras.models import model_from_json model = model_from_json(open("./model/facial_expression_model_structure.json", "r").read()) model.load_weights('./model/facial_expression_model_weights.h5') #load weights #----------------------------- emotions = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral') while True: input() # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. #facebox = box_queue.get() faceboxes = dump_queue(box_queue) print("{} FACEBOXES".format(len(faceboxes))) for facebox in faceboxes: if min(facebox) < 0: continue # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] if not face_img.shape[0] or not face_img.shape[1]: continue face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) detected_face = frame[facebox[1]: facebox[3],facebox[0]: facebox[2]] #crop detected face detected_face = cv2.cvtColor(detected_face, cv2.COLOR_BGR2GRAY) #transform to gray scale detected_face = cv2.resize(detected_face, (48, 48)) #resize to 48x48 # emotion estimation img_pixels = image.img_to_array(detected_face) img_pixels = np.expand_dims(img_pixels, axis = 0) img_pixels /= 255 #pixels are in scale of [0, 255]. normalize all pixels in scale of [0, 1] predictions = model.predict(img_pixels) #store probabilities of 7 expressions #find max indexed array 0: angry, 1:disgust, 2:fear, 3:happy, 4:sad, 5:surprise, 6:neutral max_index = np.argmax(predictions[0]) emotion = emotions[max_index] #write emotion text above rectangle #V_FONT_HERSHEY_SIMPLEX normal size sans-serif font #CV_FONT_HERSHEY_PLAIN small size sans-serif font #CV_FONT_HERSHEY_DUPLEX normal size sans-serif font (more complex than CV_FONT_HERSHEY_SIMPLEX ) #CV_FONT_HERSHEY_COMPLEX normal size serif font #CV_FONT_HERSHEY_TRIPLEX normal size serif font (more complex than CV_FONT_HERSHEY_COMPLEX ) #CV_FONT_HERSHEY_COMPLEX_SMALL smaller version of CV_FONT_HERSHEY_COMPLEX #CV_FONT_HERSHEY_SCRIPT_SIMPLEX hand-writing style font #CV_FONT_HERSHEY_SCRIPT_COMPLEX more complex variant of CV_FONT_HERSHEY_SCRIPT_SIMPLEX image_text = "" for index in range(len(emotions)): if predictions[0][index]>0.3: image_text += "{0} : {1} %\n".format(emotions[index], int(predictions[0][index]*100)) space = 0 for text in image_text.strip().split("\n"): cv2.putText( img = frame, text= text, org =(int(facebox[0]), int(facebox[1])-space), fontFace = cv2.FONT_HERSHEY_PLAIN, fontScale = 0.8, color = (255,255,255), thickness = 1 ) space += int(0.25*48) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) #pose = pose_estimator.solve_pose(marks) # Stabilize the pose. #stabile_pose = [] #pose_np = np.array(pose).flatten() #for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # stabile_pose.append(ps_stb.state[0]) #stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. #pose_estimator.draw_annotation_box( # frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
class HeadPoseEstimator(): def __init__(self): print "OpenCV version: {}".format(cv2.__version__) self.CNN_INPUT_SIZE = 128 self.mark_detector = MarkDetector() height = 480 width = 640 self.pose_estimator = PoseEstimator(img_size=(height, width)) self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(8) ] self.img_queue = Queue() self.box_queue = Queue() self.box_process = Process(target=self.get_face, args=( self.mark_detector, self.img_queue, self.box_queue, )) self.process_ctrl_flag = rospy.get_param("process_ctrl", True) self.process_ctrl_subscriber = rospy.Subscriber( "/head_pose_estimator/process_ctrl", Bool, self.process_ctrl_callback) self.sub_img_name = rospy.get_param("sub_img_name", "/usb_cam/image_raw") self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image, self.img_callback) self.show_result_img_flag = rospy.get_param("show_result_img", True) self.show_axis_flag = rospy.get_param("show_axis", True) self.show_annotation_box_flag = rospy.get_param( "show_annotation_box", True) self.pub_result_img_flag = rospy.get_param("pub_result_img", True) self.img_publisher = rospy.Publisher( "/head_pose_estimator/result_image", Image, queue_size=10) self.result_publisher = rospy.Publisher( "/head_pose_estimator/head_pose", HeadPose, queue_size=10) self.box_process.start() def get_face(self, detector, img_queue, box_queue): """ 画像キューから顔を取り出します。 処理はマルチプロセッシングで行います。 """ while not rospy.is_shutdown(): image = img_queue.get() box = detector.extract_cnn_facebox(image) box_queue.put(box) def process_ctrl_callback(self, msg): self.process_ctrl_flag = msg.data def euler_to_quaternion(self, euler): # オイラー角からクォータニオンに変換 # 鏡写しになるように値を調整 q = tf.transformations.quaternion_from_euler(euler.x, euler.y, euler.z) return Quaternion(x=q[1], y=-q[2], z=-q[3], w=q[0]) #return Quaternion(x=q[0], y=q[1], z=q[2], w=q[3]) def img_callback(self, img_msg): if self.process_ctrl_flag == True: try: cv_img = CvBridge().imgmsg_to_cv2(img_msg, "bgr8") flip_frame = cv2.flip(cv_img, 2) # 画像を鏡写しにする frame = cv_img img_height, img_width, _ = frame.shape[:3] self.img_queue.put(frame) facebox = self.box_queue.get() # 顔の検出に成功 if facebox is not None: # 画像から顔をトリミングしてCNNでlandmarksを見つける # img[top : bottom, left : right] face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize( face_img, (self.CNN_INPUT_SIZE, self.CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) face_rect = FaceRect() face_rect.x = facebox[0] face_rect.y = facebox[1] face_rect.width = facebox[2] - facebox[0] face_rect.height = facebox[3] - facebox[1] #face_rect.x = img_width - facebox[0] - (facebox[2] - facebox[0]) #face_rect.y = facebox[1] #face_rect.width = facebox[2] - facebox[0] #face_rect.height = facebox[3] - facebox[1] marks = self.mark_detector.detect_marks([face_img]) marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. #self.mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. #self.mark_detector.draw_box(frame, [facebox]) pose = self.pose_estimator.solve_pose_by_68_points(marks) #rotation = pose[0] #euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2]) #quaternion = self.euler_to_quaternion(euler) # stabilize the pose steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) rotation = steady_pose[0] euler = Vector3(x=rotation[0], y=rotation[1], z=rotation[2]) head_rotation = self.euler_to_quaternion(euler) #print head_rotation #print "---" head_pose = HeadPose() head_pose.face_rect = face_rect head_pose.head_rotation = head_rotation self.result_publisher.publish(head_pose) # Uncomment following line to draw pose annotation on frame. #self.pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. if self.show_annotation_box_flag == True: self.pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # openCV3.4.0では動かない #self.pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) if self.show_axis_flag == True: self.pose_estimator.draw_axis(frame, steady_pose[0], steady_pose[1]) if self.show_result_img_flag == True: cv2.imshow("result", frame) cv2.waitKey(1) if self.pub_result_img_flag == True: try: pub_img = CvBridge().cv2_to_imgmsg(frame, "bgr8") self.img_publisher.publish(pub_img) except CvBridgeError, e: rospy.logerr(str(e)) except CvBridgeError, e: rospy.logerr(str(e))
class PoseHeadNode(object): def __init__(self): # subscribe rospy.Subscriber("/pose_estimator/pose", Persons, self.pose_cb) rospy.Subscriber("/camera/color/image_raw", Image, self.color_callback) # publish #self.pub_headpose_info = rospy.Publisher("/headpose_info", HeadPoseInfo, \ # queue_size=10) #data self.humans = None self.color_img = None self.init() def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640)) def pose_cb(self, data): if self.color_img is None: return h, w = self.color_img.shape[:2] # ros topic to Person instance humans = [] for p_idx, person in enumerate(data.persons): human = Human([]) for body_part in person.body_part: part = BodyPart('', body_part.part_id, body_part.x, body_part.y, body_part.confidence) human.body_parts[body_part.part_id] = part humans.append(human) self.humans = humans #self.image_test_pose = TfPoseEstimator.draw_humans(self.color_img, humans, imgcopy=False) def draw_pose(self, img, humans): if img is None or humans is None: #print("Cannot draw pose on {} image and {} humans".format(img, humans)) return None return TfPoseEstimator.draw_humans(img, humans, imgcopy=False) def color_callback(self, data): # Need semaphore to protect self.color_img, because it is also used by is_waving_hand function #print("Having color image") #cv_image = self.bridge.imgmsg_to_cv2(data, data.encoding) #print(cv_image) decoded = np.frombuffer(data.data, np.uint8) img = np.reshape(decoded, (480,640, 3)) print("COLOR_CALLBACK", img.shape) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) self.color_img = img def publish_headpose_info(self, viz=True): thresh = 0.8 time.sleep(0.1) if self.color_img is None: print("Color img None") return im_shape = self.color_img.shape #scales = [1024, 1980] scales = [480*2, 640*2] target_size = scales[0] max_size = scales[1] im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) im_scale = float(target_size) / float(im_size_min) thresh = 0.8 if np.round(im_scale * im_size_max) > max_size: im_scale = float(max_size) / float(im_size_max) scales = [im_scale] frame = copy.deepcopy(self.color_img) t1 = time.time() faceboxes, landmark = self.detector.detect(frame, \ thresh, scales=scales, do_flip=False) mess = "Not detect pose" if faceboxes is not None and len(faceboxes) > 0: #if isinstance(faceboxes[1], int): # faceboxes = [faceboxes] for facebox in faceboxes: facebox = facebox.astype(np.int) # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = self.mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. self.mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = self.pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. self.pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess=round(1/(t2-t1), 2) #self.pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. #self.pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), \ cv2.FONT_HERSHEY_SIMPLEX,0.75, (0, 255, 0), thickness=2) # Show preview. if viz: frame = self.draw_pose(frame, self.humans) if frame is None: return cv2.imshow("Preview", frame) cv2.waitKey(1)