class VideoImgManager(): def __init__(self): self.POSE_ESTIMATOR = PoseEstimator() self.FRAME_OPS = FrameOperations() self.FIRST = True def estimate_vid(self, webcam_id=0): """reads webcam, applies pose estimation on webcam""" cap = cv.VideoCapture(webcam_id) while (True): has_frame, frame = cap.read() if self.FIRST: self.WEB_CAM_H, self.WEB_CAM_W = frame.shape[0:2] self.FIRST = False frame = self.POSE_ESTIMATOR.get_pose_key_angles(frame) cv.imshow('frame', frame) if cv.waitKey(1) & 0xFF == ord('q'): break def estimate_img(self, img_path): """applies pose estimation on img""" img = cv.imread(img_path) img = self.POSE_ESTIMATOR.get_pose_key_angles(img) cv.imshow("Image Pose Estimation", img) cv.waitKey(0) cv.destroyAllWindows()
def __init__(self): print "OpenCV version: {}".format(cv2.__version__) self.CNN_INPUT_SIZE = 128 self.mark_detector = MarkDetector() height = 480 width = 640 self.pose_estimator = PoseEstimator(img_size=(height, width)) self.pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(8) ] self.img_queue = Queue() self.box_queue = Queue() self.box_process = Process(target=self.get_face, args=( self.mark_detector, self.img_queue, self.box_queue, )) self.process_ctrl_flag = rospy.get_param("process_ctrl", True) self.process_ctrl_subscriber = rospy.Subscriber( "/head_pose_estimator/process_ctrl", Bool, self.process_ctrl_callback) self.sub_img_name = rospy.get_param("sub_img_name", "/usb_cam/image_raw") self.img_subscriber = rospy.Subscriber(self.sub_img_name, Image, self.img_callback) self.show_result_img_flag = rospy.get_param("show_result_img", True) self.show_axis_flag = rospy.get_param("show_axis", True) self.show_annotation_box_flag = rospy.get_param( "show_annotation_box", True) self.pub_result_img_flag = rospy.get_param("pub_result_img", True) self.img_publisher = rospy.Publisher( "/head_pose_estimator/result_image", Image, queue_size=10) self.result_publisher = rospy.Publisher( "/head_pose_estimator/head_pose", HeadPose, queue_size=10) self.box_process.start()
def __init__(self): # Load the parameters self.conf = config() # initialize dlib's face detector (HOG-based) and then create the # facial landmark predictor print("[INFO] loading facial landmark predictor...") self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path) # grab the indexes of the facial landmarks for the left and # right eye, respectively (self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] (self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] # initialize the video stream and sleep for a bit, allowing the # camera sensor to warm up self.cap = cv2.VideoCapture(0) if self.conf.vedio_path == 0: self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = self.cap.read() # Introduce mark_detector to detect landmarks. self.mark_detector = MarkDetector() # Setup process and queues for multiprocessing. self.img_queue = Queue() self.box_queue = Queue() self.img_queue.put(sample_frame) self.box_process = Process(target=get_face, args=( self.mark_detector, self.img_queue, self.box_queue,)) self.box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. self.height, self.width = sample_frame.shape[:2] self.pose_estimator = PoseEstimator(img_size=(self.height, self.width)) # Introduce scalar stabilizers for pose. self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] self.tm = cv2.TickMeter() # Gaze tracking self.gaze = GazeTracking()
def init(self): self.detector = RetinaFace('./model/R50', 0, 0, 'net3') self.mark_detector = MarkDetector() self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] sample_img = None #if self.color_img is None: # print("None") # time.sleep(0.2) # #height, width, _ = self.color_img.shape self.pose_estimator = PoseEstimator(img_size=(480, 640))
def get_source_shoulder_details(self, source_img, cloth_seg): "get source shoulder distance and rotation angle" try: # initialize source pose estimator self.source_pose_estimator = PoseEstimator(source_img) source_distance = self.source_pose_estimator.get_shoulder_details() source_points = self.source_pose_estimator.get_shoulder_points() except Exception as e: print(str(e)) # if no source shoulder points detected self.error_list.append("Issue in source image:" + str(e)) logging.warning( "Using manual shoulder detection for source image.") source_points, source_distance = custom_shoulder_locator.get_shoulder_details_mannual( cloth_seg) return source_points, source_distance
def run(self): estimator = PoseEstimator("./usb_camera.yml") # 2. 加载图片 capture = cv2.VideoCapture(2) if not capture.isOpened(): print("摄像头打开失败!") return capture.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 960) try: while not rospy.is_shutdown(): _, frame = capture.read() if self.robot.isMoving(): continue rst, image, pose = estimator.estimate(frame) cv2.imshow("img", image) if rst: print(("xyz-rpy: " + "{:>8.3f}," * 6).format( pose[0], pose[1], pose[2], pose[3], pose[4], pose[5])) self.robot.move_rotation(radians(-pose[4])) else: print("未发现目标") #self.robot.move_rotation(radians(90)) self.robot.stop() action = cv2.waitKey(30) & 0xff if action == ord('s'): cv2.imwrite("save_line.jpg", image) elif action == 27: break self.rate.sleep() except KeyboardInterrupt as e: print("Ctrl + c 主动停止了程序") finally: capture.release() cv2.destroyAllWindows()
def get_frame3(self, mark_detector, pose_stabilizers, flag1): global height, width success, image = self.video.read() print(image) if flag1 == True: height, width = image.shape[:2] # height = self.video.get(cv2.CAP_PROP_FRAME_HEIGHT) # width = self.video.get(cv2.CAP_PROP_FRAME_WIDTH) flag1 = False pose_estimator = PoseEstimator(img_size=(height, width)) img4 = pose(image, mark_detector, pose_estimator, pose_stabilizers) ret, jpeg = cv2.imencode('.jpg', img4) return jpeg.tobytes(), flag1
def init(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() return cap, video_src, img_queue, box_queue, tm, mark_detector, pose_estimator
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print("Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) if args.out != None: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') output_movie = cv2.VideoWriter(args.out, fourcc, 30, (width, height)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] tm = cv2.TickMeter() cnt = 0 input_path = args.input_path listdir = os.listdir(input_path) for v_name in listdir: v_path = os.path.join(input_path, v_name) cap = cv2.VideoCapture(v_path) while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks(face_img) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box( frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) # Show preview. # cv2.imshow("Preview", frame) # if cv2.waitKey(10) == 27: # break if args.out != None: output_movie.write(frame) else: cv2.imshow("Preview", frame) cnt = cnt + 1 if cnt % 100 == 0: print(str(cnt), flush=True) # Clean up the multiprocessing process. box_process.terminate() box_process.join() cv2.destroyAllWindows()
def Camera_Capture(): mark_detector = MarkDetector() name = input("Enter student id:") directory = os.path.join(facepath, name) if not os.path.exists(facepath): os.makedirs(facepath, exist_ok = 'True') if not os.path.exists(directory): try: os.makedirs(directory, exist_ok = 'True') except OSError as e: if e.errno != errno.EEXIST: print('invalid student id or access denied') return poses=['frontal','right','left','up','down'] file=0 cap = cv2.VideoCapture(file) ret, sample_frame = cap.read() i = 0 count = 0 if ret==False: return # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] images_saved_per_pose=0 number_of_images = 0 shape_predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat") face_aligner = FaceAligner(shape_predictor, desiredFaceWidth=FACE_WIDTH) while i<5: saveit = False # Read frame, crop it, flip it, suits your needs. ret, frame = cap.read() if ret is False: break if count % 5 !=0: # skip 5 frames count+=1 continue if images_saved_per_pose==IMAGE_PER_POSE: i+=1 images_saved_per_pose=0 # If frame comes from webcam, flip it so it looks like a mirror. if file == 0: frame = cv2.flip(frame, 2) original_frame=frame.copy() frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) facebox = mark_detector.extract_cnn_facebox(frame) if facebox is not None: # Detect landmarks from image of 128x128. x1=max(facebox[0]-0,0) x2=min(facebox[2]+0,width) y1=max(facebox[1]-0,0) y2=min(facebox[3]+0,height) face = frame[y1: y2,x1:x2] face_img = cv2.resize(face, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks([face_img]) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # print(steady_pose[0][0]) # if steady_pose[0][0]>0.1: # print('right') # else: # if steady_pose[0][0]<-0.1: # print('left') # if steady_pose[0][1]>0.1: # print('down') # else: # if steady_pose[0][1]<-0.1: # print('up') # print(steady_pose[0]) if i==0: if abs(steady_pose[0][0])<ANGLE_THRESHOLD and abs(steady_pose[0][1])<ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==1: if steady_pose[0][0]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==2: if steady_pose[0][0]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==3: if steady_pose[0][1]<-ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True if i==4: if steady_pose[0][1]>ANGLE_THRESHOLD: images_saved_per_pose+=1 saveit = True # Show preview. if i>=5: print ('Thank you') break frame = cv2.putText(frame,poses[i] +' : '+ str(images_saved_per_pose)+'/'+str(IMAGE_PER_POSE),(10,50),cv2.FONT_HERSHEY_SIMPLEX,2,(255,255,255),1,cv2.LINE_AA) frame = cv2.rectangle(frame, (x1,y1), (x2,y2),(255,255,0),2) cv2.imshow("Preview", frame) if saveit: face = dlib.rectangle(x1, y1, x2, y2) face_aligned = face_aligner.align(original_frame, frame_gray, face) cv2.imwrite(os.path.join(directory, str(name)+'_'+str(number_of_images)+'.jpg'), face_aligned) print(images_saved_per_pose) number_of_images+=1 if cv2.waitKey(10) == 27: break cap.release() cv2.destroyAllWindows()
def headPoseEstimation(): print("HEAD POSE ESTIMATION...") """MAIN""" # Video source from webcam or video file. #video_src = 0 #video_src = 'EWSN.avi' #cam = cv2.VideoCapture(video_src) #_, sample_frame = cam.read() # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() print("1") # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() print("2") # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] print("3") noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] counter = 0 font = cv2.FONT_HERSHEY_SIMPLEX numXPoints = 0 numYPoints = 0 sumX = 0 sumY = 0 # start = time.time() # previousTime1 = start # previousTime2 = start currentTime = 0 previousTime1 = 0 previousTime2 = 0 directionArray = [] moveSequence = [] moves = [] classifyMoves = 0 headPoseDirection = 'emtpy' while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break #print("4") #print(frame.shape) #print("5") # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. #if video_src == 0: # print("6") # frame = cv2.flip(frame, 2) # cv2.imwrite("Preview.png", frame) # print("7") # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) #print("8") # Get face from box queue. facebox = box_queue.get() #print(type(facebox) #print(facebox #print("9") if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) print(face_img.shape) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # # Get average position of nose noseMarksTemp = [] noseMarksTemp.append(marks[30][0]) noseMarksTemp.append(marks[30][1]) noseMarks[0] = noseMarksTemp for i in range(9, 0, -1): noseMarks[i] = noseMarks[i - 1] # Get the direction of head movement headPoseDirection = calculateDirection(noseMarks) #if headPoseDirection != 'still': directionArray.append(headPoseDirection) #print(directionArray #print(len(directionArray) print("To capture a movement press 'a' and perform a movement.") #currentTime1 = time.time() if cv2.waitKey(5) == ord('a') and not classifyMoves: classifyMoves = 1 print("Start classifying movement...") timer = time.time() currentTime = timer previousTime1 = timer previousTime2 = timer if cv2.waitKey(5) == ord('b') and classifyMoves: classifyMoves = 0 print("Stopped classifying movement...") currentTime = time.time() if currentTime - previousTime1 > 2 and classifyMoves: print("------------------------------------------------") print("Elapsed timer 1: " + str(currentTime - previousTime1)) #print(len(directionArray) # Get most common direction moveClass = mostCommon(directionArray) #moveSequence.append(moveClass) print(moveClass) # Get a sequence of head movements # if currentTime - previousTime2 > 10 and classifyMoves == 1 and len(moves) == 0: # print("Elapsed timer 2: " + str(currentTime - previousTime2) # numMoves = len(moveSequence) # moves = moveSequence[(numMoves-5):(numMoves-1)] # print(moves # moveSequence = [] # previousTime2 = currentTime # classifyMoves = 0 classifyMoves = 0 directionArray = [] previousTime1 = currentTime print( "To continue type 'c' or to recapture a movement type 'a'." ) if cv2.waitKey(5) == ord('c'): break #print(previousTime # Uncomment following line to show raw marks. #mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. #pose_estimator.draw_annotation_box( # frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # if len(moves) > 1: # cv2.putText(frame, moves[0], (450,70), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[1], (450,100), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[2], (450,130), font, 1, (0,0,0), 4) # cv2.putText(frame, moves[3], (450,160), font, 1, (0,0,0), 4) cv2.putText(frame, headPoseDirection, (20, 70), font, 1, (0, 255, 0), 4) # Show preview. #cv2.namedWindow("", cv2.WINDOW_NORMAL) cv2.imshow("Preview", frame) #cv2.resizeWindow("preview", 5000,5000) if cv2.waitKey(5) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join() return moveClass
def main(): #bagreader = BagFileReader(args.video, 640,480,848,480,30,30) bagreader = BagFileReader(args.video, 640, 480, 640, 480, 15, 15) # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = bagreader.get_color_frame() sample_frame = cv2.cvtColor(sample_frame, cv2.COLOR_BGR2RGB) height, width, _ = sample_frame.shape fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output-%s.avi' % args.name_output, fourcc, args.fps, (width, height)) # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: t1 = time.time() # Read frame, crop it, flip it, suits your needs. frame = bagreader.get_color_frame() frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if frame is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. # if video_src == 0: # frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. faceboxes = box_queue.get() print(faceboxes) mess = "Not detect pose" if faceboxes is not None: if isinstance(faceboxes[1], int): faceboxes = [faceboxes] for facebox in faceboxes: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. pose_estimator.draw_annotation_box(frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. t2 = time.time() mess = round(1 / (t2 - t1), 2) # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, stabile_pose[0], stabile_pose[1]) cv2.putText(frame, "FPS: " + "{}".format(mess), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), thickness=2) # Show preview. cv2.imshow("Preview", frame) out.write(frame) if cv2.waitKey(1) & 0xFF == ord('q'): break out.release() # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): instructions = 0 print "--- Intention classification for communication between a human and a robot ---" if instructions == 1: print "First you will be required to present a facial expression before you will do a head movement." print "If done correctly these gestures will be detected by the robot and it will perform the desired task." raw_input("Press Enter to continue...") if instructions == 1: print "This is the module for facial expression recognition." print "This program can detect the emotions: Happy and Angry." print "The program will look for the expression for 3 seconds." raw_input("To proceed to Facial Expression Recognition press Enter...") predictExp = 0 # Load Facial Expression Recognition trained model print "- Loading FER model... -" #faceExpModel = tf.keras.models.load_model("/home/bjornar/ML_models/FER/Good models(80+)/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5") faceExpModel = tf.keras.models.load_model( "/home/project/Bjorn/tf_keras_weights_ninthRev-88percent/tf_keras_weights_ninthRev.hdf5" ) # Load Face Cascade for Face Detection print "- Loading Face Cascade for Face Detection... -" #cascPath = "/home/bjornar/MScDissertation/TrainingData/FaceDetection/haarcascade_frontalface_default.xml" cascPath = "/home/project/Bjorn/IntentionClassification-Repository/haarcascade_frontalface_default.xml" faceCascade = cv2.CascadeClassifier(cascPath) ## Initializing Head Movement variables # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() sample_frame = cv2.imread("sample_frame.png") # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] noseMarks = [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]] counter = 0 font = cv2.FONT_HERSHEY_SIMPLEX numXPoints = 0 numYPoints = 0 sumX = 0 sumY = 0 currentTime = 0 previousTime1 = 0 previousTime2 = 0 directionArray = [] moveSequence = [] moves = [] classifyMoves = 0 headPoseDirection = 'emtpy' if camera == 'kinect': ## Initialize Kinect camera print "Initializing camera..." try: from pylibfreenect2 import OpenGLPacketPipeline pipeline = OpenGLPacketPipeline() except: try: from pylibfreenect2 import OpenCLPacketPipeline pipeline = OpenCLPacketPipeline() except: from pylibfreenect2 import CpuPacketPipeline pipeline = CpuPacketPipeline() #print("Packet pipeline:", type(pipeline).__name__) # Create and set logger #logger = createConsoleLogger(LoggerLevel.Debug) setGlobalLogger() fn = Freenect2() num_devices = fn.enumerateDevices() if num_devices == 0: print("- No device connected! -") sys.exit(1) serial = fn.getDeviceSerialNumber(0) device = fn.openDevice(serial, pipeline=pipeline) listener = SyncMultiFrameListener(FrameType.Color | FrameType.Ir | FrameType.Depth) # Register listeners device.setColorFrameListener(listener) device.setIrAndDepthFrameListener(listener) device.start() # NOTE: must be called after device.start() registration = Registration(device.getIrCameraParams(), device.getColorCameraParams()) elif camera == 'webcam': #video_capture = cv2.VideoCapture(0) video_capture = cv2.VideoCapture(-1) elif camera == 'video': #video_capture = cv2.VideoCapture(0) video_capture = cv2.VideoCapture( "/home/project/Bjorn/SonyHandycamTest.mp4") ## Facial Expression Recognition variables FER_prediction = [] FERclass = '' FERstart = 0 classifyMoves = 0 ## Head movement variables predictHeadMov = 3 HeadMov = [] HMCclass = '' detectedFaces = [] mark = [] notDone = 0 progressStatus = [0, 0] while notDone in progressStatus: # This waits for each module to finsih if camera == 'kinect': frames = listener.waitForNewFrame() color = frames["color"] color = color.asarray() color = cv2.resize(color, (int(873), int(491))) color = color[0:480, 150:790] color = np.delete(color, np.s_[3::], 2) elif camera == 'webcam': # Capture frame-by-frame ret, color = video_capture.read() elif camera == 'video': # Capture frame-by-frame ret, color = video_capture.read() ## Detect facial expression predictExpNums = [1, 2] if predictExp == 0: while predictExp not in predictExpNums: predictExp = int( raw_input( "\nPress 1 to detect Facial Expression or press 2 to do Head Movement classification." )) if predictExp == 1: predictExp = 1 print "------ Facial Expression Recognition ------" elif predictExp == 2: predictHeadMov = 0 progressStatus[0] = 1 FERstart = time.time() elif predictExp == 1: currentTime = time.time() if currentTime - FERstart < 10: FER_prediction.append( facialExpressionRecogntion(color, faceExpModel, faceCascade)) else: predictExp = 2 FER_prediction = filter(None, FER_prediction) FERclass = mostCommon(FER_prediction) FERclass = FERclass[2:7] predictHeadMov = 0 if FERclass == '': print("Did not detect an expression, try again.") predictExp = 0 else: progressStatus[0] = 1 print "Detected expression: " + str(FERclass) print "Progress: FER DONE" # else: # #cv2.imwrite("sample_frame.png", color) # break ## Detect head movement class if predictHeadMov == 0: predictHeadMov = int( raw_input( "\nPress 1 to do Head Movement classification or 2 to skip." )) if predictHeadMov == 1: predictHeadMov = 1 print "------ Head Movement Classification ------" moveTime = int( raw_input( "How many seconds should I record your movement?")) #moveTime = 2 else: predictHeadMov = 2 timer = time.time() previousTime1 = timer previousTime2 = timer if predictHeadMov == 1: print color.shape color = color[0:480, 0:480] color = cv2.cvtColor(color, cv2.COLOR_BGR2GRAY) cv2.imshow("", color) raw_input() print color.shape # Feed frame to image queue. img_queue.put(color) #pdb.set_trace() # Get face from box queue. facebox = box_queue.get() print color.shape if facebox is not None: # Detect landmarks from image of 128x128. face_img = color[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) #Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(color, headPoseDirection, (20, 70), font, 1, (0, 255, 0), 4) # # Get average position of nose noseMarksTemp = [] noseMarksTemp.append(marks[30][0]) noseMarksTemp.append(marks[30][1]) noseMarks[0] = noseMarksTemp for i in range(9, 0, -1): noseMarks[i] = noseMarks[i - 1] # Get the direction of head movement headPoseDirection = calculateDirection(noseMarks) directionArray.append(headPoseDirection) if classifyMoves == 0: classifyMoves = 1 timer = time.time() previousTime1 = timer previousTime2 = timer currentTime = time.time() if currentTime - previousTime1 > moveTime and classifyMoves == 1: print "------------------------------------------------" print "Elapsed timer 1: " + str(currentTime - previousTime1) # Get most common direction HMCclass = mostCommon(directionArray) classifyMoves = 2 directionArray = [] previousTime1 = currentTime progressStatus[1] = 1 print "Progress: HMC DONE" else: print "Did not detect a face" elif predictHeadMov == 2: progressStatus[1] = 1 print "Skipped Head Movement Estimation." break # if notDone in progressStatus and predictHeadMov == 2 and predictExp == 2: # print "You seem to have skipped one or more tasks." # inpt = '' # while inpt == '': # inpt = raw_input("To do FER press 1 and to do HMC press 2...") # if input == '1': # predictExp = 1 # elif input == '2': # predictHeadMov cv2.imshow("", color) if camera == 'kinect': listener.release(frames) key = cv2.waitKey(delay=1) if key == ord('q'): break if camera == 'kinect': listener.release(frames) device.stop() device.close() elif camera == 'webcam' or camera == 'video': video_capture.release() cv2.destroyAllWindows() # Clean up the multiprocessing process. box_process.terminate() box_process.join() print "---------------- RESULT ----------------" if FERclass != '': print "Detected facial expression: " + str(FERclass) else: print "Did not detect any expression." if HMCclass != '': print "Detected head movement: " + str(HMCclass) else: print "Did not detect a head movement." print "----------------------------------------" intentionClassification = [FERclass, HMCclass] return intentionClassification
def __init__(self, source_img, dest_img): self.source_img = source_img self.dest_img = dest_img self.dest_pose_estimator = PoseEstimator(cv.imread(self.dest_img)) self.source_pose_estimator = None # initialize after cloth extraction self.error_list = []
import os import cv2 as cv import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) from pose_estimator import PoseEstimator # test run SAMPLES_DIR = os.environ['ROOT_DIR']+"/src/flask_app/static/images/samples" sample_file = SAMPLES_DIR+"/profile_images/sample-profile-image1.jpg" img = cv.imread(sample_file) PoseEstimator(img).visualize_pose()
def main(): print("Starting processing...") start_time = time.time() cap = cv2.VideoCapture(video_file) assert cap.isOpened(), 'Failed to open video file' s, first_frame = cap.read() if not (s): print("Failed to read dimensions from video file!") return dimensions = tuple(first_frame.shape[i] * VIDEO_RESCALE_FACTOR for i in range(2)) pe = PoseEstimator(dimensions) with open(output_filename, "w+") as output_file: fieldnames = ["timestamp"] + [f"rot_{d}" for d in "xyz"] + [ f"pos_{d}" for d in "xyz" ] + [f"landmark_{i}_{d}" for i in range(68) for d in "xy"] mvid_writer = csv.DictWriter(output_file, fieldnames=fieldnames) mvid_writer.writeheader() last_row_dict = None while cap.isOpened(): s, frame = cap.read() if not (s): print("No more frames!") break timestamp = int(cap.get(cv2.CAP_PROP_POS_MSEC)) success, labelled_frame, landmarks, pose_estimation_pts = detect( frame, mark=should_display) row_dict = {} if success: rescaled_frame = rescale_frame(labelled_frame, VIDEO_RESCALE_FACTOR) head_rotation, head_translation = pe.estimate_pose( pose_estimation_pts) row_dict["timestamp"] = timestamp for i, d in enumerate("xyz"): row_dict[f"rot_{d}"] = head_rotation[i][0] row_dict[f"pos_{d}"] = head_translation[i][0] for i, l in enumerate(landmarks): for j, d in enumerate("xy"): row_dict[f"landmark_{i}_{d}"] = l[j] else: print(f"Couldn't find face at timestamp {timestamp}") row_dict = last_row_dict row_dict["timestamp"] = timestamp mvid_writer.writerow(row_dict) last_row_dict = row_dict if should_display: cv2.imshow(WINDOW_TITLE, frame) if cv2.waitKey( 1 ) & 0xFF == 27: # 27 is ASCII for the Esc key on a keyboard break cap.release() cv2.destroyAllWindows() print(f"Time consumed: {time.time() - start_time}") print(f"Video feature data exported to: {output_filename}")
def process(self, image, output_path=None, output_name=None): image_origin = image check_dir(output_path) cv2.imwrite(join(output_path, output_name + '.origin.png'), image_origin) # image_name = # image_origin = cv2.imread(image_path) image_origin_height, image_origin_width = image_origin.shape[0:2] # print('Width', image_origin_width, 'Height', image_origin_height) image_crop, image_edge = format_image_rgb(image_origin) cv2.imwrite(join(output_path, output_name + '.landmark.crop.png'), image_crop) # print('Image Data', image_crop、, 'Image Edge', image_edge) image_crop_resize = cv2.resize(image_crop, (128, 128)) cv2.imwrite(join(output_path, output_name + '.landmark.resize.png'), image_crop_resize) # image_data = cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB) # print('Image', image_crop_resize) predictions = self.sess.run(self.landmark_logits, feed_dict={self.inputs: image_crop_resize}) # print(predictions) # print('Len predictions', predictions) marks = np.array(predictions).flatten() marks = np.reshape(marks, (-1, 2)) # print(marks) # width = # print('Image edge shape', image_edge) # to do multiply marks *= (image_edge[2] - image_edge[0]) marks[:, 0] += image_edge[0] marks[:, 1] += image_edge[1] # print(marks) with open(join(output_path, output_name + '.marks.txt'), 'w', encoding='utf-8') as f: f.write(json.dumps(marks.tolist())) for mark in marks: cv2.circle(image_origin, tuple(mark), 3, (255, 0, 0)) cv2.imwrite(join(output_path, output_name + '.landmark.png'), image_origin) pose_estimator = PoseEstimator(img_size=(image_origin_height, image_origin_width)) # pose_estimator pose = pose_estimator.solve_pose_by_68_points(marks) print('Pose', pose) with open(join(output_path, output_name + '.pose.txt'), 'w', encoding='utf-8') as f: f.write(json.dumps(pose)) return pose
from __future__ import print_function import sys sys.path.append("../Math") import math from pose_estimator import PoseEstimator from rigidtransform import RigidTransform from rigidtransform import Twist from translation import Translation from rotation import Rotation import matplotlib.pyplot as plt estimator = PoseEstimator() #random values #12 ft/s, 1 rad/s twist = Twist(12.0, 1) expected_end_pose = RigidTransform.exp(twist) print("Expected Ending Pose: ") expected_end_pose.print() plt.ion() axes = plt.gca() axes.set_xlim([-20, 20]) axes.set_ylim([-20, 20]) def plot(): iterations = 100 l_enc = 0
from pose_estimator import PoseEstimator import cv2 model = "posenet_mobilenet_v1_100_257x257_multi_kpt_stripped.tflite" image = cv2.imread("test.jpg") pose_estimator = PoseEstimator(model) coords, scores = pose_estimator(cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)) ''' 0 nose 1 leftEye 2 rightEye 3 leftEar 4 rightEar 5 leftShoulder 6 rightShoulder 7 leftElbow 8 rightElbow 9 leftWrist 10 rightWrist 11 leftHip 12 rightHip 13 leftKnee 14 rightKnee 15 leftAnkle 16 rightAnkle ''' for coord in coords: p = (int(coord[1]), int(coord[0])) image = cv2.circle(image, p, radius=0, color=(0, 255, 0), thickness=10) cv2.imwrite("ntest.jpg", image)
def main(): """MAIN""" # Video source from webcam or video file. video_src = args.cam if args.cam is not None else args.video if video_src is None: print( "Warning: video source not assigned, default webcam will be used.") video_src = 0 cap = cv2.VideoCapture(video_src) if video_src == 0: cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. mark_detector.draw_marks(frame, marks, color=(0, 255, 0)) right_corner = tuple([int(i) for i in marks[36]]) left_corner = tuple([int(i) for i in marks[45]]) # print(marks[36], marks[45]) cv2.line(frame, right_corner, left_corner, (255, 0, 0), 2) pixel_distance = int( math.sqrt((right_corner[0] - left_corner[0])**2 + (right_corner[1] - left_corner[1])**2)) estimated_distance = (real_width * focal_length) / pixel_distance cv2.putText(frame, str(round(estimated_distance, 2)), (100, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 0, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. print(steady_pose[1]) pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) angles = process_eyes(frame, marks) if bool(angles) is True: # print(angles) angles = cvt_to_radians(angles) rotated_vector = rotate_vector(steady_pose[0], angles['right'][0], angles['right'][1]) shifted_translation_vector = np.copy(steady_pose[1]) shifted_translation_vector[0] += 50 shifted_translation_vector[1] += 50 pose_estimator.draw_axes(frame, rotated_vector, shifted_translation_vector) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue,)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stabile_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stabile_pose.append(ps_stb.state[0]) stabile_pose = np.reshape(stabile_pose, (-1, 3)) # Uncomment following line to draw pose annotaion on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotaion on frame. pose_estimator.draw_annotation_box( frame, stabile_pose[0], stabile_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): """MAIN""" cv2.namedWindow("Test") # Create a named window cv2.moveWindow("Test", 900, 600) # Move it to (40,30) screenWidth, screenHeight = pyautogui.size() st = 'Last command' cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = cap.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) box_process = Process(target=get_face, args=( mark_detector, img_queue, box_queue, )) box_process.start() # Setting up process for listening to audio commands voice_command_queue = Q() stt_process = Thread(target=get_voice_command, args=(voice_command_queue, )) stt_process.setDaemon(True) stt_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. facebox = box_queue.get() if facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks([face_img]) tm.stop() # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. steady_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) steady_pose.append(ps_stb.state[0]) steady_pose = np.reshape(steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(255, 128, 128)) # Uncomment following line to draw head axes on frame. endpoints = pose_estimator.getEndPoints(frame, steady_pose[0], steady_pose[1]) deltax = endpoints[1][0] - endpoints[0][0] deltay = endpoints[1][1] - endpoints[0][1] xpos = math.floor((deltax + 44) * screenWidth / 88) ypos = math.floor((deltay + 14) * screenHeight / 58) # print(xpos, ypos) pyautogui.moveTo(xpos, ypos) if not voice_command_queue.empty(): command = voice_command_queue.get_nowait() if 'click' in command or 'select' in command: pyautogui.click() st = 'Click' elif 'double' in command or 'in' in command: pyautogui.doubleClick() st = 'Double Click' elif 'right' in command or 'menu' in command or 'light' in command: pyautogui.rightClick() st = 'Right Click' print(command) cv2.putText(frame, st, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 20, 255) scale_percent = 30 # calculate the 50 percent of original dimensions width = int(frame.shape[1] * scale_percent / 100) height = int(frame.shape[0] * scale_percent / 100) # dsize dsize = (width, height) # resize image output = cv2.resize(frame, dsize) cv2.moveWindow("Test", screenWidth - width, screenHeight - height) # Show preview. cv2.imshow("Test", output) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. box_process.terminate() box_process.join()
def main(): ''' main function''' img_dir = "img_dir/" # image file dir # setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_list = [] for filename in os.listdir(img_dir): frame = cv2.imread(img_dir + filename) img_list.append(frame) img_queue.put(frame) box_process = Process(target=get_face, args=(img_queue, box_queue)) box_process.start() # introduce mark_detector to detect landmarks mark_detector = MarkDetector() while True: # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # flipcode > 0: horizontal flip; flipcode = 0: vertical flip; flipcode < 0: horizental and vertical flip # frame = cv2.flip(frame, 2) if len(img_list) != 0: frame = img_list.pop(0) raw_frame = frame.copy() # introduce pos estimator to solve pose. Get one frames to setup the # estimator according to the images size. height, width = frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) else: break # Pose estimation by 3 steps. # 1. detect faces. # 2. detect landmarks. # 3. estimate pose. # get face from box queue. faceboxes = box_queue.get() # print("the length of facebox is " + str(len(faceboxes))) for facebox in faceboxes: # detect landmarks from image 128 * 128 face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] # cut off the area of face. face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) # BGR -> RGB marks = mark_detector.detect_marks(face_img) # covert the marks locations from local CNN to global image. marks[:, 0] *= (facebox[2] - facebox[0]) # the width of rectangle. marks[:, 1] *= (facebox[3] - facebox[1]) # the length of rectangle. marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # uncomment following line to show raw marks. # util.draw_marks(frame, marks, color=(0, 255, 0)) # util.draw_faceboxes(frame, facebox) # try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # pose[0] is rotation_vector, and pose[1] is translation_vector pose_np = np.array(pose).flatten() pose = np.reshape(pose_np, (-1, 3)) angle = util.get_angle(pose[0]) pose_estimator.get_warp_affined_image(frame, facebox, marks, angle[2]) # show preview cv2.imshow("Preview", frame) cv2.imshow("raw_frame", raw_frame) if cv2.waitKey(0) == 27: continue # clean up the multiprocessing process. box_process.terminate() box_process.join()
class face_detector(): def __init__(self): # Load the parameters self.conf = config() # initialize dlib's face detector (HOG-based) and then create the # facial landmark predictor print("[INFO] loading facial landmark predictor...") self.detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(self.conf.shape_predictor_path) # grab the indexes of the facial landmarks for the left and # right eye, respectively (self.lStart, self.lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"] (self.rStart, self.rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"] # initialize the video stream and sleep for a bit, allowing the # camera sensor to warm up self.cap = cv2.VideoCapture(0) if self.conf.vedio_path == 0: self.cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) _, sample_frame = self.cap.read() # Introduce mark_detector to detect landmarks. self.mark_detector = MarkDetector() # Setup process and queues for multiprocessing. self.img_queue = Queue() self.box_queue = Queue() self.img_queue.put(sample_frame) self.box_process = Process(target=get_face, args=( self.mark_detector, self.img_queue, self.box_queue,)) self.box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. self.height, self.width = sample_frame.shape[:2] self.pose_estimator = PoseEstimator(img_size=(self.height, self.width)) # Introduce scalar stabilizers for pose. self.pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] self.tm = cv2.TickMeter() # Gaze tracking self.gaze = GazeTracking() def detect(self): # loop over the frames from the video stream temp_steady_pose = 0 while True: # grab the frame from the threaded video stream, resize it to # have a maximum width of 400 pixels, and convert it to # grayscale frame_got, frame = self.cap.read() # Empty frame frame_empty = np.zeros(frame.shape) # frame = imutils.rotate(frame, 90) frame = imutils.resize(frame, width=400) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # detect faces in the grayscale frame rects = self.detector(gray, 0) # initialize the frame counters and the total number of blinks TOTAL = 0 COUNTER = 0 # loop over the face detections for (i, rect) in enumerate(rects): # determine the facial landmarks for the face region, then # convert the facial landmark (x, y)-coordinates to a NumPy # array self.shape = self.predictor(gray, rect) self.shape = face_utils.shape_to_np(self.shape) # ******************************** # Blink detection # extract the left and right eye coordinates, then use the # coordinates to compute the eye aspect ratio for both eyes self.leftEye = self.shape[self.lStart:self.lEnd] self.rightEye = self.shape[self.rStart:self.rEnd] self.leftEAR = eye_aspect_ratio(self.leftEye) self.rightEAR = eye_aspect_ratio(self.rightEye) # average the eye aspect ratio together for both eyes ear = (self.leftEAR + self.rightEAR) / 2.0 # check to see if the eye aspect ratio is below the blink # threshold, and if so, increment the blink frame counter if ear < self.conf.EYE_AR_THRESH: COUNTER += 1 # otherwise, the eye aspect ratio is not below the blink # threshold else: # if the eyes were closed for a sufficient number of # then increment the total number of blinks if COUNTER >= self.conf.EYE_AR_CONSEC_FRAMES: TOTAL += 1 # reset the eye frame counter COUNTER = 0 # Frame empty cv2.putText(frame_empty, "Blinks: {}".format(TOTAL), (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) cv2.putText(frame_empty, "EAR: {:.2f}".format(ear), (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) # ******************************** # convert dlib's rectangle to a OpenCV-style bounding box # [i.e., (x, y, w, h)], then draw the face bounding box (x, y, w, h) = face_utils.rect_to_bb(rect) self.bounding_box = (x, y, w, h) # cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) # Frame empty cv2.rectangle(frame_empty, (x, y), (x + w, y + h), (0, 255, 0), 2) # show the face number cv2.putText(frame_empty, "Face #{}".format(i + 1), (30, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 255), 2) # loop over the (x, y)-coordinates for the facial landmarks # and draw them on the image for (x, y) in self.shape: # cv2.circle(frame, (x, y), 1, (0, 255, 255), -1) cv2.circle(frame_empty, (x, y), 1, (0, 255, 255), -1) # ********************************************************** if frame_got is False: break # If frame comes from webcam, flip it so it looks like a mirror. if self.conf.vedio_path == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. self.img_queue.put(frame) # Get face from box queue. self.facebox = self.box_queue.get() if self.facebox is not None: # Detect landmarks from image of 128x128. face_img = frame[self.facebox[1]: self.facebox[3], self.facebox[0]: self.facebox[2]] face_img = cv2.resize(face_img, (self.conf.CNN_INPUT_SIZE, self.conf.CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) self.tm.start() # marks = self.mark_detector.detect_marks([face_img]) self.tm.stop() # Convert the marks locations from local CNN to global image. self.shape *= (self.facebox[2] - self.facebox[0]) self.shape[:, 0] += self.facebox[0] self.shape[:, 1] += self.facebox[1] # Uncomment following line to show raw marks. # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) # Uncomment following line to show facebox. # mark_detector.draw_box(frame, [facebox]) # Try pose estimation with 68 points. self.pose = self.pose_estimator.solve_pose_by_68_points(self.shape) # Stabilize the pose. self.steady_pose = [] pose_np = np.array(self.pose).flatten() for value, ps_stb in zip(pose_np, self.pose_stabilizers): ps_stb.update([value]) self.steady_pose.append(ps_stb.state[0]) self.steady_pose = np.reshape(self.steady_pose, (-1, 3)) # Uncomment following line to draw pose annotation on frame. # pose_estimator.draw_annotation_box( # frame, pose[0], pose[1], color=(255, 128, 128)) # Uncomment following line to draw stabile pose annotation on frame. # pose_estimator.draw_annotation_box(frame, steady_pose[0], steady_pose[1], color=(128, 255, 128)) # Uncomment following line to draw head axes on frame. # pose_estimator.draw_axes(frame, steady_pose[0], steady_pose[1]) self.pose_estimator.draw_axes(frame_empty, self.steady_pose[0], self.steady_pose[1]) print('steady pose vector: {}'.format(self.steady_pose[0], self.steady_pose[1])) else: # cv2.putText(frame, "Signal loss", (200, 200), # cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.putText(frame_empty, "Signal loss", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # ****************************************************************** # We send this frame to GazeTracking to analyze it self.gaze.refresh(frame) frame = self.gaze.annotated_frame() text = "" if self.gaze.is_blinking(): text = "Blinking" elif self.gaze.is_right(): text = "Looking right" elif self.gaze.is_left(): text = "Looking left" elif self.gaze.is_center(): text = "Looking center" cv2.putText(frame_empty, text, (250, 250), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 2) left_pupil = self.gaze.pupil_left_coords() right_pupil = self.gaze.pupil_right_coords() cv2.putText(frame_empty, "Left pupil: " + str(left_pupil), (250, 280), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1) cv2.putText(frame_empty, "Right pupil: " + str(right_pupil), (250, 310), cv2.FONT_HERSHEY_DUPLEX, 0.5, (147, 58, 31), 1) # ******************************************************************** # show the frame # cv2.imshow("Frame", frame) cv2.imshow("Frame", frame_empty) key = cv2.waitKey(1) & 0xFF self.pass_variable = np.array(1) try: self._listener(self.pass_variable) except: pass # if the `q` key was pressed, break from the loop if key == ord("q"): break # do a bit of cleanup cv2.destroyAllWindows() # self.cap.stop() def set_listener(self, listener): self._listener = listener
class CMate: def __init__(self, source_img, dest_img): self.source_img = source_img self.dest_img = dest_img self.dest_pose_estimator = PoseEstimator(cv.imread(self.dest_img)) self.source_pose_estimator = None # initialize after cloth extraction self.error_list = [] def cloth_segmentation(self): # extract source image and segmented cloth try: source_img, source_seg = cloth_extractor.extract_cloth( self.source_img) source_img = cv.cvtColor(source_img, cv.COLOR_RGB2BGR) source_seg = cv.cvtColor(source_seg, cv.COLOR_RGB2BGR) # print(source_img.shape, source_seg.shape) # fill holes source_seg = utils.fill_holes(source_img, source_seg) return source_img, source_seg except Exception: raise Exception("Source image without cloth.") def get_source_shoulder_details(self, source_img, cloth_seg): "get source shoulder distance and rotation angle" try: # initialize source pose estimator self.source_pose_estimator = PoseEstimator(source_img) source_distance = self.source_pose_estimator.get_shoulder_details() source_points = self.source_pose_estimator.get_shoulder_points() except Exception as e: print(str(e)) # if no source shoulder points detected self.error_list.append("Issue in source image:" + str(e)) logging.warning( "Using manual shoulder detection for source image.") source_points, source_distance = custom_shoulder_locator.get_shoulder_details_mannual( cloth_seg) return source_points, source_distance def apply_cloth(self): # step 1: get dest shoulder distance and rotation angle try: dest_distance = self.dest_pose_estimator.get_shoulder_details() except Exception as e: raise Exception("Issue in profile image:" + str(e)) # step 2: get source image and segmented cloth source_img, source_seg = self.cloth_segmentation() # cv.imwrite("testseg.jpg", source_seg) # step 3: get source shoulder distance and rotation angle source_points, source_distance = self.get_source_shoulder_details( source_img, source_seg) # step 4: resize source seg and shoulder points if dest_distance < MIN_SHOULDER_DISTANCE: raise Exception("Shoulder detection issue in profile image.") if source_distance < MIN_SHOULDER_DISTANCE: raise Exception("Shoulder detection issue in source image.") resize_factor = dest_distance / source_distance print("resize factor:", resize_factor) source_seg = cv.resize(source_seg, (int(source_seg.shape[1] * resize_factor), int(source_seg.shape[0] * resize_factor))) source_points[0] = utils.resize_shoulder_coord(source_points[0], resize_factor) source_points[1] = utils.resize_shoulder_coord(source_points[1], resize_factor) """ # step 5: rotate source seg and shoulder points rotation_angle = dest_angle - source_angle # clip angle between [-10,10] if abs(rotation_angle) > 5: self.error_list.append("Source Image is rotated: %f" % rotation_angle) rotation_angle = max(-10, min(rotation_angle, 10)) print("rotation angle:", rotation_angle) rotated_seg = imutils.rotate(source_seg, rotation_angle) source_points = utils.rotate_shoulder_points(source_seg, source_points, rotation_angle) """ # step 5.2: remove border _, source_seg = utils.remove_segmentation_border(source_seg) # step 6: blend dest image and extracted cloth dest_frame = cv.imread(self.dest_img) dest_points = self.dest_pose_estimator.get_shoulder_points() try: final_img = utils.blend_images(source_seg, source_points, dest_frame, dest_points) except AssertionError: print("Assertion Error in blending images.") raise Exception("Issue in blending Images.") except Exception as e: print(str(e)) raise Exception("Issue in blending Images.") return final_img, self.error_list
def main(strargument): shutil.rmtree("./test") os.mkdir("./test") os.remove("result.txt") f = open("result.txt", "a") cap = cv2.VideoCapture(strargument) # cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640) #cap = cv2.VideoCapture("NTQ.mkv") #cap = cv2.VideoCapture("/home/fitmta/Real-Time-Face-Detection-OpenCV-GPU/videos/video/out1.1.avi") #cap = cv2.VideoCapture("http://*****:*****@#[email protected]:8932/mjpg/video.mjpg") # cap = cv2.VideoCapture("http://*****:*****@[email protected]:8933/Streaming/channels/102/preview") success, frame = cap.read() startId = countIdFolder("./face_db/") # quit if unable to read the video file if not success: print('Failed to read video') sys.exit(1) #The color of the rectangle we draw around the face rectangleColor = (0, 165, 255) #variables holding the current frame number and the current faceid frameCounter = 0 currentFaceID = 0 #Variables holding the correlation trackers and the name per faceid conf = configparser.ConfigParser() conf.read("config/main.cfg") mtcnn_detector = load_mtcnn(conf) MODEL_PATH = conf.get("MOBILEFACENET", "MODEL_PATH") VERIFICATION_THRESHOLD = float( conf.get("MOBILEFACENET", "VERIFICATION_THRESHOLD")) FACE_DB_PATH = conf.get("MOBILEFACENET", "FACE_DB_PATH") BLUR_THRESH = int(conf.get("CUSTOM", "BLUR_THRESH")) MIN_FACE_SIZE = int(conf.get("MTCNN", "MIN_FACE_SIZE")) MAX_BLACK_PIXEL = int(conf.get("CUSTOM", "MAX_BLACK_PIXEL")) YAWL = int(conf.get("CUSTOM", "YAWL")) YAWR = int(conf.get("CUSTOM", "YAWR")) PITCHL = int(conf.get("CUSTOM", "PITCHL")) PITCHR = int(conf.get("CUSTOM", "PITCHR")) ROLLL = int(conf.get("CUSTOM", "ROLLL")) ROLLR = int(conf.get("CUSTOM", "ROLLR")) MAXDISAPPEARED = int(conf.get("CUSTOM", "MAXDISAPPEARED")) IS_FACE_THRESH = float(conf.get("CUSTOM", "IS_FACE_THRESH")) EXTEND_Y = int(conf.get("CUSTOM", "EXTEND_Y")) EXTEND_X = int(conf.get("CUSTOM", "EXTEND_X")) SIMILAR_THRESH = float(conf.get("CUSTOM", "SIMILAR_THRESH")) MAX_LIST_LEN = int(conf.get("CUSTOM", "MAX_LIST_LEN")) MIN_FACE_FOR_SAVE = int(conf.get("CUSTOM", "MIN_FACE_FOR_SAVE")) LIVE_TIME = int(conf.get("CUSTOM", "LIVE_TIME")) ROIXL = int(conf.get("CUSTOM", "ROIXL")) ROIXR = int(conf.get("CUSTOM", "ROIXR")) ROIYB = int(conf.get("CUSTOM", "ROIYB")) ROIYA = int(conf.get("CUSTOM", "ROIYA")) maxDisappeared = MAXDISAPPEARED ## khong xuat hien toi da 100 frame faces_db = load_faces(FACE_DB_PATH, mtcnn_detector) # load_face_db = ThreadingUpdatefacedb(FACE_DB_PATH,mtcnn_detector) time.sleep(10) for item in faces_db: print(item["name"]) listTrackedFace = [] mark_detector = MarkDetector() tm = cv2.TickMeter() _, sample_frame = cap.read() height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) with tf.Graph().as_default(): with tf.Session() as sess: load_mobilefacenet(MODEL_PATH) inputs_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") try: start = time.time() while True: start1 = time.time() retval, frame = cap.read() #Increase the framecounter frameCounter += 1 if retval: _frame = frame[ROIYA:ROIYB, ROIXL:ROIXR] cv2.rectangle(frame, (ROIXL, ROIYA), (ROIXR, ROIYB), (0, 0, 255), 2) good_face_index = [] # faces_db = load_face_db.face_db if (frameCounter % 1) == 0: ### embed and compare name for i, face_db in enumerate(faces_db): if not os.path.isdir( "./face_db/" + face_db["name"].split("_")[0]): faces_db.pop(i) faces, landmarks = mtcnn_detector.detect(_frame) if faces.shape[0] is not 0: input_images = np.zeros( (faces.shape[0], 112, 112, 3)) save_images = np.zeros( (faces.shape[0], 112, 112, 3)) (yaw, pitch, roll) = (0, 0, 0) for i, face in enumerate(faces): if round(faces[i, 4], 6) > IS_FACE_THRESH: bbox = faces[i, 0:4] points = landmarks[i, :].reshape( (5, 2)) nimg = face_preprocess.preprocess( _frame, bbox, points, image_size='112,112') save_images[i, :] = nimg nimg = nimg - 127.5 nimg = nimg * 0.0078125 input_images[i, :] = nimg (x1, y1, x2, y2) = bbox.astype("int") if x1 < 0 or y1 < 0 or x2 < 0 or y2 < 0 or x1 >= x2 or y1 >= y2: continue temp = int((y2 - y1) / EXTEND_Y) y1 = y1 + temp y2 = y2 + temp temp = int((x2 - x1) / EXTEND_X) if x1 > temp: x1 = x1 - temp x2 = x2 + temp # cv2.imshow("mainframe",frame) # cv2.imwrite("temp2.jpg",frame[y1:y2,x1:x2]) face_img = cv2.resize( _frame[y1:y2, x1:x2], (128, 128)) # cv2.imshow("ok",face_img) face_img = cv2.cvtColor( face_img, cv2.COLOR_BGR2RGB) tm.start() marks = mark_detector.detect_marks( [face_img]) tm.stop() marks *= (x2 - x1) marks[:, 0] += x1 marks[:, 1] += y1 # mark_detector.draw_marks( # frame, marks, color=(0, 255, 0)) pose, ( yaw, pitch, roll ) = pose_estimator.solve_pose_by_68_points( marks) # temp = frame # cv2.putText(temp,"yaw: "+str(yaw),(x2,y1),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"pitch: "+str(pitch),(x2,y1+25),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.putText(temp,"roll: "+str(roll),(x2,y1+50),cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), thickness=2) # cv2.imshow("frame",temp) # if measureGoodFace(MIN_FACE_SIZE,MAX_BLACK_PIXEL,frame[y1:y2,x1:x2],yaw,pitch,roll,BLUR_THRESH,YAWL,YAWR,PITCHL,PITCHR,ROLLL,ROLLR): # good_face_index.append(i) # cv2.waitKey(0) # print(good_face_index) feed_dict = {inputs_placeholder: input_images} emb_arrays = sess.run(embeddings, feed_dict=feed_dict) emb_arrays = sklearn.preprocessing.normalize( emb_arrays) names = [] sims = [] for i, embedding in enumerate(emb_arrays): # if len(listTrackedFace)>i and RepresentsInt(listTrackedFace[i].name)==False: # names.append(listTrackedFace[i].name) # continue embedding = embedding.flatten() temp_dict = {} for com_face in faces_db: ret, sim = feature_compare( embedding, com_face["feature"], 0.65) temp_dict[com_face["name"]] = sim # print(temp_dict) dictResult = sorted(temp_dict.items(), key=lambda d: d[1], reverse=True) # print(dictResult[:5]) name = "" if len(dictResult) > 0 and dictResult[0][ 1] > VERIFICATION_THRESHOLD: name = dictResult[0][ 0] #.split("_")[0] sim = dictResult[0][1] ## wite log t = time.time() f.write(name + "___" + str((t - start) // 60) + ":" + str(int(t - start) % 60) + "\n") else: name = "unknown" sim = 0 names.append(name) sims.append(sim) # cv2.imwrite("./test/"+name+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+".jpg",save_images[i,:]) # if len(dictResult)>0 : # cv2.imwrite("./test/"+names[i]+"_"+str(frameCounter//60)+":"+str(frameCounter%60)+"_"+str(dictResult[0][1])+".jpg",save_images[i,:]) ################################ tracker for i, embedding in enumerate(emb_arrays): embedding = embedding.flatten() ResultDict = {} for objectTrackFace in listTrackedFace: tempList = [] (x1, y1, x2, y2) = objectTrackFace.latestBox for com_face in objectTrackFace.listEmbedding: ret, sim = feature_compare( embedding, com_face, 0.65) tempList.append(sim) tempList.sort(reverse=True) if len(tempList) > 0: if tempList[0] > 0.9 or ( similarIOU( faces[i, :4].astype( "int"), objectTrackFace. latestBox) and (frameCounter - objectTrackFace .latestFrameCounter) < 3): ResultDict[objectTrackFace. name] = tempList[0] dictResult = sorted(ResultDict.items(), key=lambda d: d[1], reverse=True) if True: if len( ResultDict ) > 0 and dictResult[0][ 1] > SIMILAR_THRESH: ## neu khop -- 0.5 # for ik in range(len(dict)): # if dict[ik][1]>SIMILAR_THRESH: nameTrackCurrent = dictResult[0][0] for index, tempFaceTrack in enumerate( listTrackedFace): if tempFaceTrack.name == nameTrackCurrent: if len(tempFaceTrack. listImage ) > MAX_LIST_LEN: tempFaceTrack.listImage.pop( 0) tempFaceTrack.listEmbedding.pop( 0) if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) else: if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[ i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): tempFaceTrack.listImage.append( save_images[ i, :]) tempFaceTrack.listEmbedding.append( emb_arrays[i]) if names[i] != "unknown": if RepresentsInt( nameTrackCurrent ): tempFaceTrack.name = names[ i] # else: ################# # names[i] = nameTrackCurrent else: if not RepresentsInt( nameTrackCurrent ): names[ i] = nameTrackCurrent tempFaceTrack.countDisappeared = 0 tempFaceTrack.latestBox = faces[ i, 0:4].astype("int") tempFaceTrack.latestFrameCounter = frameCounter tempFaceTrack.liveTime = 0 tempFaceTrack.justAdded = True ## but we still action with it break else: ## neu khong khop thi tao moi nhung chi them anh khi mat du tot if len(ResultDict) > 0: print(dictResult[0][1]) if names[i] != "unknown": newTrackFace = trackedFace( names[i]) else: newTrackFace = trackedFace( str(currentFaceID)) currentFaceID = currentFaceID + 1 if measureGoodFace( MIN_FACE_SIZE, MAX_BLACK_PIXEL, save_images[i, :], yaw, pitch, roll, BLUR_THRESH, YAWL, YAWR, PITCHL, PITCHR, ROLLL, ROLLR): newTrackFace.listImage.append( save_images[i, :]) newTrackFace.listEmbedding.append( emb_arrays[i]) newTrackFace.latestBox = faces[ i, 0:4].astype("int") newTrackFace.latestFrameCounter = frameCounter # print(newTrackFace.latestBox) newTrackFace.justAdded = True listTrackedFace.append( newTrackFace) ## add list ### disappeared for index, trackFace in enumerate( listTrackedFace): if trackFace.justAdded == False: trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 else: trackFace.justAdded = False if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t for i, face in enumerate(faces): x1, y1, x2, y2 = faces[i][0], faces[i][ 1], faces[i][2], faces[i][3] x1 = max(int(x1), 0) y1 = max(int(y1), 0) x2 = min(int(x2), _frame.shape[1]) y2 = min(int(y2), _frame.shape[0]) cv2.rectangle(frame, (x1 + ROIXL, y1 + ROIYA), (x2 + ROIXL, y2 + ROIYA), (0, 255, 0), 2) # if i in good_face_index: # if not RepresentsInt(names[i]): cv2.putText(frame, names[i].split("_")[0], (int(x1 / 2 + x2 / 2 + ROIXL), int(y1 + ROIYA)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2) else: for index, trackFace in enumerate( listTrackedFace): trackFace.countDisappeared = trackFace.countDisappeared + 1 trackFace.liveTime = trackFace.liveTime + 1 if trackFace.liveTime > LIVE_TIME: t = listTrackedFace.pop(index) del t continue if trackFace.countDisappeared > maxDisappeared: if len( trackFace.listImage ) < MIN_FACE_FOR_SAVE: ## neu chua duoc it nhat 5 mat thi xoa luon trackedFace.countDisappeared = 0 continue if trackFace.saveTrackedFace( "./temp/", startId): startId = startId + 1 t = listTrackedFace.pop(index) del t end = time.time() cv2.putText(frame, "FPS: " + str(1 // (end - start1)), (400, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.putText( frame, "Time: " + str((end - start) // 60) + ":" + str(int(end - start) % 60), (200, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 3) cv2.imshow("frame", frame) key = cv2.waitKey(30) if key & 0xFF == ord('q'): break if key == 32: cv2.waitKey(0) else: break except KeyboardInterrupt as e: pass gc.collect() cv2.destroyAllWindows() exit(0)
def __init__(self): self.POSE_ESTIMATOR = PoseEstimator() self.FRAME_OPS = FrameOperations() self.FIRST = True
random.shuffle(results) return results return paths[best_option] if __name__ == '__main__': if len(sys.argv) != 5: sys.exit('Requires a database, an embedder weights file,' + 'and a pose estimator weights file') database_file = sys.argv[1] facenet_protobuf = sys.argv[2] pose_weights = sys.argv[3] input_image_path = sys.argv[4] sqlite3.register_adapter(np.ndarray, adapt_array) sqlite3.register_converter("array", convert_array) conn = sqlite3.connect(database_file, detect_types=sqlite3.PARSE_DECLTYPES) embedder = Embedder(facenet_protobuf) pose_estimator = PoseEstimator(pose_weights) input_image = Image.open(input_image_path) input_image = crop_to_face(input_image) results = get_best_match(conn, embedder, pose_estimator, input_image) for item in results: print(item)
def main(): # construct the argument parse and parse the arguments ap = argparse.ArgumentParser() ap.add_argument("-m", "--draw-markers", action="store_true", default=False, help="") ap.add_argument("-c", "--draw-confidence", action="store_true", default=False, help="") ap.add_argument("-t", "--confidence-threshold", type=float, default=0.9, help="") ap.add_argument("-p", "--draw-pose", action="store_false", default=True, help="") ap.add_argument("-u", "--draw-unstable", action="store_true", default=False, help="") ap.add_argument("-s", "--draw-segmented", action="store_true", default=False, help="") args = vars(ap.parse_args()) confidence_threshold = args["confidence_threshold"] """MAIN""" # Video source from webcam or video file. video_src = 0 cam = cv2.VideoCapture(video_src) _, sample_frame = cam.read() # Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) if isWindows(): thread = threading.Thread(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) thread.daemon = True thread.start() else: box_process = Process(target=get_face, args=(mark_detector, confidence_threshold, img_queue, box_queue)) box_process.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [Stabilizer( state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6)] while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cam.read() if frame_got is False: break # Crop it if frame is larger than expected. # frame = frame[0:480, 300:940] # If frame comes from webcam, flip it so it looks like a mirror. if video_src == 0: frame = cv2.flip(frame, 2) # Pose estimation by 3 steps: # 1. detect face; # 2. detect landmarks; # 3. estimate pose # Feed frame to image queue. img_queue.put(frame) # Get face from box queue. result = box_queue.get() if result is not None: if args["draw_confidence"]: mark_detector.face_detector.draw_result(frame, result) # unpack result facebox, confidence = result # fix facebox if needed if facebox[1] > facebox[3]: facebox[1] = 0 if facebox[0] > facebox[2]: facebox[0] = 0 # Detect landmarks from image of 128x128. face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]] face_img = cv2.resize(face_img, (CNN_INPUT_SIZE, CNN_INPUT_SIZE)) face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB) marks = mark_detector.detect_marks(face_img) # Convert the marks locations from local CNN to global image. marks *= (facebox[2] - facebox[0]) marks[:, 0] += facebox[0] marks[:, 1] += facebox[1] # segment the image based on markers and facebox seg = Segmenter(facebox, marks, frame.shape[1], frame.shape[0]) if args["draw_segmented"]: mark_detector.draw_box(frame, seg.getSegmentBBs()) cv2.imshow("fg", seg.getSegmentJSON()["faceGrid"]) if args["draw_markers"]: mark_detector.draw_marks( frame, marks, color=(0, 255, 0)) # Try pose estimation with 68 points. pose = pose_estimator.solve_pose_by_68_points(marks) # Stabilize the pose. stable_pose = [] pose_np = np.array(pose).flatten() for value, ps_stb in zip(pose_np, pose_stabilizers): ps_stb.update([value]) stable_pose.append(ps_stb.state[0]) stable_pose = np.reshape(stable_pose, (-1, 3)) if args["draw_unstable"]: pose_estimator.draw_annotation_box( frame, pose[0], pose[1], color=(255, 128, 128)) if args["draw_pose"]: pose_estimator.draw_annotation_box( frame, stable_pose[0], stable_pose[1], color=(128, 255, 128)) # Show preview. cv2.imshow("Preview", frame) if cv2.waitKey(10) == 27: break # Clean up the multiprocessing process. if not isWindows(): box_process.terminate() box_process.join()
# Introduce mark_detector to detect landmarks. mark_detector = MarkDetector() # Setup process and queues for multiprocessing. img_queue = Queue() box_queue = Queue() img_queue.put(sample_frame) thread = threading.Thread(target=get_face, args=(mark_detector, img_queue, box_queue)) thread.daemon = True thread.start() # Introduce pose estimator to solve pose. Get one frame to setup the # estimator according to the image size. height, width = sample_frame.shape[:2] pose_estimator = PoseEstimator(img_size=(height, width)) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] tm = cv2.TickMeter() while True: # Read frame, crop it, flip it, suits your needs. frame_got, frame = cap.read() if frame_got is False: