def __init__(self): self.video_capture = cv2.VideoCapture(0) self.cam_w = int(self.video_capture.get(3)) self.cam_h = int(self.video_capture.get(4)) self.blinked = False #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera. fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 self.c_x = self.cam_w / 2 self.c_y = self.cam_h / 2 self.f_x = self.c_x / np.tan(60 / 2 * np.pi / 180) self.f_y = self.f_x self.camera_matrix = np.float32([[self.f_x, 0.0, self.c_x], [0.0, self.f_y, self.c_y], [0.0, 0.0, 1.0]]) self.camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0]) self.landmarks_3D = np.float32([ P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION ]) self.face_cascade = haarCascade( "/home/meet/Programming/python/eyetracker/haarcascade_frontalface_alt.xml", "/home/meet/Programming/python/eyetracker/haarcascade_profileface.xml" ) self.face_detector = faceLandmarkDetection( '/home/meet/Programming/python/eyetracker/shape_predictor_68_face_landmarks.dat' ) self.eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml') self.no_face_counter = 0 self.face_x1 = 0 self.face_y1 = 0 self.face_x2 = 0 self.face_y2 = 0 self.face_w = 0 self.face_h = 0 self.roi_x1 = 0 self.roi_y1 = 0 self.roi_x2 = self.cam_w self.roi_y2 = self.cam_h self.roi_w = self.cam_w self.roi_h = self.cam_h self.roi_resize_w = int(self.cam_w / 10) self.roi_resize_h = int(self.cam_h / 10) self.face_type = -1 self.no_eye_counter = 0
def main(): #Check if some argumentshave been passed #pass the path of a video if (len(sys.argv) > 2): file_path = sys.argv[1] if (os.path.isfile(file_path) == False): print( "ex_pnp_head_pose_estimation: the file specified does not exist." ) return else: #Open the video file video_capture = cv2.VideoCapture(file_path) if (video_capture.isOpened() == True): print( "ex_pnp_head_pose_estimation: the video source has been opened correctly..." ) # Define the codec and create VideoWriter object #fourcc = cv2.VideoWriter_fourcc(*'XVID') output_path = sys.argv[2] fourcc = cv2.cv.CV_FOURCC(*'XVID') out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280, 720)) else: print( "You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi" ) return #Create the main window and move it cv2.namedWindow('Video') cv2.moveWindow('Video', 20, 20) #Obtaining the CAM dimension cam_w = int(video_capture.get(3)) cam_h = int(video_capture.get(4)) #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera. fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 c_x = cam_w / 2 c_y = cam_h / 2 f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180) f_y = f_x #Estimated camera matrix values. camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y], [0.0, 0.0, 1.0]]) print("Estimated camera matrix: \n" + str(camera_matrix) + "\n") #These are the camera matrix values estimated on my webcam with # the calibration code (see: src/calibration): #camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589], #[ 0.0, 603.55869786, 229.7537026], #[ 0.0, 0.0, 1.0] ]) #Distortion coefficients camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0]) #Distortion coefficients estimated by calibration #camera_distortion = numpy.float32([ 0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263]) #This matrix contains the 3D points of the # 11 landmarks we want to find. It has been # obtained from antrophometric measurement # on the human head. landmarks_3D = numpy.float32([ P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION ]) #Declaring the two classifiers my_cascade = haarCascade("./etc/haarcascade_frontalface_alt.xml", "./etc/haarcascade_profileface.xml") my_detector = faceLandmarkDetection( './etc/shape_predictor_68_face_landmarks.dat') #Error counter definition no_face_counter = 0 #Variables that identify the face #position in the main frame. face_x1 = 0 face_y1 = 0 face_x2 = 0 face_y2 = 0 face_w = 0 face_h = 0 #Variables that identify the ROI #position in the main frame. roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w / 10) roi_resize_h = int(cam_h / 10) while (True): # Capture frame-by-frame ret, frame = video_capture.read() gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY) #Looking for faces with cascade #The classifier moves over the ROI #starting from a minimum dimension and augmentig #slightly based on the scale factor parameter. #The scale factor for the frontal face is 1.10 (10%) #Scale factor: 1.15=15%,1.25=25% ...ecc #Higher scale factors means faster classification #but lower accuracy. # #Return code: 1=Frontal, 2=FrontRotLeft, # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=True, runLeft=False, runRight=False, frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, leftScaleFactor=1.15, rightScaleFactor=1.15, minSizeX=80, minSizeY=80, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) #Accumulate error values in a counter if (my_cascade.face_type == 0): no_face_counter += 1 #If any face is found for a certain #number of cycles, then the ROI is reset if (no_face_counter == 30): no_face_counter = 0 roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h #Checking wich kind of face it is returned if (my_cascade.face_type > 0 and my_cascade.face_type < 4): #Face found, reset the error counter no_face_counter = 0 #Because the dlib landmark detector wants a precise #boundary box of the face, it is necessary to resize #the box returned by the OpenCV haar detector. #Adjusting the frame for profile left if (my_cascade.face_type == 4): face_margin_x1 = 20 - 10 #resize_rate + shift_rate face_margin_y1 = 20 + 5 #resize_rate + shift_rate face_margin_x2 = -20 - 10 #resize_rate + shift_rate face_margin_y2 = -20 + 5 #resize_rate + shift_rate face_margin_h = -0.7 #resize_factor face_margin_w = -0.7 #resize_factor #Adjusting the frame for profile right elif (my_cascade.face_type == 5): face_margin_x1 = 20 + 10 face_margin_y1 = 20 + 5 face_margin_x2 = -20 + 10 face_margin_y2 = -20 + 5 face_margin_h = -0.7 face_margin_w = -0.7 #No adjustments else: face_margin_x1 = 0 face_margin_y1 = 0 face_margin_x2 = 0 face_margin_y2 = 0 face_margin_h = 0 face_margin_w = 0 #Updating the face position face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1 face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1 face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2 face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2 face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w) face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h) #Updating the ROI position roi_x1 = face_x1 - roi_resize_w if (roi_x1 < 0): roi_x1 = 0 roi_y1 = face_y1 - roi_resize_h if (roi_y1 < 0): roi_y1 = 0 roi_w = face_w + roi_resize_w + roi_resize_w if (roi_w > cam_w): roi_w = cam_w roi_h = face_h + roi_resize_h + roi_resize_h if (roi_h > cam_h): roi_h = cam_h roi_x2 = face_x2 + roi_resize_w if (roi_x2 > cam_w): roi_x2 = cam_w roi_y2 = face_y2 + roi_resize_h if (roi_y2 > cam_h): roi_y2 = cam_h #Debugging printing utilities if (DEBUG == True): print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h) print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h) #Drawing a green rectangle # (and text) around the face. text_x1 = face_x1 text_y1 = face_y1 - 3 if (text_y1 < 0): text_y1 = 0 cv2.putText(frame, "FACE", (text_x1, text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) #In case of a frontal/rotated face it # is called the landamark detector if (my_cascade.face_type > 0): landmarks_2D = my_detector.returnLandmarks( frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS) if (DEBUG == True): #cv2.drawKeypoints(frame, landmarks_2D) for point in landmarks_2D: cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255), -1) #Applying the PnP solver to find the 3D pose # of the head from the 2D position of the # landmarks. #retval - bool #rvec - Output rotation vector that, together with tvec, brings # points from the model coordinate system to the camera coordinate system. #tvec - Output translation vector. retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D, camera_matrix, camera_distortion) #Now we project the 3D points into the image plane #Creating a 3-axis to be used as reference in the image. axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]]) imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion) #Drawing the three axis on the image frame. #The opencv colors are defined as BGR colors such as: # (a, b, c) >> Blue = a, Green = b and Red = c #Our axis/color convention is X=R, Y=G, Z=B sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1]) cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0), 3) #GREEN cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0), 3) #BLUE cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255), 3) #RED #Drawing a yellow rectangle # (and text) around the ROI. #if(DEBUG == True): #text_x1 = roi_x1 #text_y1 = roi_y1 - 3 #if(text_y1 < 0): text_y1 = 0 #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1); #cv2.rectangle(frame, #(roi_x1, roi_y1), #(roi_x2, roi_y2), #(0, 255, 255), #2) #Writing in the output file out.write(frame) #Showing the frame and waiting # for the exit command cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #Release the camera video_capture.release() print("Bye...")
def main(): #Defining the video capture object video_capture = cv2.VideoCapture(0) if(video_capture.isOpened() == False): print("Error: the resource is busy or unvailable") else: print("The video source has been opened correctly...") #Create the main window and move it cv2.namedWindow('Video') cv2.moveWindow('Video', 20, 20) #Obtaining the CAM dimension cam_w = int(video_capture.get(3)) cam_h = int(video_capture.get(4)) #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera. fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 c_x = cam_w / 2 c_y = cam_h / 2 f_x = c_x / np.tan(60/2 * np.pi / 180) f_y = f_x #Estimated camera matrix values. camera_matrix = np.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y], [0.0, 0.0, 1.0] ]) #print("Estimated camera matrix: \n" + str(camera_matrix) + "\n") #These are the camera matrix values estimated on my webcam with # the calibration code (see: src/calibration): camera_matrix = np.float32([[602.10618226, 0.0, 320.27333589], [ 0.0, 603.55869786, 229.7537026], [ 0.0, 0.0, 1.0] ]) #Distortion coefficients #camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0]) #Distortion coefficients estimated by calibration camera_distortion = np.float32([ 0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263]) #This matrix contains the 3D points of the # 11 landmarks we want to find. It has been # obtained from antrophometric measurement # on the human head. landmarks_3D = np.float32([P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION]) #Declaring the two classifiers my_cascade = haarCascade("/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_profileface.xml") #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 my_detector = faceLandmarkDetection('/Users/lamh/Documents/deepgaze/etc/shape_predictor_68_face_landmarks.dat') #Error counter definition no_face_counter = 0 #Variables that identify the face #position in the main frame. face_x1 = 0 face_y1 = 0 face_x2 = 0 face_y2 = 0 face_w = 0 face_h = 0 #Variables that identify the ROI #position in the main frame. roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w/10) roi_resize_h = int(cam_h/10) try: while(True): # Capture frame-by-frame ret, frame = video_capture.read() gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY) #Return code: 1=Frontal, 2=FrontRotLeft, # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) #Accumulate error values in a counter if(my_cascade.face_type == 0): no_face_counter += 1 #If any face is found for a certain #number of cycles, then the ROI is reset if(no_face_counter == 50): no_face_counter = 0 roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h #Checking which kind of face it is returned if(my_cascade.face_type > 0): #Face found, reset the error counter no_face_counter = 0 #Because the dlib landmark detector wants a precise #boundary box of the face, it is necessary to resize #the box returned by the OpenCV haar detector. #Adjusting the frame for profile left if(my_cascade.face_type == 4): face_margin_x1 = 20 - 10 #resize_rate + shift_rate face_margin_y1 = 20 + 5 #resize_rate + shift_rate face_margin_x2 = -20 - 10 #resize_rate + shift_rate face_margin_y2 = -20 + 5 #resize_rate + shift_rate face_margin_h = -0.7 #resize_factor face_margin_w = -0.7 #resize_factor #Adjusting the frame for profile right elif(my_cascade.face_type == 5): face_margin_x1 = 20 + 10 face_margin_y1 = 20 + 5 face_margin_x2 = -20 + 10 face_margin_y2 = -20 + 5 face_margin_h = -0.7 face_margin_w = -0.7 #No adjustments else: face_margin_x1 = 0 face_margin_y1 = 0 face_margin_x2 = 0 face_margin_y2 = 0 face_margin_h = 0 face_margin_w = 0 #Updating the face position face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1 face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1 face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2 face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2 face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w) face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h) #Updating the ROI position roi_x1 = face_x1 - roi_resize_w if (roi_x1 < 0): roi_x1 = 0 roi_y1 = face_y1 - roi_resize_h if(roi_y1 < 0): roi_y1 = 0 roi_w = face_w + roi_resize_w + roi_resize_w if(roi_w > cam_w): roi_w = cam_w roi_h = face_h + roi_resize_h + roi_resize_h if(roi_h > cam_h): roi_h = cam_h roi_x2 = face_x2 + roi_resize_w if (roi_x2 > cam_w): roi_x2 = cam_w roi_y2 = face_y2 + roi_resize_h if(roi_y2 > cam_h): roi_y2 = cam_h #Debugging printing utilities #if(DEBUG == True): if(DEBUG == False): #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h) #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h) #Drawing a green rectangle # (and text) around the face. text_x1 = face_x1 text_y1 = face_y1 - 3 if(text_y1 < 0): text_y1 = 0 cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) # In case of a frontal/rotated face it # is called the landamark detector if(my_cascade.face_type > 0): landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS) if(DEBUG == True): #cv2.drawKeypoints(frame, landmarks_2D) for point in landmarks_2D: cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1) # Applying the PnP solver to find the 3D pose # of the head from the 2D position of the # landmarks. # retval - bool # rvec - Output rotation vector that, together with tvec, brings # points from the model coordinate system to the camera coordinate system. # tvec - Output translation vector. retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D, camera_matrix, camera_distortion) # Now we project the 3D points into the image plane # Creating a 3-axis to be used as reference in the image. axis = np.float32([[50,0,0], [0,50,0], [0,0,50]]) imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion) # Drawing the three axis on the image frame. selection_xy = (landmarks_2D[7][0], landmarks_2D[7][1]) print("*** Position: {0}, pitch, yaw, roll: {1}, {2}, {3} ***".format(selection_xy, np.degrees(rvec)[0], np.degrees(rvec)[1], np.degrees(rvec)[2])) if(max(imgpts[1].ravel()) <= 500000 and min(imgpts[1].ravel()) >= -500000): cv2.line(frame, selection_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN if(max(imgpts[2].ravel()) <= 500000 and min(imgpts[2].ravel()) >= -500000): cv2.line(frame, selection_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE if(max(imgpts[0].ravel()) <= 500000 and min(imgpts[0].ravel()) >= -500000): cv2.line(frame, selection_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED #Drawing a yellow rectangle # (and text) around the ROI. if(DEBUG == True): text_x1 = roi_x1 text_y1 = roi_y1 - 3 if(text_y1 < 0): text_y1 = 0 #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1); #cv2.rectangle(frame, # (roi_x1, roi_y1), # (roi_x2, roi_y2), # (0, 255, 255), # 2) #Showing the frame and waiting # for the exit command cv2.imshow('Video', frame) key = cv2.waitKey(1) if key == 27: cv2.destroyAllWindowns() break #if cv2.waitKey(1) & 0xFF == ord('q'): break except KeyboardInterrupt: #Release the camera video_capture.release() print("KeyboardInterrupt: camera released. Bye ...")
#The MIT License (MIT) #Copyright (c) 2016 Massimiliano Patacchiola # #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF #MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY #CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #Using the OpenCV haar cascade classifiers to find a face in image. from deepgaze.haar_cascade import haarCascade import cv2 #Declaring the face detector object and loading the XML config file my_cascade = haarCascade("../../etc/xml/haarcascade_frontalface_alt.xml", "../../etc/xml/haarcascade_profileface.xml") #Reading the image in black/withe image = cv2.imread("./bellucci.jpg",0) #Calling the findFace method my_cascade.findFace(image, runFrontal=True, runFrontalRotated=True, runLeft=True, runRight=True, frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, leftScaleFactor=1.15, rightScaleFactor=1.15, minSizeX=64, minSizeY=64, rotationAngleCCW=30, rotationAngleCW=-30) #The coords of the face are saved in the class object face_x1 = my_cascade.face_x face_y1 = my_cascade.face_y
import cv2 import numpy as np import tensorflow as tf from deepgaze.head_pose_estimation import CnnHeadPoseEstimator from deepgaze.haar_cascade import haarCascade face_cascade = haarCascade( "/home/meet/Programming/python/eyetracker/haarcascade_frontalface_alt.xml", "/home/meet/Programming/python/eyetracker/haarcascade_profileface.xml") eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml') cam = cv2.VideoCapture(0) cam_w = int(cam.get(3)) cam_h = int(cam.get(4)) no_face_counter = 0 face_x0 = 0 face_y0 = 0 face_x1 = 0 face_y1 = 0 face_w = 0 face_h = 0 roi_x0 = 0 roi_y0 = 0 roi_x1 = cam_w roi_y1 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w / 10) roi_resize_h = int(cam_h / 10) def detect_eyes(img, gray_img):
def main(): #counter for counting not frontal-gazing frames COUNTER = 0 #alarm for drowsiness ALARAM_ON = False #Defining the video capture object video_capture = cv2.VideoCapture("head_pose4.mp4") if (video_capture.isOpened() == False): print("Error: the resource is busy or unvailable") else: print("The video source has been opened correctly...") #Create the main window and move it cv2.namedWindow('Video') cv2.moveWindow('Video', 20, 20) #Obtaining the CAM dimension cam_w = int(video_capture.get(3)) cam_h = int(video_capture.get(4)) #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera초점거리를 알면 좋음. # fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 c_x = cam_w / 2 c_y = cam_h / 2 f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180) f_y = f_x #Estimated camera matrix values. camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y], [0.0, 0.0, 1.0]]) print("Estimated camera matrix: \n" + str(camera_matrix) + "\n") #These are the camera matrix values estimated on my webcam with # the calibration code (see: src/calibration): camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589], [0.0, 603.55869786, 229.7537026], [0.0, 0.0, 1.0]]) #Distortion coefficients #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0]) #Distortion coefficients estimated by calibration camera_distortion = numpy.float32( [0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263]) #This matrix contains the 3D points of the # 11 landmarks we want to find. It has been # obtained from antrophometric measurement # on the human head. landmarks_3D = numpy.float32([ P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION ]) #Declaring the two classifiers my_cascade = haarCascade("./etc/xml/haarcascade_frontalface_alt.xml", "./etc/xml/haarcascade_profileface.xml") #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 my_detector = faceLandmarkDetection( './etc/shape_predictor_68_face_landmarks.dat') #Error counter definition no_face_counter = 0 #Variables that identify the face #position in the main frame. face_x1 = 0 face_y1 = 0 face_x2 = 0 face_y2 = 0 face_w = 0 face_h = 0 #Variables that identify the ROI #position in the main frame. roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w / 10) roi_resize_h = int(cam_h / 10) while (True): # Capture frame-by-frame ret, frame = video_capture.read() gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY) #Looking for faces with cascade #The classifier moves over the ROI #starting from a minimum dimension and augmentig #slightly based on the scale factor parameter. #The scale factor for the frontal face is 1.10 (10%) #Scale factor: 1.15=15%,1.25=25% ...ecc #Higher scale factors means faster classification #but lower accuracy. # #Return code: 1=Frontal, 2=FrontRotLeft, # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) #Accumulate error values in a counter if (my_cascade.face_type == 0): no_face_counter += 1 #If any face is found for a certain #number of cycles, then the ROI is reset if (no_face_counter == 50): no_face_counter = 0 roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h #Checking wich kind of face it is returned if (my_cascade.face_type > 0): #Face found, reset the error counter no_face_counter = 0 #Because the dlib landmark detector wants a precise #boundary box of the face, it is necessary to resize #the box returned by the OpenCV haar detector. #Adjusting the frame for profile left if (my_cascade.face_type == 4): face_margin_x1 = 20 - 10 #resize_rate + shift_rate face_margin_y1 = 20 + 5 #resize_rate + shift_rate face_margin_x2 = -20 - 10 #resize_rate + shift_rate face_margin_y2 = -20 + 5 #resize_rate + shift_rate face_margin_h = -0.7 #resize_factor face_margin_w = -0.7 #resize_factor #Adjusting the frame for profile right elif (my_cascade.face_type == 5): face_margin_x1 = 20 + 10 face_margin_y1 = 20 + 5 face_margin_x2 = -20 + 10 face_margin_y2 = -20 + 5 face_margin_h = -0.7 face_margin_w = -0.7 #No adjustments else: face_margin_x1 = 0 face_margin_y1 = 0 face_margin_x2 = 0 face_margin_y2 = 0 face_margin_h = 0 face_margin_w = 0 #Updating the face position face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1 face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1 face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2 face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2 face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w) face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h) #Updating the ROI position roi_x1 = face_x1 - roi_resize_w if (roi_x1 < 0): roi_x1 = 0 roi_y1 = face_y1 - roi_resize_h if (roi_y1 < 0): roi_y1 = 0 roi_w = face_w + roi_resize_w + roi_resize_w if (roi_w > cam_w): roi_w = cam_w roi_h = face_h + roi_resize_h + roi_resize_h if (roi_h > cam_h): roi_h = cam_h roi_x2 = face_x2 + roi_resize_w if (roi_x2 > cam_w): roi_x2 = cam_w roi_y2 = face_y2 + roi_resize_h if (roi_y2 > cam_h): roi_y2 = cam_h #Debugging printing utilities if (DEBUG == True): print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h) print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h) #Drawing a green rectangle # (and text) around the face. text_x1 = face_x1 text_y1 = face_y1 - 3 if (text_y1 < 0): text_y1 = 0 cv2.putText(frame, "FACE", (text_x1, text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1) cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) #In case of a frontal/rotated face it # is called the landamark detector if (my_cascade.face_type > 0): landmarks_2D = my_detector.returnLandmarks( frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS) if (DEBUG == True): #cv2.drawKeypoints(frame, landmarks_2D) for point in landmarks_2D: cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255), -1) #Applying the PnP solver to find the 3D pose # of the head from the 2D position of the # landmarks. #retval - bool #rvec - Output rotation vector that, together with tvec, brings # points from the model coordinate system to the camera coordinate system. #tvec - Output translation vector. retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D, camera_matrix, camera_distortion) #Now we project the 3D points into the image plane #Creating a 3-axis to be used as reference in the image. axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]]) imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion) #Compute rotate_degree modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec, camera_matrix, camera_distortion) rvec_matrix = cv2.Rodrigues(rvec)[0] proj_matrix = numpy.hstack((rvec_matrix, tvec)) eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6] pitch, yaw, roll = [math.radians(_) for _ in eulerAngles] roll = math.degrees(math.asin(math.sin(roll))) print(str(roll)) #Drawing the three axis on the image frame. #The opencv colors are defined as BGR colors such as: # (a, b, c) >> Blue = a, Green = b and Red = c #Our axis/color convention is X=R, Y=G, Z=B sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1]) cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0), 3) #GREEN cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0), 3) #BLUE cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255), 3) #RED #Drawing a yellow rectangle # (and text) around the ROI. if (DEBUG == True): text_x1 = roi_x1 text_y1 = roi_y1 - 3 if (text_y1 < 0): text_y1 = 0 cv2.putText(frame, "ROI", (text_x1, text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2), (0, 255, 255), 2) if (roll <= -40.0): COUNTER = COUNTER + 1 cv2.putText(frame, "Not frontal-gazing", (10, cam_h - 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2) print("counter: {}".format(counter)) if (COUNTER >= 20): if not ALARAM_ON: ALARAM_ON = True cv2.putText(frame, "DROWSINESS ALERT!", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) else: COUNTER = 0 ALARAM_ON = False #Showing the frame and waiting # for the exit command cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #Release the camera video_capture.release() print("Bye...")
def main(): # The state machine has the following states: # [VOID, INIT, FIND, SHOW, KEY, ICUB, WHICH, QUIT] # # VOID: Empty state, to use for test # INIT: It is called only once for the initialisation # FIND: use the hardware face and landmark detection libraries # SHOW: Print the image on screen using OpenCV # KEY: Check which key is pressed # ICUB: Pressing the (h) button the robot look in front of itself # WHICH: Pressing the (w) button is like asking to the robot to look to a object on the table # QUIT: Pressing (q) unsubscribe and close the script #Configuration Variables, adjust to taste #ICUB_IP = "192.168.0.100" #ICUB_PORT = 9559 RECORD_VIDEO = True #If True record a video from the ICUB camera USE_FESTIVAL_TTS = True #To use the Festival Text To Speach #If you want to use Acapela TTS you have to fill the #following variable with the correct values #USE_ACAPELA_TTS = False #ACCOUNT_LOGIN = '******' #APPLICATION_LOGIN = '******' #APPLICATION_PASSWORD = '******' #SERVICE_URL = 'http://vaas.acapela-group.com/Services/Synthesizer' #The initial state STATE = "VOID" while(True): #Empty STATE, to be used for test if(STATE == "VOID"): STATE = "INIT" # The zero state is an init phase # In this state all the proxies are # created and tICUB subscribe to the services # elif(STATE=="INIT"): #Init some generic variables #This counter allows continuing the program flow #without calling a sleep which_counter = 0 #Counter increased when WHICH is called which_counter_limit = 30 #Limit for the which counter #Init YARP print("[STATE " + str(STATE) + "] " + "YARP init" + "\n") yarp.Network.init() #Camera connection try: print("[STATE " + str(STATE) + "] " + "Waiting for '/icubSim/cam/left' ..." + "\n") cam_w = 320 #640 cam_h = 240 #480 input_port = yarp.Port() input_port.open("/pyera-image-port") yarp.Network.connect("/icubSim/cam/left", "/pyera-image-port") img_array = np.zeros((cam_h, cam_w, 3), dtype=np.uint8) yarp_image = yarp.ImageRgb() yarp_image.resize(cam_w, cam_h) yarp_image.setExternal(img_array, img_array.shape[1], img_array.shape[0]) except BaseException, err: print("[ERROR] connect To Camera catching error " + str(err)) return #Head connection and Reset print("[STATE " + str(STATE) + "] " + "Waiting for '/icubSim/head/rpc:i' ..." + "\n") rpc_client = yarp.RpcClient() rpc_client.addOutput("/icubSim/head/rpc:i") print("[STATE " + str(STATE) + "] " + "Reset the head position..." + "\n") set_icub_head_pose(rpc_client, roll=0, pitch=0, yaw=0) #Initialise the OpenCV video recorder if(RECORD_VIDEO == True): print("[STATE " + str(STATE) + "] " + "Starting the video recorder..." + "\n") fourcc = cv2.cv.CV_FOURCC(*'XVID') video_out = cv2.VideoWriter("./output.avi", fourcc, 20.0, (cam_w,cam_h)) #Init dlib Face detector #my_face_detector = dlib.get_frontal_face_detector() #Init the Deepgaze face detector my_cascade = haarCascade("./haarcascade_frontalface_alt.xml", "./haarcascade_profileface.xml") #Talking if(USE_FESTIVAL_TTS == True): print("[STATE " + str(STATE) + "] " + "Trying the TTS Festival..." + "\n") say_something("Hello World, I'm ready!") #if(USE_ACAPELA_TTS == True): #print("[ACAPELA] Downloading the mp3 file...") #tts_acapela = acapela.Acapela(account_login=ACCOUNT_LOGIN, application_login=APPLICATION_LOGIN, application_password=APPLICATION_PASSWORD, #service_url=SERVICE_URL, quality='22k', directory='/tmp/') #tts_acapela.prepare(text="Hello world, I'm ready!", lang='US', gender='M', intonation='NORMAL') #output_filename = tts_acapela.run() #subprocess.Popen(["play","-q","/tmp/" + str(output_filename)]) #print "[ACAPELA] Recorded TTS to %s" % output_filename #Swithc to STATE > 1 print("[STATE " + str(STATE) + "] " + "Switching to next state" + "\n") time.sleep(2) STATE = "FIND" # Get data from landmark detection and # face detection. # elif(STATE=="FIND"): #Get Data for Face Detection #if(face_data and isinstance(face_data, list) and len(face_data) > 0): #print("[ICUB] Face detected!") #else: #print("No face detected...") #is_face_detected = False input_port.read(yarp_image) ''' faces_array = my_face_detector(img_array, 1) print("Total Faces: " + str(len(faces_array))) for i, pos in enumerate(faces_array): face_x1 = pos.left() face_y1 = pos.top() face_x2 = pos.right() face_y2 = pos.bottom() text_x1 = face_x1 text_y1 = face_y1 - 3 cv2.putText(img_array, "FACE " + str(i+1), (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); cv2.rectangle(img_array, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) ''' gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) # Return code: 1=Frontal, 2=FrontRotLeft, 3=FronRotRight, # 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=False, runLeft=True, runRight=True, frontalScaleFactor=1.18, rotatedFrontalScaleFactor=1.18, leftScaleFactor=1.18, rightScaleFactor=1.18, minSizeX=70, minSizeY=70, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) face_x1 = my_cascade.face_x face_y1 = my_cascade.face_y face_x2 = my_cascade.face_x + my_cascade.face_w face_y2 = my_cascade.face_y + my_cascade.face_h text_x1 = face_x1 text_y1 = face_y1 - 3 if(my_cascade.face_type == 1 or my_cascade.face_type == 2 or my_cascade.face_type == 3): cv2.putText(img_array, "FRONTAL", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); elif(my_cascade.face_type == 4): cv2.putText(img_array, "LEFT", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); elif(my_cascade.face_type == 5): cv2.putText(img_array, "RIGH", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); cv2.rectangle(img_array, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) is_face_detected = False STATE = "SHOW"
def main(): #Check if some argumentshave been passed #pass the path of a video if(len(sys.argv) > 2): file_path = sys.argv[1] if(os.path.isfile(file_path)==False): print("ex_pnp_head_pose_estimation: the file specified does not exist.") return else: #Open the video file video_capture = cv2.VideoCapture(file_path) if(video_capture.isOpened() == True): print("ex_pnp_head_pose_estimation: the video source has been opened correctly...") # Define the codec and create VideoWriter object #fourcc = cv2.VideoWriter_fourcc(*'XVID') output_path = sys.argv[2] fourcc = cv2.cv.CV_FOURCC(*'XVID') out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280,720)) else: print("You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi") return #Create the main window and move it cv2.namedWindow('Video') cv2.moveWindow('Video', 20, 20) #Obtaining the CAM dimension cam_w = int(video_capture.get(3)) cam_h = int(video_capture.get(4)) #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera. fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 c_x = cam_w / 2 c_y = cam_h / 2 f_x = c_x / numpy.tan(60/2 * numpy.pi / 180) f_y = f_x #Estimated camera matrix values. camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y], [0.0, 0.0, 1.0] ]) print("Estimated camera matrix: \n" + str(camera_matrix) + "\n") #These are the camera matrix values estimated on my webcam with # the calibration code (see: src/calibration): #camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589], #[ 0.0, 603.55869786, 229.7537026], #[ 0.0, 0.0, 1.0] ]) #Distortion coefficients camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0]) #Distortion coefficients estimated by calibration #camera_distortion = numpy.float32([ 0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263]) #This matrix contains the 3D points of the # 11 landmarks we want to find. It has been # obtained from antrophometric measurement # on the human head. landmarks_3D = numpy.float32([P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION]) #Declaring the two classifiers my_cascade = haarCascade("../etc/xml/haarcascade_frontalface_alt.xml", "../etc/xml/haarcascade_profileface.xml") my_detector = faceLandmarkDetection('../etc/shape_predictor_68_face_landmarks.dat') #Error counter definition no_face_counter = 0 #Variables that identify the face #position in the main frame. face_x1 = 0 face_y1 = 0 face_x2 = 0 face_y2 = 0 face_w = 0 face_h = 0 #Variables that identify the ROI #position in the main frame. roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w/10) roi_resize_h = int(cam_h/10) while(True): # Capture frame-by-frame ret, frame = video_capture.read() if frame is None: return gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY) #Looking for faces with cascade #The classifier moves over the ROI #starting from a minimum dimension and augmentig #slightly based on the scale factor parameter. #The scale factor for the frontal face is 1.10 (10%) #Scale factor: 1.15=15%,1.25=25% ...ecc #Higher scale factors means faster classification #but lower accuracy. # #Return code: 1=Frontal, 2=FrontRotLeft, # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=True, runLeft=False, runRight=False, frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, leftScaleFactor=1.15, rightScaleFactor=1.15, minSizeX=80, minSizeY=80, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) #Accumulate error values in a counter if(my_cascade.face_type == 0): no_face_counter += 1 #If any face is found for a certain #number of cycles, then the ROI is reset if(no_face_counter == 30): no_face_counter = 0 roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h #Checking wich kind of face it is returned if(my_cascade.face_type > 0 and my_cascade.face_type < 4): #Face found, reset the error counter no_face_counter = 0 #Because the dlib landmark detector wants a precise #boundary box of the face, it is necessary to resize #the box returned by the OpenCV haar detector. #Adjusting the frame for profile left if(my_cascade.face_type == 4): face_margin_x1 = 20 - 10 #resize_rate + shift_rate face_margin_y1 = 20 + 5 #resize_rate + shift_rate face_margin_x2 = -20 - 10 #resize_rate + shift_rate face_margin_y2 = -20 + 5 #resize_rate + shift_rate face_margin_h = -0.7 #resize_factor face_margin_w = -0.7 #resize_factor #Adjusting the frame for profile right elif(my_cascade.face_type == 5): face_margin_x1 = 20 + 10 face_margin_y1 = 20 + 5 face_margin_x2 = -20 + 10 face_margin_y2 = -20 + 5 face_margin_h = -0.7 face_margin_w = -0.7 #No adjustments else: face_margin_x1 = 0 face_margin_y1 = 0 face_margin_x2 = 0 face_margin_y2 = 0 face_margin_h = 0 face_margin_w = 0 #Updating the face position face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1 face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1 face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2 face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2 face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w) face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h) #Updating the ROI position roi_x1 = face_x1 - roi_resize_w if (roi_x1 < 0): roi_x1 = 0 roi_y1 = face_y1 - roi_resize_h if(roi_y1 < 0): roi_y1 = 0 roi_w = face_w + roi_resize_w + roi_resize_w if(roi_w > cam_w): roi_w = cam_w roi_h = face_h + roi_resize_h + roi_resize_h if(roi_h > cam_h): roi_h = cam_h roi_x2 = face_x2 + roi_resize_w if (roi_x2 > cam_w): roi_x2 = cam_w roi_y2 = face_y2 + roi_resize_h if(roi_y2 > cam_h): roi_y2 = cam_h #Debugging printing utilities if(DEBUG == True): print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h) print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h) #Drawing a green rectangle # (and text) around the face. text_x1 = face_x1 text_y1 = face_y1 - 3 if(text_y1 < 0): text_y1 = 0 cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) #In case of a frontal/rotated face it # is called the landamark detector if(my_cascade.face_type > 0): landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS) if(DEBUG == True): #cv2.drawKeypoints(frame, landmarks_2D) for point in landmarks_2D: cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1) #Applying the PnP solver to find the 3D pose # of the head from the 2D position of the # landmarks. #retval - bool #rvec - Output rotation vector that, together with tvec, brings # points from the model coordinate system to the camera coordinate system. #tvec - Output translation vector. retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D, camera_matrix, camera_distortion) #Now we project the 3D points into the image plane #Creating a 3-axis to be used as reference in the image. axis = numpy.float32([[50,0,0], [0,50,0], [0,0,50]]) imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion) #Drawing the three axis on the image frame. #The opencv colors are defined as BGR colors such as: # (a, b, c) >> Blue = a, Green = b and Red = c #Our axis/color convention is X=R, Y=G, Z=B sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1]) cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED #Drawing a yellow rectangle # (and text) around the ROI. #if(DEBUG == True): #text_x1 = roi_x1 #text_y1 = roi_y1 - 3 #if(text_y1 < 0): text_y1 = 0 #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1); #cv2.rectangle(frame, #(roi_x1, roi_y1), #(roi_x2, roi_y2), #(0, 255, 255), #2) #Writing in the output file out.write(frame) #Showing the frame and waiting # for the exit command cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #Release the camera video_capture.release() print("Bye...")
def main(): #Defining the video capture object video_capture = cv2.VideoCapture(1) thresh = 0.25 frame_check = 15 detect = dlib.get_frontal_face_detector() predict = dlib.shape_predictor("/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat")# change to path where landmark points are stored (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["left_eye"] # get the left eye index (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["right_eye"] # get the right eye index flag=0 sess = tf.Session() my_head_pose_estimator = CnnHeadPoseEstimator(sess) my_head_pose_estimator.load_roll_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/roll/cnn_cccdd_30k.tf"))# change to deepgaze directory path my_head_pose_estimator.load_pitch_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/pitch/cnn_cccdd_30k.tf")) my_head_pose_estimator.load_yaw_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/yaw/cnn_cccdd_30k.tf")) #Start of Eye Gaze Tracking win = dlib.image_window() predictor_path = "/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat" roi = [] ref_point = 0 index1 = 0 pt_lefteye_corner_x= 0 pt_lefteye_corner_y = 0 pt_pos1 = 0 predictor = dlib.shape_predictor(predictor_path) pt_x2 =0 pt_y2 = 0 pt_x1 = 0 pt_y1 = 0 pt_actualx = 0 pt_actualy = 0 detector = dlib.get_frontal_face_detector() flag = 0 flag1 = 0 pt_righteye_corner_x = 0 pt_righteye_corner_y = 0 if(video_capture.isOpened() == False): print("Error: the resource is busy or unvailable") else: print("The video source has been opened correctly...") #Create the main window and move it cv2.namedWindow('Video') cv2.moveWindow('Video', 20, 20) #Obtaining the CAM dimension cam_w = int(video_capture.get(3)) cam_h = int(video_capture.get(4)) #Defining the camera matrix. #To have better result it is necessary to find the focal # lenght of the camera. fx/fy are the focal lengths (in pixels) # and cx/cy are the optical centres. These values can be obtained # roughly by approximation, for example in a 640x480 camera: # cx = 640/2 = 320 # cy = 480/2 = 240 # fx = fy = cx/tan(60/2 * pi / 180) = 554.26 c_x = cam_w / 2 c_y = cam_h / 2 f_x = c_x / numpy.tan(60/2 * numpy.pi / 180) f_y = f_x #Estimated camera matrix values. camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y], [0.0, 0.0, 1.0] ]) print("Estimated camera matrix: \n" + str(camera_matrix) + "\n") #These are the camera matrix values estimated on my webcam with # the calibration code (see: src/calibration): camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589], [ 0.0, 603.55869786, 229.7537026], [ 0.0, 0.0, 1.0] ]) #Distortion coefficients #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0]) #Distortion coefficients estimated by calibration camera_distortion = numpy.float32([ 0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263]) #This matrix contains the 3D points of the # 11 landmarks we want to find. It has been # obtained from antrophometric measurement # on the human head. landmarks_3D = numpy.float32([P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT, P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT, P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR, P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION]) #Declaring the two classifiers my_cascade = haarCascade("/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_profileface.xml") #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 my_detector = faceLandmarkDetection('/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat') #Error counter definition no_face_counter = 0 #Variables that identify the face #position in the main frame. face_x1 = 0 face_y1 = 0 face_x2 = 0 face_y2 = 0 face_w = 0 face_h = 0 #Variables that identify the ROI #position in the main frame. roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h roi_resize_w = int(cam_w/10) roi_resize_h = int(cam_h/10) while(True): # Capture frame-by-frame ret, frame = video_capture.read() ret, frame_eye = video_capture.read() #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0]) + "]") gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY) img = cv2.cvtColor(frame_eye, cv2.COLOR_RGB2BGR) #for eye gaze detection drowsyframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) subjects = detect(drowsyframe, 0) for subject in subjects: shape = predict(frame,subject) shape = face_utils.shape_to_np(shape) leftEye = shape[lStart:lEnd] rightEye = shape[rStart:rEnd] leftEAR = eye_aspect_ratio(leftEye) rightEAR = eye_aspect_ratio(rightEye) ear = leftEAR + rightEAR / 2.0 leftEyeHull = cv2.convexHull(leftEye) rightEyeHull = cv2.convexHull(rightEye) cv2.drawContours(frame, [leftEyeHull], -1, (0,255,0),1) cv2.drawContours(frame, [rightEyeHull], -1, (0,255,0), 1) if ear<thresh: flag+= 1 print(flag) if flag >= frame_check: cv2.putText(frame, "WAKEUPPPP", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2) cv2.putText(frame, "WAKEUPPPP", (10, 325), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255),2) else: flag=0 #Eye Gaze Detetction dets = detect(img, 0) check = 5 shapes_eye = [] for k,d in enumerate(dets): #print("dets{}".format(d)) #print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(k, d.left(), d.top(), d.right(), d.bottom())) shape_eye = predict(img, d) for index, pt in enumerate(shape_eye.parts()): #print('Part {}: {}'.format(index, pt)) pt_pos = (pt.x, pt.y) cv2.circle(img, pt_pos, 1, (0,225, 0), 2) if index == 29: pt_x2 = int(pt.x) pt_y2 = int(pt.y) if index == 18: pt_x1 = int(pt.x) pt_y1 = int(pt.y) if index == 37: pt_righteye_corner_x = pt.x pt_righteye_corner_y = pt.y if index == 40: pt_lefteye_corner_x = pt.x pt_lefteye_corner_y = pt.y roi = frame_eye[pt_y1:pt_y2,pt_x1:pt_x2] roi_gray = cv2.cvtColor(roi,cv2.COLOR_RGB2GRAY) _, threshold = cv2.threshold(roi_gray, 30, 255, cv2.THRESH_BINARY_INV) try: M = cv2.moments(threshold) #print(M) cX = int(M["m10"]/M["m00"]) cY = int(M["m01"]/M["m00"]) #print(cX,cY) pt_actualx = pt_x1+cX pt_actualy = pt_y1+cY #print(pt_actualx,pt_actualy) diff_right = pt_actualx-pt_righteye_corner_x diff_left = pt_lefteye_corner_x - pt_actualx print(diff_right,diff_left) #print(cX,cY) if diff_right < 3: cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2) if diff_left <3: cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2) except: pass cv2.circle(frame,(pt_actualx,pt_actualy), 2,(255,0,255),-1) #print(pt_actualx,pt_actualy) #print(pt_x1,pt_x2,pt_y1,pt_y2) #print(roi.shape_eye) #print(img.shape_eye) try: cv2.imshow("threshold", threshold) cv2.waitKey(1) except: pass win.clear_overlay() win.set_image(img) if len(shapes_eye)!= 0 : for i in range(len(shapes_eye)): win.add_overlay(shapes_eye[i]) #Looking for faces with cascade #The classifier moves over the ROI #starting from a minimum dimension and augmentig #slightly based on the scale factor parameter. #The scale factor for the frontal face is 1.10 (10%) #Scale factor: 1.15=15%,1.25=25% ...ecc #Higher scale factors means faster classification #but lower accuracy. # #Return code: 1=Frontal, 2=FrontRotLeft, # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight. my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type) #print(returnvalue) #Accumulate error values in a counter if(my_cascade.face_type == 0): no_face_counter += 1 #If any face is found for a certain #number of cycles, then the ROI is reset if(no_face_counter == 50): no_face_counter = 0 roi_x1 = 0 roi_y1 = 0 roi_x2 = cam_w roi_y2 = cam_h roi_w = cam_w roi_h = cam_h #Checking wich kind of face it is returned if(my_cascade.face_type > 0): #Face found, reset the error counter no_face_counter = 0 #Because the dlib landmark detector wants a precise #boundary box of the face, it is necessary to resize #the box returned by the OpenCV haar detector. #Adjusting the frame for profile left if(my_cascade.face_type == 4): face_margin_x1 = 20 - 10 #resize_rate + shift_rate face_margin_y1 = 20 + 5 #resize_rate + shift_rate face_margin_x2 = -20 - 10 #resize_rate + shift_rate face_margin_y2 = -20 + 5 #resize_rate + shift_rate face_margin_h = -0.7 #resize_factor face_margin_w = -0.7 #resize_factor #Adjusting the frame for profile right elif(my_cascade.face_type == 5): face_margin_x1 = 20 + 10 face_margin_y1 = 20 + 5 face_margin_x2 = -20 + 10 face_margin_y2 = -20 + 5 face_margin_h = -0.7 face_margin_w = -0.7 #No adjustments else: face_margin_x1 = 0 face_margin_y1 = 0 face_margin_x2 = 0 face_margin_y2 = 0 face_margin_h = 0 face_margin_w = 0 #Updating the face position face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1 face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1 face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2 face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2 face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w) face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h) crop_img = frame[face_y1:face_y2, face_x1:face_x2] cv2.imshow("cropped", crop_img) roll = my_head_pose_estimator.return_roll(crop_img) pitch = my_head_pose_estimator.return_pitch(crop_img) yaw = my_head_pose_estimator.return_yaw(crop_img) #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0]) + "]") if yaw > 30: cv2.putText(frame, "You are facing right!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2) if yaw < -30: cv2.putText(frame, "You are facing left!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2) #Updating the ROI position roi_x1 = face_x1 - roi_resize_w if (roi_x1 < 0): roi_x1 = 0 roi_y1 = face_y1 - roi_resize_h if(roi_y1 < 0): roi_y1 = 0 roi_w = face_w + roi_resize_w + roi_resize_w if(roi_w > cam_w): roi_w = cam_w roi_h = face_h + roi_resize_h + roi_resize_h if(roi_h > cam_h): roi_h = cam_h roi_x2 = face_x2 + roi_resize_w if (roi_x2 > cam_w): roi_x2 = cam_w roi_y2 = face_y2 + roi_resize_h if(roi_y2 > cam_h): roi_y2 = cam_h #Debugging printing utilities if(DEBUG == True): #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h) #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h) #Drawing a green rectangle # (and text) around the face. text_x1 = face_x1 text_y1 = face_y1 - 3 if(text_y1 < 0): text_y1 = 0 cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1); cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2), (0, 255, 0), 2) #In case of a frontal/rotated face it # is called the landamark detector if(my_cascade.face_type > 0): landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS) if(DEBUG == True): #cv2.drawKeypoints(frame, landmarks_2D) for point in landmarks_2D: cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1) #Applying the PnP solver to find the 3D pose # of the head from the 2D position of the # landmarks. #retval - bool #rvec - Output rotation vector that, together with tvec, brings # points from the model coordinate system to the camera coordinate system. #tvec - Output translation vector. retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D, camera_matrix, camera_distortion) #Now we project the 3D points into the image plane #Creating a 3-axis to be used as reference in the image. axis = numpy.float32([[50,0,0], [0,50,0], [0,0,50]]) imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion) #Drawing the three axis on the image frame. #The opencv colors are defined as BGR colors such as: # (a, b, c) >> Blue = a, Green = b and Red = c #Our axis/color convention is X=R, Y=G, Z=B sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1]) cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED #Drawing a yellow rectangle # (and text) around the ROI. if(DEBUG == True): text_x1 = roi_x1 text_y1 = roi_y1 - 3 if(text_y1 < 0): text_y1 = 0 cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1); cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2), (0, 255, 255), 2) #Showing the frame and waiting # for the exit command cv2.imshow('Video', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break #Release the camera video_capture.release() print("Bye...")