Exemple #1
0
 def __init__(self):
     self.video_capture = cv2.VideoCapture(0)
     self.cam_w = int(self.video_capture.get(3))
     self.cam_h = int(self.video_capture.get(4))
     self.blinked = False
     #Defining the camera matrix.
     #To have better result it is necessary to find the focal
     # lenght of the camera. fx/fy are the focal lengths (in pixels)
     # and cx/cy are the optical centres. These values can be obtained
     # roughly by approximation, for example in a 640x480 camera:
     # cx = 640/2 = 320
     # cy = 480/2 = 240
     # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
     self.c_x = self.cam_w / 2
     self.c_y = self.cam_h / 2
     self.f_x = self.c_x / np.tan(60 / 2 * np.pi / 180)
     self.f_y = self.f_x
     self.camera_matrix = np.float32([[self.f_x, 0.0, self.c_x],
                                      [0.0, self.f_y, self.c_y],
                                      [0.0, 0.0, 1.0]])
     self.camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])
     self.landmarks_3D = np.float32([
         P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
         P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
         P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
         P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
     ])
     self.face_cascade = haarCascade(
         "/home/meet/Programming/python/eyetracker/haarcascade_frontalface_alt.xml",
         "/home/meet/Programming/python/eyetracker/haarcascade_profileface.xml"
     )
     self.face_detector = faceLandmarkDetection(
         '/home/meet/Programming/python/eyetracker/shape_predictor_68_face_landmarks.dat'
     )
     self.eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')
     self.no_face_counter = 0
     self.face_x1 = 0
     self.face_y1 = 0
     self.face_x2 = 0
     self.face_y2 = 0
     self.face_w = 0
     self.face_h = 0
     self.roi_x1 = 0
     self.roi_y1 = 0
     self.roi_x2 = self.cam_w
     self.roi_y2 = self.cam_h
     self.roi_w = self.cam_w
     self.roi_h = self.cam_h
     self.roi_resize_w = int(self.cam_w / 10)
     self.roi_resize_h = int(self.cam_h / 10)
     self.face_type = -1
     self.no_eye_counter = 0
def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if (len(sys.argv) > 2):
        file_path = sys.argv[1]
        if (os.path.isfile(file_path) == False):
            print(
                "ex_pnp_head_pose_estimation: the file specified does not exist."
            )
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if (video_capture.isOpened() == True):
                print(
                    "ex_pnp_head_pose_estimation: the video source has been opened correctly..."
                )
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.cv.CV_FOURCC(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280, 720))
    else:
        print(
            "You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi"
        )
        return

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
    #[         0.0, 603.55869786,  229.7537026],
    #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    my_cascade = haarCascade("./etc/haarcascade_frontalface_alt.xml",
                             "./etc/haarcascade_profileface.xml")
    my_detector = faceLandmarkDetection(
        './etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w / 10)
    roi_resize_h = int(cam_h / 10)

    while (True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2],
                            cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray,
                            runFrontal=True,
                            runFrontalRotated=True,
                            runLeft=False,
                            runRight=False,
                            frontalScaleFactor=1.2,
                            rotatedFrontalScaleFactor=1.2,
                            leftScaleFactor=1.15,
                            rightScaleFactor=1.15,
                            minSizeX=80,
                            minSizeY=80,
                            rotationAngleCCW=30,
                            rotationAngleCW=-30,
                            lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if (my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if (no_face_counter == 30):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if (my_cascade.face_type > 0 and my_cascade.face_type < 4):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if (my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10  #resize_rate + shift_rate
                face_margin_y1 = 20 + 5  #resize_rate + shift_rate
                face_margin_x2 = -20 - 10  #resize_rate + shift_rate
                face_margin_y2 = -20 + 5  #resize_rate + shift_rate
                face_margin_h = -0.7  #resize_factor
                face_margin_w = -0.7  #resize_factor
            #Adjusting the frame for profile right
            elif (my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if (roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if (roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if (roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if (roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if (DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w,
                      face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if (text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1, text_y1),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                              (0, 255, 0), 2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if (my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(
                    frame,
                    face_x1,
                    face_y1,
                    face_x2,
                    face_y2,
                    points_to_return=TRACKED_POINTS)

                if (DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255),
                                   -1)

                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                                  camera_matrix,
                                                  camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec,
                                                camera_matrix,
                                                camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()),
                         (0, 255, 0), 3)  #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()),
                         (255, 0, 0), 3)  #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()),
                         (0, 0, 255), 3)  #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        #if(DEBUG == True):
        #text_x1 = roi_x1
        #text_y1 = roi_y1 - 3
        #if(text_y1 < 0): text_y1 = 0
        #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
        #cv2.rectangle(frame,
        #(roi_x1, roi_y1),
        #(roi_x2, roi_y2),
        #(0, 255, 255),
        #2)

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")
Exemple #3
0
def main():

    video_capture = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
    if(video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    cv2.namedWindow('VIDEO')
    cv2.moveWindow('VIDEO', 20, 20)

    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0] ])
    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")
    camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   [         0.0, 603.55869786,  229.7537026],
                                   [         0.0,          0.0,          1.0] ])
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])
    dlib_landmarks_file = "./etc/shape_predictor_68_face_landmarks.dat"
    if(os.path.isfile(dlib_landmarks_file)==False):
        print("The dlib landmarks file is missing! Use the following commands to download and unzip: ")
        print(">> wget dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
        print(">> bzip2 -d shape_predictor_68_face_landmarks.dat.bz2")
        return
    my_detector = faceLandmarkDetection(dlib_landmarks_file)
    my_predictor = dlib.shape_predictor(dlib_landmarks_file)
    my_face_detector = dlib.get_frontal_face_detector()

    print("[INFO] loading facial landmark...")
    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]



    #counter for counting detected eye,
    EYE_NOW = 0
    ANG_NOW = 0
    EYE_AVERAGE = 0
    ANG_AVERAGE=0
    ANG_COUNTER = 0
    EYE_COUNTER= 0

    EYE_AR_THRESH = 0.21                      #여기 파라미터 그냥 넣으시면 됩니다!

    ANG_THRESH = 0
    FACE_FRAME_COUNTER = 0
    FRAME1 = 4
    FRAME2 = 4
    FRAME3 = 3

    ANG_THRESH_LIST= numpy.zeros(15)
    ANG = False
    EYE_THRESH_LIST = numpy.zeros(15)
    EYE = False

    OPTIMIZE = True
    DETECTION = False

    while(True):
        ret, frame = video_capture.read()
        faces_array = my_face_detector(frame, 1)
        print("Total Faces: " + str(len(faces_array)))

        if(OPTIMIZE):
            if(len(faces_array) == 0):
                cv2.putText(frame, "Cannot detect face!", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
            else:
                FACE_FRAME_COUNTER = 0
                ALARM_ON = False

        if(DETECTION):
            #DROWSINESS ALERT1 : Not Face Detection
            if(len(faces_array) == 0):
                FACE_FRAME_COUNTER += 1
                if(FACE_FRAME_COUNTER >= FRAME1):
                    if not ALARM_ON:
                        ALARM_ON = True
                    cv2.putText(frame, "DROWSINESS ALERT1!", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
                    print("**DROWSINESS ALERT1!**\n")
                    beepsound()
            else:
                FACE_FRAME_COUNTER = 0
                ALARM_ON = False

        for i, pos in enumerate(faces_array):
            face_x1 = pos.left()
            face_y1 = pos.top()
            face_x2 = pos.right()
            face_y2 = pos.bottom()
            text_x1 = face_x1
            text_y1 = face_y1 - 3
            cv2.putText(frame, "FACE " + str(i+1), (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
            cv2.rectangle(frame,
                         (face_x1, face_y1),
                         (face_x2, face_y2),
                         (0, 255, 0),
                          2)
            landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)
            for point in landmarks_2D:
                cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)
            retval, rvec, tvec = cv2.solvePnP(landmarks_3D,
                                                  landmarks_2D,
                                                  camera_matrix, camera_distortion)
            axis = numpy.float32([[50,0,0],
                                      [0,50,0],
                                      [0,0,50]])
            imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED
            modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec, camera_matrix, camera_distortion)
            rvec_matrix = cv2.Rodrigues(rvec)[0]
            proj_matrix = numpy.hstack((rvec_matrix, tvec))
            eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]
            pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]
            roll = math.degrees(math.asin(math.sin(roll)))
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED
            cv2.putText(frame, "angle: {:.2f}".format(roll), (500,60), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

            if(OPTIMIZE):
                if(ANG_NOW < 15):
                    ANG_THRESH_LIST[ANG_NOW]= roll
                    print("ANG_THRESH ",ANG_NOW)
                    ANG_NOW =ANG_NOW+1
                elif (ANG_NOW == 15):
                    print(ANG_THRESH_LIST)
                    list = []
                    #percentile1=numpy.percentile(ANG_THRESH_LIST, 25);
                    #percentile2=numpy.percentile(ANG_THRESH_LIST, 50);
                    ANG_THRESH_LIST_REAL = ANG_THRESH_LIST.copy();
                    for i in ANG_THRESH_LIST:
                        if(i >=-90 and i<=-70):
                            print("adding",i)
                            list.append(i)
                            #ANG_THRESH_LIST_REAL.remove(i);
                    print(list);
                    ANG_AVERAGE = numpy.mean(list);
                    print("ANG AVERAGE IS", ANG_AVERAGE)
                    print("ANG AVERAGE COMPLETE")
                    beepsound()
                    beepsound()
                    ANG = True
                    ANG_THRESH = ANG_AVERAGE
                    cv2.putText(frame, "Angle Optimize Complete: {:.2f}".format(ANG_THRESH), (10,430),cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,100), 2)
                    ANG_NOW =ANG_NOW+1
                else:
                    break

            if(DETECTION):
                #NOT FRONTAL-GAZING
                print("ANGLE:{}".format(roll))
                if(roll > 0) :
                    roll = roll - abs(ANG_THRESH)
                else:
                    roll = roll + abs(ANG_THRESH)
                print("changed ANGLE:{}".format(roll))
                if(abs(roll) <= 5 ):
                    ANG_COUNTER = ANG_COUNTER + 1
                    cv2.putText(frame, "Not frontal-gazing", (10,cam_h-30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,0,0), 2)
                    print("counter: {}".format(ANG_COUNTER))
                    print("Non frontal-gazing!\n");


                    #DROWSINESS ALERT2 : KEEP (NOT FRONTAL GAZING)
                    if (ANG_COUNTER >= FRAME2):
                        if not ALARM_ON:
                            ALARM_ON = True
                        cv2.putText(frame, "DROWSINESS ALERT2!", (10,40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
                        beepsound()
                        print("**DROWSINESS ALERT2!**\n");
                else:
                    ANG_COUNTER = 0
                    ALARM_ON = False

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            rects = my_face_detector(gray, 0)
            for rect in rects:
                shape = my_predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]
                leftEAR = eye_aspect_ratio(leftEye)
                rightEAR = eye_aspect_ratio(rightEye)
                ear = (leftEAR + rightEAR) / 2.0

                leftEyeHull = cv2.convexHull(leftEye)
                rightEyeHull = cv2.convexHull(rightEye)
                cv2.drawContours(frame,[leftEyeHull], -1,(0,255,0),1)
                cv2.drawContours(frame,[rightEyeHull], -1,(0,255,0),1)
                cv2.putText(frame, "EAR: {:.2f}".format(ear), (300,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

                if(DETECTION):
                    if ear < EYE_AR_THRESH:
                        EYE_COUNTER += 1

                        #DROWSINESS ALERT3 : NO EYE DETECTION
                        if EYE_COUNTER >= FRAME3:
                            if not ALARM_ON:
                                ALARM_ON = True
                            cv2.putText(frame, "DROWSINESS ALERT3!", (10,70), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)
                            print("**DROWSINESS ALERT3!**\n")
                            beepsound()
                    else:
                        EYE_COUNTER = 0
                        ALARM_ON = False
                    #cv2.putText(frame, "EAR: {:.2f}".format(ear), (450,30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

        cv2.imshow('VIDEO', frame)
        if (ANG == True):
            print("########################ANG OPTIMIZE COMPLETE########################")
            cv2.putText(frame, "START DETECTING!", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)
            print("########################START DETECTING########################")
            print("ANG_THRESH IS", ANG_THRESH)
            ANG = False
            OPTIMIZE = False
            DETECTION = True
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
Exemple #4
0
def main():

    #alarm for drowsiness
    ALARM_ON = False

    #Defining the video capture object
    #video_image = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
    video_capture = cv2.VideoCapture(
        "C:/Users/YJ/Desktop/학교생활/UROP 연구/190806 낮-20190816T135703Z-001/190806 낮/맨얼굴_2.mp4"
    )
    _, video_image = video_capture.read()
    video_image = cv2.resize(video_image,
                             dsize=(640, 480),
                             interpolation=cv2.INTER_AREA)
    if (video_image.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_image.get(3))
    cam_h = int(video_image.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera초점거리를 알면 좋음.
    # fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589],
                                   [0.0, 603.55869786, 229.7537026],
                                   [0.0, 0.0, 1.0]])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    #my_cascade = haarCascade("../etc/haarcascade_frontalface_alt.xml", "../etc/haarcascade_profileface.xml")
    dlib_landmarks_file = "./etc/shape_predictor_68_face_landmarks.dat"
    if (os.path.isfile(dlib_landmarks_file) == False):
        print(
            "The dlib landmarks file is missing! Use the following commands to download and unzip: "
        )
        print(
            ">> wget dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
        print(">> bzip2 -d shape_predictor_68_face_landmarks.dat.bz2")
        return
    my_detector = faceLandmarkDetection(dlib_landmarks_file)
    my_predictor = dlib.shape_predictor(dlib_landmarks_file)
    my_face_detector = dlib.get_frontal_face_detector()

    #settings for detection

    #detect blink
    EYE_AR_THRESH = 0.15
    # number of consecutive frames. 이거넘으면 알람
    ANG_THRESH = 55.0

    #counter for counting undetected face
    FACE_FRAME_COUNTER = 0

    #counter for counting not frontal-gazing frames
    ANG_COUNTER = 0

    #counter for counting closed eyes
    EYE_COUNTER = 0

    #number of consequtive frames for face detection
    FRAME1 = 3

    #number of consequtive frames for frontal gaze detection
    FRAME2 = 3

    #number of consequtive frames for eye detection
    FRAME3 = 3

    print("[INFO] loading facial landmark predictor...")

    # grab the indexes of the facial landmarks for the left and
    # right eye, respectively
    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    while (True):

        # Capture frame-by-frame
        ret, frame = video_image.read()
        #gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        faces_array = my_face_detector(frame, 1)
        print("Total Faces: " + str(len(faces_array)))

        if (faces_array == None):
            FACE_FRAME_COUNTER += 1

            if (FACE_FRAME_COUNTER >= FRAME1):
                if not ALARM_ON:
                    ALARM_ON = True
                cv2.putText(frame, "DROWSINESS ALERT1!", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        else:
            FACE_FRAME_COUNTER = 0
            ALARM_ON = False

        for i, pos in enumerate(faces_array):

            face_x1 = pos.left()
            face_y1 = pos.top()
            face_x2 = pos.right()
            face_y2 = pos.bottom()
            text_x1 = face_x1
            text_y1 = face_y1 - 3

            cv2.putText(frame, "FACE " + str(i + 1), (text_x1, text_y1),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                          (0, 255, 0), 2)

            landmarks_2D = my_detector.returnLandmarks(
                frame,
                face_x1,
                face_y1,
                face_x2,
                face_y2,
                points_to_return=TRACKED_POINTS)

            for point in landmarks_2D:
                cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255), -1)

            #Applying the PnP solver to find the 3D pose
            # of the head from the 2D position of the
            # landmarks.
            #retval - bool
            #rvec - Output rotation vector that, together with tvec, brings
            # points from the model coordinate system to the camera coordinate system.
            #tvec - Output translation vector.
            retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                              camera_matrix, camera_distortion)

            #Now we project the 3D points into the image plane
            #Creating a 3-axis to be used as reference in the image.
            axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
            imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix,
                                            camera_distortion)

            #Drawing the three axis on the image frame.
            #The opencv colors are defined as BGR colors such as:
            # (a, b, c) >> Blue = a, Green = b and Red = c
            #Our axis/color convention is X=R, Y=G, Z=B
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0),
                     3)  #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0),
                     3)  #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255),
                     3)  #RED

            #Compute rotate_degree
            modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec,
                                               camera_matrix,
                                               camera_distortion)
            rvec_matrix = cv2.Rodrigues(rvec)[0]

            proj_matrix = numpy.hstack((rvec_matrix, tvec))
            eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]

            pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]

            roll = math.degrees(math.asin(math.sin(roll)))
            roll = roll - 20
            print(str(roll))

            #Drawing the three axis on the image frame.
            #The opencv colors are defined as BGR colors such as:
            # (a, b, c) >> Blue = a, Green = b and Red = c
            #Our axis/color convention is X=R, Y=G, Z=B
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0),
                     3)  #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0),
                     3)  #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255),
                     3)  #RED

            cv2.putText(frame, "angle: {:.2f}".format(roll), (300, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            if (abs(roll) >= ANG_THRESH):
                ANG_COUNTER = ANG_COUNTER + 1
                cv2.putText(frame, "Not frontal-gazing", (10, cam_h - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)

                print("counter: {}".format(ANG_COUNTER))

                if (ANG_COUNTER >= FRAME2):
                    if not ALARM_ON:
                        ALARM_ON = True

                    cv2.putText(frame, "DROWSINESS ALERT2!", (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

            else:
                ANG_COUNTER = 0
                ALARM_ON = False

                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                rects = my_face_detector(gray, 0)

                for rect in rects:
                    # determine the facial landmarks for the face region, then
                    # convert the facial landmark (x, y)-coordinates to a NumPy
                    # array
                    shape = my_predictor(gray, rect)
                    shape = face_utils.shape_to_np(shape)

                    leftEye = shape[lStart:lEnd]
                    rightEye = shape[rStart:rEnd]
                    leftEAR = eye_aspect_ratio(leftEye)
                    rightEAR = eye_aspect_ratio(rightEye)

                    ear = (leftEAR + rightEAR) / 2.0

                    leftEyeHull = cv2.convexHull(leftEye)
                    rightEyeHull = cv2.convexHull(rightEye)
                    cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
                    cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)

                    if ear < EYE_AR_THRESH:
                        EYE_COUNTER += 1

                        if EYE_COUNTER >= FRAME3:

                            if not ALARM_ON:
                                ALARM_ON = True

                            cv2.putText(frame, "DROWSINESS ALERT3!", (10, 30),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                        (0, 0, 255), 2)
                    else:
                        EYE_COUNTER = 0
                        ALARM_ON = False

                    cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_image.release()
    print("Bye...")
Exemple #5
0
def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if (len(sys.argv) > 2):
        file_path = sys.argv[1]
        if (os.path.isfile(file_path) == False):
            print(
                "ex_pnp_head_pose_estimation: the file specified does not exist."
            )
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if (video_capture.isOpened() == True):
                print(
                    "ex_pnp_head_pose_estimation: the video source has been opened correctly..."
                )
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280, 720))
    else:
        print(
            "You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi"
        )
        return

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
    #[         0.0, 603.55869786,  229.7537026],
    #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    #my_cascade = haarCascade("./etc/haarcascade_frontalface_alt.xml", "./etc/haarcascade_profileface.xml")
    my_detector = faceLandmarkDetection(
        './etc/shape_predictor_68_face_landmarks.dat')
    my_face_detector = dlib.get_frontal_face_detector()

    while (True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        #gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        faces_array = my_face_detector(frame, 1)
        print("Total Faces: " + str(len(faces_array)))
        for i, pos in enumerate(faces_array):

            face_x1 = pos.left()
            face_y1 = pos.top()
            face_x2 = pos.right()
            face_y2 = pos.bottom()
            text_x1 = face_x1
            text_y1 = face_y1 - 3

            cv2.putText(frame, "FACE " + str(i + 1), (text_x1, text_y1),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                          (0, 255, 0), 2)

            landmarks_2D = my_detector.returnLandmarks(
                frame,
                face_x1,
                face_y1,
                face_x2,
                face_y2,
                points_to_return=TRACKED_POINTS)

            for point in landmarks_2D:
                cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255), -1)

            #Applying the PnP solver to find the 3D pose
            # of the head from the 2D position of the
            # landmarks.
            #retval - bool
            #rvec - Output rotation vector that, together with tvec, brings
            # points from the model coordinate system to the camera coordinate system.
            #tvec - Output translation vector.
            retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                              camera_matrix, camera_distortion)

            #Now we project the 3D points into the image plane
            #Creating a 3-axis to be used as reference in the image.
            axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
            imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix,
                                            camera_distortion)

            #Drawing the three axis on the image frame.
            #The opencv colors are defined as BGR colors such as:
            # (a, b, c) >> Blue = a, Green = b and Red = c
            #Our axis/color convention is X=R, Y=G, Z=B
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0),
                     3)  #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0),
                     3)  #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255),
                     3)  #RED

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")
Exemple #6
0
def main():

    #Defining the video capture object
    video_capture = cv2.VideoCapture(0)

    if(video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / np.tan(60/2 * np.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = np.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0] ])

    #print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = np.float32([[602.10618226,          0.0, 320.27333589],
                                   [         0.0, 603.55869786,  229.7537026],
                                   [         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    #camera_distortion = np.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = np.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = np.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/Users/lamh/Documents/deepgaze/etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection('/Users/lamh/Documents/deepgaze/etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    try:
        while(True):

            # Capture frame-by-frame
            ret, frame = video_capture.read()
            gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

            #Return code: 1=Frontal, 2=FrontRotLeft,
            # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
            my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)

            #Accumulate error values in a counter
            if(my_cascade.face_type == 0):
                no_face_counter += 1

            #If any face is found for a certain
            #number of cycles, then the ROI is reset
            if(no_face_counter == 50):
                no_face_counter = 0
                roi_x1 = 0
                roi_y1 = 0
                roi_x2 = cam_w
                roi_y2 = cam_h
                roi_w = cam_w
                roi_h = cam_h

            #Checking which kind of face it is returned
            if(my_cascade.face_type > 0):

                #Face found, reset the error counter
                no_face_counter = 0

                #Because the dlib landmark detector wants a precise
                #boundary box of the face, it is necessary to resize
                #the box returned by the OpenCV haar detector.
                #Adjusting the frame for profile left
                if(my_cascade.face_type == 4):
                    face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                    face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                    face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                    face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                    face_margin_h = -0.7 #resize_factor
                    face_margin_w = -0.7 #resize_factor
                #Adjusting the frame for profile right
                elif(my_cascade.face_type == 5):
                    face_margin_x1 = 20 + 10
                    face_margin_y1 = 20 + 5
                    face_margin_x2 = -20 + 10
                    face_margin_y2 = -20 + 5
                    face_margin_h = -0.7
                    face_margin_w = -0.7
                #No adjustments
                else:
                    face_margin_x1 = 0
                    face_margin_y1 = 0
                    face_margin_x2 = 0
                    face_margin_y2 = 0
                    face_margin_h = 0
                    face_margin_w = 0

                #Updating the face position
                face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
                face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
                face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
                face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
                face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
                face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

                #Updating the ROI position
                roi_x1 = face_x1 - roi_resize_w
                if (roi_x1 < 0): roi_x1 = 0
                roi_y1 = face_y1 - roi_resize_h
                if(roi_y1 < 0): roi_y1 = 0
                roi_w = face_w + roi_resize_w + roi_resize_w
                if(roi_w > cam_w): roi_w = cam_w
                roi_h = face_h + roi_resize_h + roi_resize_h
                if(roi_h > cam_h): roi_h = cam_h
                roi_x2 = face_x2 + roi_resize_w
                if (roi_x2 > cam_w): roi_x2 = cam_w
                roi_y2 = face_y2 + roi_resize_h
                if(roi_y2 > cam_h): roi_y2 = cam_h

                #Debugging printing utilities
                #if(DEBUG == True):
                if(DEBUG == False):
                    #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                    #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                    #Drawing a green rectangle
                    # (and text) around the face.
                    text_x1 = face_x1
                    text_y1 = face_y1 - 3
                    if(text_y1 < 0): text_y1 = 0
                    cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                    cv2.rectangle(frame,
                                 (face_x1, face_y1),
                                 (face_x2, face_y2),
                                 (0, 255, 0),
                                  2)

                # In case of a frontal/rotated face it
                # is called the landamark detector
                if(my_cascade.face_type > 0):
                    landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                    if(DEBUG == True):
                        #cv2.drawKeypoints(frame, landmarks_2D)

                        for point in landmarks_2D:
                            cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                    # Applying the PnP solver to find the 3D pose
                    # of the head from the 2D position of the
                    # landmarks.
                    # retval - bool
                    # rvec - Output rotation vector that, together with tvec, brings
                    # points from the model coordinate system to the camera coordinate system.
                    # tvec - Output translation vector.
                    retval, rvec, tvec = cv2.solvePnP(landmarks_3D,
                                                      landmarks_2D,
                                                      camera_matrix, camera_distortion)

                    # Now we project the 3D points into the image plane
                    # Creating a 3-axis to be used as reference in the image.
                    axis = np.float32([[50,0,0],
                                          [0,50,0],
                                          [0,0,50]])
                    imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                    # Drawing the three axis on the image frame.
                    selection_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                    print("*** Position: {0}, pitch, yaw, roll: {1}, {2}, {3} ***".format(selection_xy, np.degrees(rvec)[0], np.degrees(rvec)[1], np.degrees(rvec)[2]))
                    if(max(imgpts[1].ravel()) <= 500000 and min(imgpts[1].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN

                    if(max(imgpts[2].ravel()) <= 500000 and min(imgpts[2].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE

                    if(max(imgpts[0].ravel()) <= 500000 and min(imgpts[0].ravel()) >= -500000):
                        cv2.line(frame, selection_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

            #Drawing a yellow rectangle
            # (and text) around the ROI.
            if(DEBUG == True):
                text_x1 = roi_x1
                text_y1 = roi_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
                #cv2.rectangle(frame,
                #             (roi_x1, roi_y1),
                #             (roi_x2, roi_y2),
                #             (0, 255, 255),
                #             2)

            #Showing the frame and waiting
            # for the exit command
            cv2.imshow('Video', frame)
            key = cv2.waitKey(1)
            if key == 27:
                cv2.destroyAllWindowns()
                break
            #if cv2.waitKey(1) & 0xFF == ord('q'): break
    except KeyboardInterrupt:
        #Release the camera
        video_capture.release()
        print("KeyboardInterrupt: camera released. Bye ...")
def main():
    #counter for counting not frontal-gazing frames
    COUNTER = 0
    #alarm for drowsiness
    ALARAM_ON = False

    #Defining the video capture object
    video_capture = cv2.VideoCapture("head_pose4.mp4")

    if (video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera초점거리를 알면 좋음.
    # fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = numpy.float32([[602.10618226, 0.0, 320.27333589],
                                   [0.0, 603.55869786, 229.7537026],
                                   [0.0, 0.0, 1.0]])

    #Distortion coefficients
    #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = numpy.float32(
        [0.06232237, -0.41559805, 0.00125389, -0.00402566, 0.04879263])

    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    #Declaring the two classifiers
    my_cascade = haarCascade("./etc/xml/haarcascade_frontalface_alt.xml",
                             "./etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection(
        './etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w / 10)
    roi_resize_h = int(cam_h / 10)

    while (True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2],
                            cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray,
                            True,
                            True,
                            True,
                            True,
                            1.10,
                            1.10,
                            1.15,
                            1.15,
                            40,
                            40,
                            rotationAngleCCW=30,
                            rotationAngleCW=-30,
                            lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if (my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if (no_face_counter == 50):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if (my_cascade.face_type > 0):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if (my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10  #resize_rate + shift_rate
                face_margin_y1 = 20 + 5  #resize_rate + shift_rate
                face_margin_x2 = -20 - 10  #resize_rate + shift_rate
                face_margin_y2 = -20 + 5  #resize_rate + shift_rate
                face_margin_h = -0.7  #resize_factor
                face_margin_w = -0.7  #resize_factor
            #Adjusting the frame for profile right
            elif (my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if (roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if (roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if (roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if (roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if (DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w,
                      face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if (text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1, text_y1),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
                cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                              (0, 255, 0), 2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if (my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(
                    frame,
                    face_x1,
                    face_y1,
                    face_x2,
                    face_y2,
                    points_to_return=TRACKED_POINTS)

                if (DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255),
                                   -1)

                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                                  camera_matrix,
                                                  camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec,
                                                camera_matrix,
                                                camera_distortion)

                #Compute rotate_degree
                modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec,
                                                   camera_matrix,
                                                   camera_distortion)
                rvec_matrix = cv2.Rodrigues(rvec)[0]

                proj_matrix = numpy.hstack((rvec_matrix, tvec))
                eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]

                pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]

                roll = math.degrees(math.asin(math.sin(roll)))
                print(str(roll))
                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()),
                         (0, 255, 0), 3)  #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()),
                         (255, 0, 0), 3)  #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()),
                         (0, 0, 255), 3)  #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        if (DEBUG == True):
            text_x1 = roi_x1
            text_y1 = roi_y1 - 3
            if (text_y1 < 0): text_y1 = 0
            cv2.putText(frame, "ROI", (text_x1, text_y1),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
            cv2.rectangle(frame, (roi_x1, roi_y1), (roi_x2, roi_y2),
                          (0, 255, 255), 2)

            if (roll <= -40.0):
                COUNTER = COUNTER + 1
                cv2.putText(frame, "Not frontal-gazing", (10, cam_h - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
                print("counter: {}".format(counter))

                if (COUNTER >= 20):
                    if not ALARAM_ON:
                        ALARAM_ON = True

                    cv2.putText(frame, "DROWSINESS ALERT!", (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            else:
                COUNTER = 0
                ALARAM_ON = False

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")
def main():
    # Open the video file
    video_capture = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
    if (video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    # Create the main window and move it
    cv2.namedWindow('VIDEO')
    cv2.moveWindow('VIDEO', 20, 20)

    # Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    # Defining the camera matrix.
    # To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres.
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60 / 2 * numpy.pi / 180)
    f_y = f_x

    camera_matrix = numpy.float32([[f_x, 0.0, c_x], [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0]])
    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    # load 68 face landmarks using dlib and detect the face
    landmarks_3D = numpy.float32([
        P3D_RIGHT_SIDE, P3D_GONION_RIGHT, P3D_MENTON, P3D_GONION_LEFT,
        P3D_LEFT_SIDE, P3D_FRONTAL_BREADTH_RIGHT, P3D_FRONTAL_BREADTH_LEFT,
        P3D_SELLION, P3D_NOSE, P3D_SUB_NOSE, P3D_RIGHT_EYE, P3D_RIGHT_TEAR,
        P3D_LEFT_TEAR, P3D_LEFT_EYE, P3D_STOMION
    ])

    # Declaring the classifier
    dlib_landmarks_file = "./etc/shape_predictor_68_face_landmarks.dat"

    if (os.path.isfile(dlib_landmarks_file) == False):
        print(
            "The dlib landmarks file is missing! Use the following commands to download and unzip: "
        )
        print(
            ">> wget dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
        print(">> bzip2 -d shape_predictor_68_face_landmarks.dat.bz2")
        return
    my_detector = faceLandmarkDetection(dlib_landmarks_file)
    my_predictor = dlib.shape_predictor(dlib_landmarks_file)
    my_face_detector = dlib.get_frontal_face_detector()

    # grab the indexes of the facial landmarks for the left and right eye, respectively
    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]

    # factors for detecting drowsiness

    ANG_NOW = 0  # counter for optimizing the optimized angle of driver
    ROLL_AVERAGE = 0  # result of an average of optimized roll angle
    PITCH_AVERAGE = 0  # result of an average of optimized pitch angle
    ANG_OPTIMIZE = 0  # result of an optimized angle

    ANG_COUNTER = 0  # counter for counting detected angle
    EYE_COUNTER = 0  # counter for counting detected eye
    FACE_FRAME_COUNTER = 0  # counter for counting non-face-detected frame

    EYE_AR_THRESH = 0.21  # a threshold for detecting blink
    ANG_THRESH = 7  # a threshold for detecting bowing head

    FRAME1 = 4  # number of consecutive frames of non-face-detection
    FRAME2 = 4  # number of consecutive frames of detected angle
    FRAME3 = 3  # number of consecutive frames of detected eye

    # variables needed for optimization
    ROLL_THRESH_LIST = numpy.zeros(15)
    PITCH_THRESH_LIST = numpy.zeros(15)
    ANG = False
    OPTIMIZE = True
    OPTIMIZE_AGAIN = True
    DETECTION = False

    while (True):
        # Capture frame-by-frame
        ret, frame = video_capture.read()
        faces_array = my_face_detector(
            frame, 1)  # get a face array from face detection
        print("Total Faces: " + str(len(faces_array)))

        if (OPTIMIZE):
            if (len(faces_array) == 0):  # if face detection is failed
                cv2.putText(frame, "Cannot detect face!", (10, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            else:
                FACE_FRAME_COUNTER = 0
                ALARM_ON = False

        # Detection : Detect drowsiness when a particular situation lasts for a number of frames
        if (DETECTION):
            #DROWSINESS ALERT1 : Non-face-detection including the case when the driver bends one's head too much
            if (len(faces_array) == 0):
                FACE_FRAME_COUNTER += 1
                if (FACE_FRAME_COUNTER >= FRAME1):
                    if not ALARM_ON:
                        ALARM_ON = True
                    cv2.putText(frame, "DROWSINESS ALERT1!", (10, 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
                    print("**DROWSINESS ALERT1!**\n")
                    beepsound()
            else:
                FACE_FRAME_COUNTER = 0
                ALARM_ON = False

        for i, pos in enumerate(faces_array):
            face_x1 = pos.left()
            face_y1 = pos.top()
            face_x2 = pos.right()
            face_y2 = pos.bottom()
            text_x1 = face_x1
            text_y1 = face_y1 - 3

            cv2.putText(frame, "FACE " + str(i + 1), (text_x1, text_y1),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.rectangle(frame, (face_x1, face_y1), (face_x2, face_y2),
                          (0, 255, 0), 2)
            landmarks_2D = my_detector.returnLandmarks(
                frame,
                face_x1,
                face_y1,
                face_x2,
                face_y2,
                points_to_return=TRACKED_POINTS)
            for point in landmarks_2D:
                cv2.circle(frame, (point[0], point[1]), 2, (0, 0, 255), -1)

            # Applying the PnP solver to find the 3D pose of the head from the 2D position of the landmarks.
            # retval - bool
            # rvec - Output rotation vector that, together with tvec, brings points from the model coordinate system to the camera coordinate system.
            # tvec - Output translation vector.
            retval, rvec, tvec = cv2.solvePnP(landmarks_3D, landmarks_2D,
                                              camera_matrix, camera_distortion)
            # Now we project the 3D points into the image plane
            # Creating a 3-axis to be used as reference in the image.
            axis = numpy.float32([[50, 0, 0], [0, 50, 0], [0, 0, 50]])
            imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix,
                                            camera_distortion)

            # Drawing the three axis on the image frame.
            # The opencv colors are defined as BGR colors such as:
            # (a, b, c) >> Blue = a, Green = b and Red = c
            #  Our axis/color convention is X=R, Y=G, Z=B
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0, 255, 0),
                     3)  #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255, 0, 0),
                     3)  #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0, 0, 255),
                     3)  #RED
            modelpts, jac2 = cv2.projectPoints(landmarks_3D, rvec, tvec,
                                               camera_matrix,
                                               camera_distortion)
            rvec_matrix = cv2.Rodrigues(rvec)[0]
            proj_matrix = numpy.hstack((rvec_matrix, tvec))
            eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]
            pitch, yaw, roll = [math.radians(_) for _ in eulerAngles]
            roll = math.degrees(math.asin(math.sin(roll)))
            pitch = math.degrees(math.asin(math.sin(pitch)))
            yaw = math.degrees(math.asin(math.sin(yaw)))
            #print(pitch, yaw, roll)
            cv2.putText(frame, "Roll: {:.2f}".format(roll), (500, 60),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
            cv2.putText(frame, "Pitch: {:.2f}".format(pitch), (500, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # Optimization : calculate the average angle when the driver is looking at the front
            if (OPTIMIZE):
                if (ANG_NOW < 15):  # get roll,and pitch during 15 frames
                    ROLL_THRESH_LIST[ANG_NOW] = roll
                    PITCH_THRESH_LIST[ANG_NOW] = pitch
                    print("ANG_NOW ", ANG_NOW)
                    ANG_NOW = ANG_NOW + 1
                elif (ANG_NOW == 15):
                    print(ROLL_THRESH_LIST)
                    print(PITCH_THRESH_LIST)
                    listR = []
                    listPp = []
                    listPn = []
                    listP = []
                    pos = 0
                    neg = 0

                    # remove the outliers
                    for i in ROLL_THRESH_LIST:
                        if (i >= -90 and i <= -70):
                            listR.append(i)
                    for i in PITCH_THRESH_LIST:
                        #if most of pitch values are positive, then we will use only positive values for computing the avg.
                        if (i >= 0):
                            pos = pos + 1
                            listPp.append(i)
                        #if most of pitch values are negative, then we will use only negative values for computing the avg.
                        else:
                            neg = neg + 1
                            listPn.append(i)
                    if (pos > neg):
                        listP = listPp
                    else:
                        listP = listPn

                    print(listR)
                    print(listP)
                    if not listR or not listP:  #if either of lists is null, optimize again
                        beepsound()
                        cv2.putText(frame, "Angle Optimize Fail", (10, 430),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                                    (100, 255, 255), 2)
                        ANG_NOW = 0

                    else:
                        ROLL_AVERAGE = numpy.mean(listR)
                        PITCH_AVERAGE = numpy.mean(listP)
                        print("ROLL AVERAGE IS", ROLL_AVERAGE)
                        print("PITCH AVERAGE IS", PITCH_AVERAGE)
                        print("ANG AVERAGE COMPLETE")
                        beepsound()
                        beepsound()
                        ANG = True
                        ROLL_OPTIMIZE = ROLL_AVERAGE
                        PITCH_OPTIMIZE = PITCH_AVERAGE
                        cv2.putText(frame, "Angle Optimize Complete",
                                    (10, 430), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
                                    (255, 255, 100), 2)
                        ANG_NOW = ANG_NOW + 1
                else:
                    break

            if (DETECTION):
                print("ROLL ANGLE:{}   PITCH ANGLE : {}".format(roll, pitch))
                # compute the distance between average and detected roll regardless of sign
                if (roll > 0):
                    roll = roll - abs(ROLL_OPTIMIZE)
                else:
                    roll = roll + abs(ROLL_OPTIMIZE)
                # compute the distance between average and detected pitch
                pitch = PITCH_AVERAGE - pitch
                print("changed ROLL:{}".format(roll))
                print("changed PITCH:{}".format(pitch))

                # detect NOT FRONTAL-GAZING
                if (abs(roll) <= 7 and abs(pitch) >= 10):
                    ANG_COUNTER = ANG_COUNTER + 1
                    cv2.putText(frame, "Not frontal-gazing", (10, cam_h - 30),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
                    print("counter: {}".format(ANG_COUNTER))
                    print("Non frontal-gazing!\n")

                    #DROWSINESS ALERT2 : KEEP (NOT FRONTAL GAZING)
                    if (ANG_COUNTER >= FRAME2):
                        if not ALARM_ON:
                            ALARM_ON = True
                        cv2.putText(frame, "DROWSINESS ALERT2!", (10, 40),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255),
                                    2)
                        beepsound()
                        print("**DROWSINESS ALERT2!**\n")
                else:
                    ANG_COUNTER = 0
                    ALARM_ON = False

            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            rects = my_face_detector(gray, 0)
            for rect in rects:
                # determine the facial landmarks for the face region, then convert
                # the facial landmark (x, y)-coordinates to a NumPy array
                shape = my_predictor(gray, rect)
                shape = face_utils.shape_to_np(shape)
                leftEye = shape[lStart:lEnd]
                rightEye = shape[rStart:rEnd]

                # compute eye aspect ratio
                leftEAR = eye_aspect_ratio(leftEye)
                rightEAR = eye_aspect_ratio(rightEye)
                ear = (leftEAR + rightEAR) / 2.0

                # draw line along the eyes
                leftEyeHull = cv2.convexHull(leftEye)
                rightEyeHull = cv2.convexHull(rightEye)
                cv2.drawContours(frame, [leftEyeHull], -1, (0, 255, 0), 1)
                cv2.drawContours(frame, [rightEyeHull], -1, (0, 255, 0), 1)
                cv2.putText(frame, "EAR: {:.2f}".format(ear), (300, 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

                if (DETECTION):
                    if ear < EYE_AR_THRESH:
                        EYE_COUNTER += 1
                        # DROWSINESS ALERT3 : closed eye detection
                        if EYE_COUNTER >= FRAME3:
                            if not ALARM_ON:
                                ALARM_ON = True
                            cv2.putText(frame, "DROWSINESS ALERT3!", (10, 70),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                        (0, 0, 255), 2)
                            print("**DROWSINESS ALERT3!**\n")
                            beepsound()
                    else:
                        EYE_COUNTER = 0
                        ALARM_ON = False

        cv2.imshow('VIDEO', frame)
        if (ANG == True):  # Optimization complete
            print(
                "########################ANG OPTIMIZE COMPLETE########################"
            )
            cv2.putText(frame, "START DETECTING!", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            print(
                "########################START DETECTING########################"
            )
            print("ROLL_OPTIMIZE IS", ROLL_OPTIMIZE)
            print("PITCH_OPTIMIZE IS", PITCH_OPTIMIZE)
            ANG = False
            OPTIMIZE = False
            DETECTION = True
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if(len(sys.argv) > 2):
        file_path = sys.argv[1]
        if(os.path.isfile(file_path)==False): 
            print("ex_pnp_head_pose_estimation: the file specified does not exist.")
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if(video_capture.isOpened() == True): print("ex_pnp_head_pose_estimation: the video source has been opened correctly...")
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.cv.CV_FOURCC(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280,720))
    else:
        print("You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi")
        return

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels) 
    # and cx/cy are the optical centres. These values can be obtained 
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y], 
                                   [0.0, 0.0, 1.0] ])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   #[         0.0, 603.55869786,  229.7537026], 
                                   #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    #my_cascade = haarCascade("../etc/haarcascade_frontalface_alt.xml", "../etc/haarcascade_profileface.xml")
    dlib_landmarks_file = "./shape_predictor_68_face_landmarks.dat"
    if(os.path.isfile(dlib_landmarks_file)==False): 
        print("The dlib landmarks file is missing! Use the following commands to download and unzip: ")
        print(">> wget dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2")
        print(">> bzip2 -d shape_predictor_68_face_landmarks.dat.bz2")
        return
    my_detector = faceLandmarkDetection(dlib_landmarks_file)
    my_face_detector = dlib.get_frontal_face_detector()

    while(True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        #gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        faces_array = my_face_detector(frame, 1)
        print("Total Faces: " + str(len(faces_array)))
        for i, pos in enumerate(faces_array):

            face_x1 = pos.left()
            face_y1 = pos.top()
            face_x2 = pos.right()
            face_y2 = pos.bottom()
            text_x1 = face_x1
            text_y1 = face_y1 - 3

            cv2.putText(frame, "FACE " + str(i+1), (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
            cv2.rectangle(frame, 
                         (face_x1, face_y1), 
                         (face_x2, face_y2), 
                         (0, 255, 0), 
                          2)            

            landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)


            for point in landmarks_2D:
                cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


            #Applying the PnP solver to find the 3D pose
            # of the head from the 2D position of the
            # landmarks.
            #retval - bool
            #rvec - Output rotation vector that, together with tvec, brings 
            # points from the model coordinate system to the camera coordinate system.
            #tvec - Output translation vector.
            retval, rvec, tvec = cv2.solvePnP(landmarks_3D, 
                                                  landmarks_2D, 
                                                  camera_matrix, camera_distortion)

            #Now we project the 3D points into the image plane
            #Creating a 3-axis to be used as reference in the image.
            axis = numpy.float32([[50,0,0], 
                                      [0,50,0], 
                                      [0,0,50]])
            imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

            #Drawing the three axis on the image frame.
            #The opencv colors are defined as BGR colors such as: 
            # (a, b, c) >> Blue = a, Green = b and Red = c
            #Our axis/color convention is X=R, Y=G, Z=B
            sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
            cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
            cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
            cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
  
    #Release the camera
    video_capture.release()
    print("Bye...")
def main():

    #Check if some argumentshave been passed
    #pass the path of a video
    if(len(sys.argv) > 2):
        file_path = sys.argv[1]
        if(os.path.isfile(file_path)==False): 
            print("ex_pnp_head_pose_estimation: the file specified does not exist.")
            return
        else:
            #Open the video file
            video_capture = cv2.VideoCapture(file_path)
            if(video_capture.isOpened() == True): print("ex_pnp_head_pose_estimation: the video source has been opened correctly...")
            # Define the codec and create VideoWriter object
            #fourcc = cv2.VideoWriter_fourcc(*'XVID')
            output_path = sys.argv[2]
            fourcc = cv2.cv.CV_FOURCC(*'XVID')
            out = cv2.VideoWriter(output_path, fourcc, 20.0, (1280,720))
    else:
        print("You have to pass as argument the path to a video file and the path to the output file to produce, for example: \n python ex_pnp_pose_estimation_video.py /home/video.mpg ./output.avi")
        return


    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels) 
    # and cx/cy are the optical centres. These values can be obtained 
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y], 
                                   [0.0, 0.0, 1.0] ])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    #camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   #[         0.0, 603.55869786,  229.7537026], 
                                   #[         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    #camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("../etc/xml/haarcascade_frontalface_alt.xml", "../etc/xml/haarcascade_profileface.xml")
    my_detector = faceLandmarkDetection('../etc/shape_predictor_68_face_landmarks.dat')

    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    while(True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        if frame is None: return
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft, 
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray, runFrontal=True, runFrontalRotated=True, runLeft=False, runRight=False, frontalScaleFactor=1.2, rotatedFrontalScaleFactor=1.2, leftScaleFactor=1.15, rightScaleFactor=1.15, minSizeX=80, minSizeY=80, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)

        #Accumulate error values in a counter
        if(my_cascade.face_type == 0): 
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if(no_face_counter == 30):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if(my_cascade.face_type > 0 and my_cascade.face_type < 4):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if(my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                face_margin_h = -0.7 #resize_factor
                face_margin_w = -0.7 #resize_factor
            #Adjusting the frame for profile right
            elif(my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            #Updating the ROI position       
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if(roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if(roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if(roi_h > cam_h): roi_h = cam_h    
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if(roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if(DEBUG == True):
                print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)
                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                cv2.rectangle(frame, 
                             (face_x1, face_y1), 
                             (face_x2, face_y2), 
                             (0, 255, 0),
                              2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if(my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                if(DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings 
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D, 
                                                  landmarks_2D, 
                                                  camera_matrix, camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50,0,0], 
                                      [0,50,0], 
                                      [0,0,50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as: 
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        #if(DEBUG == True):
            #text_x1 = roi_x1
            #text_y1 = roi_y1 - 3
            #if(text_y1 < 0): text_y1 = 0
            #cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
            #cv2.rectangle(frame, 
                         #(roi_x1, roi_y1), 
                         #(roi_x2, roi_y2), 
                         #(0, 255, 255),
                         #2)

        #Writing in the output file
        out.write(frame)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
   
    #Release the camera
    video_capture.release()
    print("Bye...")
Exemple #11
0
def main():
    #Defining the video capture object
    video_capture = cv2.VideoCapture(1)
    thresh = 0.25
    frame_check = 15
    detect = dlib.get_frontal_face_detector()
    predict = dlib.shape_predictor("/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat")# change to path where landmark points are stored

    (lStart, lEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["left_eye"]   # get the left eye index
    (rStart, rEnd) = face_utils.FACIAL_LANDMARKS_68_IDXS["right_eye"]  # get the right eye index
    flag=0

    sess = tf.Session()
    my_head_pose_estimator = CnnHeadPoseEstimator(sess)
    my_head_pose_estimator.load_roll_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/roll/cnn_cccdd_30k.tf"))# change to deepgaze directory path
    my_head_pose_estimator.load_pitch_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/pitch/cnn_cccdd_30k.tf"))
    my_head_pose_estimator.load_yaw_variables(os.path.realpath("/home/agopinath1996/git_ws/deepgaze/etc/tensorflow/head_pose/yaw/cnn_cccdd_30k.tf"))


    #Start of Eye Gaze Tracking
    win = dlib.image_window()
    
    predictor_path = "/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat"
    roi = []
    ref_point = 0 
    index1 = 0
    pt_lefteye_corner_x= 0
    pt_lefteye_corner_y = 0
    pt_pos1 = 0
    predictor = dlib.shape_predictor(predictor_path)
    pt_x2 =0
    pt_y2 = 0
    pt_x1 = 0
    pt_y1 = 0
    pt_actualx = 0
    pt_actualy = 0
    detector = dlib.get_frontal_face_detector()
    flag = 0
    flag1 = 0
    pt_righteye_corner_x = 0
    pt_righteye_corner_y = 0


    if(video_capture.isOpened() == False):
        print("Error: the resource is busy or unvailable")
    else:
        print("The video source has been opened correctly...")

    #Create the main window and move it
    cv2.namedWindow('Video')
    cv2.moveWindow('Video', 20, 20)

    #Obtaining the CAM dimension
    cam_w = int(video_capture.get(3))
    cam_h = int(video_capture.get(4))

    #Defining the camera matrix.
    #To have better result it is necessary to find the focal
    # lenght of the camera. fx/fy are the focal lengths (in pixels)
    # and cx/cy are the optical centres. These values can be obtained
    # roughly by approximation, for example in a 640x480 camera:
    # cx = 640/2 = 320
    # cy = 480/2 = 240
    # fx = fy = cx/tan(60/2 * pi / 180) = 554.26
    c_x = cam_w / 2
    c_y = cam_h / 2
    f_x = c_x / numpy.tan(60/2 * numpy.pi / 180)
    f_y = f_x

    #Estimated camera matrix values.
    camera_matrix = numpy.float32([[f_x, 0.0, c_x],
                                   [0.0, f_y, c_y],
                                   [0.0, 0.0, 1.0] ])

    print("Estimated camera matrix: \n" + str(camera_matrix) + "\n")

    #These are the camera matrix values estimated on my webcam with
    # the calibration code (see: src/calibration):
    camera_matrix = numpy.float32([[602.10618226,          0.0, 320.27333589],
                                   [         0.0, 603.55869786,  229.7537026],
                                   [         0.0,          0.0,          1.0] ])

    #Distortion coefficients
    #camera_distortion = numpy.float32([0.0, 0.0, 0.0, 0.0, 0.0])

    #Distortion coefficients estimated by calibration
    camera_distortion = numpy.float32([ 0.06232237, -0.41559805,  0.00125389, -0.00402566,  0.04879263])


    #This matrix contains the 3D points of the
    # 11 landmarks we want to find. It has been
    # obtained from antrophometric measurement
    # on the human head.
    landmarks_3D = numpy.float32([P3D_RIGHT_SIDE,
                                  P3D_GONION_RIGHT,
                                  P3D_MENTON,
                                  P3D_GONION_LEFT,
                                  P3D_LEFT_SIDE,
                                  P3D_FRONTAL_BREADTH_RIGHT,
                                  P3D_FRONTAL_BREADTH_LEFT,
                                  P3D_SELLION,
                                  P3D_NOSE,
                                  P3D_SUB_NOSE,
                                  P3D_RIGHT_EYE,
                                  P3D_RIGHT_TEAR,
                                  P3D_LEFT_TEAR,
                                  P3D_LEFT_EYE,
                                  P3D_STOMION])

    #Declaring the two classifiers
    my_cascade = haarCascade("/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_frontalface_alt.xml", "/home/agopinath1996/git_ws/deepgaze/etc/xml/haarcascade_profileface.xml")
    #TODO If missing, example file can be retrieved from http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
    my_detector = faceLandmarkDetection('/home/agopinath1996/git_ws/deepgaze/scripts/shape_predictor_68_face_landmarks.dat')





    #Error counter definition
    no_face_counter = 0

    #Variables that identify the face
    #position in the main frame.
    face_x1 = 0
    face_y1 = 0
    face_x2 = 0
    face_y2 = 0
    face_w = 0
    face_h = 0

    #Variables that identify the ROI
    #position in the main frame.
    roi_x1 = 0
    roi_y1 = 0
    roi_x2 = cam_w
    roi_y2 = cam_h
    roi_w = cam_w
    roi_h = cam_h
    roi_resize_w = int(cam_w/10)
    roi_resize_h = int(cam_h/10)

    while(True):

        # Capture frame-by-frame
        ret, frame = video_capture.read()
        ret, frame_eye = video_capture.read()

        #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0])  + "]")
        gray = cv2.cvtColor(frame[roi_y1:roi_y2, roi_x1:roi_x2], cv2.COLOR_BGR2GRAY)

        img = cv2.cvtColor(frame_eye, cv2.COLOR_RGB2BGR) #for eye gaze detection
        drowsyframe = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        subjects = detect(drowsyframe, 0)
        for subject in subjects:
            shape = predict(frame,subject)
            shape = face_utils.shape_to_np(shape)
            leftEye = shape[lStart:lEnd]
            rightEye = shape[rStart:rEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            ear = leftEAR + rightEAR / 2.0
            leftEyeHull = cv2.convexHull(leftEye)
            rightEyeHull = cv2.convexHull(rightEye)
            cv2.drawContours(frame, [leftEyeHull], -1, (0,255,0),1)
            cv2.drawContours(frame, [rightEyeHull], -1, (0,255,0), 1)
            if ear<thresh:
                flag+= 1
                print(flag)
                if flag >= frame_check:
                    cv2.putText(frame, "WAKEUPPPP", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)
                    cv2.putText(frame, "WAKEUPPPP", (10, 325), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255),2)
            else:
                flag=0

#Eye Gaze Detetction
        dets = detect(img, 0)
        check = 5
        shapes_eye = []
        for k,d in enumerate(dets):
            #print("dets{}".format(d))
            #print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(k, d.left(), d.top(), d.right(), d.bottom()))

            shape_eye = predict(img, d)

            for index, pt in enumerate(shape_eye.parts()):
                #print('Part {}: {}'.format(index, pt))
                pt_pos = (pt.x, pt.y)
                cv2.circle(img, pt_pos, 1, (0,225, 0), 2)
                if index == 29:
                    pt_x2 = int(pt.x)
                    pt_y2 = int(pt.y)
                if index == 18:
                    pt_x1 = int(pt.x)
                    pt_y1 = int(pt.y)
                if index == 37:
                    pt_righteye_corner_x = pt.x
                    pt_righteye_corner_y = pt.y
                if index == 40:
                    pt_lefteye_corner_x = pt.x
                    pt_lefteye_corner_y = pt.y
                roi =  frame_eye[pt_y1:pt_y2,pt_x1:pt_x2]
                roi_gray = cv2.cvtColor(roi,cv2.COLOR_RGB2GRAY)
                _, threshold = cv2.threshold(roi_gray, 30, 255, cv2.THRESH_BINARY_INV)
                try:
                    M = cv2.moments(threshold)
                    #print(M)
                    cX = int(M["m10"]/M["m00"])
                    cY = int(M["m01"]/M["m00"])
                    #print(cX,cY)
                    pt_actualx = pt_x1+cX
                    pt_actualy = pt_y1+cY
                    #print(pt_actualx,pt_actualy)
                    diff_right = pt_actualx-pt_righteye_corner_x
                    diff_left = pt_lefteye_corner_x - pt_actualx
                    print(diff_right,diff_left)
                    #print(cX,cY)
                    if diff_right < 3:
                        cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2)
                    if diff_left <3:
                        cv2.putText(frame,'Look straight!',(10,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(10,10,255),2)


                except:
                    pass
                cv2.circle(frame,(pt_actualx,pt_actualy), 2,(255,0,255),-1)
                #print(pt_actualx,pt_actualy)


            #print(pt_x1,pt_x2,pt_y1,pt_y2)
            #print(roi.shape_eye)
            #print(img.shape_eye)
            try:
                cv2.imshow("threshold", threshold)
                cv2.waitKey(1)
            except:
                pass

        win.clear_overlay()
        win.set_image(img)
        if len(shapes_eye)!= 0 :
            for i in range(len(shapes_eye)):
                win.add_overlay(shapes_eye[i])





        #Looking for faces with cascade
        #The classifier moves over the ROI
        #starting from a minimum dimension and augmentig
        #slightly based on the scale factor parameter.
        #The scale factor for the frontal face is 1.10 (10%)
        #Scale factor: 1.15=15%,1.25=25% ...ecc
        #Higher scale factors means faster classification
        #but lower accuracy.
        #
        #Return code: 1=Frontal, 2=FrontRotLeft,
        # 3=FrontRotRight, 4=ProfileLeft, 5=ProfileRight.
        my_cascade.findFace(gray, True, True, True, True, 1.10, 1.10, 1.15, 1.15, 40, 40, rotationAngleCCW=30, rotationAngleCW=-30, lastFaceType=my_cascade.face_type)
        #print(returnvalue)
        #Accumulate error values in a counter
        if(my_cascade.face_type == 0):
            no_face_counter += 1

        #If any face is found for a certain
        #number of cycles, then the ROI is reset
        if(no_face_counter == 50):
            no_face_counter = 0
            roi_x1 = 0
            roi_y1 = 0
            roi_x2 = cam_w
            roi_y2 = cam_h
            roi_w = cam_w
            roi_h = cam_h

        #Checking wich kind of face it is returned
        if(my_cascade.face_type > 0):

            #Face found, reset the error counter
            no_face_counter = 0

            #Because the dlib landmark detector wants a precise
            #boundary box of the face, it is necessary to resize
            #the box returned by the OpenCV haar detector.
            #Adjusting the frame for profile left
            if(my_cascade.face_type == 4):
                face_margin_x1 = 20 - 10 #resize_rate + shift_rate
                face_margin_y1 = 20 + 5 #resize_rate + shift_rate
                face_margin_x2 = -20 - 10 #resize_rate + shift_rate
                face_margin_y2 = -20 + 5 #resize_rate + shift_rate
                face_margin_h = -0.7 #resize_factor
                face_margin_w = -0.7 #resize_factor
            #Adjusting the frame for profile right
            elif(my_cascade.face_type == 5):
                face_margin_x1 = 20 + 10
                face_margin_y1 = 20 + 5
                face_margin_x2 = -20 + 10
                face_margin_y2 = -20 + 5
                face_margin_h = -0.7
                face_margin_w = -0.7
            #No adjustments
            else:
                face_margin_x1 = 0
                face_margin_y1 = 0
                face_margin_x2 = 0
                face_margin_y2 = 0
                face_margin_h = 0
                face_margin_w = 0

            #Updating the face position
            face_x1 = my_cascade.face_x + roi_x1 + face_margin_x1
            face_y1 = my_cascade.face_y + roi_y1 + face_margin_y1
            face_x2 = my_cascade.face_x + my_cascade.face_w + roi_x1 + face_margin_x2
            face_y2 = my_cascade.face_y + my_cascade.face_h + roi_y1 + face_margin_y2
            face_w = my_cascade.face_w + int(my_cascade.face_w * face_margin_w)
            face_h = my_cascade.face_h + int(my_cascade.face_h * face_margin_h)

            crop_img = frame[face_y1:face_y2, face_x1:face_x2]
            cv2.imshow("cropped", crop_img)

            roll = my_head_pose_estimator.return_roll(crop_img)
            pitch = my_head_pose_estimator.return_pitch(crop_img)
            yaw = my_head_pose_estimator.return_yaw(crop_img)
            #print("Estimated [roll, pitch, yaw] ..... [" + str(roll[0,0,0]) + "," + str(pitch[0,0,0]) + "," + str(yaw[0,0,0])  + "]")

            if yaw > 30:
                cv2.putText(frame, "You are facing right!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)
            if yaw < -30:
                cv2.putText(frame, "You are facing left!", (10,30), cv2.FONT_HERSHEY_PLAIN, 1.6, (10,10,255), 2)








            #Updating the ROI position
            roi_x1 = face_x1 - roi_resize_w
            if (roi_x1 < 0): roi_x1 = 0
            roi_y1 = face_y1 - roi_resize_h
            if(roi_y1 < 0): roi_y1 = 0
            roi_w = face_w + roi_resize_w + roi_resize_w
            if(roi_w > cam_w): roi_w = cam_w
            roi_h = face_h + roi_resize_h + roi_resize_h
            if(roi_h > cam_h): roi_h = cam_h
            roi_x2 = face_x2 + roi_resize_w
            if (roi_x2 > cam_w): roi_x2 = cam_w
            roi_y2 = face_y2 + roi_resize_h
            if(roi_y2 > cam_h): roi_y2 = cam_h

            #Debugging printing utilities
            if(DEBUG == True):
                #print("FACE: ", face_x1, face_y1, face_x2, face_y2, face_w, face_h)
                #print("ROI: ", roi_x1, roi_y1, roi_x2, roi_y2, roi_w, roi_h)

                #Drawing a green rectangle
                # (and text) around the face.
                text_x1 = face_x1
                text_y1 = face_y1 - 3
                if(text_y1 < 0): text_y1 = 0
                cv2.putText(frame, "FACE", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1);
                cv2.rectangle(frame,
                             (face_x1, face_y1),
                             (face_x2, face_y2),
                             (0, 255, 0),
                              2)

            #In case of a frontal/rotated face it
            # is called the landamark detector
            if(my_cascade.face_type > 0):
                landmarks_2D = my_detector.returnLandmarks(frame, face_x1, face_y1, face_x2, face_y2, points_to_return=TRACKED_POINTS)

                if(DEBUG == True):
                    #cv2.drawKeypoints(frame, landmarks_2D)

                    for point in landmarks_2D:
                        cv2.circle(frame,( point[0], point[1] ), 2, (0,0,255), -1)


                #Applying the PnP solver to find the 3D pose
                # of the head from the 2D position of the
                # landmarks.
                #retval - bool
                #rvec - Output rotation vector that, together with tvec, brings
                # points from the model coordinate system to the camera coordinate system.
                #tvec - Output translation vector.
                retval, rvec, tvec = cv2.solvePnP(landmarks_3D,
                                                  landmarks_2D,
                                                  camera_matrix, camera_distortion)

                #Now we project the 3D points into the image plane
                #Creating a 3-axis to be used as reference in the image.
                axis = numpy.float32([[50,0,0],
                                      [0,50,0],
                                      [0,0,50]])
                imgpts, jac = cv2.projectPoints(axis, rvec, tvec, camera_matrix, camera_distortion)

                #Drawing the three axis on the image frame.
                #The opencv colors are defined as BGR colors such as:
                # (a, b, c) >> Blue = a, Green = b and Red = c
                #Our axis/color convention is X=R, Y=G, Z=B
                sellion_xy = (landmarks_2D[7][0], landmarks_2D[7][1])
                cv2.line(frame, sellion_xy, tuple(imgpts[1].ravel()), (0,255,0), 3) #GREEN
                cv2.line(frame, sellion_xy, tuple(imgpts[2].ravel()), (255,0,0), 3) #BLUE
                cv2.line(frame, sellion_xy, tuple(imgpts[0].ravel()), (0,0,255), 3) #RED

        #Drawing a yellow rectangle
        # (and text) around the ROI.
        if(DEBUG == True):
            text_x1 = roi_x1
            text_y1 = roi_y1 - 3
            if(text_y1 < 0): text_y1 = 0
            cv2.putText(frame, "ROI", (text_x1,text_y1), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,255), 1);
            cv2.rectangle(frame,
                         (roi_x1, roi_y1),
                         (roi_x2, roi_y2),
                         (0, 255, 255),
                         2)

        #Showing the frame and waiting
        # for the exit command
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): break

    #Release the camera
    video_capture.release()
    print("Bye...")