Beispiel #1
0
def main():
    # Setup face detection models
    if args.cpu: # use dlib to do face detection and facial landmark detection
        import dlib
        dlib_model_path = 'head_pose_estimation/assets/shape_predictor_68_face_landmarks.dat'
        shape_predictor = dlib.shape_predictor(dlib_model_path)
        face_detector = dlib.get_frontal_face_detector()
    else: # use better models on GPU
        import face_alignment # the local directory in this repo
        try:
            import onnxruntime
            use_onnx = True
        except:
            use_onnx = False
        fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, use_onnx=use_onnx, 
                                          flip_input=False)
        face_detector = fa.face_detector

    os_name = system()
    if os_name in ['Windows']: # CAP_DSHOW is required on my windows PC to get 30 FPS
        cap = cv2.VideoCapture(args.cam+cv2.CAP_DSHOW)
    else: # linux PC is as usual
        cap = cv2.VideoCapture(args.cam)
    cap.set(cv2.CAP_PROP_FPS, 30)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    pose_estimator = PoseEstimator(img_size=sample_frame.shape[:2])

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [Stabilizer(
                        state_num=2,
                        measure_num=1,
                        cov_process=0.01,
                        cov_measure=0.1) for _ in range(8)]

    # Establish a TCP connection to unity.
    if args.connect:
        address = ('127.0.0.1', 14514)
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(address)

    ts = []
    frame_count = 0
    no_face_count = 0
    prev_boxes = deque(maxlen=5)
    prev_marks = deque(maxlen=5)

    while True:
        _, frame = cap.read()
        frame = cv2.flip(frame, 2)
        frame_count += 1
        if args.connect and frame_count > 60: # send information to unity
            msg = '%.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f'% \
                  (roll, pitch, yaw, eye_open, eye_diff, eyeballX, eyeballY, mouthWidth, mouthVar) # modified by Kennard
            s.send(bytes(msg, "utf-8"))

        t = time.time()

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        if frame_count % 2 == 1: # do face detection every odd frame
            facebox = get_face(face_detector, frame, args.cpu)
            if facebox is not None:
                no_face_count = 0
        elif len(prev_boxes) > 1: # use a linear movement assumption
            if no_face_count > 1: # don't estimate more than 1 frame
                facebox = None
            else:
                facebox = prev_boxes[-1] + np.mean(np.diff(np.array(prev_boxes), axis=0), axis=0)[0]
                facebox = facebox.astype(int)
                no_face_count += 1

        if facebox is not None: # if face is detected
            prev_boxes.append(facebox)
            # Do facial landmark detection and iris detection.
            if args.cpu: # do detection every frame
                face = dlib.rectangle(left=facebox[0], top=facebox[1], 
                                      right=facebox[2], bottom=facebox[3])
                marks = shape_to_np(shape_predictor(frame, face))
            else:
                if len(prev_marks) == 0 \
                    or frame_count == 1 \
                    or frame_count % 2 == 0: # do landmark detection on first frame
                                             # or every even frame
                    face_img = frame[facebox[1]: facebox[3], facebox[0]: facebox[2]]
                    marks = fa.get_landmarks(face_img[:,:,::-1], 
                            detected_faces=[(0, 0, facebox[2]-facebox[0], facebox[3]-facebox[1])])
                    marks = marks[-1]
                    marks[:, 0] += facebox[0]
                    marks[:, 1] += facebox[1]
                elif len(prev_marks) > 1: # use a linear movement assumption
                    marks = prev_marks[-1] + np.mean(np.diff(np.array(prev_marks), axis=0), axis=0)
                prev_marks.append(marks)

            x_l, y_l, ll, lu = detect_iris(frame, marks, "left")
            x_r, y_r, rl, ru = detect_iris(frame, marks, "right")

            # Try pose estimation with 68 points.
            error, R, T = pose_estimator.solve_pose_by_68_points(marks)
            pose = list(R) + list(T)
            # Add iris positions to stabilize.
            pose+= [(ll+rl)/2.0, (lu+ru)/2.0]

            if error > 100: # large error means tracking fails: reinitialize pose estimator
                            # at the same time, keep sending the same information (e.g. same roll)
                pose_estimator = PoseEstimator(img_size=sample_frame.shape[:2])

            else:
                # Stabilize the pose.
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])

            roll = np.clip(-(180+np.degrees(steady_pose[2])), -50, 50)
            pitch = np.clip(-(np.degrees(steady_pose[1]))-15, -40, 40) # the 15 here is my camera angle.
            yaw = np.clip(-(np.degrees(steady_pose[0])), -50, 50)

            # modified by Kennard
            eye_left = eye_aspect_ratio(marks[36:42])
            eye_right = eye_aspect_ratio(marks[42:48])
            eye_open = min(eye_left, eye_right)
            eye_diff = eye_left - eye_right
            eyeballX = (steady_pose[6] - 0.45)*(-2)  # calibrate
            eyeballY = (steady_pose[7] - 0.38)*2  # calibrate
            mouthVar = mouth_aspect_ration(marks[60:68])
            mouthWidth = mouth_distance(marks[60:68])/(facebox[2]-facebox[0]) +0.4 # calibrate
            
            if args.debug:  # draw landmarks, etc.

                # show iris.
                if x_l > 0 and y_l > 0:
                    draw_iris(frame, x_l, y_l)
                if x_r > 0 and y_r > 0:
                    draw_iris(frame, x_r, y_r)

                # show facebox.
                draw_box(frame, [facebox])

                if error < 100:
                    # show face landmarks.
                    draw_marks(frame, marks, color=(0, 255, 0))

                    # draw stable pose annotation on frame.
                    pose_estimator.draw_annotation_box(
                        frame, np.expand_dims(steady_pose[:3],0), np.expand_dims(steady_pose[3:6],0), 
                        color=(128, 255, 128))

                    # draw head axes on frame.
                    pose_estimator.draw_axes(frame, np.expand_dims(steady_pose[:3],0), 
                                             np.expand_dims(steady_pose[3:6],0))

        dt = time.time()-t
        ts += [dt]
        FPS = int(1/(np.mean(ts[-10:])+1e-6))
        print('\r', '%.3f'%dt, end=' ')

        if args.debug:
            draw_FPS(frame, FPS)
            cv2.imshow("face", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'): # press q to exit.
                break

    # Clean up the process.
    cap.release()
    if args.connect:
        s.close()
    if args.debug:
        cv2.destroyAllWindows()
    print('%.3f'%np.mean(ts))
Beispiel #2
0
def main():

    dlib_model_path = 'head_pose_estimation/Asset/shape_predictor_68_face_landmarks.dat'
    shape_predictor = dlib.shape_predictor(dlib_model_path)
    face_detector = dlib.get_frontal_face_detector()

    cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FPS, 30)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    _, sample_frame = cap.read()

    pose_estimator = PoseEstimator(img_size=sample_frame.shape[0:2])

    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.01,
                   cov_measure=0.1) for _ in range(6)
    ]
    '''address = ('127.0.0.1', 5066)
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.connect(address)'''

    frame_count = 0

    while True:
        _, frame = cap.read()
        frame = cv2.flip(frame, 1)
        frame_count += 1

        facebox = get_face(face_detector, frame)

        if facebox is not None:
            face = dlib.rectangle(left=facebox[0],
                                  top=facebox[1],
                                  right=facebox[2],
                                  bottom=facebox[3])
            shape_ = shape_predictor(frame, face)
            mask = shape_to_np(shape_)

            reprojection_error, rotation_vector, translation_vector = pose_estimator.solve_pose_by_68_points(
                mask)
            pose = list(rotation_vector) + list(translation_vector)

            if reprojection_error > 100:
                pose_estimator = PoseEstimator(
                    img_size=sample_frame.shape[0:2])

            else:
                steady_pose = []
                pose_np = np.array(pose).flatten()
                for value, ps_stb in zip(pose_np, pose_stabilizers):
                    ps_stb.update([value])
                    steady_pose.append(ps_stb.state[0])

            roll = np.clip(180 + np.degrees(steady_pose[2]), -50,
                           50)  #unity.z  pnp.z
            pitch = np.clip(-(np.degrees(steady_pose[1])) - 25, -50,
                            50)  #unity.x  pnp.y
            yaw = np.clip(np.degrees(steady_pose[0]), -50, 50)  #unity.y  pnp.x

            print(pitch, yaw, roll)
            '''if frame_count > 40: 
                msg = '%.4f %.4f %.4f' % (roll, pitch, yaw)
                s.send(bytes(msg, "utf-8"))'''

            draw_box(frame, [facebox])

            if reprojection_error < 100:
                draw_marks(frame, mask, color=(0, 255, 0))

                pose_estimator.draw_annotation_box(
                    frame,
                    np.expand_dims(steady_pose[:3], 0),
                    np.expand_dims(steady_pose[3:6], 0),
                    color=(128, 255, 128))

                pose_estimator.draw_axes(frame,
                                         np.expand_dims(steady_pose[:3], 0),
                                         np.expand_dims(steady_pose[3:6], 0))

        cv2.imshow("face", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    s.close()
Beispiel #3
0
def main():
    # Setup face detection models
    if args.cpu:  # use dlib to do face detection and facial landmark detection
        import dlib
        face_detector = dlib.get_frontal_face_detector()
        predictor = dlib.shape_predictor(
            'head_pose_estimation/assets/shape_predictor_68_face_landmarks.dat'
        )
    else:  # use better models on GPU
        import face_alignment
        face_detector = MarkDetector()
        fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,
                                          flip_input=False)
    if running_on_jetson_nano():
        # Accessing the camera with OpenCV on a Jetson Nano requires gstreamer with a custom gstreamer source string
        cap = cv2.VideoCapture(get_jetson_gstreamer_source(),
                               cv2.CAP_GSTREAMER)
    else:
        # Accessing the camera with OpenCV on a laptop just requires passing in the number of the webcam (usually 0)
        # Note: You can pass in a filename instead if you want to process a video file instead of a live camera stream
        cap = cv2.VideoCapture(0)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    _, sample_frame = cap.read()

    # Setup process and queues for multiprocessing.
    img_queue = Queue()
    box_queue = Queue()
    img_queue.put(sample_frame)
    box_process = Process(target=get_face,
                          args=(
                              face_detector,
                              img_queue,
                              box_queue,
                              args.cpu,
                          ))
    box_process.start()

    # Introduce pose estimator to solve pose. Get one frame to setup the
    # estimator according to the image size.
    pose_estimator = PoseEstimator(img_size=sample_frame.shape[:2])

    # Introduce scalar stabilizers for pose.
    pose_stabilizers = [
        Stabilizer(state_num=2,
                   measure_num=1,
                   cov_process=0.01,
                   cov_measure=0.1) for _ in range(8)
    ]

    if args.connect:
        address = ('127.0.0.1', 5066)
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        s.connect(address)

    ts = []
    frame_count = 0
    while True:
        _, frame = cap.read()
        frame = cv2.flip(frame, 2)
        frame_count += 1
        if args.connect and frame_count > 60:  # send information to unity
            msg = '%.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f' % \
                  (roll, pitch, yaw, min_ear, mar, mdst, steady_pose[6], steady_pose[7])
            s.send(bytes(msg, "utf-8"))

        t = time.time()
        ########################################added by me
        if frame_count > 150:
            msg = '%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f,%.4f\n' % \
                  (roll, pitch, yaw, steady_pose[3], steady_pose[4], steady_pose[5], steady_pose[6], steady_pose[7])

            X = np.array([
                roll, pitch, yaw, steady_pose[3], steady_pose[4],
                steady_pose[5], steady_pose[6], steady_pose[7]
            ])
            try:
                Pridected_GZ = KNNclassifier.KNN_model.predict(X.reshape(
                    1, -1))
            except:
                Pridected_GZ = 'nothing'

            Prd_GZ = str(Pridected_GZ)
            Prd_GZ = Prd_GZ.replace("[", "")
            Prd_GZ = Prd_GZ.replace("]", "")
            Prd_GZ = Prd_GZ.replace("'", "")
            global eyess
            global cface
            eyess = []
            cface = 0
            ret, img = cap.read()
            cv2.imwrite(r'C:\Users\moham\temp-images\img.jpg', img)
            func(r'C:\Users\moham\temp-images\img.jpg', MyModel)
            if Prd_GZ == 'Front mirror':
                seqclass.append(0.0100)
            elif Prd_GZ == 'Left mirror':
                seqclass.append(0.0500)
            elif Prd_GZ == 'Right mirror':
                seqclass.append(0.0800)
            elif Prd_GZ == 'Rear mirror':
                seqclass.append(0.0250)
            elif Prd_GZ == 'Center Console':
                seqclass.append(0.0356)
            elif Prd_GZ == 'Dashboard':
                seqclass.append(0.1210)
            if len(seqclass) == 26:
                seq = '%.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f, %.4f\n' % \
                  (seqclass[0], seqclass[1], seqclass[2], seqclass[3], seqclass[4], seqclass[5], seqclass[6], seqclass[7], seqclass[8], seqclass[9], seqclass[10], seqclass[11], seqclass[12], seqclass[13], seqclass[14], seqclass[15], seqclass[16], seqclass[17], seqclass[18], seqclass[19], seqclass[20], seqclass[21], seqclass[22], seqclass[23], seqclass[24], seqclass[25])
                Y = np.array([
                    seqclass[0], seqclass[1], seqclass[2], seqclass[3],
                    seqclass[4], seqclass[5], seqclass[6], seqclass[7],
                    seqclass[8], seqclass[9], seqclass[10], seqclass[11],
                    seqclass[12], seqclass[13], seqclass[14], seqclass[15],
                    seqclass[16], seqclass[17], seqclass[18], seqclass[19],
                    seqclass[20], seqclass[21], seqclass[22], seqclass[23],
                    seqclass[24], seqclass[25]
                ])
                Pridected_mode = SEQclassifier.KNN_model.predict(
                    Y.reshape(1, -1))
                Prd_MD = str(Pridected_mode)
                Prd_MD = Prd_MD.replace("[", "")
                Prd_MD = Prd_MD.replace("]", "")
                Prd_MD = Prd_MD.replace("'", "")
                Prd_CS = str(imp())
                if Prd_MD == 'Apnormal' or Prd_CS == 'Drowsy' or Prd_CS == 'Distracted':
                    causse.clear()
                    cv2.putText(frame, 'Apnormal Driving Mode', (300, 30),
                                cv2.FONT_HERSHEY_DUPLEX, 0.9, (147, 58, 31), 1)
                    alarm.append("Apnormal")
                else:
                    cv2.putText(frame, 'Normal Driving Mode', (300, 30),
                                cv2.FONT_HERSHEY_DUPLEX, 0.9, (147, 58, 31), 1)
                    alarmnot.append("Normal")
                seqclass.clear()
            if len(alarm) == 2:
                playsound(mp3File)
                alarm.clear()
            if len(alarmnot) == 1:
                alarm.clear()
                alarmnot.clear()
            cv2.putText(frame, Prd_GZ, (400, 100), cv2.FONT_HERSHEY_DUPLEX,
                        0.9, (147, 58, 31), 1)

        # Pose estimation by 3 steps:
        # 1. detect face;
        # 2. detect landmarks;
        # 3. estimate pose

        img_queue.put(frame)
        facebox = box_queue.get()

        if facebox is not None:
            # Do face detection, facial landmark detection and iris detection.
            if args.cpu:
                face = dlib.rectangle(left=facebox[0],
                                      top=facebox[1],
                                      right=facebox[2],
                                      bottom=facebox[3])
                marks = shape_to_np(predictor(frame, face))
            else:
                face_img = frame[facebox[1]:facebox[3], facebox[0]:facebox[2]]
                marks = fa.get_landmarks(face_img[:, :, ::-1],
                                         detected_faces=[
                                             (0, 0, facebox[2] - facebox[0],
                                              facebox[3] - facebox[1])
                                         ])[-1]
                marks[:, 0] += facebox[0]
                marks[:, 1] += facebox[1]

            x_l, y_l, ll, lu = detect_iris(frame, marks, "left")
            x_r, y_r, rl, ru = detect_iris(frame, marks, "right")

            # Try pose estimation with 68 points.
            R, T = pose_estimator.solve_pose_by_68_points(marks)
            pose = list(R) + list(T)
            # Add iris positions to stabilize.
            pose += [(ll + rl) / 2.0, (lu + ru) / 2.0]

            # Stabilize the pose.
            steady_pose = []
            pose_np = np.array(pose).flatten()
            for value, ps_stb in zip(pose_np, pose_stabilizers):
                ps_stb.update([value])
                steady_pose.append(ps_stb.state[0])

            if args.debug:

                # show iris.
                if x_l > 0 and y_l > 0:
                    draw_iris(frame, x_l, y_l)
                if x_r > 0 and y_r > 0:
                    draw_iris(frame, x_r, y_r)

                # show face landmarks.
                draw_marks(frame, marks, color=(0, 255, 0))

                # show facebox.
                draw_box(frame, [facebox])

                # draw stable pose annotation on frame.
                pose_estimator.draw_annotation_box(
                    frame,
                    np.expand_dims(steady_pose[:3], 0),
                    np.expand_dims(steady_pose[3:6], 0),
                    color=(128, 255, 128))

                # draw head axes on frame.
                pose_estimator.draw_axes(frame,
                                         np.expand_dims(steady_pose[:3], 0),
                                         np.expand_dims(steady_pose[3:6], 0))

            roll = np.clip(-(180 + np.degrees(steady_pose[2])), -50, 50)
            pitch = np.clip(-(np.degrees(steady_pose[1])) - 15, -40, 40)
            yaw = np.clip(-(np.degrees(steady_pose[0])), -50, 50)
            min_ear = min(eye_aspect_ratio(marks[36:42]),
                          eye_aspect_ratio(marks[42:48]))
            mar = mouth_aspect_ration(marks[60:68])
            mdst = mouth_distance(marks[60:68]) / (facebox[2] - facebox[0])

            if args.connect and frame_count > 60:
                msg = '%.4f %.4f %.4f %.4f %.4f %.4f %.4f %.4f'% \
                      (roll, pitch, yaw, min_ear, mar, mdst, steady_pose[6], steady_pose[7])
                s.send(bytes(msg, "utf-8"))

        dt = time.time() - t
        try:
            FPS = int(1 / dt)
        except:
            dt1 = time.time()
            FPS = int(1 / dt1)
        ts += [dt]
        print('\r', '%.3f' % dt, end=' ')

        if args.debug:
            draw_FPS(frame, FPS)
            cv2.imshow("face", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):  # press q to exit.
                break

    # Clean up the multiprocessing process.
    box_process.terminate()
    box_process.join()
    cap.release()
    if args.connect:
        s.close()
    if args.debug:
        cv2.destroyAllWindows()
    print('%.3f' % np.mean(ts))