예제 #1
0
                    COUNTER_ORIGIN = 0

                ######
                if args.eye_gaze == 1:
                    _rotation_matrix = np.matmul(rmat, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m = np.zeros((4, 4))
                    _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
                    _m[:3, :3] = _rotation_matrix
                    _m[3, 3] = 1
                    # Go from camera space to ROS space
                    _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                                    [-1.0, 0.0, 0.0, 0.0],
                                    [0.0, -1.0, 0.0, 0.0],
                                    [0.0, 0.0, 0.0, 1.0]]

                    roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
                    roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

                    phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)


                    # le_c, re_c, _, _ = get_eye_image_from_landmarks(f.bbox, get_image_from_bb(frame, f.bbox), f.lms, (60,36))
                    re_c = get_eye_area(eye_gaze_frame, [f.lms[36], f.lms[37], f.lms[38], f.lms[39], f.lms[40], f.lms[41]])
                    le_c = get_eye_area(eye_gaze_frame, [f.lms[42], f.lms[43], f.lms[44], f.lms[45], f.lms[46], f.lms[47]])

                    r_eye_roi_resize.append(input_from_image(re_c))
                    l_eye_roi_resize.append(input_from_image(le_c))
                    head_list.append([phi_head, theta_head])

                #########
                frame_count += 1
예제 #2
0
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix):
    faceboxes = landmark_estimator.get_face_bb(color_img)
    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        return

    subjects = landmark_estimator.get_subjects_from_faceboxes(
        color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    for idx, subject in enumerate(subjects):
        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(
            landmark_estimator.model_points,
            subject.landmarks.reshape(len(subject.landmarks), 1, 2),
            cameraMatrix=camera_matrix,
            distCoeffs=dist_coefficients,
            flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write(
                'Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(
            _rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0], [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)

        face_image_resized = cv2.resize(subject.face_color,
                                        dsize=(224, 224),
                                        interpolation=cv2.INTER_CUBIC)
        head_pose_image = landmark_estimator.visualize_headpose_result(
            face_image_resized, (phi_head, theta_head))

        if args.vis_headpose:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_headpose:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_headpose.jpg'),
                head_pose_image)

        input_r_list.append(
            gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(
            gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)

    if len(valid_subject_list) == 0:
        return

    gaze_est = gaze_estimator.estimate_gaze_twoeyes(
        inference_input_left_list=input_l_list,
        inference_input_right_list=input_r_list,
        inference_headpose_list=input_head_list)

    for subject_id, gaze, headpose in zip(valid_subject_list,
                                          gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        # Build visualizations
        r_gaze_img = gaze_estimator.visualize_eye_result(
            subject.right_eye_color, gaze)
        l_gaze_img = gaze_estimator.visualize_eye_result(
            subject.left_eye_color, gaze)
        s_gaze_img = np.concatenate((r_gaze_img, l_gaze_img), axis=1)

        if args.vis_gaze:
            plt.axis("off")
            plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
            plt.show()

        if args.save_gaze:
            cv2.imwrite(
                os.path.join(args.output_path,
                             os.path.splitext(base_name)[0] + '_gaze.jpg'),
                s_gaze_img)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
            # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)

        if args.save_estimate:
            with open(
                    os.path.join(
                        args.output_path,
                        os.path.splitext(base_name)[0] + '_output.txt'),
                    'w+') as f:
                f.write(
                    os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) +
                    ', ' + str(headpose[0]) + ']' + ', [' + str(gaze[1]) +
                    ', ' + str(gaze[0]) + ']' + '\n')
def estimate_gaze(base_name, color_img, dist_coefficients, camera_matrix ,label):
    global FPS


    cv2.putText(color_img, "FPS : "+FPS, (10,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    start = time.time()
    #face box의 위치를 반환.(모든 대상 list로 반환) -[left_x, top_y, right_x, bottom_y]
    faceboxes = landmark_estimator.get_face_bb(color_img)

    #draw bounding box
    for facebox in faceboxes:
        left = int(facebox[0])
        top = int(facebox[1])
        right = int(facebox[2])
        bottom = int(facebox[3])
        cv2.rectangle(color_img,(left,top),(right,bottom),(0,255,0),2)


    if len(faceboxes) == 0:
        tqdm.write('Could not find faces in the image')
        # if EVAL and not SHIFT:
            # head_phi.append(-100)
            # head_theta.append(-100)
            # gaze_phi.append(-100)
            # gaze_theta.append(-100)
        return

    # check_people(faceboxes)

    subjects = landmark_estimator.get_subjects_from_faceboxes(color_img, faceboxes)
    extract_eye_image_patches(subjects)

    input_r_list = []
    input_l_list = []
    input_head_list = []
    valid_subject_list = []

    people_count = 1;
    frame_img =color_img

    for idx, subject in enumerate(subjects):
        people_idx = get_people(faceboxes[idx][0])
        people_list[people_idx].set_bbox_l(faceboxes[idx][0])

        if subject.left_eye_color is None or subject.right_eye_color is None:
            tqdm.write('Failed to extract eye image patches')
            continue

        success, rotation_vector, _ = cv2.solvePnP(landmark_estimator.model_points,
                                                   subject.landmarks.reshape(len(subject.landmarks), 1, 2),
                                                   cameraMatrix=camera_matrix,
                                                   distCoeffs=dist_coefficients, flags=cv2.SOLVEPNP_DLS)

        if not success:
            tqdm.write('Not able to extract head pose for subject {}'.format(idx))
            continue

        _rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        _rotation_matrix = np.matmul(_rotation_matrix, np.array([[0, 1, 0], [0, 0, -1], [-1, 0, 0]]))
        _m = np.zeros((4, 4))
        _m[:3, :3] = _rotation_matrix
        _m[3, 3] = 1
        # Go from camera space to ROS space
        _camera_to_ros = [[0.0, 0.0, 1.0, 0.0],
                          [-1.0, 0.0, 0.0, 0.0],
                          [0.0, -1.0, 0.0, 0.0],
                          [0.0, 0.0, 0.0, 1.0]]
        roll_pitch_yaw = list(euler_from_matrix(np.dot(_camera_to_ros, _m)))
        roll_pitch_yaw = limit_yaw(roll_pitch_yaw)

        phi_head, theta_head = get_phi_theta_from_euler(roll_pitch_yaw)
        # if EVAL:
            # head_phi.append(phi_head)
            # head_theta.append(theta_head)
        # face_image_resized = cv2.resize(subject.face_color, dsize=(224, 224), interpolation=cv2.INTER_CUBIC)
        # head_pose_image = landmark_estimator.visualize_headpose_result(face_image_resized, (phi_head, theta_head))

        #color_image의 facebox에 headpose vector를 그림.
        if EVAL:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx],label)
        else:
            head_pose_image, headpose_err = landmark_estimator.visualize_headpose_result(frame_img,faceboxes[idx], (phi_head, theta_head), people_list[people_idx])

        frame_img = head_pose_image
        if EVAL:
            headpose_error.append(headpose_err)
            print("head pose error:",headpose_err)

        if args.mode =='image':
            #show headpose
            # if args.vis_headpose:
            #     plt.axis("off")
            #     plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
            #     plt.show()

            if args.save_headpose:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_headpose.jpg'), head_pose_image)
        people_count +=1
        #size 등 format 변경.
        input_r_list.append(gaze_estimator.input_from_image(subject.right_eye_color))
        input_l_list.append(gaze_estimator.input_from_image(subject.left_eye_color))
        input_head_list.append([theta_head, phi_head])
        valid_subject_list.append(idx)


    # if args.mode =='video':
    #     # plt.axis("off")
    #     # plt.imshow(cv2.cvtColor(head_pose_image, cv2.COLOR_BGR2RGB))
    #     # plt.show()
    #     headpose_out_video.write(frame_img)

    if len(valid_subject_list) == 0:
        return

    # returns [subject : [gaze_pose]]
    gaze_est = gaze_estimator.estimate_gaze_twoeyes(inference_input_left_list=input_l_list,
                                                    inference_input_right_list=input_r_list,

                                                    inference_headpose_list=input_head_list)
    people_count = 1
    for subject_id, gaze, headpose in zip(valid_subject_list, gaze_est.tolist(), input_head_list):
        subject = subjects[subject_id]
        facebox = faceboxes[subject_id]
        people_idx = get_people(facebox[0])
        # Build visualizations
        # r_gaze_img = gaze_estimator.visualize_eye_result(subject.right_eye_color, gaze)
        # l_gaze_img = gaze_estimator.visualize_eye_result(subject.left_eye_color, gaze)
        # if EVAL:
        #     gaze_theta.append(gaze[0])
        #     gaze_phi.append(gaze[1])
        if EVAL:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r", label)
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l", label)
        else:
            r_gaze_img, r_gaze_err = gaze_estimator.visualize_eye_result(frame_img, gaze, subject.leftcenter_coor, facebox,people_list[people_idx], "gaze_r")
            l_gaze_img, l_gaze_err = gaze_estimator.visualize_eye_result(r_gaze_img, gaze, subject.rightcenter_coor, facebox,people_list[people_idx], "gaze_l")

        frame_img = l_gaze_img
        if EVAL:
            print("right gaze error:",r_gaze_err)
            print("left gaze error:",l_gaze_err)
            gaze_error.append(r_gaze_err)
            gaze_error.append(l_gaze_err)

        #show gaze image
        # if args.vis_gaze:
        #     plt.axis("off")
        #     plt.imshow(cv2.cvtColor(s_gaze_img, cv2.COLOR_BGR2RGB))
        #     plt.show()
        if args.mode =='image':
            if args.save_gaze:
                cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0]+str(people_count) + '_gaze.jpg'), frame_img)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_left.jpg'), subject.left_eye_color)
                # cv2.imwrite(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_right.jpg'), subject.right_eye_color)


        if args.save_estimate:
            with open(os.path.join(args.output_path, os.path.splitext(base_name)[0] + '_output.txt'), 'w+') as f:
                f.write(os.path.splitext(base_name)[0] + ', [' + str(headpose[1]) + ', ' + str(headpose[0]) + ']' +

                        ', [' + str(gaze[1]) + ', ' + str(gaze[0]) + ']' + '\n')
        people_count +=1
    if args.mode =='video':
        out_video.write(frame_img)
    end = time.time()
    delay_time = end-start
    FPS = str(int(1/delay_time))