Beispiel #1
0
def main():
    dataset = args.dataset
    root_dir = args.root_dir
    filenamelist = get_list_from_filenames(args.filename_list)
    face_detector = FaceDetector(detection_cfg)
    count = 0
    det_count = 0
    faceboxes = {}
    if dataset == 'BIWI':
        img_ext = '_rgb.png'
    else:
        img_ext = '.jpg'
    for image_name in filenamelist:
        image_path = os.path.join(root_dir, image_name + img_ext)
        image = cv2.imread(image_path)
        bboxes = face_detector.detect_faces(image)
        count += 1
        if len(bboxes) > 0:
            det_count += 1
            print("images, %s, detected face %s" % (count, det_count))
            x_min, y_min, x_max, y_max = bboxes[0][:4]
            xmin = int(x_min)
            ymin = int(y_min)
            xmax = int(x_max)
            ymax = int(y_max)
            bbox = [xmin, ymin, xmax, ymax]
        faceboxes[image_name] = bbox
        # if count >10:
        #     break
    headpose_file = os.path.join('datasets', 'filenamelists',
                                 dataset + '_facebbox_disgard.json')
    with open(headpose_file, 'w') as f:
        json.dump(faceboxes, f, indent=4)
    return
Beispiel #2
0
def main():
    global face_detector, landmark_dectector, head_pose_estimator, gaze_estimator, mouse_controller
    # Load parameters
    app_start_time = cv2.getTickCount()

    set_logging()
    logging.info('start APP')
    pyautogui.FAILSAFE = False
    cmd_paras = get_comand_line_parameters()
    #cmd_paras = get_comand_lineFP16()

    # Setup Projects
    mouse_controller = createMouseController()

    # Setup Classes
    logging.info('setUp dectors and estimators started')

    # NC2 can only handel one instance of IECore
    plugin = IECore()
    face_detector = FaceDetector(cmd_paras.fd, cmd_paras.device, plugin)
    face_detector.load_model()
    landmark_dectector = FacialLandmarksDetector(cmd_paras.lr,
                                                 cmd_paras.device, plugin)
    landmark_dectector.load_model()
    head_pose_estimator = HeadPoseEstimator(cmd_paras.hp, cmd_paras.device,
                                            plugin)
    head_pose_estimator.load_model()
    gaze_estimator = GazeEstimatior(cmd_paras.ge, cmd_paras.device, plugin)
    gaze_estimator.load_model()
    logging.info('setUp dectors estimators ends')

    logging.info('start inputFeeder read stream')
    input_feeder = createInputFeeder(cmd_paras)
    input_feeder.load_data()

    # RUN the pipline
    i = 0
    try:
        for tmp_image in input_feeder.next_batch():
            if tmp_image is not None:
                i = i + 1
                tmp_image, gaze_position = pipline(tmp_image, i)
                cv2.imshow('frame', tmp_image)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
                mouse_controller.move(gaze_position[0], gaze_position[1])
            else:
                break
    finally:
        input_feeder.close()
        logging.info('end inputFeeder end stream')

    app_end_time = cv2.getTickCount()
    logging.info('App Runtime: {:.4f} seconds pipline runs {:d}'.format(
        (app_end_time - app_start_time) / cv2.getTickFrequency(), i))
Beispiel #3
0
    def __init__(self, video_source=0):
        self.COVER_COLOR = (0, 0, 0)

        self.detector = FaceDetector()
        self.age_estimator = AgeEstimator()

        self.capture = cv2.VideoCapture(video_source)
        if not self.capture.isOpened():
            raise ValueError("Unable to open video source", video_source)

        self.width = self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
def main(args):
    print("[INFO] loading facial landmark predictor...")
    # Load mtcnn detector from facenet
    face_detect = FaceDetector(True, None)
    vs, file_stream = getVideoStream(args)
    # Read the first frame
    success, frame = vs.read()
    face_box = None
    i = 0
    runtime_array = []
    face_detection_runtime_array = []
    while success:
        print("Frame: ", i)
        i += 1
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        # detect faces in the rgb frame
        start = time.time()
        boxes, _ = face_detect.detectFace(frame_rgb)
        stop = time.time()
        face_detection_runtime_array.append(stop - start)
        frame_draw = frame.copy()
        if boxes is not None:
            if face_box is None:
                face_box = FaceBox(boxes[0], frame, args["shape_predictor"])
            else:
                face_box.updateFrame(frame)
                face_box.updateRect(boxes[0])
            start = time.time()
            check_liveness = face_box.checkFrame()
            stop = time.time()
            runtime_array.append(stop - start)
            if check_liveness:
                print("Real")
                break
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        # if the `q` key was pressed, break from the loop
        if key == ord("q"):
            break
        success, frame = vs.read()
    cv2.destroyAllWindows()
    if not file_stream:
        vs.stop()
    sum = 0
    for runtime in runtime_array:
        sum += runtime
    avg = sum / len(runtime_array)
    print("Avg blink detection time:", avg)
    sum = 0
    for runtime in face_detection_runtime_array:
        sum += runtime
    avg = sum / len(face_detection_runtime_array)
    print("Avg face detection time:", avg)
Beispiel #5
0
class FaceBlocking:
    def __init__(self, video_source=0):
        self.COVER_COLOR = (0, 0, 0)

        self.detector = FaceDetector()
        self.age_estimator = AgeEstimator()

        self.capture = cv2.VideoCapture(video_source)
        if not self.capture.isOpened():
            raise ValueError("Unable to open video source", video_source)

        self.width = self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)

    def __del__(self):
        if self.capture.isOpened():
            self.capture.release()

    def get_processed_frame(self, _age_restrictions=(), _debug=False):
        val, frame = self.capture.read()

        faces = self.detector.get_coordinates(frame, _multi_face=True)
        faces = faces if faces is not None else []
        for i in range(len(faces)):
            start_x, start_y, end_x, end_y = faces[i]
            roi_color = frame[start_y:end_y, start_x:end_x]
            pred_gender, pred_age = self.age_estimator.estimate(roi_color)
            estimated_age = np.argmax(pred_age)
            if not _age_restrictions[0] <= estimated_age <= _age_restrictions[1]:
                self.block_face(frame, start_x, start_y, end_x, end_y)
            if _debug:
                self.draw_debug(frame, estimated_age, start_x, start_y, end_x, end_y)

        return frame

    def draw_debug(self, _frame, _age, _start_x, _start_y, _end_x, _end_y):
        font = cv2.FONT_HERSHEY_SIMPLEX
        color = (0, 255, 0)
        stroke = 2
        cv2.putText(_frame, "Age: " + str(_age), (_start_x, _start_y), font, 1, color, stroke, cv2.LINE_AA)
        cv2.rectangle(_frame, (_start_x, _start_y), (_end_x, _end_y), color, stroke)

    def block_face(self, _frame, _start_x, _start_y, _end_x, _end_y):
        cv2.rectangle(_frame, (_start_x, _start_y), (_end_x, _end_y), self.COVER_COLOR, -1)

    def set_detection_type(self, detection_type):
        self.detector.set_detection_type(detection_type)

    def set_age_estimation_model(self, estimation_model):
        self.age_estimator.switch_model(estimation_model)
def main():
    """Entry point for node."""

    # Register ourselves as a node with ROS
    rospy.init_node('face_detect')

    # Create the face detector
    hcd = rospy.get_param('haar_cascade_dir', DEFAULT_HAAR_CASCADE_DIR)
    rospy.logdebug('Creating face detector using data in: %s', hcd)
    detect_faces = FaceDetector(hcd)

    # Create a publisher for detected face results. See [1] for discussion of
    # queue_size parameter.
    # [1] http://wiki.ros.org/rospy/Overview/Publishers%20and%20Subscribers
    faces_pub = rospy.Publisher(rospy.get_name() + '/faces',
                                Faces,
                                queue_size=1)

    # Create a object encapsulating this node's logic
    node = FaceDetectionNode(detect_faces, faces_pub)

    # Subscribe to incoming camera images. Note that we set the queue size to
    # 1. This means we automatically drop images if we can't detect faces fast
    # enough.
    rospy.Subscriber('camera/image_raw',
                     Image,
                     node.new_input_image,
                     queue_size=1)

    # Run the event loop. Only returns once the node has shutdown.
    rospy.spin()
Beispiel #7
0
    def __init__(self, detection_type, recognition_type, video_source=0):
        self.COVER_COLOR = (0, 0, 0)
        self.MIN_CONF = 40

        self.detector = FaceDetector(detection_type)
        self.face_recognizer = FaceRecognizer(recognition_type)
        self.embedding_model = load_model(
            os.path.join(MODELS_DIR, "facenet_keras.h5"))
        self.labels = self.face_recognizer.labels

        self.capture = cv2.VideoCapture(video_source)
        if not self.capture.isOpened():
            raise ValueError("Unable to open video source", video_source)

        self.width = self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
Beispiel #8
0
def process_video(
    detector: FaceDetector,
    filepath: str,
    mode: str = 'blur',
):
    cap = cv2.VideoCapture(filepath)
    _, frame = cap.read()
    height, width = frame.shape[:2]
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    writer = cv2.VideoWriter('./output/video.avi', fourcc, 20, (width, height), True)

    while cap.isOpened():
        _, frame = cap.read()
        bboxes, landmarks = detector.predict(frame)

        if mode == 'blur':
            frame = blur(frame, bboxes)
        elif mode == 'pixel':
            frame = pixelate(frame, bboxes)
        else:
            frame = hide_eyes(frame, landmarks)

        writer.write(frame)

    cap.release()
    writer.release()
Beispiel #9
0
def main():
    cfg = anyconfig.load('settings.yaml')
    cfg = munch.munchify(cfg)  
    detector = FaceDetector(cfg.face_detector.model_path, -1)
    for ext in exts:
        image_list.extend(glob.glob(os.path.join(image_dir, ext)))
    for image_path in image_list:
        filename = image_path.split('/')[-1] #os should be linux
        image = cv2.imread(image_path)
        bboxes, facial_lanmarks = detector.detect(image[:, :, ::-1], 1.0)
        for i, bbox in enumerate(bboxes):
            conf_score = bbox[4]
            if conf_score < cfg.face_detector.confident_score_threshold:
                continue
            xmin, ymin, xmax, ymax = [int(val) for val in bbox[:4]]
            # cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)
            face = image[ymin:ymax, xmin:xmax]
            image[ymin:ymax, xmin:xmax] = anonymize_face_pixelate(face)
        cv2.imwrite(os.path.join(output_dir, filename), image)
Beispiel #10
0
 def __init__(self):
     self.opts = Options().opts
     self.FaceDetector = FaceDetector()
     self.interpreter_lm = tf.lite.Interpreter(
         model_path=self.opts.landmark_model_path)
     self.interpreter_lm.allocate_tensors()
     self.input_details_lm = self.interpreter_lm.get_input_details()
     self.output_details_lm = self.interpreter_lm.get_output_details()
     self.input_shape_lm = self.input_details_lm[0]['shape']
     self.output_shape_lm = self.output_details_lm[0]['shape']
     self.in_h, self.in_w = self.input_shape_lm[1], self.input_shape_lm[2]
Beispiel #11
0
class FaceBlocking:
    def __init__(self, detection_type, recognition_type, video_source=0):
        self.COVER_COLOR = (0, 0, 0)
        self.MIN_CONF = 40

        self.detector = FaceDetector(detection_type)
        self.face_recognizer = FaceRecognizer(recognition_type)
        self.embedding_model = load_model(
            os.path.join(MODELS_DIR, "facenet_keras.h5"))
        self.labels = self.face_recognizer.labels

        self.capture = cv2.VideoCapture(video_source)
        if not self.capture.isOpened():
            raise ValueError("Unable to open video source", video_source)

        self.width = self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        self.height = self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)

    def __del__(self):
        if self.capture.isOpened():
            self.capture.release()

    def get_processed_frame(self, _block_list=[], _debug=False):
        val, frame = self.capture.read()

        faces = self.detector.get_coordinates(frame, _multi_face=True)
        faces = faces if faces is not None else []
        for i in range(len(faces)):
            start_x, start_y, end_x, end_y = faces[i]
            roi_color = frame[start_y:end_y, start_x:end_x]
            who_face, conf = self.face_recognizer.face_classification(
                roi_color, self.embedding_model)
            if conf >= self.MIN_CONF and who_face in _block_list:
                self.block_face(frame, who_face, start_x, start_y, end_x,
                                end_y)
            if _debug:
                self.draw_debug(frame, who_face, start_x, start_y, end_x,
                                end_y)

        return frame

    def draw_debug(self, _frame, _name, _start_x, _start_y, _end_x, _end_y):
        font = cv2.FONT_HERSHEY_SIMPLEX
        color = (255, 255, 255)
        stroke = 2
        cv2.putText(_frame, _name, (_start_x, _start_y), font, 1, color,
                    stroke, cv2.LINE_AA)
        cv2.rectangle(_frame, (_start_x, _start_y), (_end_x, _end_y), color,
                      stroke)

    def block_face(self, _frame, _name, _start_x, _start_y, _end_x, _end_y):
        cv2.rectangle(_frame, (_start_x, _start_y), (_end_x, _end_y),
                      self.COVER_COLOR, -1)
Beispiel #12
0
def show_AFLW2000():
    face_detector = FaceDetector(detection_cfg)
    root_dir = args.root_dir
    filenamelist = get_list_from_filenames(args.filename_list)
    for image_name in filenamelist:
        image_path = os.path.join(root_dir, image_name + ".jpg")
        mat_path = os.path.join(root_dir, image_name + '.mat')
        # Crop the face loosely
        pt2d = utils.get_pt2d_from_mat(mat_path)
        x_min = min(pt2d[0, :])
        y_min = min(pt2d[1, :])
        x_max = max(pt2d[0, :])
        y_max = max(pt2d[1, :])
        k = 0.20
        x_min -= 0.6 * k * abs(x_max - x_min)
        y_min -= 2 * k * abs(y_max - y_min)
        x_max += 0.6 * k * abs(x_max - x_min)
        y_max += 0.6 * k * abs(y_max - y_min)
        bboxes = np.array([[x_min, y_min, x_max, y_max, 1.0]])
        image = cv2.imread(image_path)
        image = face_detector.draw_bboxes(image, bboxes)
        cv2.imshow('image', image)
        k = cv2.waitKey(500)
    return
    def __init__(self):
        Opts = Options()
        opts = Opts.opts
        self.recog_th = opts.recog_th
        self.face_database = opts.face_database
        self.face_model_path = opts.face_recog_model_path
        self.feature_extractor = tf.lite.Interpreter(self.face_model_path)
        self.feature_extractor.allocate_tensors()
        self.input_details = self.feature_extractor.get_input_details()
        self.output_details = self.feature_extractor.get_output_details()

        self.in_shape = self.input_details[0]['shape']
        self.output_shape = self.output_details[0]['shape']

        self.FaceDetector = FaceDetector()
        self.known_names, self.known_embeddings = self.get_embeddings_dir()
Beispiel #14
0
def process_image(
    detector: FaceDetector,
    filepath: str,
    mode: str = 'blur',
):
    image = cv2.imread(filepath)
    bboxes, landmarks = detector.predict(image)

    if mode == 'blur':
        image = blur(image, bboxes)
    elif mode == 'pixel':
        image = pixelate(image, bboxes)
    else:
        image = hide_eyes(image, landmarks)

    os.makedirs('./output', exist_ok=True)
    output_path = os.path.join('./output', os.path.split(args.file)[1])
    cv2.imwrite(output_path, image)
args = parser.parse_args()

# Set up logging
logging.basicConfig(filename=args.log,
                    level=logging.INFO,
                    format="%(asctime)s [%(levelname)s] %(message)s",
                    datefmt='%Y-%m-%d %I:%M:%S')
logging.info('Initialization...')

try:

    feed = InputFeeder(args.input)

    t = -time.time()  # measure model loading time
    faceDetector = FaceDetector(precision=args.precision,
                                concurrency=args.concurrency,
                                device=args.device,
                                extensions=args.ext)
    eyeDetector = EyeDetector(precision=args.precision,
                              concurrency=args.concurrency,
                              device=args.device,
                              extensions=args.ext)
    headPoseEstimator = HeadPoseEstimator(precision=args.precision,
                                          concurrency=args.concurrency,
                                          device=args.device,
                                          extensions=args.ext)
    gazeEstimator = GazeEstimator(precision=args.precision,
                                  concurrency=args.concurrency,
                                  device=args.device,
                                  extensions=args.ext)
    mouseController = MouseController(precision='high',
                                      speed=args.speed.lower(),
Beispiel #16
0
def infer_on_stream(args):
    try:
        log.basicConfig(
            level=log.INFO,
            format="%(asctime)s [%(levelname)s] %(message)s",
            handlers=[log.FileHandler("app.log"),
                      log.StreamHandler()])

        mouse_controller = MouseController(precision="low", speed="fast")

        start_model_load_time = time.time()

        face_detector = FaceDetector(args.model_face_detection)
        facial_landmarks_detector = FacialLandmarksDetector(
            args.model_facial_landmarks_detection)
        head_pose_estimator = HeadPoseEstimator(
            args.model_head_pose_estimation)
        gaze_estimator = GazeEstimator(args.model_gaze_estimation)
        face_detector.load_model()
        facial_landmarks_detector.load_model()
        head_pose_estimator.load_model()
        gaze_estimator.load_model()

        total_model_load_time = time.time() - start_model_load_time
        log.info("Model load time: {:.1f}ms".format(1000 *
                                                    total_model_load_time))

        output_directory = os.path.join(args.output_path + '\\' + args.device)
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)

        feed = InputFeeder(args.input_type, args.input_path)
        feed.load_data()
        out_video = feed.get_out_video(output_directory)

        frame_counter = 0
        start_inference_time = time.time()
        total_prepocess_time = 0

        while True:
            try:
                frame = next(feed.next_batch())
            except StopIteration:
                break
            frame_counter += 1

            face_boxes = face_detector.predict(frame)
            for face_box in face_boxes:
                face_image = get_crop_image(frame, face_box)
                eye_boxes, eye_centers = facial_landmarks_detector.predict(
                    face_image)
                left_eye_image, right_eye_image = [
                    get_crop_image(face_image, eye_box)
                    for eye_box in eye_boxes
                ]
                head_pose_angles = head_pose_estimator.predict(face_image)
                gaze_x, gaze_y = gaze_estimator.predict(
                    right_eye_image, head_pose_angles, left_eye_image)
                draw_gaze_line(frame, face_box, eye_centers, gaze_x, gaze_y)
                if args.show_input:
                    cv2.imshow('im', frame)
                if args.move_mouse:
                    mouse_controller.move(gaze_x, gaze_y)
                total_prepocess_time += face_detector.preprocess_time + facial_landmarks_detector.preprocess_time + \
                    head_pose_estimator.preprocess_time + gaze_estimator.preprocess_time
                break

            if out_video is not None:
                out_video.write(frame)
            if args.input_type == "image":
                cv2.imwrite(os.path.join(output_directory, 'output_image.jpg'),
                            frame)

            key_pressed = cv2.waitKey(60)
            if key_pressed == 27:
                break

        total_time = time.time() - start_inference_time
        total_inference_time = round(total_time, 1)
        fps = frame_counter / total_inference_time
        log.info("Inference time:{:.1f}ms".format(1000 * total_inference_time))
        log.info("Input/output preprocess time:{:.1f}ms".format(
            1000 * total_prepocess_time))
        log.info("FPS:{}".format(fps))

        with open(os.path.join(output_directory, 'stats.txt'), 'w') as f:
            f.write(str(total_inference_time) + '\n')
            f.write(str(total_prepocess_time) + '\n')
            f.write(str(fps) + '\n')
            f.write(str(total_model_load_time) + '\n')

        feed.close()
        cv2.destroyAllWindows()
    except Exception as e:
        log.exception("Something wrong when running inference:" + str(e))
Beispiel #17
0
    for subdir in os.listdir(directory):
        path = os.path.join(directory, subdir)
        # skip any files that might be in the dir
        if not os.path.isdir(path):
            continue
        # load all faces in the subdirectory
        faces = get_faces(detector, path)
        labels = [subdir.replace("_", " ").title() for _ in range(len(faces))]
        print(f'>loaded {len(faces)} examples for person: {subdir.replace("_", " ").title()}')
        X_.extend(faces)
        y_.extend(labels)
    return np.asarray(X_), np.asarray(y_)


if __name__ == '__main__':
    detector_base = FaceDetector(detection_type='base')
    detector_caffe = FaceDetector(detection_type='caffe')

    recognizer_base = BaseRecognizer()
    data_base, labels_base = prepare_data_base_recognizer(detector_caffe)
    print('Prepared data for base recognizer')
    recognizer_base.train_clf(data_base, labels_base)
    print('Trained base recognizer')

    recognizer_svm = SuppVecMachinesRecognizer()
    data_svm, labels_svm = prepare_data_svm_recognizer(detector_caffe, recognizer_svm)
    print('Prepared data for svm recognizer')
    recognizer_svm.train_clf(data_svm, labels_svm, show_stats=True)
    print('Trained svm recognizer')

    # for root, dirs, files in os.walk(IMG_DIR):
Beispiel #18
0
import cv2
import os
import time
import torch
import argparse
import json
import shutil
import string

from headpose import headpose_estimator

from face_detection import FaceDetector
from face_detection.config import cfg as detection_cfg

face_detector = FaceDetector(detection_cfg)

parser = argparse.ArgumentParser()
parser.add_argument('--root_data', dest='root_data', help='Path to test dataset', default='', type=str)
parser.add_argument('--output_dir', dest='output_dir', help='Path to output', default='', type=str)
args = parser.parse_args()


def get_list_from_filenames(file_path):
    # input:    relative path to .txt file with file names
    # output:   list of relative path names
    with open(file_path) as f:
        lines = f.read().splitlines()
    return lines

def main():
    print(args)
Beispiel #19
0
        if mode == 'blur':
            frame = blur(frame, bboxes)
        elif mode == 'pixel':
            frame = pixelate(frame, bboxes)
        else:
            frame = hide_eyes(frame, landmarks)

        writer.write(frame)

    cap.release()
    writer.release()

if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('-f', '--file', type=str, required=True)
    parser.add_argument('-m', '--mode', choices=['blur', 'pixel', 'eyes'], default='blur', required=True)
    parser.add_argument('-w', '--weights', default='./weights/Resnet50_Final.pth')

    args = parser.parse_args()

    detector = FaceDetector.from_path(args.weights)

    ext = os.path.splitext(args.file)[1]
    if ext in IMAGE_FORMATS:
        process_image(detector=detector, filepath=args.file, mode=args.mode)
    elif ext in VIDEO_FORMATS:
        process_video(detector=detector, filepath=args.file, mode=args.mode)
    else:
        raise Exception(f'Unknown file format: {ext}')

Beispiel #20
0
import numpy as np
import cv2
import imageio
import skimage
from face_detection import FaceDetector
import os
from emotion_classifier import EmotionClassifier
os.chdir('..')
face_detector = FaceDetector(minsize=100)
crop_ratio = (0.05, 0.05)

emotion_classifier = EmotionClassifier()


def process_image(image):
    if image is None:
        return
    image = cv2.GaussianBlur(image, (3, 3), 0)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    face_bbs, _ = face_detector.detect_face(image_rgb)
    for bb in face_bbs:
        bb = [int(x) if x > 0 else 0 for x in bb]
        face = image[bb[1]:bb[3], bb[0]:bb[2], :]
        if face is None:
            continue
        label_emotion = emotion_classifier.predict(face)
        cv2.rectangle(image, (bb[0], bb[1]), (bb[2], bb[3]), (0, 255, 0), 2)
        front_scale = (bb[3] - bb[0] + bb[2] - bb[1]) / 130
        thickness = int(front_scale / 4) + 1
        if thickness < 1:
            thickness = 1
Beispiel #21
0
def main():
    args = build_argparser().parse_args()
    input_file = args.input
    logger = log.getLogger()
    if input_file == "CAM":
        input_feeder = InputFeeder("cam")
    else:
        if not os.path.isfile(input_file):
            logger.error("Path should be file")
            exit(1)
        input_feeder = InputFeeder("video", input_file)

    face_detector = FaceDetector(
        args.face_detection_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    face_landmark_detector = FaceLandmarkDetector(
        args.face_landmark_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    head_pose_estimator = HeadPoseEstimator(
        args.head_pose_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    gaze_estimator = GazeEstimator(
        args.gaze_estimation_model,
        device=args.device,
        threshold=args.threshold,
        extensions=args.extensions,
    )
    mouse_controller = MouseController("medium", "fast")

    face_detector.load_model()
    face_landmark_detector.load_model()
    head_pose_estimator.load_model()
    gaze_estimator.load_model()

    input_feeder.load_data()

    width = 1000
    height = int(width * 9 / 16)

    for flag, frame in input_feeder.next_batch():

        if not flag:
            break
        pressed_key = cv2.waitKey(60)

        face_detected = face_detector.predict(frame)
        if face_detected:
            face_coordinates, face_image = face_detected
            if not face_coordinates:
                continue
        else:
            continue
        if "fd" in args.visualization:
            cv2.rectangle(
                frame,
                (face_coordinates[0], face_coordinates[1]),
                (face_coordinates[2], face_coordinates[3]),
                (36, 255, 12),
                2,
            )
            cv2.putText(
                frame,
                "Face Detected",
                (face_coordinates[0], face_coordinates[1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.9,
                (36, 255, 12),
                2,
            )

        left_eye_img, righ_eye_img, eye_coords = face_landmark_detector.predict(
            face_image
        )
        if "fl" in args.visualization:
            frame_eye_coords_min = (
                np.array(eye_coords)[:, :2] + np.array(face_coordinates)[:2]
            )
            frame_eye_coords_max = (
                np.array(eye_coords)[:, 2:] + np.array(face_coordinates)[:2]
            )
            cv2.rectangle(
                frame,
                (frame_eye_coords_min[0][0], frame_eye_coords_min[0][1]),
                (frame_eye_coords_max[0][0], frame_eye_coords_max[0][1]),
                (36, 255, 12),
                2,
            )
            cv2.rectangle(
                frame,
                (frame_eye_coords_min[1][0], frame_eye_coords_min[1][1]),
                (frame_eye_coords_max[1][0], frame_eye_coords_max[1][1]),
                (36, 255, 12),
                2,
            )

        head_pose_estimate = head_pose_estimator.predict(face_image)
        if "hp" in args.visualization:
            cv2.putText(
                frame,
                "yaw:{:.1f}|pitch:{:.1f}|roll:{:.1f}".format(*head_pose_estimate),
                (20, 35),
                cv2.FONT_HERSHEY_COMPLEX,
                1.2,
                (36, 255, 12),
                3,
            )

        mouse_coordinate, gaze_vector = gaze_estimator.predict(
            left_eye_img, righ_eye_img, head_pose_estimate
        )
        if "ge" in args.visualization:
            head_pose_estimate = np.array(head_pose_estimate)
            yaw, pitch, roll = head_pose_estimate * np.pi / 180.0

            focal_length = 950
            scale = 100

            origin = (
                int(
                    face_coordinates[0]
                    + (face_coordinates[2] - face_coordinates[0]) / 2
                ),
                int(
                    face_coordinates[1]
                    + (face_coordinates[3] - face_coordinates[1]) / 2
                ),
            )

            r_x = np.array(
                [
                    [1, 0, 0],
                    [0, math.cos(pitch), -math.sin(pitch)],
                    [0, math.sin(pitch), math.cos(pitch)],
                ]
            )
            r_y = np.array(
                [
                    [math.cos(yaw), 0, -math.sin(yaw)],
                    [0, 1, 0],
                    [math.sin(yaw), 0, math.cos(yaw)],
                ]
            )
            r_z = np.array(
                [
                    [math.cos(roll), -math.sin(roll), 0],
                    [math.sin(roll), math.cos(roll), 0],
                    [0, 0, 1],
                ]
            )
            r = r_z @ r_y @ r_x

            zaxis = np.array(([0, 0, -1 * scale]), dtype="float32")
            offset = np.array(([0, 0, focal_length]), dtype="float32")
            zaxis = np.dot(r, zaxis) + offset
            tip = (
                int(zaxis[0] / zaxis[2] * focal_length) + origin[0],
                int(zaxis[1] / zaxis[2] * focal_length) + origin[1],
            )

            cv2.arrowedLine(frame, origin, tip, (0, 0, 255), 3, tipLength=0.3)

        cv2.imshow("frame", cv2.resize(frame, (width, height)))
        mouse_controller.move(mouse_coordinate[0], mouse_coordinate[1])

        if pressed_key == 27:
            logger.error("exit key is pressed..")
            break
Beispiel #22
0
import os
from io import BytesIO
from urllib.parse import urlencode

import asks
import cv2
from sanic import Sanic, response
from pydub import AudioSegment

from config import (BOT_TOKEN, HOST, PORT, DEBUG, DIRS)
from face_detection import FaceDetector

app = Sanic(__name__)
face_detector = FaceDetector()

API_URL = 'https://api.telegram.org/'
BOT_URL = API_URL + 'bot' + BOT_TOKEN + '/'
BOT_FILE_URL = API_URL + 'file/' + 'bot' + BOT_TOKEN


@app.post('/')
async def request_handler(request):
    """
    Route function to handle request data depending on data's keys.
    Return empty request with status code 204.
    """
    message = request.json.get('message')
    if 'photo' in message:
        await photo_handler(message)
    elif 'voice' in message:
        await voice_handler(message)
def main():
    """Main entrypoint when running this module from the terminal."""

    parser = argparse.ArgumentParser(
        description='Process and a clean a raw dataset of halloween costumes')
    parser.add_argument(
        'dataset_source',
        help='The path to the directory containing the source dataset.',
        type=Path,
        action=ReadableDirectory)
    parser.add_argument(
        '--destination',
        '-d',
        dest='dataset_destination',
        help='The path to the directory which the cleaned '
        'dataset should be saved. If this is not specified, the cleaned files are saved in the same parent '
        'folder as the source.',
        type=Path,
        default=None)
    parser.add_argument(
        '--file-glob-patterns',
        nargs='+',
        type=str,
        default=['*.png', '*.jpeg', '*.jpg'],
        help='The glob patterns to use to find files in the source directory.')
    parser.add_argument(
        '--no-remove-transparency',
        action='store_false',
        dest='remove_transparency',
        help='Remove transparency and replace it with a colour.')
    parser.add_argument('--bg-colour',
                        type=str,
                        default='WHITE',
                        help='The colour to replace transparency with.')
    parser.add_argument(
        '--u2net-size',
        type=str,
        default='large',
        help=
        'The size of the pretrained U-2-net model. Either \'large\' or \'small\'.'
    )
    parser.add_argument(
        '--face_image_ratio_threshold',
        type=float,
        default=0.05,
        help='The maximum face-to-image area ratio that is allowed.')
    parser.add_argument('--crop-faces',
                        dest='crop_faces',
                        action='store_true',
                        help='Crop out faces.')
    parser.add_argument('--yes', '-y', action='store_true', help='Yes to all.')
    args = parser.parse_args()

    # Create a destination path if none was provided.
    if args.dataset_destination is None:
        args.dataset_destination = args.dataset_source.parent / (
            args.dataset_source.stem + '_cleaned')

    if args.dataset_destination.exists() and any(
            args.dataset_destination.iterdir()):
        if not args.yes:
            click.confirm(
                f'The destination path (\'{args.dataset_destination.resolve()}\') '
                'already exists! Would you like to continue? This will overwrite the directory.',
                abort=True)

        _rmtree(args.dataset_destination)

    args.dataset_destination.mkdir(exist_ok=True, parents=True)

    u2net = U2Net(pretrained_model_name=args.u2net_size)
    face_detector = FaceDetector()

    files = list(get_files(args.dataset_source, args.file_glob_patterns))
    with tqdm.tqdm(files) as progress:
        for file in progress:
            progress.set_description(f'Processing {file.name}')

            # Skip images that don't have a single face in them...
            face_detection_results = face_detector.detect_faces(file)
            if len(face_detection_results) != 1:
                continue

            segmentation_map = u2net.segment_image(file)

            try:
                # Remove background from image (using U2Net)
                image = u2net.remove_background(file, segmentation_map)
                old_image_width, old_image_height = image.size
            except InvalidImageError as e:
                continue

            # Crop image to bounding box (using U2Net)
            bounding_box = u2net.get_bounding_box(segmentation_map)
            image = image.crop(bounding_box)

            # Tuple of the form (x1, y1, x2, y2)
            fbb = face_detection_results[0].bounding_box
            fbb_width = (fbb[2] - fbb[0])
            fbb_height = (fbb[3] - fbb[1])

            image_width, image_height = image.size
            # Compute the face-to-image area ratio.
            # This is used as a heuristic to filter out portrait images
            # (i.e. when the face takes up more than a certain percentage of the total image).
            face_image_ratio = (fbb_width * fbb_height) / (image_width *
                                                           image_height)
            if face_image_ratio > args.face_image_ratio_threshold:
                continue

            if args.crop_faces:
                # Crop out the face...
                # This assumes that the image is of a person standing vertically.

                # Convert the bottom-right y-coordinate of the face bounding box
                # into the coordinate system AFTER cropping.
                adjusted_fbb_y2 = fbb[3] - bounding_box[1]
                image = image.crop((0, adjusted_fbb_y2 - fbb_height * 0.10,
                                    image_width, image_height))

            if args.remove_transparency:
                # Replace transparency with colour
                background_image = Image.new('RGBA', image.size,
                                             args.bg_colour)
                background_image.paste(image, (0, 0), image)
                image = background_image.convert('RGB')

            # Output processed image
            destination = args.dataset_destination / (file.stem + '.png')
            image.save(str(destination))
Beispiel #24
0
def main():
    args = get_args()

    log.basicConfig(filename='example.log', level=log.DEBUG)

    inputFile = args.input
    #inputFile = "./bin/demo.mp4"

    mouse = MouseController("high", "fast")

    frame_count = 0
    focal_length = 950.0
    scale = 50

    #print(f"Visual flag: {args.visual_flag}")

    if inputFile.lower() == "cam":
        feed = InputFeeder('cam')
        log.info("Video source: " + str(inputFile))

    else:
        if not os.path.isfile(inputFile):
            log.error("Unable to find file: " + inputFile)
            exit(1)
        feed = InputFeeder("video", inputFile)
        log.info("Video source: " + str(inputFile))
        log.info("InputFeeder initialized")

    log.info("Device: " + str(args.device))
    log.info("Face detection model: " + str(args.facedetectionmodel))
    log.info("Facial landmarks model: " + str(args.faciallandmarksmodel))
    log.info("Head pose estimation model: " + str(args.headposemodel))
    log.info("Gaze estimation model: " + str(args.gazeestimationmodel))

    if args.stats == 1:
        print("Running statistics...")
        inference_times = []
        fdm_inference_times = []
        hpm_inference_times = []
        flm_inference_times = []
        gem_inference_times = []
        start_time = time.time()

    # Create instances of the different models
    fdm = FaceDetector(args.facedetectionmodel, args.device,
                       args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        fdm.load_model()
        fdm_load_time = time.time() - start_time
    else:
        fdm.load_model()
    fdm.check_model()

    hpm = HeadPoseEstimator(args.headposemodel, args.device,
                            args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        hpm.load_model()
        hpm_load_time = time.time() - start_time
    else:
        hpm.load_model()
    hpm.check_model()

    flm = FacialLandmarksDetector(args.faciallandmarksmodel, args.device,
                                  args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        flm.load_model()
        flm_load_time = time.time() - start_time
    else:
        flm.load_model()
    flm.check_model()

    gem = GazeEstimator(args.gazeestimationmodel, args.device,
                        args.cpu_extension)
    if args.stats == 1:
        start_time = time.time()
        gem.load_model()
        gem_load_time = time.time() - start_time
    else:
        gem.load_model()
    gem.check_model()

    if args.stats == 1:
        duration_loading = time.time() - start_time
        print(
            f"Duration for loading and checking the models: {duration_loading}"
        )
        log.info(
            f"Duration for loading and checking the models: {duration_loading}"
        )

    cv2.namedWindow('preview', cv2.WINDOW_NORMAL)
    cv2.resizeWindow('preview', 600, 600)

    feed.load_data()
    for ret, frame in feed.next_batch():
        if not ret:
            break

        if frame is not None:
            frame_count += 1
            key = cv2.waitKey(60)

            if args.stats == 1:
                start_time = time.time()

            # Run face detection
            face_crop, face_coords = fdm.predict(frame.copy())
            print("Face crop shape: " + str(face_crop.shape))
            frame_h, frame_w = frame.shape[:2]
            (xmin, ymin, xmax, ymax) = face_coords
            face_frame = frame[ymin:ymax, xmin:xmax]
            #center_of_face = (xmin + face_frame.shape[1] / 2, ymin + face_frame.shape[0] / 2, 0) # 0 for colour channel
            #print("Center of face " + str(center_of_face))

            try:
                # Check if face was detected
                if type(face_coords) == int:
                    print("Unable to detect face")
                    if key == 27:
                        break
                    continue

                # Facial landmark detection
                left_eye_crop, right_eye_crop, landmarks, crop_coords = flm.predict(
                    face_crop.copy())
                #print("Landmarks" +str(landmarks))
                left_eye = (landmarks[0], landmarks[1])
                right_eye = (landmarks[2], landmarks[3])

                # Landmark position based on complete frame
                landmarks_viz = landmarks
                landmarks_viz[0] = landmarks_viz[0] + xmin
                landmarks_viz[1] = landmarks_viz[1] + ymin
                landmarks_viz[2] = landmarks_viz[2] + xmin
                landmarks_viz[3] = landmarks_viz[3] + ymin

                crop_coords_viz = (crop_coords[0] + xmin, crop_coords[1] +
                                   ymin, crop_coords[2] + xmin,
                                   crop_coords[3] + ymin, crop_coords[4] +
                                   xmin, crop_coords[5] + ymin,
                                   crop_coords[6] + xmin,
                                   crop_coords[7] + ymin)

                left_eye_viz = (landmarks_viz[0], landmarks_viz[1])
                right_eye_viz = (landmarks_viz[2], landmarks_viz[3])

                third_eye_viz_x = (landmarks_viz[2] -
                                   landmarks_viz[0]) / 2 + landmarks_viz[0]
                third_eye_viz_y = (landmarks_viz[3] -
                                   landmarks_viz[1]) / 2 + landmarks_viz[1]
                third_eye_viz = (third_eye_viz_x, third_eye_viz_y)
                #print(landmarks_viz[0], landmarks_viz[2], third_eye_viz_x)

                # Head pose estimation
                head_pose = hpm.predict(face_crop.copy())
                print("Head pose: " + str(head_pose))
                (yaw, pitch, roll) = head_pose
                frame = display_head_pose(frame, pitch, roll, yaw)

                # Send inputs to GazeEstimator
                gaze_vector = gem.predict(head_pose, left_eye_crop,
                                          right_eye_crop)

                if args.stats == 1:
                    inference_time = time.time() - start_time
                    inference_times.append(inference_time)

                print(gaze_vector)
                frame = display_gaze(frame, gaze_vector)

                # Control the mouse
                if frame_count % 5 == 0:
                    mouse_x, mouse_y = get_mouse_vector(gaze_vector, roll)
                    print("Mouse vector:" + str(mouse_x) + " - " +
                          str(mouse_y))
                    mouse.move(mouse_x, mouse_y)
                    currentMouseX, currentMouseY = pyautogui.position()
                    print("Mouse coordinates: " + str(currentMouseX) + ", " +
                          str(currentMouseY))

                if args.visual_flag == 1:

                    frame = draw_bounding_box(frame, face_coords)

                    left_eye_frame = crop_coords_viz[0:4]
                    right_eye_frame = crop_coords_viz[4:]
                    frame = draw_bounding_box(frame, left_eye_frame)
                    frame = draw_bounding_box(frame, right_eye_frame)

                    frame = visualize_landmark(frame, left_eye_viz)
                    frame = visualize_landmark(frame,
                                               right_eye_viz,
                                               color=(0, 0, 255))

                    frame = visualize_gaze(frame, gaze_vector, landmarks_viz)

                    # visualize the axes of the HeadPoseEstimator results
                    #frame = hpm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)
                    frame = hpm.draw_axes(frame.copy(), third_eye_viz, yaw,
                                          pitch, roll, scale, focal_length)
                    #hdm.draw_axes(frame.copy(), center_of_face, yaw, pitch, roll, scale, focal_length)

                cv2.imshow('preview', frame)
                cv2.imshow('left eye', left_eye_crop)
                cv2.imshow('right eye', right_eye_crop)

            except Exception as e:
                print("Unable to predict using model" + str(e) +
                      " for frame " + str(frame_count))
                log.error("Unable to predict using model" + str(e) +
                          " for frame " + str(frame_count))
            continue

    if args.stats == 1:
        avg_inference_time = sum(inference_times) / len(inference_times)
        print("Average inference time: " + str(avg_inference_time))
        log.info("Average inference time: " + str(avg_inference_time))
        log.info("Load time for face detection model: " + str(fdm_load_time))
        log.info("Load time for facial landmarks model: " + str(flm_load_time))
        log.info("Load time for head pose detection model: " +
                 str(hpm_load_time))
        log.info("Load time for gaze estimation model: " + str(gem_load_time))
    cv2.destroyAllWindows()
    feed.close()
Beispiel #25
0
def parse_arguments(argv):
    parser = argparse.ArgumentParser()

    parser.add_argument('--src_path', type=str, help='src people image path')
    parser.add_argument(
        '--dst_path',
        type=str,
        help=
        'move to dst people image path. remove image if this param is none.')
    return parser.parse_args(argv)


if __name__ == '__main__':
    sys.argv = [
        'face_filter.py',
        '--src_path',
        '/home/xiajun/res/face/GC-WebFace/raw',
        '--dst_path',
        '/home/xiajun/res/face/GC-WebFace-tmp',
    ]

    args = parse_arguments(sys.argv[1:])
    print(args)
    src_path = args.src_path
    dst_path = args.dst_path

    fdetector = FaceDetector()

    face_filter(src_path, dst_path)

print('debug')
def main():
    # Get command line arguments
    args = parser.parse_args()
    device = args.device
    cpu_extensions = args.extensions
    threshold = args.threshold
    gaze_estimation_precision = args.gaze_estimation_precision
    head_pose_precision = args.head_pose_precision
    face_detection_precision = args.face_detection_precision
    landmarks_precision = args.landmarks_precision
    input_feeder = InputFeeder(args)
    control_mouse = MouseController(args)
    gaze_model = 'models/intel/gaze-estimation-adas-0002/{}/gaze-estimation-adas-0002'.format(
        gaze_estimation_precision)
    face_detector_model = 'models/intel/face-detection-adas-binary-0001/{}/face-detection-adas-binary-0001'.format(
        face_detection_precision)
    facial_landmark_model = 'models/intel/landmarks-regression-retail-0009/{}/landmarks-regression-retail-0009'.format(
        landmarks_precision)
    head_pose_model = 'models/intel/head-pose-estimation-adas-0001/{}/head-pose-estimation-adas-0001'.format(
        head_pose_precision)

    # Initialize the models
    face_detector = FaceDetector(face_detector_model, args)
    facial_landmarks = FacialLandmarksDetector(
        model_name=facial_landmark_model,
        device=device,
        extensions=cpu_extensions)
    head_pose_estimation = HeadPoseEstimation(model_name=head_pose_model,
                                              device=device,
                                              extensions=cpu_extensions)
    gaze_estimation = GazeEstimation(model_name=gaze_model,
                                     device=device,
                                     extensions=cpu_extensions)

    # Load the models
    start_time = time.time()
    face_detector.load_model()
    face_detector_loadtime = time.time() - start_time
    start_time = time.time()
    facial_landmarks.load_model()
    facial_landmark_loadtime = time.time() - start_time
    start_time = time.time()
    head_pose_estimation.load_model()
    head_pose_estimation_loadtime = time.time() - start_time
    start_time = time.time()
    gaze_estimation.load_model()
    gaze_estimation_loadtime = time.time() - start_time
    logging.info('FINISH LOADING MODELS')

    try:
        width, height = input_feeder.load_data()
    except TypeError:
        logging.error('Invalid file type.')
        return

    output_handler = OutputHandler(args)
    output_handler.initalize_video_writer(width, height)
    frame_count = 0
    start_time = 0
    capture = input_feeder.cap
    inputs = args.input
    if input_feeder.input_type == 'cam':
        inputs = 0
    else:
        capture.open(inputs)
    while capture.isOpened():
        flag, frame = capture.read()

        if start_time == 0:
            start_time = time.time()

        if inputs == 0 and time.time() - start_time >= 1:
            gaze_estimate = run_inference(frame, face_detector,
                                          facial_landmarks,
                                          head_pose_estimation,
                                          gaze_estimation, output_handler)
            if gaze_estimate is None:
                break

            if gaze_estimate[0][0]:
                x, y = gaze_estimate[0][:2]
                control_mouse.move(x, y)
            start_time = 0
            frame_count += 1
        elif not inputs == 0:
            gaze_estimate = run_inference(frame, face_detector,
                                          facial_landmarks,
                                          head_pose_estimation,
                                          gaze_estimation, output_handler)
            if gaze_estimate is None:
                break

            if gaze_estimate[0][0] and time.time() - start_time >= 0.5:
                x, y = gaze_estimate[0][:2]
                control_mouse.move(x, y)
                start_time = 0
            frame_count += 1

    input_feeder.close()
    logging.info('TOTOAL FRAMES PROCESSED: {}'.format(frame_count))
    logging.info('Time to load face detector model is {:.5f}'.format(
        face_detector_loadtime))
    logging.info('Time to load head pose estimation model is {:.5f}'.format(
        head_pose_estimation_loadtime))
    logging.info('Time to load facial landmarks model model is {:.5f}'.format(
        facial_landmark_loadtime))
    logging.info('Time to load gaze estimation model is {:.5f}'.format(
        gaze_estimation_loadtime))
Beispiel #27
0
def main(args):

    fd_infer_time, ld_infer_time, hpe_infer_time, ge_infer_time = 0 ,0 ,0 ,0

    start = time.time()
    face_detector = FaceDetector(args.model_fd, args.device_fd, args.ext_fd)
    fd_load_time = time.time() - start 

    start = time.time()
    landmarks_detector = LandmarksDetector(args.model_ld, args.device_ld, args.ext_ld)
    ld_load_time = time.time() - start 

    start = time.time()
    head_pose_estimator = HeadPoseEstimator(args.model_hpe, args.device_hpe, args.ext_hpe)
    hpe_load_time = time.time() - start 

    start = time.time()
    gaze_estimator = GazeEstimator(args.model_ge, args.device_ge, args.ext_ge)
    ge_load_time = time.time() - start 
    log.info("Models Loading...")
    log.info("Face detection load time       :{:.4f}ms".format(fd_load_time))    
    log.info("Landmarks estimation load time :{:.4f}ms".format(ld_load_time))     
    log.info("Head pose estimation load time :{:.4f}ms".format(hpe_load_time))     
    log.info("Gaze estimation load time      :{:.4f}ms".format(ge_load_time))  
    log.info('All Models loaded')
    mouse_controller = MouseController('high', 'fast')


    if args.input == 0:
        input_feeder = InputFeeder('cam', args.input)
    elif args.input.endswith('.jpg') or args.input.endswith('.bmp'):
        input_feeder = InputFeeder('image', args.input)
    else:
        input_feeder = InputFeeder('video', args.input)
    
    input_feeder.load_data()
    init_w  = input_feeder.init_w
    init_h =  input_feeder.init_h
    

    counter = 0

    for flag, frame in input_feeder.next_batch():
        
        if not flag:
            break

        counter +=1

        key = cv2.waitKey(60)
        try:
            start = time.time()
            outputs = face_detector.predict(frame)
            
            face = face_detector.preprocess_output(frame, outputs, init_w, init_h)
            
            fd_infer_time += time.time() - start

            start = time.time()
            outputs = landmarks_detector.predict(face)
            
            left_eye, right_eye, real_landmraks = landmarks_detector.preprocess_output(face, outputs)
           
            ld_infer_time += time.time() - start

            start = time.time()

            outputs = head_pose_estimator.predict(face)
            head_pose_angles = head_pose_estimator.preprocess_output(outputs)
            
            hpe_infer_time += time.time() - start

            
            start = time.time()
            
            outputs = gaze_estimator.predict(left_eye, right_eye, head_pose_angles)
            
            gaze = gaze_estimator.preprocess_output(outputs)
            
            ge_infer_time += time.time() - start
  

            log.info("Face detection time       :{:.4f}ms".format(fd_infer_time/counter))    
            log.info("Landmarks estimation time :{:.4f}ms".format(ld_infer_time/counter))     
            log.info("Head pose estimation time :{:.4f}ms".format(hpe_infer_time/counter))     
            log.info("Gaze estimation time      :{:.4f}ms".format(ge_infer_time/counter))     

            if args.input != 0:
                drawer = Drawer(face, real_landmraks, head_pose_angles, gaze)
                drawer.draw_landmarks(20)
                drawer.draw_head_pose()
                drawer.draw_gazes()
                drawer.show()
            roll_cos = math.cos(head_pose_angles[2] *  math.pi/180)

            roll_sin = math.sin(head_pose_angles[2] *  math.pi/180)

            mouse_x = gaze[0] * roll_cos + gaze[0] * roll_sin
            mouse_y = gaze[1] * roll_cos + gaze[1] * roll_sin

            mouse_controller.move(mouse_x, mouse_y)

        except Exception as e:
            log.error(e)
        finally:
            if key == 27:
                break

    input_feeder.close()
Beispiel #28
0
    args = parser.parse_args()
    dir_name = args.name
    detection_type = args.detection_type
    only_face = True if args.only_face == 'yes' else False

    capture = cv2.VideoCapture(0)
    num = 1
    while True:
        if dir_name is None:
            print('You must pass your name as argument!')
            subprocess.run(["python", "took_pictures.py", "-h"])
            break

        img_dir = f'images/{dir_name}/'
        os.makedirs(img_dir, exist_ok=True)
        detector = FaceDetector(detection_type)
        # Capture frame-by-frame
        val, frame = capture.read()
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        try:
            start_x, start_y, end_x, end_y = detector.get_coordinates(frame)[0]
            roi_ = frame[start_y:end_y, start_x:end_x]
            cv2.rectangle(frame, (start_x, start_y), (end_x, end_y),
                          (255, 255, 255), 2)
            if only_face:
                cv2.imwrite(f'{img_dir}/{num}.png', roi_)
            else:
                cv2.imwrite(f'{img_dir}/{num}.png', frame)
            print(f'face no. {num}')
            num += 1
def main(args):
    print("Main script running...")
    log_name = 'stats_' + args.device + '_' + args.hpe + args.fld + args.ge

    if not os.path.exists('output'):
        os.makedirs('output')
    print(f"Logging to: output/{log_name}")
    log = open('output/' + log_name, 'w+')

    print("Initializing models...")

    fd = FaceDetector(
        model_name=
        'models/intel/face-detection-adas-binary-0001/FP32-INT1/face-detection-adas-binary-0001',
        device=args.device,
        extensions=None)

    fd.load_model()

    if args.v: print(f"Face Detection Load Time: {fd.load_time}")

    hpe = HeadPoseEstimator(
        model_name=
        f'models/intel/head-pose-estimation-adas-0001/{args.hpe}/head-pose-estimation-adas-0001',
        device=args.device,
        extensions=None)
    hpe.load_model()

    if args.v: print(f"Head Pose Estimation Load Time: {hpe.load_time}")

    fld = FacialLandmarkDetector(
        model_name=
        f'models/intel/landmarks-regression-retail-0009/{args.fld}/landmarks-regression-retail-0009',
        device=args.device,
        extensions=None)
    fld.load_model()

    if args.v: print(f"Facial Landmarks Detection Load Time: {fld.load_time}")

    ge = GazeEstimator(
        model_name=
        f'models/intel/gaze-estimation-adas-0002/{args.ge}/gaze-estimation-adas-0002',
        device=args.device,
        extensions=None)
    ge.load_model()

    if args.v: print(f"Gaze Estimation Load Time: {ge.load_time}")

    image = False

    print("Initializing source feed...")
    feed = InputFeeder(input_type=args.input_type, input_file=args.input_file)
    if args.input_type == 'image':
        image = True

    feed.load_data()

    for batch in feed.next_batch():
        if args.v:
            print()
        cv2.imshow('Batch', batch)
        if image:
            cv2.imwrite('output/Batch.png', batch)

        coords, bounding_face = fd.predict(batch)
        if not coords:
            print("No face")
            continue
        if image: cv2.imwrite('output/Face.png', bounding_face)
        box = coords[0]
        face = bounding_face[box[1]:box[3], box[0]:box[2]]

        if args.v:
            print(f"Face Time: {fd.infer_time}")
        log.write("FD_infer: " + str(fd.infer_time) + "\n")
        if image:
            cv2.imshow('Cropped Face', face)

        # Landmark Detection
        coords, landmark_detection, landmark_points = fld.predict(face)
        if image: cv2.imwrite('output/Landmarks.png', landmark_detection)
        if image: cv2.imshow('Landmark Detection', landmark_detection)
        if args.v: print(f"Landmark Time: {fld.infer_time}")
        log.write("FLD_infer: " + str(fld.infer_time) + "\n")
        right_box, left_box = coords[0:2]
        if args.v: print(f"Eye Coords: {coords}")

        if left_box == None or right_box == None:
            print("No eyes")
            continue

        left_eye = face[left_box[1]:left_box[3], left_box[0]:left_box[2]]
        cv2.putText(face, 'L', (left_box[0], left_box[3]),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

        right_eye = face[right_box[1]:right_box[3], right_box[0]:right_box[2]]
        cv2.putText(face, 'R', (right_box[0], right_box[3]),
                    cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

        if args.v:
            print(f"Eye Shape: {left_eye.shape} :: {right_eye.shape}")

        #Head Pose Estimation
        head_yaw, head_pitch, head_roll = hpe.predict(face)
        if args.v: print(f"Head Pose Time: {hpe.infer_time}")
        log.write("HPE_infer: " + str(hpe.infer_time) + "\n")
        head_angles = [head_yaw[0][0], head_pitch[0][0], head_roll[0][0]]

        #Gaze Estimation
        # expects pose as  (yaw, pitch, and roll)
        gaze = ge.predict(left_eye, right_eye, head_angles)

        if args.v:
            print(f"Gaze Time: {ge.infer_time}")
        log.write("GE_infer: " + str(ge.infer_time) + "\n")
        gaze_point = (int(gaze[0][0] * 50), int(gaze[0][1] * 50))

        arrows = cv2.arrowedLine(face, landmark_points[0],
                                 (landmark_points[0][0] + gaze_point[0],
                                  landmark_points[0][1] - gaze_point[1]),
                                 (0, 0, 255), 2)
        arrows = cv2.arrowedLine(face, landmark_points[1],
                                 (landmark_points[1][0] + gaze_point[0],
                                  landmark_points[1][1] - gaze_point[1]),
                                 (0, 0, 255), 2)
        if image:
            cv2.imwrite('output/Gaze.png', arrows)

        if not image:
            mouse = MouseController(precision='medium', speed='medium')
            mouse.move(gaze[0][0], gaze[0][1])

        if image:
            cv2.imshow('Arrows', arrows)

        if image:
            log.write("FD_LoadTime: " + str(fd.load_time) + "\n")
            log.write("FD_PreprocessTime: " + str(fd.preprocess_input_time) +
                      "\n")
            log.write("FD_PostrocessTime: " + str(fd.preprocess_output_time) +
                      "\n")

            log.write("FLD_LoadTime: " + str(fld.load_time) + "\n")
            log.write("FLD_PreprocessTime: " + str(fld.preprocess_input_time) +
                      "\n")
            log.write("FLD_PostprocessTime: " +
                      str(fld.preprocess_output_time) + "\n")

            log.write("HPE_LoadTime: " + str(hpe.load_time) + "\n")
            log.write("HPE_PreprocessTime: " + str(hpe.preprocess_input_time) +
                      "\n")

            log.write("GE_LoadTime: " + str(ge.load_time) + "\n")
            log.write("GE_PreprocessTime: " + str(ge.preprocess_input_time) +
                      "\n")

            cv2.waitKey(0)
        else:
            if cv2.waitKey(15) & 0xFF == ord('q'):
                break

    feed.close()
    log.close()
    cv2.destroyAllWindows