コード例 #1
0
def run_app(options):
    metrics_builder = MetricsBuilder(options.precision)

    with elapsed_timer() as et:
        fdmodel = FaceDetectionModel(options.fdmodel, options.device,
                                     options.prob_threshold, options.is_visual,
                                     options.extension)
        fdmodel.load_model()
        fdmodel_loadtime = et()
        metrics_builder.face_detection.load_time = fdmodel_loadtime
        logging.info(f'face detection loading time taken: {fdmodel_loadtime}')

    with elapsed_timer() as et:
        ldmodel = LandmarkDetectionModel(options.ldmodel, options.device,
                                         options.prob_threshold,
                                         options.is_visual, options.extension)
        ldmodel.load_model()
        ldmodel_loadtime = et()
        metrics_builder.landmarks_detection.load_time = ldmodel_loadtime
        logging.info(
            f'Landmark detection loading time taken: {ldmodel_loadtime}')

    with elapsed_timer() as et:
        hpemodel = HeadPoseEstimationModel(options.hpemodel, options.device,
                                           options.prob_threshold,
                                           options.is_visual,
                                           options.extension)
        hpemodel.load_model()
        hpemodel_loadtime = et()
        metrics_builder.head_pose_estimation.load_time = hpemodel_loadtime
        logging.info(
            f'Head Position Estimation loading time taken: {hpemodel_loadtime}'
        )

    with elapsed_timer() as et:
        gemodel = GazeEstimationModel(options.gemodel, options.device,
                                      options.prob_threshold,
                                      options.is_visual, options.extension)
        gemodel.load_model()
        gemodel_loadtime = et()
        metrics_builder.gaze_estimation.load_time = gemodel_loadtime
        logging.info(
            f'Gazer Estimation loading time taken: {gemodel_loadtime}')

    try:

        # Get and open video capture
        if options.is_cam:
            feeder = InputFeeder('cam')
        else:
            feeder = InputFeeder('video', options.input)
        feeder.load_data()

        initial_w, initial_h = feeder.get_size()
        fps = feeder.get_fps()

        fdmodel.set_inputsize(initial_w, initial_h)
        ldmodel.set_inputsize(initial_w, initial_h)
        hpemodel.set_inputsize(initial_w, initial_h)
        gemodel.set_inputsize(initial_w, initial_h)

        frame_count = 0

        mouse_controller = MouseController("low", "fast")

        window_name = 'computer pointer controller'
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(window_name, initial_w, initial_h)

        out_path = str(pathlib.Path('./results/output_video.mp4'))
        print(out_path)
        out_video = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'avc1'),
                                    fps, (initial_w, initial_h), True)

        for frame in feeder.next_batch():

            if frame is None:
                break

            # exit video for escape key
            key_pressed = cv2.waitKey(60)
            if key_pressed == 27:
                break

            frame_count += 1

            # detect face
            p_frame = fdmodel.preprocess_input(frame)
            with elapsed_timer() as et:
                fdmodel_output = fdmodel.predict(p_frame)
                metrics_builder.face_detection.add_infer_time(et())
            out_frame, fboxes = fdmodel.preprocess_output(
                fdmodel_output, frame)

            # Take first face - (xmin,ymin,xmax,ymax)
            fbox = fboxes[0]

            # landmarks estimation
            # face = np.asarray(Image.fromarray(frame).crop(fbox))
            xmin, ymin, xmax, ymax = fbox
            face = frame[ymin:ymax, xmin:xmax]

            p_frame = ldmodel.preprocess_input(face)
            with elapsed_timer() as et:
                lmoutput = ldmodel.predict(p_frame)
                metrics_builder.landmarks_detection.add_infer_time(et())
            out_frame, left_eye_point, right_eye_point = ldmodel.preprocess_output(
                lmoutput, fbox, out_frame)

            # head pose estimation
            p_frame = hpemodel.preprocess_input(face)
            with elapsed_timer() as et:
                hpoutput = hpemodel.predict(p_frame)
                metrics_builder.head_pose_estimation.add_infer_time(et())
            out_frame, headpose_angels = hpemodel.preprocess_output(
                hpoutput, out_frame, face, fbox)
            #
            # # gaze  estimation
            out_frame, left_eye, right_eye = gemodel.preprocess_input(
                out_frame, face, left_eye_point, right_eye_point)
            with elapsed_timer() as et:
                geoutput = gemodel.predict(headpose_angels, left_eye,
                                           right_eye)
                metrics_builder.gaze_estimation.add_infer_time(et())
            out_frame, gazevector = gemodel.preprocess_output(
                geoutput, out_frame, fbox, left_eye_point, right_eye_point)
            # show frame
            if options.is_show_frame:
                cv2.imshow(window_name, out_frame)

            # mouse controller
            if options.is_move_pointer:
                x, y, _ = gazevector
                mouse_controller.move(x, y)

            out_video.write(out_frame)

        # performance metrics
        metrics_builder.save_metrics(frame_count)

        feeder.close()
        cv2.destroyAllWindows()

    except Exception as e:
        logging.error("Fatal error in main loop", exc_info=True)
コード例 #2
0
def infer_on_stream(args):
    
    start_model_load_time=time.time()
    
    #initiate and load models
    face_det_net = Face_Detection_Model(args.face_model)
    face_det_net.load_model()
    head_pose_net = Head_Pose_Model(args.head_model)
    head_pose_net.load_model()
    facial_landmarks_net = Facial_Landmarks_Model(args.landmarks_model)
    facial_landmarks_net.load_model()
    gaze_est_net = Gaze_Estimation_Model(args.gaze_model)
    gaze_est_net.load_model()
    total_model_load_time = time.time() - start_model_load_time
    
    #initiate stream
    counter=0
    start_inference_time=time.time()
    
    if args.input.lower()=="cam":
        frame_feeder = InputFeeder(input_type='cam')
        frame_feeder.load_data()
    else:
        frame_feeder = InputFeeder(input_type='video', input_file=args.input)
        frame_feeder.load_data()
    fps = frame_feeder.get_fps()
    log.info('Video started')
    
    #initiate mouse controller
    mouse_controller = MouseController('medium','fast')
    
    ## write output video in Winows
    out_video = cv2.VideoWriter('../output.mp4',cv2.VideoWriter_fourcc(*'avc1'),
                                fps,(frame_feeder.get_size()), True)
    
    ## write output video in Linux
    #out_video = cv2.VideoWriter('output.mp4',cv2.VideoWriter_fourcc(*'avc1'),
    #fps,(frame_feeder.get_size()))
    
    for flag,frame in frame_feeder.next_batch():
        if flag == True:             
            key = cv2.waitKey(60) 
            counter+=1
            coords, image, face = face_det_net.predict(frame)
            pose = head_pose_net.predict(face)
            land, left_eye_image, right_eye_image, eye_coords = facial_landmarks_net.predict(face)
            
            if left_eye_image.shape == (40, 40, 3):
                mouse_coords, gaze = gaze_est_net.predict(left_eye_image, right_eye_image, pose)
                
            mouse_controller.move(mouse_coords[0], mouse_coords[1])
            
            if args.visual.lower()=="yes":
                frame = draw_outputs(coords, eye_coords, pose, gaze, 
                                     mouse_coords[0], mouse_coords[1],
                                     image)
                cv2.imshow('video', frame)
                out_video.write(frame)
                cv2.imshow('video', frame)
            else:
                cv2.imshow('video', frame)
            if key == 27:
                break 
        else:
            log.info('Video ended')
            total_time=time.time()-start_inference_time
            total_inference_time=round(total_time, 1)
            f_ps=counter/total_inference_time
            log.info("Models load time {:.2f}.".format(total_model_load_time))
            log.info("Total inference time {:.2f}.".format(total_inference_time))
            log.info("Inference frames pre second {:.2f}.".format(f_ps))
            cv2.destroyAllWindows()
            frame_feeder.close()
            break