def run_app(options): metrics_builder = MetricsBuilder(options.precision) with elapsed_timer() as et: fdmodel = FaceDetectionModel(options.fdmodel, options.device, options.prob_threshold, options.is_visual, options.extension) fdmodel.load_model() fdmodel_loadtime = et() metrics_builder.face_detection.load_time = fdmodel_loadtime logging.info(f'face detection loading time taken: {fdmodel_loadtime}') with elapsed_timer() as et: ldmodel = LandmarkDetectionModel(options.ldmodel, options.device, options.prob_threshold, options.is_visual, options.extension) ldmodel.load_model() ldmodel_loadtime = et() metrics_builder.landmarks_detection.load_time = ldmodel_loadtime logging.info( f'Landmark detection loading time taken: {ldmodel_loadtime}') with elapsed_timer() as et: hpemodel = HeadPoseEstimationModel(options.hpemodel, options.device, options.prob_threshold, options.is_visual, options.extension) hpemodel.load_model() hpemodel_loadtime = et() metrics_builder.head_pose_estimation.load_time = hpemodel_loadtime logging.info( f'Head Position Estimation loading time taken: {hpemodel_loadtime}' ) with elapsed_timer() as et: gemodel = GazeEstimationModel(options.gemodel, options.device, options.prob_threshold, options.is_visual, options.extension) gemodel.load_model() gemodel_loadtime = et() metrics_builder.gaze_estimation.load_time = gemodel_loadtime logging.info( f'Gazer Estimation loading time taken: {gemodel_loadtime}') try: # Get and open video capture if options.is_cam: feeder = InputFeeder('cam') else: feeder = InputFeeder('video', options.input) feeder.load_data() initial_w, initial_h = feeder.get_size() fps = feeder.get_fps() fdmodel.set_inputsize(initial_w, initial_h) ldmodel.set_inputsize(initial_w, initial_h) hpemodel.set_inputsize(initial_w, initial_h) gemodel.set_inputsize(initial_w, initial_h) frame_count = 0 mouse_controller = MouseController("low", "fast") window_name = 'computer pointer controller' cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, initial_w, initial_h) out_path = str(pathlib.Path('./results/output_video.mp4')) print(out_path) out_video = cv2.VideoWriter(out_path, cv2.VideoWriter_fourcc(*'avc1'), fps, (initial_w, initial_h), True) for frame in feeder.next_batch(): if frame is None: break # exit video for escape key key_pressed = cv2.waitKey(60) if key_pressed == 27: break frame_count += 1 # detect face p_frame = fdmodel.preprocess_input(frame) with elapsed_timer() as et: fdmodel_output = fdmodel.predict(p_frame) metrics_builder.face_detection.add_infer_time(et()) out_frame, fboxes = fdmodel.preprocess_output( fdmodel_output, frame) # Take first face - (xmin,ymin,xmax,ymax) fbox = fboxes[0] # landmarks estimation # face = np.asarray(Image.fromarray(frame).crop(fbox)) xmin, ymin, xmax, ymax = fbox face = frame[ymin:ymax, xmin:xmax] p_frame = ldmodel.preprocess_input(face) with elapsed_timer() as et: lmoutput = ldmodel.predict(p_frame) metrics_builder.landmarks_detection.add_infer_time(et()) out_frame, left_eye_point, right_eye_point = ldmodel.preprocess_output( lmoutput, fbox, out_frame) # head pose estimation p_frame = hpemodel.preprocess_input(face) with elapsed_timer() as et: hpoutput = hpemodel.predict(p_frame) metrics_builder.head_pose_estimation.add_infer_time(et()) out_frame, headpose_angels = hpemodel.preprocess_output( hpoutput, out_frame, face, fbox) # # # gaze estimation out_frame, left_eye, right_eye = gemodel.preprocess_input( out_frame, face, left_eye_point, right_eye_point) with elapsed_timer() as et: geoutput = gemodel.predict(headpose_angels, left_eye, right_eye) metrics_builder.gaze_estimation.add_infer_time(et()) out_frame, gazevector = gemodel.preprocess_output( geoutput, out_frame, fbox, left_eye_point, right_eye_point) # show frame if options.is_show_frame: cv2.imshow(window_name, out_frame) # mouse controller if options.is_move_pointer: x, y, _ = gazevector mouse_controller.move(x, y) out_video.write(out_frame) # performance metrics metrics_builder.save_metrics(frame_count) feeder.close() cv2.destroyAllWindows() except Exception as e: logging.error("Fatal error in main loop", exc_info=True)
def infer_on_stream(args): start_model_load_time=time.time() #initiate and load models face_det_net = Face_Detection_Model(args.face_model) face_det_net.load_model() head_pose_net = Head_Pose_Model(args.head_model) head_pose_net.load_model() facial_landmarks_net = Facial_Landmarks_Model(args.landmarks_model) facial_landmarks_net.load_model() gaze_est_net = Gaze_Estimation_Model(args.gaze_model) gaze_est_net.load_model() total_model_load_time = time.time() - start_model_load_time #initiate stream counter=0 start_inference_time=time.time() if args.input.lower()=="cam": frame_feeder = InputFeeder(input_type='cam') frame_feeder.load_data() else: frame_feeder = InputFeeder(input_type='video', input_file=args.input) frame_feeder.load_data() fps = frame_feeder.get_fps() log.info('Video started') #initiate mouse controller mouse_controller = MouseController('medium','fast') ## write output video in Winows out_video = cv2.VideoWriter('../output.mp4',cv2.VideoWriter_fourcc(*'avc1'), fps,(frame_feeder.get_size()), True) ## write output video in Linux #out_video = cv2.VideoWriter('output.mp4',cv2.VideoWriter_fourcc(*'avc1'), #fps,(frame_feeder.get_size())) for flag,frame in frame_feeder.next_batch(): if flag == True: key = cv2.waitKey(60) counter+=1 coords, image, face = face_det_net.predict(frame) pose = head_pose_net.predict(face) land, left_eye_image, right_eye_image, eye_coords = facial_landmarks_net.predict(face) if left_eye_image.shape == (40, 40, 3): mouse_coords, gaze = gaze_est_net.predict(left_eye_image, right_eye_image, pose) mouse_controller.move(mouse_coords[0], mouse_coords[1]) if args.visual.lower()=="yes": frame = draw_outputs(coords, eye_coords, pose, gaze, mouse_coords[0], mouse_coords[1], image) cv2.imshow('video', frame) out_video.write(frame) cv2.imshow('video', frame) else: cv2.imshow('video', frame) if key == 27: break else: log.info('Video ended') total_time=time.time()-start_inference_time total_inference_time=round(total_time, 1) f_ps=counter/total_inference_time log.info("Models load time {:.2f}.".format(total_model_load_time)) log.info("Total inference time {:.2f}.".format(total_inference_time)) log.info("Inference frames pre second {:.2f}.".format(f_ps)) cv2.destroyAllWindows() frame_feeder.close() break