def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global INFO
    global DELAY
    global POSE_CHECKED
    #controller = MouseController()

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(os.path.join(args.output_dir, "shopper.mp4"),
                          cv2.VideoWriter_fourcc(*"MP4V"), fps,
                          (initial_w, initial_h), True)
    frame_count = 0

    job_id = 1  #os.environ['PBS_JOBID']
    progress_file_path = os.path.join(args.output_dir,
                                      'i_progress_' + str(job_id) + '.txt')

    infer_time_start = time.time()

    if input_stream:
        cap.open(args.input)
        # Adjust DELAY to match the number of FPS of the video file
        DELAY = 1000 / cap.get(cv2.CAP_PROP_FPS)

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    # Initialise the class
    if args.cpu_extension:
        facedet = FaceDetection(args.facemodel,
                                args.confidence,
                                extensions=args.cpu_extension)
        posest = HeadPoseEstimation(args.posemodel,
                                    args.confidence,
                                    extensions=args.cpu_extension)
        landest = FaceLandmarksDetection(args.landmarksmodel,
                                         args.confidence,
                                         extensions=args.cpu_extension)
        gazeest = GazeEstimation(args.gazemodel,
                                 args.confidence,
                                 extensions=args.cpu_extension)
    else:
        facedet = FaceDetection(args.facemodel, args.confidence)
        posest = HeadPoseEstimation(args.posemodel, args.confidence)
        landest = FaceLandmarksDetection(args.landmarksmodel, args.confidence)
        gazeest = GazeEstimation(args.gazemodel, args.confidence)

    # infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer
    facedet.load_model()
    posest.load_model()
    landest.load_model()
    gazeest.load_model()
    print("loaded models")

    ret, frame = cap.read()
    while ret:
        looking = 0
        POSE_CHECKED = False
        ret, frame = cap.read()
        frame_count += 1
        if not ret:
            print("checkpoint *BREAKING")
            break

        if frame is None:
            log.error("checkpoint ERROR! blank FRAME grabbed")
            break

        initial_w = int(cap.get(3))
        initial_h = int(cap.get(4))

        # Start asynchronous inference for specified request
        inf_start_fd = time.time()
        # Results of the output layer of the network
        coords, frame = facedet.predict(frame)
        det_time_fd = time.time() - inf_start_fd
        if len(coords) > 0:
            [xmin, ymin, xmax,
             ymax] = coords[0]  # use only the first detected face
            head_pose = frame[ymin:ymax, xmin:xmax]
            inf_start_hp = time.time()
            is_looking, pose_angles = posest.predict(head_pose)
            if is_looking:
                det_time_hp = time.time() - inf_start_hp
                POSE_CHECKED = True
                #print(is_looking)
                inf_start_lm = time.time()
                coords, f = landest.predict(head_pose)
                frame[ymin:ymax, xmin:xmax] = f
                det_time_lm = time.time() - inf_start_lm

                [[xlmin, ylmin, xlmax, ylmax], [xrmin, yrmin, xrmax,
                                                yrmax]] = coords
                left_eye_image = frame[ylmin:ylmax, xlmin:xlmax]
                right_eye_image = frame[yrmin:yrmax, xrmin:xrmax]
                output = gazeest.predict(left_eye_image, right_eye_image,
                                         pose_angles)
        # Draw performance stats
        inf_time_message = "Face Inference time: {:.3f} ms.".format(
            det_time_fd * 1000)
        if POSE_CHECKED:
            cv2.putText(
                frame, "Head pose Inference time: {:.3f} ms.".format(
                    det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (255, 255, 255), 1)
            cv2.putText(frame, inf_time_message, (0, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 255, 255), 1)
        out.write(frame)
        print("frame", frame_count)
        if frame_count % 10 == 0:
            print(time.time() - infer_time_start)
            progressUpdate(progress_file_path,
                           int(time.time() - infer_time_start), frame_count,
                           video_len)
        if args.output_dir:
            total_time = time.time() - infer_time_start
            with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
                f.write(str(round(total_time, 1)) + '\n')
                f.write(str(frame_count) + '\n')
    facedet.clean()
    posest.clean()
    landest.clean()
    gazeest.clean()
    out.release()
    cap.release()
    cv2.destroyAllWindows()
Example #2
0
def main():
    """
    Load the network and parse the output.
    :return: None
    """
    global POSE_CHECKED

    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO,
                    stream=sys.stdout)
    args = args_parser().parse_args()
    logger = log.getLogger()

    if args.input == 'cam':
        input_stream = 0
    else:
        input_stream = args.input
        assert os.path.isfile(args.input), "Specified input file doesn't exist"

    cap = cv2.VideoCapture(input_stream)
    initial_w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    initial_h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    video_len = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    out = cv2.VideoWriter(os.path.join(args.output_dir, "output.mp4"),
                          cv2.VideoWriter_fourcc(*"MP4V"), fps,
                          (initial_w, initial_h), True)

    if args.write_intermediate == 'yes':
        out_fm = cv2.VideoWriter(
            os.path.join(args.output_dir, "output_fm.mp4"),
            cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True)
        out_lm = cv2.VideoWriter(
            os.path.join(args.output_dir, "output_lm.mp4"),
            cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True)
        out_pm = cv2.VideoWriter(
            os.path.join(args.output_dir, "output_pm.mp4"),
            cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True)
        out_gm = cv2.VideoWriter(
            os.path.join(args.output_dir, "output_gm.mp4"),
            cv2.VideoWriter_fourcc(*"MP4V"), fps, (initial_w, initial_h), True)

    frame_count = 0

    job_id = 1

    infer_time_start = time.time()

    if input_stream:
        cap.open(args.input)
        # Adjust DELAY to match the number of FPS of the video file

    if not cap.isOpened():
        logger.error("ERROR! Unable to open video source")
        return

    if args.mode == 'sync':
        async_mode = False
    else:
        async_mode = True

    # Initialise the class
    if args.cpu_extension:
        face_det = FaceDetection(args.facemodel,
                                 args.confidence,
                                 extensions=args.cpu_extension,
                                 async_mode=async_mode)
        pose_det = HeadPoseEstimation(args.posemodel,
                                      args.confidence,
                                      extensions=args.cpu_extension,
                                      async_mode=async_mode)
        land_det = FaceLandmarksDetection(args.landmarksmodel,
                                          args.confidence,
                                          extensions=args.cpu_extension,
                                          async_mode=async_mode)
        gaze_est = GazeEstimation(args.gazemodel,
                                  args.confidence,
                                  extensions=args.cpu_extension,
                                  async_mode=async_mode)
    else:
        face_det = FaceDetection(args.facemodel,
                                 args.confidence,
                                 async_mode=async_mode)
        pose_det = HeadPoseEstimation(args.posemodel,
                                      args.confidence,
                                      async_mode=async_mode)
        land_det = FaceLandmarksDetection(args.landmarksmodel,
                                          args.confidence,
                                          async_mode=async_mode)
        gaze_est = GazeEstimation(args.gazemodel,
                                  args.confidence,
                                  async_mode=async_mode)

    # infer_network_pose = Network()
    # Load the network to IE plugin to get shape of input layer
    face_det.load_model()
    pose_det.load_model()
    land_det.load_model()
    gaze_est.load_model()

    model_load_time = time.time() - infer_time_start

    print("All models are loaded successfully")

    try:
        pass
    except Exception as e:
        print("Could not run Inference: ", e)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("checkpoint *BREAKING")
            break

        frame_count += 1
        looking = 0
        POSE_CHECKED = False

        if frame is None:
            log.error("checkpoint ERROR! blank FRAME grabbed")
            break

        initial_w = int(cap.get(3))
        initial_h = int(cap.get(4))

        # Start asynchronous inference for specified request
        inf_start_fd = time.time()

        # Results of the output layer of the network
        coords, frame = face_det.predict(frame)

        if args.write_intermediate == 'yes':
            out_fm.write(frame)

        det_time_fd = time.time() - inf_start_fd

        if len(coords) > 0:
            [xmin, ymin, xmax,
             ymax] = coords[0]  # use only the first detected face
            head_pose = frame[ymin:ymax, xmin:xmax]
            inf_start_hp = time.time()
            is_looking, pose_angles = pose_det.predict(head_pose)
            if args.write_intermediate == 'yes':
                p = "Pose Angles {}, is Looking? {}".format(
                    pose_angles, is_looking)
                cv2.putText(frame, p, (50, 15), cv2.FONT_HERSHEY_COMPLEX, 0.5,
                            (255, 0, 0), 1)
                out_pm.write(frame)

            if is_looking:
                det_time_hp = time.time() - inf_start_hp
                POSE_CHECKED = True
                inf_start_lm = time.time()
                coords, f = land_det.predict(head_pose)

                frame[ymin:ymax, xmin:xmax] = f

                if args.write_intermediate == "yes":
                    out_lm.write(frame)

                det_time_lm = time.time() - inf_start_lm
                [[xlmin, ylmin, xlmax, ylmax], [xrmin, yrmin, xrmax,
                                                yrmax]] = coords
                left_eye_image = f[ylmin:ylmax, xlmin:xlmax]
                right_eye_image = f[yrmin:yrmax, xrmin:xrmax]

                output, gaze_vector = gaze_est.predict(left_eye_image,
                                                       right_eye_image,
                                                       pose_angles)

                if args.write_intermediate == 'yes':
                    p = "Gaze Vector {}".format(gaze_vector)
                    cv2.putText(frame, p, (50, 15), cv2.FONT_HERSHEY_COMPLEX,
                                0.5, (255, 0, 0), 1)
                    fl = draw_gaze(left_eye_image, gaze_vector)
                    fr = draw_gaze(right_eye_image, gaze_vector)
                    f[ylmin:ylmax, xlmin:xlmax] = fl
                    f[yrmin:yrmax, xrmin:xrmax] = fr
                    # cv2.arrowedLine(f, (xlmin, ylmin), (xrmin, yrmin), (0,0,255), 5)
                    out_gm.write(frame)

        # Draw performance stats
        inf_time_message = "Face Inference time: {:.3f} ms.".format(
            det_time_fd * 1000)
        #
        if POSE_CHECKED:
            cv2.putText(
                frame, "Head pose Inference time: {:.3f} ms.".format(
                    det_time_hp * 1000), (0, 35), cv2.FONT_HERSHEY_SIMPLEX,
                0.5, (0, 255, 0), 1)
            cv2.putText(frame, inf_time_message, (0, 15),
                        cv2.FONT_HERSHEY_COMPLEX, 0.5, (255, 0, 0), 1)
        out.write(frame)
        if frame_count % 10 == 0:
            print("Inference time = ", int(time.time() - infer_time_start))
            print('Frame count {} and vidoe len {}'.format(
                frame_count, video_len))
        if args.output_dir:
            total_time = time.time() - infer_time_start
            with open(os.path.join(args.output_dir, 'stats.txt'), 'w') as f:
                f.write(str(round(total_time, 1)) + '\n')
                f.write(str(frame_count) + '\n')

    if args.output_dir:
        with open(os.path.join(args.output_dir, 'stats.txt'), 'a') as f:
            f.write(str(round(model_load_time)) + '\n')

    # Clean all models
    face_det.clean()
    pose_det.clean()
    land_det.clean()
    gaze_est.clean()
    # release cv2 cap
    cap.release()
    cv2.destroyAllWindows()
    # release all out writer
    out.release()
    if args.write_intermediate == 'yes':
        out_fm.release()
        out_pm.release()
        out_lm.release()
        out_gm.release()