예제 #1
0
                tensorflow_session=session,
                batch_size=batch_size,
                camera_id=args.camera_id,
                fps=args.fps,
                data_format='NCHW' if gpu_available else 'NHWC',
                eye_image_shape=(36, 60))

        # Define model
        if args.from_video:
            model = ELG(
                session,
                train_data={'videostream': data_source},
                first_layer_stride=3,
                num_modules=3,
                num_feature_maps=64,
                learning_schedule=[
                    {
                        'loss_terms_to_optimize': {
                            'dummy': ['hourglass', 'radius']
                        },
                    },
                ],
            )
        else:
            model = ELG(
                session,
                train_data={'videostream': data_source},
                first_layer_stride=1,
                num_modules=2,
                num_feature_maps=32,
                learning_schedule=[
                    {
예제 #2
0
            train_model = ELG(
            # Tensorflow session
            # Note: The same session must be used for the model and the data sources.
                session,

                first_layer_stride=elg_first_layer_stride,
                num_feature_maps=elg_num_feature_maps,
                num_modules=elg_num_modules,

                learning_schedule=[
                    {
                        'loss_terms_to_optimize': {
                            'heatmaps_mse': ['hourglass'],
                            'radius_mse': ['radius'],
                        },
                        'learning_rate': 1e-3,
                    },
                ],

                # Data sources for training (and testing).
                train_data={'synthetic': unityeyes},

                test_data = {
                    'mpi': HDF5Source(
                    session,
                    data_format='NCHW',
                    batch_size=batch_size,
                    keys_to_use=['train/' + s for s in other_person_ids],
                    hdf_path='/home/xiehuan/datasets/MPIIGaze.h5',
                    eye_image_shape=(36, 60),
                    testing=True,
                    min_after_dequeue=30000,
                    ),
                }
            )
예제 #3
0
        model = ELG(
            # Tensorflow session
            # Note: The same session must be used for the model and the data sources.
            session,

            # Model configuration parameters
            # first_layer_stride describes how much the input image is downsampled before producing
            #                    feature maps for eventual heatmaps regression
            # num_modules defines the number of hourglass modules, and thus the number of times repeated
            #             coarse-to-fine refinement is done.
            # num_feature_maps describes how many feature maps are refined over the entire network.
            first_layer_stride=elg_first_layer_stride,
            num_feature_maps=elg_num_feature_maps,
            num_modules=elg_num_modules,

            # The learning schedule describes in which order which part of the network should be
            # trained and with which learning rate.
            #
            # A standard network would have one entry (dict) in this argument where all model
            # parameters are optimized. To do this, you must specify which variables must be
            # optimized and this is done by specifying which prefixes to look for.
            # The prefixes are defined by using `tf.variable_scope`.
            #
            # The loss terms which can be specified depends on model specifications, specifically
            # the `loss_terms` output of `BaseModel::build_model`.
            learning_schedule=[
                {
                    'loss_terms_to_optimize': {
                        'heatmaps_mse': ['hourglass'],
                        'radius_mse': ['radius'],
                    },
                    'learning_rate': 1e-3,
                },
            ],

            # Data sources for training (and testing).
            train_data={'synthetic': unityeyes},
        )
예제 #4
0
                        train_data={'videostream': data_source},
                        learning_schedule=[
                            {
                                'loss_terms_to_optimize': {
                                    'dummy': ['hourglass', 'radius']
                                },
                            },
                        ])
        else:
            model = ELG(
                session,
                train_data={'videostream': data_source},
                first_layer_stride=first_layer_stride,
                num_modules=num_modules,
                num_feature_maps=num_feature_maps,
                learning_schedule=[
                    {
                        'loss_terms_to_optimize': {
                            'dummy': ['hourglass', 'radius']
                        },
                    },
                ],
            )
        if args.record_eye_data:
            eye_data = []
        else:
            eye_data = None

        # Record output frames to file if requested
        record_thread = RecordVideoThread(args=(args.record_video,
                                                data_source))
        if args.record_video:
예제 #5
0
def main():
    frames_dir = os.path.join(DATA_DIR, 'frames')
    mkdir_if_not_exist(VIDS_DIR)
    mkdir_if_not_exist(frames_dir)

    # from_video = os.path.join(VIDS_DIR, "normal_video.mp4")
    from_video = os.path.join(VIDS_DIR, "vid_no_glasses.mp4")

    # record_video = os.path.join(DATA_DIR, "normal_video.out.mp4")

    coloredlogs.install(
        datefmt='%d/%m %H:%M',
        fmt='%(asctime)s %(levelname)s %(message)s',
        level="INFO",
    )

    # Check if GPU is available
    from tensorflow.python.client import device_lib

    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True))
    gpu_available = False
    try:
        gpus = [
            d for d in device_lib.list_local_devices(config=session_config)
            if d.device_type == 'GPU'
        ]
        gpu_available = len(gpus) > 0
    except Exception as e:
        print('[Info] GPU异常,使用CPU!')

    print('[Info] 是否启用GPU: {}'.format(gpu_available))
    # -----------------------------------------------------------------------#

    tf.logging.set_verbosity(tf.logging.INFO)
    session = tf.Session(config=session_config)

    batch_size = 2  # 设置batch
    print('[Info] 输入视频路径: {}'.format(from_video))
    assert os.path.isfile(from_video)

    # 模型包括大模型和小模型
    data_source = Video(
        from_video,
        tensorflow_session=session,
        batch_size=batch_size,
        data_format='NCHW' if gpu_available else 'NHWC',
        # eye_image_shape=(108, 180)
        eye_image_shape=(36, 60))

    # Define model
    model = ELG(
        session,
        train_data={'videostream': data_source},
        # first_layer_stride=3,
        first_layer_stride=1,
        # num_modules=3,
        num_modules=2,
        # num_feature_maps=64,
        num_feature_maps=32,
        learning_schedule=[
            {
                'loss_terms_to_optimize': {
                    'dummy': ['hourglass', 'radius']
                },
            },
        ],
    )

    infer = model.inference_generator()

    count = 0

    while True:
        print('')
        print('-' * 50)
        output = next(infer)
        process_output(output, batch_size, data_source, frames_dir)  # 处理输出
        count += 1
        print('count: {}'.format(count))
        if count == 10:
            break
예제 #6
0
    def __init__(self, from_video = None, record_video = None):

        # Initialise the obj
        database = GazeDB()
        # Set global log level
        parser = argparse.ArgumentParser(description='Demonstration of landmarks localization.')
        parser.add_argument('-v', type=str, help='logging level', default='info',
                            choices=['debug', 'info', 'warning', 'error', 'critical'])
        parser.add_argument('--from_video', type=str, help='Use this video path instead of webcam')
        parser.add_argument('--record_video', type=str, help='Output path of video of demonstration.')
        parser.add_argument('--fullscreen', action='store_true')
        parser.add_argument('--headless', action='store_true')

        parser.add_argument('--fps', type=int, default=60, help='Desired sampling rate of webcam')
        parser.add_argument('--camera_id', type=int, default=0, help='ID of webcam to use')

        args = parser.parse_args()
        coloredlogs.install(
            datefmt='%d/%m %H:%M',
            fmt='%(asctime)s %(levelname)s %(message)s',
            level=args.v.upper(),
        )

        # Check if GPU is available
        from tensorflow.python.client import device_lib
        session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
        gpu_available = False
        try:
            gpus = [d for d in device_lib.list_local_devices()
                    if d.device_type == 'GPU']
            gpu_available = len(gpus) > 0
        except:
            pass

        # Initialize Tensorflow session
        tf.logging.set_verbosity(tf.logging.INFO)
        with tf.Session(config=session_config) as session:

            # Declare some parameters
            batch_size = 2

            # Define webcam stream data source
            # Change data_format='NHWC' if not using CUDA
            if from_video:
                assert os.path.isfile(from_video)
                data_source = Video(from_video,
                                    tensorflow_session=session, batch_size=batch_size,
                                    data_format='NCHW' if gpu_available else 'NHWC',
                                    eye_image_shape=(108, 180))
            else:
                data_source = Webcam(tensorflow_session=session, batch_size=batch_size,
                                    camera_id=args.camera_id, fps=args.fps,
                                    data_format='NCHW' if gpu_available else 'NHWC',
                                    eye_image_shape=(36, 60))

            # Define model
            if from_video:
                model = ELG(
                    session, train_data={'videostream': data_source},
                    first_layer_stride=3,
                    num_modules=3,
                    num_feature_maps=64,
                    learning_schedule=[
                        {
                            'loss_terms_to_optimize': {'dummy': ['hourglass', 'radius']},
                        },
                    ],
                )
            else:
                model = ELG(
                    session, train_data={'videostream': data_source},
                    first_layer_stride=1,
                    num_modules=2,
                    num_feature_maps=32,
                    learning_schedule=[
                        {
                            'loss_terms_to_optimize': {'dummy': ['hourglass', 'radius']},
                        },
                    ],
                )

            # Record output frames to file if requested
            if record_video:
                video_out = None
                video_out_queue = queue.Queue()
                video_out_should_stop = False
                video_out_done = threading.Condition()
                video_recorder = cv.VideoWriter(
                                record_video, cv.VideoWriter_fourcc(*'XVID'),
                                20, (1280, 720),
                            )
                
                def _record_frame():
                    global video_out
                    last_frame_time = None
                    out_fps = 60
                    out_frame_interval = 1.0 / out_fps
                    while not video_out_should_stop:
                        frame_index = video_out_queue.get()
                        if frame_index is None:
                            break
                        # assert frame_index in data_source._frames
                        frame = data_source._frames[frame_index]['bgr']
                        h, w, _ = frame.shape
                        if video_out is None:
                            video_out = cv.VideoWriter(
                                record_video, cv.VideoWriter_fourcc(*'XVID'),
                                60, (w, h),
                            )
                        now_time = time.time()
                        if last_frame_time is not None:
                            time_diff = now_time - last_frame_time
                            while time_diff > 0.0:
                                # video_out.write(frame)
                                time_diff -= out_frame_interval
                        last_frame_time = now_time
                    video_out.release()
                    with video_out_done:
                        video_out_done.notify_all()
                
                # record_thread = threading.Thread(target=_record_frame, name='record')
                # record_thread.daemon = True
                # record_thread.start()

            # Begin visualization thread
            inferred_stuff_queue = queue.Queue()

            def _visualize_output():
                restrictedflag = False
                last_frame_index = 0
                last_frame_time = time.time()
                fps_history = []
                all_gaze_histories = []

                if args.fullscreen:
                    cv.namedWindow('vis', cv.WND_PROP_FULLSCREEN)
                    cv.setWindowProperty('vis', cv.WND_PROP_FULLSCREEN, cv.WINDOW_FULLSCREEN)

                while True:
                    # If no output to visualize, show unannotated frame
                    if inferred_stuff_queue.empty():
                        next_frame_index = last_frame_index + 1
                        if next_frame_index in data_source._frames:
                            next_frame = data_source._frames[next_frame_index]
                            if 'faces' in next_frame and len(next_frame['faces']) == 0:
                                if not args.headless:
                                    resized_img = cv.resize(next_frame['bgr'], (1280, 720))
                                    cv.imshow('vis', resized_img)
                                    video_recorder.write(resized_img)
                                    # cv.imshow('vis', flipped_bgr)
                                if record_video:
                                    video_out_queue.put_nowait(next_frame_index)
                                last_frame_index = next_frame_index
                        if cv.waitKey(1) & 0xFF == ord('q'):
                            return
                        continue

                    # Get output from neural network and visualize
                    output = inferred_stuff_queue.get()
                    bgr = None

                    line_lengths = []
                    look_flag = False

                    for j in range(batch_size):

                        print("Batch Size, J: ", batch_size, j)

                        frame_index = output['frame_index'][j]
                        if frame_index not in data_source._frames:
                            continue
                        frame = data_source._frames[frame_index]

                        # Decide which landmarks are usable
                        heatmaps_amax = np.amax(output['heatmaps'][j, :].reshape(-1, 18), axis=0)
                        can_use_eye = np.all(heatmaps_amax > 0.7)
                        can_use_eyelid = np.all(heatmaps_amax[0:8] > 0.75)
                        can_use_iris = np.all(heatmaps_amax[8:16] > 0.8)

                        start_time = time.time()
                        eye_index = output['eye_index'][j]
                        bgr = frame['bgr']
                        eye = frame['eyes'][eye_index]
                        eye_image = eye['image']
                        eye_side = eye['side']
                        eye_landmarks = output['landmarks'][j, :]
                        eye_radius = output['radius'][j][0]
                        if eye_side == 'left':
                            eye_landmarks[:, 0] = eye_image.shape[1] - eye_landmarks[:, 0]
                            eye_image = np.fliplr(eye_image)

                        # Embed eye image and annotate for picture-in-picture
                        eye_upscale = 2
                        eye_image_raw = cv.cvtColor(cv.equalizeHist(eye_image), cv.COLOR_GRAY2BGR)
                        eye_image_raw = cv.resize(eye_image_raw, (0, 0), fx=eye_upscale, fy=eye_upscale)
                        eye_image_annotated = np.copy(eye_image_raw)
                        if can_use_eyelid:
                            cv.polylines(
                                eye_image_annotated,
                                [np.round(eye_upscale*eye_landmarks[0:8]).astype(np.int32)
                                                                        .reshape(-1, 1, 2)],
                                isClosed=True, color=(255, 255, 0), thickness=1, lineType=cv.LINE_AA,
                            )
                        if can_use_iris:
                            cv.polylines(
                                eye_image_annotated,
                                [np.round(eye_upscale*eye_landmarks[8:16]).astype(np.int32)
                                                                        .reshape(-1, 1, 2)],
                                isClosed=True, color=(0, 255, 255), thickness=1, lineType=cv.LINE_AA,
                            )
                            cv.drawMarker(
                                eye_image_annotated,
                                tuple(np.round(eye_upscale*eye_landmarks[16, :]).astype(np.int32)),
                                color=(0, 255, 255), markerType=cv.MARKER_CROSS, markerSize=4,
                                thickness=1, line_type=cv.LINE_AA,
                            )
                        try:
                            face_index = int(eye_index / 2)
                            eh, ew, _ = eye_image_raw.shape
                            v0 = face_index * 2 * eh
                            v1 = v0 + eh
                            v2 = v1 + eh
                            u0 = 0 if eye_side == 'left' else ew
                            u1 = u0 + ew
                            # print("eye_image_raw:", eye_image_raw)
                            # print("eye_image_annotated", eye_image_annotated)
                            # print("BGR: ", bgr)
                            # print("v0: {}, v1: {}, v2: {}, u0: {}, u1: {}".format(v0, v1, v2, u0, u1))
                            # bgr[v0:v1, u0:u1] = eye_image_raw
                            # bgr[v1:v2, u0:u1] = eye_image_annotated
                        except Exception as e:
                            print("\t Exception on matching numpy shape. ", e)
                            pass

                        # Visualize preprocessing results
                        frame_landmarks = (frame['smoothed_landmarks']
                                        if 'smoothed_landmarks' in frame
                                        else frame['landmarks'])
                        for f, face in enumerate(frame['faces']):
                            # for landmark in frame_landmarks[f][:-1]:
                            #     cv.drawMarker(bgr, tuple(np.round(landmark).astype(np.int32)),
                            #                   color=(0, 0, 255), markerType=cv.MARKER_STAR,
                            #                   markerSize=2, thickness=1, line_type=cv.LINE_AA)

                            # tuple(np.round(np.add(face[:2], face[2:])

                            # print("Frame: {}, Face: {}".format(np.round(np.add(face[:2], face[2:])), bgr.shape[0]))
                            bgr = cv.line(bgr, (650, 100), (650, 600), (255, 255, 255), 3)
                            if(np.round(face[:2][0]) < 650):

                                cv.rectangle(
                                    bgr, tuple(np.round(face[:2]).astype(np.int32)),
                                    tuple(np.round(np.add(face[:2], face[2:])).astype(np.int32)),
                                    color=(0, 0, 255), thickness=2, lineType=cv.LINE_AA,
                                )
                                restrictedflag = True
                                
                            else:
                                cv.rectangle(
                                    bgr, tuple(np.round(face[:2]).astype(np.int32)),
                                    tuple(np.round(np.add(face[:2], face[2:])).astype(np.int32)),
                                    color=(0, 255, 0), thickness=2, lineType=cv.LINE_AA,
                                )
                                restrictedflag = False

                            if restrictedflag:
                                cv.putText(bgr, "RESTRICTED", org=(50, 30),
                                fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=1.2,
                                color=(0, 0, 255), thickness=3, lineType=cv.LINE_AA)
                                restrictedflag = False
                            else:
                                cv.putText(bgr, "ALLOWED", org=(50, 30),
                                fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=1.2,
                                color=(0, 255, 0), thickness=3, lineType=cv.LINE_AA)

                        # Transform predictions
                        eye_landmarks = np.concatenate([eye_landmarks,
                                                        [[eye_landmarks[-1, 0] + eye_radius,
                                                        eye_landmarks[-1, 1]]]])
                        eye_landmarks = np.asmatrix(np.pad(eye_landmarks, ((0, 0), (0, 1)),
                                                        'constant', constant_values=1.0))
                        eye_landmarks = (eye_landmarks *
                                        eye['inv_landmarks_transform_mat'].T)[:, :2]
                        eye_landmarks = np.asarray(eye_landmarks)
                        eyelid_landmarks = eye_landmarks[0:8, :]
                        iris_landmarks = eye_landmarks[8:16, :]
                        iris_centre = eye_landmarks[16, :]
                        eyeball_centre = eye_landmarks[17, :]
                        eyeball_radius = np.linalg.norm(eye_landmarks[18, :] -
                                                        eye_landmarks[17, :])

                        

                        # Smooth and visualize gaze direction
                        num_total_eyes_in_frame = len(frame['eyes'])
                        if len(all_gaze_histories) != num_total_eyes_in_frame:
                            all_gaze_histories = [list() for _ in range(num_total_eyes_in_frame)]
                        gaze_history = all_gaze_histories[eye_index]
                        if can_use_eye:
                            # Visualize landmarks
                            # cv.drawMarker(  # Eyeball centre
                            #     bgr, tuple(np.round(eyeball_centre).astype(np.int32)),
                            #     color=(0, 255, 0), markerType=cv.MARKER_CROSS, markerSize=4,
                            #     thickness=1, line_type=cv.LINE_AA,
                            # )
                            # cv.circle(  # Eyeball outline
                            #     bgr, tuple(np.round(eyeball_centre).astype(np.int32)),
                            #     int(np.round(eyeball_radius)), color=(0, 255, 0),
                            #     thickness=1, lineType=cv.LINE_AA,
                            # )




                            # Draw "gaze"
                            # from models.elg import estimate_gaze_from_landmarks
                            # current_gaze = estimate_gaze_from_landmarks(
                            #     iris_landmarks, iris_centre, eyeball_centre, eyeball_radius)
                            i_x0, i_y0 = iris_centre
                            e_x0, e_y0 = eyeball_centre
                            theta = -np.arcsin(np.clip((i_y0 - e_y0) / eyeball_radius, -1.0, 1.0))
                            phi = np.arcsin(np.clip((i_x0 - e_x0) / (eyeball_radius * -np.cos(theta)),
                                                    -1.0, 1.0))
                            current_gaze = np.array([theta, phi])
                            gaze_history.append(current_gaze)
                            gaze_history_max_len = 10
                            if len(gaze_history) > gaze_history_max_len:
                                gaze_history = gaze_history[-gaze_history_max_len:]
                            bgr, line_length = util.gaze.draw_gaze(bgr, iris_centre, np.mean(gaze_history, axis=0),
                                                length=120.0, thickness=1)
                            line_lengths.append(line_length)
                        else:
                            gaze_history.clear()

                        # if can_use_eyelid:
                        #     cv.polylines(
                        #         bgr, [np.round(eyelid_landmarks).astype(np.int32).reshape(-1, 1, 2)],
                        #         isClosed=True, color=(255, 255, 0), thickness=1, lineType=cv.LINE_AA,
                        #     )

                        # if can_use_iris:
                        #     cv.polylines(
                        #         bgr, [np.round(iris_landmarks).astype(np.int32).reshape(-1, 1, 2)],
                        #         isClosed=True, color=(0, 255, 255), thickness=1, lineType=cv.LINE_AA,
                        #     )
                        #     cv.drawMarker(
                        #         bgr, tuple(np.round(iris_centre).astype(np.int32)),
                        #         color=(0, 255, 255), markerType=cv.MARKER_CROSS, markerSize=4,
                        #         thickness=1, line_type=cv.LINE_AA,
                        #     )

                        dtime = 1e3*(time.time() - start_time)
                        if 'visualization' not in frame['time']:
                            frame['time']['visualization'] = dtime
                        else:
                            frame['time']['visualization'] += dtime

                        def _dtime(before_id, after_id):
                            return int(1e3 * (frame['time'][after_id] - frame['time'][before_id]))

                        def _dstr(title, before_id, after_id):
                            return '%s: %dms' % (title, _dtime(before_id, after_id))

                        if eye_index == len(frame['eyes']) - 1:
                            # Calculate timings
                            frame['time']['after_visualization'] = time.time()
                            fps = int(np.round(1.0 / (time.time() - last_frame_time)))
                            fps_history.append(fps)
                            if len(fps_history) > 60:
                                fps_history = fps_history[-60:]
                            fps_str = '%d FPS' % np.mean(fps_history)
                            last_frame_time = time.time()
                            fh, fw, _ = bgr.shape
                            cv.putText(bgr, fps_str, org=(fw - 110, fh - 20),
                                    fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=0.8,
                                    color=(0, 0, 0), thickness=1, lineType=cv.LINE_AA)
                            cv.putText(bgr, fps_str, org=(fw - 111, fh - 21),
                                    fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=0.79,
                                    color=(255, 255, 255), thickness=1, lineType=cv.LINE_AA)

                            # if j % 2 == 1:
                            
                            print("\n\n\t\t Line Lengths: ", line_lengths)
                            # print("\n\n\t\t Face: ", (np.round(face[2] + 5).astype(np.int32), np.round(face[3] - 10).astype(np.int32)))
                            for line_length in line_lengths:
                                if line_length < 50:
                                    look_flag = True
                                                            
                            if look_flag and line_lengths:
                                text_look = ""
                                print("\t LOOKING", fw, fh)
                                # cv.rectangle(
                                # bgr, tuple(np.round(face[:2]).astype(np.int32)),
                                # tuple(np.round(np.add(face[:2], face[2:])).astype(np.int32)),
                                # color=(0, 0, 255), thickness=2, lineType=cv.LINE_AA,)
                                                                                        
                                # cv.rectangle(bgr, (2633, 10), (2930, 85), (0, 0, 0), -1)
                                # cv.putText(bgr, "LOOKING", org=(2644, 70),
                                #     fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=2,
                                #     color=(0, 0, 225), thickness=4, lineType=cv.LINE_AA)
                                cv.rectangle(bgr, (2633, 10), (2930, 85), (0, 0, 225), 5)
                                # cv.rectangle(bgr, (2639, 75), (2641, 77), (0, 0, 225), 2) 
                                # cv.rectangle(bgr, (fw - 48, fh - 700), (fw - 43, fh - 695), (0, 0, 225), 2)
                                # rgb_image = cv.cvtColor(frame['bgr'], cv.COLOR_BGR2RGB)
                                # database.MarkingProcess(img = rgb_image, bboxs = frame['faces'], lookingflag = look_flag, frameindex = frame['frame_index'])
                            
                            else:
                                text_look = ""
                                print("\t Not Looking")
                                # rgb_image = cv.cvtColor(frame['bgr'], cv.COLOR_BGR2RGB)
                                # database.MarkingProcess(img = rgb_image, bboxs = frame['faces'], lookingflag = look_flag, frameindex = frame['frame_index'])

                            for face in frame['faces']:
                                cv.putText(bgr, text_look, (np.round(face[0] + 5).astype(np.int32), np.round(face[1] - 10).astype(np.int32)),
                                        fontFace=cv.FONT_HERSHEY_DUPLEX, fontScale=0.8,
                                        color=(0, 0, 255), thickness=1, lineType=cv.LINE_AA)     
                            
                            if not args.headless:
                                resized_img = cv.resize(bgr, (1280, 720))
                                cv.imshow('vis', resized_img)
                                video_recorder.write(resized_img)
                                # cv.imshow('vis', bgr)
                            last_frame_index = frame_index

                            # Record frame?
                            if record_video:
                                video_out_queue.put_nowait(frame_index)

                            # Quit?
                            if cv.waitKey(1) & 0xFF == ord('q'):
                                return

                            # Print timings
                            if frame_index % 60 == 0:
                                latency = _dtime('before_frame_read', 'after_visualization')
                                processing = _dtime('after_frame_read', 'after_visualization')
                                timing_string = ', '.join([
                                    _dstr('read', 'before_frame_read', 'after_frame_read'),
                                    _dstr('preproc', 'after_frame_read', 'after_preprocessing'),
                                    'infer: %dms' % int(frame['time']['inference']),
                                    'vis: %dms' % int(frame['time']['visualization']),
                                    'proc: %dms' % processing,
                                    'latency: %dms' % latency,
                                ])
                                print('%08d [%s] %s' % (frame_index, fps_str, timing_string))
                            
            visualize_thread = threading.Thread(target=_visualize_output, name='visualization')
            visualize_thread.daemon = True
            visualize_thread.start()

            # Do inference forever
            infer = model.inference_generator()
            while True:
                output = next(infer)
                for frame_index in np.unique(output['frame_index']):
                    if frame_index not in data_source._frames:
                        continue
                    frame = data_source._frames[frame_index]
                    if 'inference' in frame['time']:
                        frame['time']['inference'] += output['inference_time']
                    else:
                        frame['time']['inference'] = output['inference_time']
                inferred_stuff_queue.put_nowait(output)

                if not visualize_thread.isAlive():
                    break

                if not data_source._open:
                    break

            # Close video recording
            if record_video and video_out is not None:
                video_out_should_stop = True
                video_out_queue.put_nowait(None)
                with video_out_done:
                    video_out_done.wait()