Example #1
0
def extract_frames(video_path, inference_engine, path_frames=None, return_frames=True):
    save_frames = path_frames is not None and not os.path.exists(path_frames)

    if not save_frames and not return_frames:
        # Nothing to do
        return None

    # Read frames from video
    video_source = camera.VideoSource(size=inference_engine.expected_frame_size, filename=video_path)
    video_fps = video_source.get_fps()
    frames = []

    while True:
        images = video_source.get_image()
        if images is None:
            break
        else:
            image, image_rescaled = images
            frames.append(image_rescaled)

    frames = uniform_frame_sample(np.array(frames), inference_engine.fps / video_fps)

    # Save frames if a path was provided
    if save_frames:
        os.makedirs(path_frames)

        for idx, frame in enumerate(frames[::MODEL_TEMPORAL_STRIDE]):
            Image.fromarray(frame[:, :, ::-1]).resize((400, 300)).save(
                os.path.join(path_frames, f'{idx}.jpg'), quality=50)

    return frames
Example #2
0
def compute_features(video_path, path_out, inference_engine, num_timesteps=1, path_frames=None,
                     batch_size=None):
    video_source = camera.VideoSource(camera_id=None,
                                      size=inference_engine.expected_frame_size,
                                      filename=video_path)
    video_fps = video_source.get_fps()
    frames = []
    while True:
        images = video_source.get_image()
        if images is None:
            break
        else:
            image, image_rescaled = images
            frames.append(image_rescaled)
    frames = uniform_frame_sample(np.array(frames), inference_engine.fps / video_fps)

    # Compute how many frames are padded to the left in order to "warm up" the model -- removing previous predictions
    # from the internal states --  with the first image, and to ensure we have enough frames in the video.
    # We also want the first non padding frame to output a feature
    frames_to_add = MODEL_TEMPORAL_STRIDE * (MODEL_TEMPORAL_DEPENDENCY // MODEL_TEMPORAL_STRIDE + 1) - 1

    # Possible improvement : investigate if a symmetric or reflect padding could be better for
    # temporal annotation prediction instead of the static first frame
    frames = np.pad(frames, ((frames_to_add, 0), (0, 0), (0, 0), (0, 0)),
                    mode='edge')

    # Inference
    clip = frames[None].astype(np.float32)

    # Run the model on padded frames in order to remove the state in the current model comming
    # from the previous video.
    pre_features = inference_engine.infer(clip[:, 0:frames_to_add + 1], batch_size=batch_size)

    # Depending on the number of layers we finetune, we keep the number of features from padding
    # equal to the temporal dependancy of the model.
    temporal_dependancy_features = np.array(pre_features)[-num_timesteps:]

    # predictions of the actual video frames
    predictions = inference_engine.infer(clip[:, frames_to_add + 1:], batch_size=batch_size)
    predictions = np.concatenate([temporal_dependancy_features, predictions], axis=0)
    features = np.array(predictions)
    os.makedirs(os.path.dirname(path_out), exist_ok=True)
    np.save(path_out, features)

    if path_frames is not None:
        os.makedirs(os.path.dirname(path_frames), exist_ok=True)
        frames_to_save = []
        # remove the padded frames. extract frames starting at the first one (feature for the
        # first frame)
        for e, frame in enumerate(frames[frames_to_add:]):
            if e % MODEL_TEMPORAL_STRIDE == 0:
                frames_to_save.append(frame)

        for e, frame in enumerate(frames_to_save):
            Image.fromarray(frame[:, :, ::-1]).resize((400, 300)).save(
                os.path.join(path_frames, str(e) + '.jpg'), quality=50)