Example #1
0
def video_uniform_sample_n_frames_old(video_path, n_samples, max_dim):
    """
    Sample only n frames from the video.
    """

    raise Exception('Needs to add argument about resizing type')

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    step = duration / (n_samples)

    frames = []
    for i in range(n_samples):
        time_sec = i * step
        frame = cap.get_frame(time_sec)
        # resize frame to fit in the array, it's going to be used by caffe anyway
        frame = image_utils.resize_keep_aspect_ratio_max_dim(frame, max_dim)
        # frame encoded as uint and values are from 0-255
        # but caffe needs float32 and values from 0-1
        frame = frame.astype('float32') / float(255)
        frames.append(frame)

    # very important, or we'd have memory leakage
    cap.__del__()

    return frames
Example #2
0
def get_regions(video_path, annot, resize_type, verbose=False):
    """
    Get the frames whose numbers are given in the "annot" dictionary.. Then, for each frame get the regions as specificed in the "annot" dictionary.
    Finally, return these regions.
    """

    assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled']

    resize_function = None
    if resize_type == 'resize':
        resize_function = image_utils.resize_frame
    elif resize_type == 'resize_crop':
        resize_function = image_utils.resize_crop
    elif resize_type == 'resize_crop_scaled':
        resize_function = image_utils.resize_crop_scaled

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    n_frames = cap.nframes
    duration = cap.duration
    n_regions = sum([len(v) for k, v in annot.iteritems()])

    frame_size = 224
    bbox_resize_factor = 2
    regions = np.zeros(shape=(n_regions, frame_size, frame_size, 3),
                       dtype='float32')
    region_idx = -1

    frame_nums = annot.keys()
    for frame_num in frame_nums:

        if (region_idx + 1) % 100 == 0 and verbose:
            print(' ... reading region %d/%d' % (region_idx + 1, n_regions))

        # get the frame
        i = frame_num - 1
        time_sec = i / fps
        frame = cap.get_frame(time_sec)

        # get the regions (resized) from the frame
        regions_info = annot[frame_num]
        for region_info in regions_info:
            region_idx += 1
            bbox = region_info[1:5]
            bbox = np.multiply(bbox, bbox_resize_factor).astype(np.int)
            x1, y1, x2, y2 = bbox
            region = frame[y1:y2, x1:x2]
            # resize frame to fit in the array, it's going to be used by caffe anyway
            region = resize_function(region)
            # frame encoded as uint and values are from 0-255, but caffe needs float32 and values from 0-1
            region = region.astype('float32') / float(255)
            regions[region_idx] = region

    # very important, or we'd have memory leakage
    cap.__del__()

    return regions
Example #3
0
def video_save_frames_specific_duration(action_num,
                                        video_num,
                                        video_path,
                                        frames_root_pathes,
                                        start_stop_sec,
                                        image_name_format,
                                        verbose=False):
    assert len(frames_root_pathes) == len(start_stop_sec)

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = float(cap.fps)
    duration_sec = cap.duration
    img_dim = 224

    start_stop_sec = np.array(start_stop_sec)

    for i, s_s_sec in enumerate(start_stop_sec):
        start_sec, stop_sec = s_s_sec
        frame_root_path = frames_root_pathes[i]

        # offset of starting/stopping the action
        sec_offset = 0.25

        start_idx = int((start_sec + sec_offset) * fps)
        stop_idx = int((stop_sec + sec_offset) * fps) + 1

        if verbose:
            print('action, video: %d, %d' % (action_num, video_num))
            print('%d/%d' % (start_sec, stop_sec))
            print('%d/%d' % (start_idx, stop_idx))

        for idx_frame in range(start_idx, stop_idx):
            time_sec = idx_frame / fps
            if verbose and idx_frame % 100 == 0:
                print('... time_sec, frame: %d/%d' % (time_sec, idx_frame))

            frame = cap.get_frame(time_sec)
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            frame = image_utils.resize_crop(frame,
                                            target_width=img_dim,
                                            target_height=img_dim)

            image_name = image_name_format % (idx_frame, )
            frame_path = os.path.join(frame_root_path, image_name)
            cv2.imwrite(frame_path, frame)

    # very important, or we'd have memory leakage
    cap.__del__()
Example #4
0
def video_uniform_sampling(spf,
                           video_path,
                           resize_type,
                           is_local,
                           verbose=False):
    assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled']

    resize_function = None
    if resize_type == 'resize':
        resize_function = image_utils.resize_frame
    elif resize_type == 'resize_crop':
        resize_function = image_utils.resize_crop
    elif resize_type == 'resize_crop_scaled':
        resize_function = image_utils.resize_crop_scaled

    cap = FFMPEG_VideoReader(video_path, False)
    cap.initialize()
    fps = cap.fps
    n_frames = cap.nframes
    duration = cap.duration
    n_samples = int(duration / float(spf))

    # check if no samples because the video duration is less than spf
    # then at least, get 1 frame of the video
    if n_samples == 0:
        n_samples = 1

    frame_size = 224
    frames = np.zeros(shape=(n_samples, frame_size, frame_size, 3),
                      dtype='float32')
    for i in range(n_samples):
        num = i + 1
        if num % 100 == 0 and verbose:
            print(' ... reading frame %d/%d' % (num, n_samples))
        time_sec = i * spf
        frame = cap.get_frame(time_sec)
        # resize frame to fit in the array, it's going to be used by caffe anyway
        frame = resize_function(frame)
        # frame encoded as uint and values are from 0-255
        # but caffe needs float32 and values from 0-1
        frame = frame.astype('float32') / float(255)
        frames[i] = frame

    # very important, or we'd have memory leakage
    cap.__del__()

    return frames, fps, n_frames, duration