def video_uniform_sample_n_frames_old(video_path, n_samples, max_dim): """ Sample only n frames from the video. """ raise Exception('Needs to add argument about resizing type') cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration step = duration / (n_samples) frames = [] for i in range(n_samples): time_sec = i * step frame = cap.get_frame(time_sec) # resize frame to fit in the array, it's going to be used by caffe anyway frame = image_utils.resize_keep_aspect_ratio_max_dim(frame, max_dim) # frame encoded as uint and values are from 0-255 # but caffe needs float32 and values from 0-1 frame = frame.astype('float32') / float(255) frames.append(frame) # very important, or we'd have memory leakage cap.__del__() return frames
def get_regions(video_path, annot, resize_type, verbose=False): """ Get the frames whose numbers are given in the "annot" dictionary.. Then, for each frame get the regions as specificed in the "annot" dictionary. Finally, return these regions. """ assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled'] resize_function = None if resize_type == 'resize': resize_function = image_utils.resize_frame elif resize_type == 'resize_crop': resize_function = image_utils.resize_crop elif resize_type == 'resize_crop_scaled': resize_function = image_utils.resize_crop_scaled cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) n_frames = cap.nframes duration = cap.duration n_regions = sum([len(v) for k, v in annot.iteritems()]) frame_size = 224 bbox_resize_factor = 2 regions = np.zeros(shape=(n_regions, frame_size, frame_size, 3), dtype='float32') region_idx = -1 frame_nums = annot.keys() for frame_num in frame_nums: if (region_idx + 1) % 100 == 0 and verbose: print(' ... reading region %d/%d' % (region_idx + 1, n_regions)) # get the frame i = frame_num - 1 time_sec = i / fps frame = cap.get_frame(time_sec) # get the regions (resized) from the frame regions_info = annot[frame_num] for region_info in regions_info: region_idx += 1 bbox = region_info[1:5] bbox = np.multiply(bbox, bbox_resize_factor).astype(np.int) x1, y1, x2, y2 = bbox region = frame[y1:y2, x1:x2] # resize frame to fit in the array, it's going to be used by caffe anyway region = resize_function(region) # frame encoded as uint and values are from 0-255, but caffe needs float32 and values from 0-1 region = region.astype('float32') / float(255) regions[region_idx] = region # very important, or we'd have memory leakage cap.__del__() return regions
def video_save_frames_specific_duration(action_num, video_num, video_path, frames_root_pathes, start_stop_sec, image_name_format, verbose=False): assert len(frames_root_pathes) == len(start_stop_sec) cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = float(cap.fps) duration_sec = cap.duration img_dim = 224 start_stop_sec = np.array(start_stop_sec) for i, s_s_sec in enumerate(start_stop_sec): start_sec, stop_sec = s_s_sec frame_root_path = frames_root_pathes[i] # offset of starting/stopping the action sec_offset = 0.25 start_idx = int((start_sec + sec_offset) * fps) stop_idx = int((stop_sec + sec_offset) * fps) + 1 if verbose: print('action, video: %d, %d' % (action_num, video_num)) print('%d/%d' % (start_sec, stop_sec)) print('%d/%d' % (start_idx, stop_idx)) for idx_frame in range(start_idx, stop_idx): time_sec = idx_frame / fps if verbose and idx_frame % 100 == 0: print('... time_sec, frame: %d/%d' % (time_sec, idx_frame)) frame = cap.get_frame(time_sec) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = image_utils.resize_crop(frame, target_width=img_dim, target_height=img_dim) image_name = image_name_format % (idx_frame, ) frame_path = os.path.join(frame_root_path, image_name) cv2.imwrite(frame_path, frame) # very important, or we'd have memory leakage cap.__del__()
def video_uniform_sampling(spf, video_path, resize_type, is_local, verbose=False): assert resize_type in ['resize', 'resize_crop', 'resize_crop_scaled'] resize_function = None if resize_type == 'resize': resize_function = image_utils.resize_frame elif resize_type == 'resize_crop': resize_function = image_utils.resize_crop elif resize_type == 'resize_crop_scaled': resize_function = image_utils.resize_crop_scaled cap = FFMPEG_VideoReader(video_path, False) cap.initialize() fps = cap.fps n_frames = cap.nframes duration = cap.duration n_samples = int(duration / float(spf)) # check if no samples because the video duration is less than spf # then at least, get 1 frame of the video if n_samples == 0: n_samples = 1 frame_size = 224 frames = np.zeros(shape=(n_samples, frame_size, frame_size, 3), dtype='float32') for i in range(n_samples): num = i + 1 if num % 100 == 0 and verbose: print(' ... reading frame %d/%d' % (num, n_samples)) time_sec = i * spf frame = cap.get_frame(time_sec) # resize frame to fit in the array, it's going to be used by caffe anyway frame = resize_function(frame) # frame encoded as uint and values are from 0-255 # but caffe needs float32 and values from 0-1 frame = frame.astype('float32') / float(255) frames[i] = frame # very important, or we'd have memory leakage cap.__del__() return frames, fps, n_frames, duration