def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration): data = load_annotation_data(annotation_path) # check if the subdirectory of the dataset is split into volumes is_volumed = len(os.listdir(root_path)) <= (len(VOLUME_SPLITS) + 1) video_names, annotations = get_video_names_and_annotations( data, subset, is_volumed) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name dataset = [] for i in range(len(video_names)): if i % 1000 == 0: print('dataset loading [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): print("Path does not exist" + video_path) continue n_frames_file_path = os.path.join(video_path, 'n_frames') n_frames = int(load_value_file(n_frames_file_path)) if n_frames <= 0: continue begin_t = 1 end_t = n_frames sample = { 'video': video_path, 'segment': [begin_t, end_t], 'n_frames': n_frames, #'video_id': video_names[i].split('/')[1] 'video_id': video_names[i] } if len(annotations) != 0: sample['label'] = class_to_idx[annotations[i]['label']] else: sample['label'] = -1 if n_samples_for_each_video == 1: sample['frame_indices'] = list(range(1, n_frames + 1)) dataset.append(sample) else: if n_samples_for_each_video > 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(1, n_frames, step): sample_j = copy.deepcopy(sample) sample_j['frame_indices'] = list( range(j, min(n_frames + 1, j + sample_duration))) dataset.append(sample_j) return dataset, idx_to_class
def make_dataset(dataset_path, subset, sample_duration, n_samples_for_each_video): dataset_path = os.path.join(dataset_path, 'QUVA') video_path = os.path.join(dataset_path, 'imgs') video_names, annotations = get_video_names_and_annotations(dataset_path, subset) dataset = [] max_n_frames = 0 for i in range(len(video_names)): if (i+1) % 50 == 0 or i+1 == len(video_names): print('{} dataset loading [{}/{}]'.format(subset, i+1, len(video_names))) video_path_i = os.path.join(video_path, subset, video_names[i]) if not os.path.exists(video_path_i): continue n_frames_file_path = os.path.join(video_path_i, 'n_frames') n_frames = int(load_value_file(n_frames_file_path)) max_n_frames = max(max_n_frames, n_frames) if n_frames <= 0: continue begin_t = 1 end_t = n_frames sample = { 'video': video_path_i, 'segment': [begin_t, end_t], 'n_frames': n_frames, 'video_id': video_names[i][0:3], 'label': annotations[i] } if n_samples_for_each_video == 1: sample['frame_indices'] = list(range(1, n_frames + 1)) dataset.append(sample) else: if n_samples_for_each_video > 1: step = max(1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) step = int(step) else: raise('error, n_samples_for_each_video should >=1\n') # step = sample_duration for j in range(1, n_frames-sample_duration, step): sample_j = copy.deepcopy(sample) sample_j['frame_indices'] = list( range(j, min(n_frames + 1, j + sample_duration))) dataset.append(sample_j) return dataset, max_n_frames
def make_untrimmed_dataset(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration): data = load_annotation_data(annotation_path) video_names, _ = get_video_names_and_annotations(data, subset) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name dataset = [] for i in range(len(video_names)): if i % 1000 == 0: print('dataset loading [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): continue fps_file_path = os.path.join(video_path, 'fps') fps = load_value_file(fps_file_path) begin_t = 1 end_t = get_end_t(video_path) n_frames = end_t - begin_t sample = { 'video': video_path, 'segment': [begin_t, end_t], 'fps': fps, 'video_id': video_names[i][2:] } if n_samples_for_each_video >= 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(begin_t, end_t, step): sample_j = copy.deepcopy(sample) frame_indices = list(range(j, j + sample_duration)) frame_indices = modify_frame_indices(sample_j['video'], frame_indices) if len(frame_indices) < 16: continue sample_j['frame_indices'] = frame_indices dataset.append(sample_j) return dataset, idx_to_class
def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration): data = load_annotation_data(annotation_path) video_names, annotations = get_video_names_and_annotations(data, subset) if not annotations: raise ValueError('Unable to load annotations...') class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name dataset = [] for i in range(len(video_names)): if i % 1000 == 0: print('Loading UCF-101 videos [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): continue n_frames_file_path = os.path.join(video_path, 'n_frames') if not os.path.exists(n_frames_file_path): raise FileNotFoundError( 'n_frames_file_path does not exist: {}'.format( n_frames_file_path)) n_frames = int(load_value_file(n_frames_file_path)) if n_frames <= 0: continue begin_t = 1 end_t = n_frames sample = { 'video': video_path, 'segment': [begin_t, end_t], 'n_frames': n_frames, 'video_id': video_names[i].split('/')[1] } if len(annotations) != 0: sample['label'] = class_to_idx[annotations[i]['label']] else: sample['label'] = -1 if n_samples_for_each_video == 1: sample['frame_indices'] = list(range(1, n_frames + 1)) dataset.append(sample) else: if n_samples_for_each_video > 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(1, n_frames, step): sample_j = copy.deepcopy(sample) sample_j['frame_indices'] = list( range(j, min(n_frames + 1, j + sample_duration))) dataset.append(sample_j) return dataset, idx_to_class