def make_dataset(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration): data = load_annotation_data(annotation_path) video_names, annotations = get_video_names_and_annotations(data, subset) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name dataset = [] for i in range(len(video_names)): # if i % 1000 == 0: # print('dataset loading [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): continue n_frames_file_path = os.path.join(video_path, 'n_frames') n_frames = int(load_value_file(n_frames_file_path)) if n_frames <= 0: continue begin_t = 1 end_t = n_frames sample = { 'video': video_path, 'segment': [begin_t, end_t], 'n_frames': n_frames, 'video_id': video_names[i].split('/')[1] } if len(annotations) != 0: sample['label'] = class_to_idx[annotations[i]['label']] else: sample['label'] = -1 if n_samples_for_each_video == 1: sample['frame_indices'] = list(range(1, n_frames + 1)) dataset.append(sample) else: if n_samples_for_each_video > 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(1, n_frames, step): sample_j = copy.deepcopy(sample) sample_j['frame_indices'] = list( range(j, min(n_frames + 1, j + sample_duration))) dataset.append(sample_j) return dataset, idx_to_class
def make_untrimmed_dataset(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration): data = load_annotation_data(annotation_path) video_names, _ = get_video_names_and_annotations(data, subset) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name dataset = [] for i in range(len(video_names)): if i % 1000 == 0: print('dataset loading [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): continue fps_file_path = os.path.join(video_path, 'fps') fps = load_value_file(fps_file_path) begin_t = 1 end_t = get_end_t(video_path) n_frames = end_t - begin_t sample = { 'video': video_path, 'segment': [begin_t, end_t], 'fps': fps, 'video_id': video_names[i][2:] } if n_samples_for_each_video >= 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(begin_t, end_t, step): sample_j = copy.deepcopy(sample) frame_indices = list(range(j, j + sample_duration)) frame_indices = modify_frame_indices(sample_j['video'], frame_indices) if len(frame_indices) < 16: continue sample_j['frame_indices'] = frame_indices dataset.append(sample_j) return dataset, idx_to_class
def make_dataset_human_det(root_path, annotation_path, subset, n_samples_for_each_video, sample_duration, dets): data = load_annotation_data(annotation_path) video_names, annotations = get_video_names_and_annotations(data, subset) class_to_idx = get_class_labels(data) idx_to_class = {} for name, label in class_to_idx.items(): idx_to_class[label] = name num_samples_wo_filtering = 0 dataset = [] for i in range(len(video_names)): if i % 1000 == 0: print('dataset loading [{}/{}]'.format(i, len(video_names))) video_path = os.path.join(root_path, video_names[i]) if not os.path.exists(video_path): continue n_frames_file_path = os.path.join(video_path, 'n_frames') n_frames = int(load_value_file(n_frames_file_path)) if n_frames <= 0: continue if n_samples_for_each_video == 1: num_samples_wo_filtering += 1 else: if n_samples_for_each_video > 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration num_samples_wo_filtering += np.arange(1, n_frames, step).shape[0] cur_cls = video_path.split('/')[-2] vid = video_path.split('/')[-1][:11] if not is_video_valid_det( dets[cur_cls][vid], n_frames, det_th=0.3, ratio_th=0.7): continue begin_t = 1 end_t = n_frames sample = { 'video': video_path, 'segment': [begin_t, end_t], 'n_frames': n_frames, 'video_id': video_names[i][:-14].split('/')[1] } if len(annotations) != 0: sample['label'] = class_to_idx[annotations[i]['label']] else: sample['label'] = -1 if n_samples_for_each_video == 1: sample['frame_indices'] = list(range(1, n_frames + 1)) dataset.append(sample) else: if n_samples_for_each_video > 1: step = max( 1, math.ceil((n_frames - 1 - sample_duration) / (n_samples_for_each_video - 1))) else: step = sample_duration for j in range(1, n_frames, step): sample_j = copy.deepcopy(sample) sample_j['frame_indices'] = list( range(j, min(n_frames + 1, j + sample_duration))) dataset.append(sample_j) print( 'len(dataset) after filtering the videos without sufficient detections: [{}/{}]' .format(len(dataset), num_samples_wo_filtering)) # repeat the dataset so that the number of samples can be matched to the original dataset w/o filtering if len(dataset) < num_samples_wo_filtering: num_repeat = np.ceil(num_samples_wo_filtering / len(dataset)).astype( np.int32) dataset_repeat = copy.deepcopy(dataset) for i in range(1, num_repeat): dataset_shuffled = copy.deepcopy(dataset) np.random.shuffle(dataset_shuffled) dataset_repeat += dataset_shuffled return dataset_repeat[:num_samples_wo_filtering], idx_to_class