def get_training_data(video_path, annotation_path, dataset_name, input_type, file_type, spatial_transform=None, temporal_transform=None, target_transform=None): assert dataset_name in [ 'kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit', 'somethingv2', 'somethingv1' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5'] if 'somethingv1' in dataset_name: formatter = sthv1_image_name_formatter elif 'somethingv2' in dataset_name: formatter = sthv2_image_name_formatter else: formatter = image_name_formatter if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(formatter, ImageLoaderAccImage()) else: loader = VideoLoader(formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / '{}.hdf5'.format(video_id)) if dataset_name == 'activitynet': training_data = ActivityNet(video_path, annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) else: training_data = VideoDataset(video_path, annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) return training_data
def get_validation_data(video_path, annotation_path, dataset_name, input_type, file_type, spatial_transform=None, temporal_transform=None, target_transform=None): assert dataset_name in [ 'kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5'] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) if dataset_name == 'activitynet': video_path_formatter = ( lambda root_path, label, video_id: root_path / f'v_{video_id}') else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') if dataset_name == 'activitynet': validation_data = ActivityNet( video_path, annotation_path, 'validation', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) else: validation_data = VideoDatasetMultiClips( video_path, annotation_path, 'validation', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) return validation_data, collate_fn
def get_inference_data(video_path, annotation_path, dataset_name, input_type, file_type, inference_subset, spatial_transform=None, temporal_transform=None, target_transform=None): assert dataset_name in [ 'kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit', 'vggsound' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5'] assert inference_subset in ['train', 'val', 'test'] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') if inference_subset == 'train': subset = 'training' elif inference_subset == 'val': subset = 'validation' elif inference_subset == 'test': subset = 'testing' inference_data = VideoDatasetMultiClips( video_path, annotation_path, subset, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter, target_type=['video_id', 'segment']) return inference_data, collate_fn
def get_training_av_data(video_path, audio_path, annotation_path, dataset_name, input_type, file_type, spatial_transform=None, temporal_transform=None, target_transform=None): assert dataset_name in [ 'kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit', 'vggsound' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5'] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) else: if input_type == 'rgb' and dataset_name != 'kinetics': loader = VideoLoaderHDF5() elif input_type == 'rgb' and dataset_name == 'kinetics': loader = VideoLoaderHDF5_kinetics() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') audio_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.npy') training_data = AudioVideoDataset( video_path, audio_path, annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter, audio_path_formatter=audio_path_formatter) return training_data
def get_inference_data(video_path, input_type, file_type, spatial_transform=None, temporal_transform=None): assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5'] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, video_name: root_path / video_name) else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') # inference_data = VideoDataset(video_path, # spatial_transform=spatial_transform, # video_loader=loader, # video_path_formatter=video_path_formatter) inference_data = VideoDatasetMultiClips( video_path, spatial_transform=spatial_transform, temporal_transform=temporal_transform, video_loader=loader, video_path_formatter=video_path_formatter) return inference_data, collate_fn
def get_training_data(video_path, annotation_path, dataset_name, input_type, file_type, spatial_transform=None, temporal_transform=None, target_transform=None, sample_t_stride=1): assert dataset_name in [ 'kinetics', 'mini_kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit', 'breakfast', 'mini_breakfast', 'movingmnist', 'movingmnist_blackframes', 'movingmnist_longterm', 'movingmnist_motiondiff', 'movingmnist_motionsame', 'movingmnist_frequencies', 'movingmnist_frequencies_complex', 'something', 'movingmnist_static' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5', None] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if 'movingmnist' in dataset_name: image_name_formatter = mnist_image_name_formatter elif 'something' in dataset_name: image_name_formatter = something_image_name_formatter else: image_name_formatter = usual_image_name_formatter if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) if 'movingmnist' in dataset_name or 'something' in dataset_name: video_path_formatter = ( lambda root_path, label, video_id: root_path / video_id) else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() if dataset_name in ['kinetics', 'mini_kinetics']: video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}') else: video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') print("video_path_formatter", video_path_formatter) if dataset_name == 'activitynet': training_data = ActivityNet(video_path, annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) elif dataset_name in ['kinetics', 'mini_kinetics']: training_data = VideoDataset(Path( os.path.join(video_path, "h5_train_frames")), annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) else: print("Building VideoDataset for", dataset_name) #print(spatial_transform) #print(temporal_transform) #print(loader) training_data = VideoDataset(video_path, annotation_path, 'training', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter) return training_data
def get_inference_data(video_path, annotation_path, dataset_name, input_type, file_type, inference_subset, spatial_transform=None, temporal_transform=None, target_transform=None, sample_t_stride=1): assert dataset_name in [ 'kinetics', 'mini_kinetics', 'activitynet', 'ucf101', 'hmdb51', 'mit', 'breakfast', 'mini_breakfast', 'movingmnist', 'movingmnist_blackframes', 'movingmnist_longterm', 'movingmnist_motiondiff', 'movingmnist_motionsame', 'movingmnist_frequencies', 'movingmnist_frequencies_complex', 'something', 'movingmnist_static' ] assert input_type in ['rgb', 'flow'] assert file_type in ['jpg', 'hdf5', None] assert inference_subset in ['train', 'val', 'test'] if file_type == 'jpg': assert input_type == 'rgb', 'flow input is supported only when input type is hdf5.' if 'movingmnist' in dataset_name: image_name_formatter = mnist_image_name_formatter elif 'something' in dataset_name: image_name_formatter = something_image_name_formatter else: image_name_formatter = usual_image_name_formatter if get_image_backend() == 'accimage': from datasets.loader import ImageLoaderAccImage loader = VideoLoader(image_name_formatter, ImageLoaderAccImage()) else: loader = VideoLoader(image_name_formatter) video_path_formatter = ( lambda root_path, label, video_id: root_path / label / video_id) if dataset_name in [ 'movingmnist', 'movingmnist_blackframes', 'movingmnist_longterm', 'something' ]: video_path_formatter = ( lambda root_path, label, video_id: root_path / video_id) else: if input_type == 'rgb': loader = VideoLoaderHDF5() else: loader = VideoLoaderFlowHDF5() video_path_formatter = (lambda root_path, label, video_id: root_path / label / f'{video_id}.hdf5') if inference_subset == 'train': subset = 'training' elif inference_subset == 'val': subset = 'validation' elif inference_subset == 'test': subset = 'testing' if dataset_name == 'activitynet': inference_data = ActivityNet(video_path, annotation_path, subset, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter, is_untrimmed_setting=True) else: inference_data = VideoDatasetMultiClips( video_path, annotation_path, subset, spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, video_loader=loader, video_path_formatter=video_path_formatter, target_type=['label', 'video_id', 'segment']) return inference_data, collate_fn