def build_dataset(params): if params['REBUILD_DATASET']: # We build a new dataset instance if(params['VERBOSE'] > 0): silence=False logging.info('Building ' + params['DATASET_NAME'] + ' dataset') else: silence=True base_path = params['DATA_ROOT_PATH'] name = params['DATASET_NAME'] ds = Dataset(name, base_path, silence=silence) ##### INPUT DATA # Let's load the associated images (inputs) num_cap = 1 # We only extract one feature vector per image list_train = base_path + '/' + params['IMG_FILES']['train'][0] list_val = base_path + '/' + params['IMG_FILES']['val'][0] list_test = base_path + '/' + params['IMG_FILES']['test'][0] ds.setInput(list_train, 'train', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], repeat_set=num_cap) ds.setInput(list_val, 'val', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], repeat_set=num_cap) ds.setInput(list_test, 'test', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], repeat_set=num_cap) ### IMAGES' associated IDs ds.setInput(base_path + '/' + params['IMG_FILES']['train'][1], 'train', type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids', repeat_set=num_cap) ds.setInput(base_path + '/' + params['IMG_FILES']['val'][1], 'val', type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids', repeat_set=num_cap) ds.setInput(base_path + '/' + params['IMG_FILES']['test'][1], 'test', type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids', repeat_set=num_cap) # Train mean ds.setTrainMean(params['MEAN_IMAGE'], params['INPUTS_IDS_DATASET'][0]) ###### OUTPUT DATA: None # Process dataset for keeping only one caption per image and storing the rest in a dict() with the following format: # ds.extra_variables[set_name][id_output][img_position] = [cap1, cap2, cap3, ..., capN] #keep_n_captions(ds, repeat=[1, 1], n=1, set_names=['val','test']) # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, params['DATASET_STORE_PATH']) else: # We can easily recover it with a single line ds = loadDataset(params['DATASET_STORE_PATH']+'/Dataset_'+params['DATASET_NAME']+'.pkl') return ds
def loadMSVD(): logging.info('Loading MSVD dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/MSVD/' name = 'MSVD_VideoDescription' ds = Dataset(name, base_path) max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line. In this dataset a variable number # of descriptions per video are provided. ds.setOutput(base_path + 'train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated videos (inputs) # we must take into account that in this dataset we have a different number of sentences per video, # for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list # containing the number of captions in each video. num_captions_train = np.load(base_path + 'train_descriptions_counts.npy') num_captions_val = np.load(base_path + 'val_descriptions_counts.npy') num_captions_test = np.load(base_path + 'test_descriptions_counts.npy') ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'], 'train', type='video', id='videos', repeat_set=num_captions_train) ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'], 'val', type='video', id='videos', repeat_set=num_captions_val) ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'], 'test', type='video', id='videos', repeat_set=num_captions_test) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='videos') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadMSVD(): logging.info('Loading MSVD dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/MSVD/' name = 'MSVD_VideoDescription' ds = Dataset(name, base_path) max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line. In this dataset a variable number # of descriptions per video are provided. ds.setOutput(base_path + 'train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated videos (inputs) # we must take into account that in this dataset we have a different number of sentences per video, # for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list # containing the number of captions in each video. num_captions_train = np.load(base_path + 'train_descriptions_counts.npy') num_captions_val = np.load(base_path + 'val_descriptions_counts.npy') num_captions_test = np.load(base_path + 'test_descriptions_counts.npy') ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'], 'train', type='video', id='videos', repeat_set=num_captions_train) ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'], 'val', type='video', id='videos', repeat_set=num_captions_val) ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'], 'test', type='video', id='videos', repeat_set=num_captions_test) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='videos') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFood101(): logging.info('Loading Food101 dataset') logging.info( 'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/' ) base_path = '../data/Food101/' name = 'Food101' ds = Dataset(name, base_path + 'images') # Insert inputs (images) ds.setInput(base_path + 'meta/train_split.txt', 'train', type='image', id='images', img_size_crop=[227, 227, 3]) ds.setInput(base_path + 'meta/val_split.txt', 'val', type='image', id='images') ds.setInput(base_path + 'meta/test.txt', 'test', type='image', id='images') # Insert outputs (labels) ds.setOutput(base_path + 'meta/train_labels.txt', 'train', type='categorical', id='labels') ds.setOutput(base_path + 'meta/val_labels.txt', 'val', type='categorical', id='labels') ds.setOutput(base_path + 'meta/test_labels.txt', 'test', type='categorical', id='labels') # Set list of classes (strings) ds.setClasses(base_path + 'meta/classes.txt', 'labels') # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFlickr8k(): logging.info('Loading Flickr8k dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/Flickr8k/' name = 'Flickr8k_ImageDescription' ds = Dataset(name, base_path + 'Flicker8k_Dataset') max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line # and a set of 5 consecutive descriptions correspond to a single input image ds.setOutput(base_path + 'text/train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'text/val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'text/test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated images (inputs) # we must take into account that in this dataset we have 5 sentences per image, # for this reason we introduce the parameter 'repeat_set'=5 ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test', type='image', id='images', repeat_set=5) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def loadFlickr8k(): logging.info('Loading Flickr8k dataset') # Build basic dataset structure # we assign it a name and the path were the images are stored base_path = '/media/HDD_2TB/DATASETS/Flickr8k/' name = 'Flickr8k_ImageDescription' ds = Dataset(name, base_path + 'Flicker8k_Dataset') max_text_len = 35 # Let's load the train, val and test splits of the descriptions (outputs) # the files include a description per line # and a set of 5 consecutive descriptions correspond to a single input image ds.setOutput(base_path + 'text/train_descriptions.txt', 'train', type='text', id='descriptions', tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len) ds.setOutput(base_path + 'text/val_descriptions.txt', 'val', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) ds.setOutput(base_path + 'text/test_descriptions.txt', 'test', type='text', id='descriptions', tokenization='tokenize_basic', max_text_len=max_text_len) # Let's load the associated images (inputs) # we must take into account that in this dataset we have 5 sentences per image, # for this reason we introduce the parameter 'repeat_set'=5 ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val', type='image', id='images', repeat_set=5) ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test', type='image', id='images', repeat_set=5) # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly. %d input samples. %d output samples' % (len(X), len(Y)))
def loadFood101(): logging.info('Loading Food101 dataset') logging.info( 'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/') base_path = '../data/Food101/' name = 'Food101' ds = Dataset(name, base_path + 'images') # Insert inputs (images) ds.setInput(base_path + 'meta/train_split.txt', 'train', type='image', id='images', img_size_crop=[227, 227, 3]) ds.setInput(base_path + 'meta/val_split.txt', 'val', type='image', id='images') ds.setInput(base_path + 'meta/test.txt', 'test', type='image', id='images') # Insert outputs (labels) ds.setOutput(base_path + 'meta/train_labels.txt', 'train', type='categorical', id='labels') ds.setOutput(base_path + 'meta/val_labels.txt', 'val', type='categorical', id='labels') ds.setOutput(base_path + 'meta/test_labels.txt', 'test', type='categorical', id='labels') # Set list of classes (strings) ds.setClasses(base_path + 'meta/classes.txt', 'labels') # Now let's set the dataset mean image for preprocessing the data ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images') # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, 'Datasets') # We can easily recover it with a single line ds = loadDataset('Datasets/Dataset_' + name + '.pkl') # Lets recover the first batch of data [X, Y] = ds.getXY('train', 10) logging.info('Sample data loaded correctly.')
def build_dataset(params): if params['REBUILD_DATASET']: # We build a new dataset instance if (params['VERBOSE'] > 0): silence = False logging.info('Building ' + params['DATASET_NAME'] + ' dataset') else: silence = True base_path = params['DATA_ROOT_PATH'] name = params['DATASET_NAME'] ds = Dataset(name, base_path, silence=silence) ##### INPUT DATA # Let's load the images (inputs) ### IMAGES list_train = base_path + '/' + params['IMG_FILES']['train'][0] ds.setInput(list_train, 'train', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], use_RGB=params['RGB']) if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']: list_val = base_path + '/' + params['IMG_FILES']['val'][0] ds.setInput(list_val, 'val', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], use_RGB=params['RGB']) if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']: list_test = base_path + '/' + params['IMG_FILES']['test'][0] ds.setInput(list_test, 'test', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'], use_RGB=params['RGB']) # Train mean if params['MEAN_IMAGE']: # if params['NORMALIZE']: # params['MEAN_IMAGE'] = [m / 255. for m in params['MEAN_IMAGE']] ds.setTrainMean(params['MEAN_IMAGE'], params['INPUTS_IDS_DATASET'][0]) else: ds.calculateTrainMean(params['INPUTS_IDS_DATASET'][0]) ##### OUTPUT DATA if params['TYPE_OUT'] == '3DLabel': # Set list of classes (strings) ds.setClasses(base_path + '/' + params['CLASSES_PATH'], params['OUTPUTS_IDS_DATASET'][0]) elif params['TYPE_OUT'] == '3DSemanticLabel': # Set list of classes (strings) classes_names = [] with open(base_path + '/' + params['CLASSES_PATH'], 'r') as file: for line in file: line = line.rstrip('\n').split(',')[0] classes_names.append(line) ds.setClasses(classes_names, params['OUTPUTS_IDS_DATASET'][0]) ds.setSemanticClasses(base_path + '/' + params['CLASSES_PATH'], params['OUTPUTS_IDS_DATASET'][0]) ### 3DLabels or 3DSemanticLabels ds.setOutput(base_path + '/' + params['IMG_FILES']['train'][1], 'train', type=params['TYPE_OUT'], id=params['OUTPUTS_IDS_DATASET'][0], associated_id_in=params['INPUTS_IDS_DATASET'][0], num_poolings=params['NUM_MODEL_POOLINGS']) if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']: ds.setOutput(base_path + '/' + params['IMG_FILES']['val'][1], 'val', type=params['TYPE_OUT'], id=params['OUTPUTS_IDS_DATASET'][0], associated_id_in=params['INPUTS_IDS_DATASET'][0], num_poolings=params['NUM_MODEL_POOLINGS']) if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']: ds.setOutput(base_path + '/' + params['IMG_FILES']['test'][1], 'test', type=params['TYPE_OUT'], id=params['OUTPUTS_IDS_DATASET'][0], associated_id_in=params['INPUTS_IDS_DATASET'][0], num_poolings=params['NUM_MODEL_POOLINGS']) if params['DISCARD_CLASSES']: weights = np.ones((params['NUM_CLASSES'], )) for c in params['DISCARD_CLASSES']: weights[c] = 0.0 ds.extra_variables['class_weights_' + params['OUTPUTS_IDS_DATASET'][0]] = weights if params['WEIGHT_CLASSES']: weights = params['WEIGHT_CLASSES'] ds.extra_variables['class_weights_' + params['OUTPUTS_IDS_DATASET'][0]] = weights ### Single multi-label if params['APPLY_MULTILABEL_CLASSIFICATION']: n_classes = len(ds.classes[params['OUTPUTS_IDS_DATASET'][0]]) multilabel = convert3DLabels2multilabel( base_path + '/' + params['IMG_FILES']['train'][1], n_classes) ds.setOutput(multilabel, 'train', type='binary', id=params['OUTPUTS_IDS_DATASET'][1]) if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']: multilabel = convert3DLabels2multilabel( base_path + '/' + params['IMG_FILES']['val'][1], n_classes) ds.setOutput(multilabel, 'val', type='binary', id=params['OUTPUTS_IDS_DATASET'][1]) if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']: multilabel = convert3DLabels2multilabel( base_path + '/' + params['IMG_FILES']['test'][1], n_classes) ds.setOutput(multilabel, 'test', type='binary', id=params['OUTPUTS_IDS_DATASET'][1]) # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, params['DATASET_STORE_PATH']) else: # We can easily recover it with a single line ds = loadDataset(params['DATASET_STORE_PATH'] + '/Dataset_' + params['DATASET_NAME'] + '.pkl') return ds
def build_dataset(params): if params['REBUILD_DATASET']: # We build a new dataset instance if(params['VERBOSE'] > 0): silence=False logging.info('Building ' + params['DATASET_NAME'] + ' dataset') else: silence=True base_path = params['DATA_ROOT_PATH'] ds = Dataset(params['DATASET_NAME'], base_path+params.get('SUFFIX_DATASET', '/images'), silence=silence) ##### INPUT DATA ### IMAGES ds.setInput(base_path+'/'+params['IMG_FILES']['train'], 'train', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP']) ds.setInput(base_path+'/'+params['IMG_FILES']['val'], 'val', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP']) ds.setInput(base_path+'/'+params['IMG_FILES']['test'], 'test', type='raw-image', id=params['INPUTS_IDS_DATASET'][0], img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP']) # Set train mean ds.setTrainMean(mean_image=params['MEAN_IMAGE'], id=params['INPUTS_IDS_DATASET'][0]) ##### OUTPUT DATA if params['CLASSIFICATION_TYPE'] == 'single-label': # train split ds.setOutput(base_path + '/' + params['LABELS_FILES']['train'], 'train', type='categorical', id=params['OUTPUTS_IDS_DATASET'][0]) # val split ds.setOutput(base_path + '/' + params['LABELS_FILES']['val'], 'val', type='categorical', id=params['OUTPUTS_IDS_DATASET'][0]) # test split ds.setOutput(base_path + '/' + params['LABELS_FILES']['test'], 'test', type='categorical', id=params['OUTPUTS_IDS_DATASET'][0]) elif params['CLASSIFICATION_TYPE'] == 'multi-label': # Convert list of ingredients into classes logging.info('Preprocessing list of ingredients for assigning vocabulary as image classes.') [classes, word2idx, idx2word] = convertIngredientsList2BinaryClasses(base_path, params['LABELS_FILES'], params['CLASSES_PATH'], type_list=params.get('LABELS_TYPE_LIST', 'identifiers')) # Insert them as outputs ds.setOutput(classes['train'], 'train', type='binary', id=params['OUTPUTS_IDS_DATASET'][0]) ds.setOutput(classes['val'], 'val', type='binary', id=params['OUTPUTS_IDS_DATASET'][0]) ds.setOutput(classes['test'], 'test', type='binary', id=params['OUTPUTS_IDS_DATASET'][0]) # Insert vocabularies ds.extra_variables['word2idx_binary'] = word2idx ds.extra_variables['idx2word_binary'] = idx2word if 'Food_and_Ingredients' in params['DATASET_NAME']: # train split ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['train'], 'train', type='categorical', id=params['OUTPUTS_IDS_DATASET'][1]) # val split ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['val'], 'val', type='categorical', id=params['OUTPUTS_IDS_DATASET'][1]) # test split ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['test'], 'test', type='categorical', id=params['OUTPUTS_IDS_DATASET'][1]) # We have finished loading the dataset, now we can store it for using it in the future saveDataset(ds, params['STORE_PATH']) else: # We can easily recover it with a single line ds = loadDataset(params['STORE_PATH']+'/Dataset_'+params['DATASET_NAME']+'.pkl') return ds