def build_dataset(params):
    
    if params['REBUILD_DATASET']: # We build a new dataset instance
        if(params['VERBOSE'] > 0):
            silence=False
            logging.info('Building ' + params['DATASET_NAME'] + ' dataset')
        else:
            silence=True

        base_path = params['DATA_ROOT_PATH']
        name = params['DATASET_NAME']
        ds = Dataset(name, base_path, silence=silence)

        ##### INPUT DATA
        # Let's load the associated images (inputs)
        num_cap = 1 # We only extract one feature vector per image
        list_train = base_path + '/' + params['IMG_FILES']['train'][0]
        list_val = base_path + '/' + params['IMG_FILES']['val'][0]
        list_test = base_path + '/' + params['IMG_FILES']['test'][0]
        ds.setInput(list_train, 'train',
                    type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                    img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'],
                    repeat_set=num_cap)
        ds.setInput(list_val, 'val',
                    type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                    img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'],
                    repeat_set=num_cap)
        ds.setInput(list_test, 'test',
                    type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                    img_size=params['IMG_SIZE'], img_size_crop=params['IMG_CROP_SIZE'],
                    repeat_set=num_cap)
        ### IMAGES' associated IDs
        ds.setInput(base_path + '/' + params['IMG_FILES']['train'][1], 'train',
                    type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids',
                    repeat_set=num_cap)
        ds.setInput(base_path + '/' + params['IMG_FILES']['val'][1], 'val',
                    type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids',
                    repeat_set=num_cap)
        ds.setInput(base_path + '/' + params['IMG_FILES']['test'][1], 'test',
                    type='id', id=params['INPUTS_IDS_DATASET'][0] + '_ids',
                    repeat_set=num_cap)
        # Train mean
        ds.setTrainMean(params['MEAN_IMAGE'], params['INPUTS_IDS_DATASET'][0])

        ###### OUTPUT DATA: None

        # Process dataset for keeping only one caption per image and storing the rest in a dict() with the following format:
        #        ds.extra_variables[set_name][id_output][img_position] = [cap1, cap2, cap3, ..., capN]
        #keep_n_captions(ds, repeat=[1, 1], n=1, set_names=['val','test'])

        # We have finished loading the dataset, now we can store it for using it in the future
        saveDataset(ds, params['DATASET_STORE_PATH'])
    else:
        # We can easily recover it with a single line
        ds = loadDataset(params['DATASET_STORE_PATH']+'/Dataset_'+params['DATASET_NAME']+'.pkl')

    return ds
Ejemplo n.º 2
0
def loadMSVD():
    logging.info('Loading MSVD dataset')

    # Build basic dataset structure
    #    we assign it a name and the path were the images are stored

    base_path = '/media/HDD_2TB/DATASETS/MSVD/'
    name = 'MSVD_VideoDescription'
    ds = Dataset(name, base_path)
    max_text_len = 35

    # Let's load the train, val and test splits of the descriptions (outputs)
    #    the files include a description per line. In this dataset a variable number
    #    of descriptions per video are provided.

    ds.setOutput(base_path + 'train_descriptions.txt', 'train',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len)
    ds.setOutput(base_path + 'val_descriptions.txt', 'val',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)
    ds.setOutput(base_path + 'test_descriptions.txt', 'test',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)

    # Let's load the associated videos (inputs)
    #    we must take into account that in this dataset we have a different number of sentences per video, 
    #    for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list
    #    containing the number of captions in each video.

    num_captions_train = np.load(base_path + 'train_descriptions_counts.npy')
    num_captions_val = np.load(base_path + 'val_descriptions_counts.npy')
    num_captions_test = np.load(base_path + 'test_descriptions_counts.npy')

    ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'],
                'train', type='video', id='videos',
                repeat_set=num_captions_train)
    ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'],
                'val', type='video', id='videos',
                repeat_set=num_captions_val)
    ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'],
                'test', type='video', id='videos',
                repeat_set=num_captions_test)

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='videos')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly.')
Ejemplo n.º 3
0
def loadMSVD():
    logging.info('Loading MSVD dataset')

    # Build basic dataset structure
    #    we assign it a name and the path were the images are stored

    base_path = '/media/HDD_2TB/DATASETS/MSVD/'
    name = 'MSVD_VideoDescription'
    ds = Dataset(name, base_path)
    max_text_len = 35

    # Let's load the train, val and test splits of the descriptions (outputs)
    #    the files include a description per line. In this dataset a variable number
    #    of descriptions per video are provided.

    ds.setOutput(base_path + 'train_descriptions.txt', 'train',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len)
    ds.setOutput(base_path + 'val_descriptions.txt', 'val',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)
    ds.setOutput(base_path + 'test_descriptions.txt', 'test',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)

    # Let's load the associated videos (inputs)
    #    we must take into account that in this dataset we have a different number of sentences per video,
    #    for this reason we introduce the parameter 'repeat_set'=num_captions, where num_captions is a list
    #    containing the number of captions in each video.

    num_captions_train = np.load(base_path + 'train_descriptions_counts.npy')
    num_captions_val = np.load(base_path + 'val_descriptions_counts.npy')
    num_captions_test = np.load(base_path + 'test_descriptions_counts.npy')

    ds.setInput([base_path + 'train_imgs_list.txt', base_path + 'train_imgs_counts.txt'],
                'train', type='video', id='videos',
                repeat_set=num_captions_train)
    ds.setInput([base_path + 'val_imgs_list.txt', base_path + 'val_imgs_counts.txt'],
                'val', type='video', id='videos',
                repeat_set=num_captions_val)
    ds.setInput([base_path + 'test_imgs_list.txt', base_path + 'test_imgs_counts.txt'],
                'test', type='video', id='videos',
                repeat_set=num_captions_test)

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='videos')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly.')
Ejemplo n.º 4
0
def loadFood101():
    logging.info('Loading Food101 dataset')
    logging.info(
        'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/'
    )

    base_path = '../data/Food101/'
    name = 'Food101'
    ds = Dataset(name, base_path + 'images')

    # Insert inputs (images)
    ds.setInput(base_path + 'meta/train_split.txt',
                'train',
                type='image',
                id='images',
                img_size_crop=[227, 227, 3])
    ds.setInput(base_path + 'meta/val_split.txt',
                'val',
                type='image',
                id='images')
    ds.setInput(base_path + 'meta/test.txt', 'test', type='image', id='images')

    # Insert outputs (labels)
    ds.setOutput(base_path + 'meta/train_labels.txt',
                 'train',
                 type='categorical',
                 id='labels')
    ds.setOutput(base_path + 'meta/val_labels.txt',
                 'val',
                 type='categorical',
                 id='labels')
    ds.setOutput(base_path + 'meta/test_labels.txt',
                 'test',
                 type='categorical',
                 id='labels')

    # Set list of classes (strings)
    ds.setClasses(base_path + 'meta/classes.txt', 'labels')

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067],
                    data_id='images')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly.')
Ejemplo n.º 5
0
def loadFlickr8k():
    logging.info('Loading Flickr8k dataset')

    # Build basic dataset structure
    #    we assign it a name and the path were the images are stored

    base_path = '/media/HDD_2TB/DATASETS/Flickr8k/'
    name = 'Flickr8k_ImageDescription'
    ds = Dataset(name, base_path + 'Flicker8k_Dataset')
    max_text_len = 35

    # Let's load the train, val and test splits of the descriptions (outputs)
    #    the files include a description per line 
    #    and a set of 5 consecutive descriptions correspond to a single input image

    ds.setOutput(base_path + 'text/train_descriptions.txt', 'train',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len)
    ds.setOutput(base_path + 'text/val_descriptions.txt', 'val',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)
    ds.setOutput(base_path + 'text/test_descriptions.txt', 'test',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)

    # Let's load the associated images (inputs)
    #    we must take into account that in this dataset we have 5 sentences per image, 
    #    for this reason we introduce the parameter 'repeat_set'=5

    ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train',
                type='image', id='images', repeat_set=5)
    ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val',
                type='image', id='images', repeat_set=5)
    ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test',
                type='image', id='images', repeat_set=5)

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], id='images')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly.')
Ejemplo n.º 6
0
def loadFlickr8k():
    logging.info('Loading Flickr8k dataset')

    # Build basic dataset structure
    #    we assign it a name and the path were the images are stored

    base_path = '/media/HDD_2TB/DATASETS/Flickr8k/'
    name = 'Flickr8k_ImageDescription'
    ds = Dataset(name, base_path + 'Flicker8k_Dataset')
    max_text_len = 35

    # Let's load the train, val and test splits of the descriptions (outputs)
    #    the files include a description per line
    #    and a set of 5 consecutive descriptions correspond to a single input image

    ds.setOutput(base_path + 'text/train_descriptions.txt', 'train',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', build_vocabulary=True, max_text_len=max_text_len)
    ds.setOutput(base_path + 'text/val_descriptions.txt', 'val',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)
    ds.setOutput(base_path + 'text/test_descriptions.txt', 'test',
                 type='text', id='descriptions',
                 tokenization='tokenize_basic', max_text_len=max_text_len)

    # Let's load the associated images (inputs)
    #    we must take into account that in this dataset we have 5 sentences per image,
    #    for this reason we introduce the parameter 'repeat_set'=5

    ds.setInput(base_path + 'text/Flickr_8k.trainImages.txt', 'train', type='image', id='images', repeat_set=5)
    ds.setInput(base_path + 'text/Flickr_8k.devImages.txt', 'val', type='image', id='images', repeat_set=5)
    ds.setInput(base_path + 'text/Flickr_8k.testImages.txt', 'test', type='image', id='images', repeat_set=5)

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly. %d input samples. %d output samples' % (len(X), len(Y)))
Ejemplo n.º 7
0
def loadFood101():
    logging.info('Loading Food101 dataset')
    logging.info(
        'INFO: in order to load this dataset it must be placed in ../data/Food101/images/ after downloading it form https://www.vision.ee.ethz.ch/datasets_extra/food-101/')

    base_path = '../data/Food101/'
    name = 'Food101'
    ds = Dataset(name, base_path + 'images')

    # Insert inputs (images)
    ds.setInput(base_path + 'meta/train_split.txt', 'train',
                type='image', id='images', img_size_crop=[227, 227, 3])
    ds.setInput(base_path + 'meta/val_split.txt', 'val',
                type='image', id='images')
    ds.setInput(base_path + 'meta/test.txt', 'test',
                type='image', id='images')

    # Insert outputs (labels)
    ds.setOutput(base_path + 'meta/train_labels.txt', 'train',
                 type='categorical', id='labels')
    ds.setOutput(base_path + 'meta/val_labels.txt', 'val',
                 type='categorical', id='labels')
    ds.setOutput(base_path + 'meta/test_labels.txt', 'test',
                 type='categorical', id='labels')

    # Set list of classes (strings)
    ds.setClasses(base_path + 'meta/classes.txt', 'labels')

    # Now let's set the dataset mean image for preprocessing the data
    ds.setTrainMean(mean_image=[122.6795, 116.6690, 104.0067], data_id='images')

    # We have finished loading the dataset, now we can store it for using it in the future
    saveDataset(ds, 'Datasets')

    # We can easily recover it with a single line
    ds = loadDataset('Datasets/Dataset_' + name + '.pkl')

    # Lets recover the first batch of data
    [X, Y] = ds.getXY('train', 10)
    logging.info('Sample data loaded correctly.')
Ejemplo n.º 8
0
def build_dataset(params):

    if params['REBUILD_DATASET']:  # We build a new dataset instance
        if (params['VERBOSE'] > 0):
            silence = False
            logging.info('Building ' + params['DATASET_NAME'] + ' dataset')
        else:
            silence = True

        base_path = params['DATA_ROOT_PATH']
        name = params['DATASET_NAME']
        ds = Dataset(name, base_path, silence=silence)

        ##### INPUT DATA
        # Let's load the images (inputs)

        ### IMAGES
        list_train = base_path + '/' + params['IMG_FILES']['train'][0]
        ds.setInput(list_train,
                    'train',
                    type='raw-image',
                    id=params['INPUTS_IDS_DATASET'][0],
                    img_size=params['IMG_SIZE'],
                    img_size_crop=params['IMG_CROP_SIZE'],
                    use_RGB=params['RGB'])
        if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']:
            list_val = base_path + '/' + params['IMG_FILES']['val'][0]
            ds.setInput(list_val,
                        'val',
                        type='raw-image',
                        id=params['INPUTS_IDS_DATASET'][0],
                        img_size=params['IMG_SIZE'],
                        img_size_crop=params['IMG_CROP_SIZE'],
                        use_RGB=params['RGB'])
        if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']:
            list_test = base_path + '/' + params['IMG_FILES']['test'][0]
            ds.setInput(list_test,
                        'test',
                        type='raw-image',
                        id=params['INPUTS_IDS_DATASET'][0],
                        img_size=params['IMG_SIZE'],
                        img_size_crop=params['IMG_CROP_SIZE'],
                        use_RGB=params['RGB'])

        # Train mean
        if params['MEAN_IMAGE']:
            # if params['NORMALIZE']:
            #    params['MEAN_IMAGE'] = [m / 255. for m in params['MEAN_IMAGE']]
            ds.setTrainMean(params['MEAN_IMAGE'],
                            params['INPUTS_IDS_DATASET'][0])
        else:
            ds.calculateTrainMean(params['INPUTS_IDS_DATASET'][0])

        ##### OUTPUT DATA
        if params['TYPE_OUT'] == '3DLabel':
            # Set list of classes (strings)
            ds.setClasses(base_path + '/' + params['CLASSES_PATH'],
                          params['OUTPUTS_IDS_DATASET'][0])
        elif params['TYPE_OUT'] == '3DSemanticLabel':
            # Set list of classes (strings)
            classes_names = []
            with open(base_path + '/' + params['CLASSES_PATH'], 'r') as file:
                for line in file:
                    line = line.rstrip('\n').split(',')[0]
                    classes_names.append(line)
            ds.setClasses(classes_names, params['OUTPUTS_IDS_DATASET'][0])
            ds.setSemanticClasses(base_path + '/' + params['CLASSES_PATH'],
                                  params['OUTPUTS_IDS_DATASET'][0])

        ### 3DLabels or 3DSemanticLabels
        ds.setOutput(base_path + '/' + params['IMG_FILES']['train'][1],
                     'train',
                     type=params['TYPE_OUT'],
                     id=params['OUTPUTS_IDS_DATASET'][0],
                     associated_id_in=params['INPUTS_IDS_DATASET'][0],
                     num_poolings=params['NUM_MODEL_POOLINGS'])
        if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']:
            ds.setOutput(base_path + '/' + params['IMG_FILES']['val'][1],
                         'val',
                         type=params['TYPE_OUT'],
                         id=params['OUTPUTS_IDS_DATASET'][0],
                         associated_id_in=params['INPUTS_IDS_DATASET'][0],
                         num_poolings=params['NUM_MODEL_POOLINGS'])
        if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']:
            ds.setOutput(base_path + '/' + params['IMG_FILES']['test'][1],
                         'test',
                         type=params['TYPE_OUT'],
                         id=params['OUTPUTS_IDS_DATASET'][0],
                         associated_id_in=params['INPUTS_IDS_DATASET'][0],
                         num_poolings=params['NUM_MODEL_POOLINGS'])

        if params['DISCARD_CLASSES']:
            weights = np.ones((params['NUM_CLASSES'], ))
            for c in params['DISCARD_CLASSES']:
                weights[c] = 0.0
            ds.extra_variables['class_weights_' +
                               params['OUTPUTS_IDS_DATASET'][0]] = weights

        if params['WEIGHT_CLASSES']:
            weights = params['WEIGHT_CLASSES']
            ds.extra_variables['class_weights_' +
                               params['OUTPUTS_IDS_DATASET'][0]] = weights

        ### Single multi-label
        if params['APPLY_MULTILABEL_CLASSIFICATION']:
            n_classes = len(ds.classes[params['OUTPUTS_IDS_DATASET'][0]])
            multilabel = convert3DLabels2multilabel(
                base_path + '/' + params['IMG_FILES']['train'][1], n_classes)
            ds.setOutput(multilabel,
                         'train',
                         type='binary',
                         id=params['OUTPUTS_IDS_DATASET'][1])
            if 'val' in params['IMG_FILES'] and params['IMG_FILES']['val']:
                multilabel = convert3DLabels2multilabel(
                    base_path + '/' + params['IMG_FILES']['val'][1], n_classes)
                ds.setOutput(multilabel,
                             'val',
                             type='binary',
                             id=params['OUTPUTS_IDS_DATASET'][1])
            if 'test' in params['IMG_FILES'] and params['IMG_FILES']['test']:
                multilabel = convert3DLabels2multilabel(
                    base_path + '/' + params['IMG_FILES']['test'][1],
                    n_classes)
                ds.setOutput(multilabel,
                             'test',
                             type='binary',
                             id=params['OUTPUTS_IDS_DATASET'][1])

        # We have finished loading the dataset, now we can store it for using it in the future
        saveDataset(ds, params['DATASET_STORE_PATH'])
    else:
        # We can easily recover it with a single line
        ds = loadDataset(params['DATASET_STORE_PATH'] + '/Dataset_' +
                         params['DATASET_NAME'] + '.pkl')

    return ds
Ejemplo n.º 9
0
def build_dataset(params):
    
    if params['REBUILD_DATASET']: # We build a new dataset instance
        if(params['VERBOSE'] > 0):
            silence=False
            logging.info('Building ' + params['DATASET_NAME'] + ' dataset')
        else:
            silence=True

        base_path = params['DATA_ROOT_PATH']
        ds = Dataset(params['DATASET_NAME'], base_path+params.get('SUFFIX_DATASET', '/images'), silence=silence)

        ##### INPUT DATA
        ### IMAGES
        ds.setInput(base_path+'/'+params['IMG_FILES']['train'], 'train',
                   type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                   img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP'])
        ds.setInput(base_path+'/'+params['IMG_FILES']['val'], 'val',
                   type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                   img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP'])
        ds.setInput(base_path+'/'+params['IMG_FILES']['test'], 'test',
                   type='raw-image', id=params['INPUTS_IDS_DATASET'][0],
                   img_size=params['IMG_SIZE'], img_size_crop=params['IMG_SIZE_CROP'])
        # Set train mean
        ds.setTrainMean(mean_image=params['MEAN_IMAGE'], id=params['INPUTS_IDS_DATASET'][0])

        ##### OUTPUT DATA
        if params['CLASSIFICATION_TYPE'] == 'single-label':

            # train split
            ds.setOutput(base_path + '/' + params['LABELS_FILES']['train'], 'train',
                         type='categorical', id=params['OUTPUTS_IDS_DATASET'][0])
            # val split
            ds.setOutput(base_path + '/' + params['LABELS_FILES']['val'], 'val',
                         type='categorical', id=params['OUTPUTS_IDS_DATASET'][0])
            # test split
            ds.setOutput(base_path + '/' + params['LABELS_FILES']['test'], 'test',
                         type='categorical', id=params['OUTPUTS_IDS_DATASET'][0])

        elif params['CLASSIFICATION_TYPE'] == 'multi-label':

            # Convert list of ingredients into classes
            logging.info('Preprocessing list of ingredients for assigning vocabulary as image classes.')
            [classes, word2idx, idx2word] = convertIngredientsList2BinaryClasses(base_path,
                                                                                 params['LABELS_FILES'],
                                                                                 params['CLASSES_PATH'],
                                                                                 type_list=params.get('LABELS_TYPE_LIST', 'identifiers'))
            # Insert them as outputs
            ds.setOutput(classes['train'], 'train', type='binary', id=params['OUTPUTS_IDS_DATASET'][0])
            ds.setOutput(classes['val'], 'val', type='binary', id=params['OUTPUTS_IDS_DATASET'][0])
            ds.setOutput(classes['test'], 'test', type='binary', id=params['OUTPUTS_IDS_DATASET'][0])

            # Insert vocabularies
            ds.extra_variables['word2idx_binary'] = word2idx
            ds.extra_variables['idx2word_binary'] = idx2word
            
            if 'Food_and_Ingredients' in params['DATASET_NAME']:
                
                # train split
                ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['train'], 'train',
                             type='categorical', id=params['OUTPUTS_IDS_DATASET'][1])
                # val split
                ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['val'], 'val',
                             type='categorical', id=params['OUTPUTS_IDS_DATASET'][1])
                # test split
                ds.setOutput(base_path + '/' + params['LABELS_FILES_FOOD']['test'], 'test',
                             type='categorical', id=params['OUTPUTS_IDS_DATASET'][1])


        # We have finished loading the dataset, now we can store it for using it in the future
        saveDataset(ds, params['STORE_PATH'])
    
    
    else:
        # We can easily recover it with a single line
        ds = loadDataset(params['STORE_PATH']+'/Dataset_'+params['DATASET_NAME']+'.pkl')

    return ds