Python ImageDataGenerator.flow_hdf5 Examples

Programming Language: Python

Namespace/Package Name: keras.preprocessing.image

Method/Function: flow_hdf5

Examples at hotexamples.com: 2

Python ImageDataGenerator.flow_hdf5 - 2 examples found. These are the top rated real world Python examples of keras.preprocessing.image.ImageDataGenerator.flow_hdf5 extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ImageDataGenerator(30)

flow_from_directory(30)

apply_transform(30)

mean(30)

next(30)

random_transform(30)

flow(30)

flow_from_dataframe(30)

get_random_transform(17)

__init__(12)

fit(9)

reset(3)

astype(3)

height_shift_range(2)

flow_hdf5(2)

preprocessing_function(2)

next_val(2)

fit_generator(2)

create_data_transformer(2)

classes(2)

class_indices(2)

on_epoch_end(1)

next_train(1)

index_generator(1)

rescale(1)

items(1)

flow_from_directry(1)

image_dataset_from_directory(1)

horizontal_flip(1)

gen_steps(1)

flowg(1)

flow_from_directoty(1)

flow_from_director(1)

flow_from_dictionary(1)

flow_directory(1)

fit_from_directory(1)

compile(1)

add(1)

rotation_range(1)

Example #1

Show file

def train_new_top_model(all_features_hdf5, all_labels_hdf5):
    #The Bottleneck or other image extracted features are  stored in h5 files
    # Use HDF5MatrixCacheIterator to use them. By default HDF5MatrixCacheIterator doesnt transform and doesnt shuffle.
    # so we can use old ImageDataGenerator

    datagen = ImageDataGenerator(validation_split=0.2)
    f1_trainvalidation = h5.File(all_features_hdf5, 'r')
    shape = f1_trainvalidation['data'].shape
    f1_trainvalidation.close()
    #15 Gb is upper limit for cache memory.
    total_mem_usage, dividing_factor = calculateDividingFactor(shape)

    #print("Dividing factor {}:".format(dividing_factor))
    print("Cached memory usage: {}".format(total_mem_usage / (1024**3) /
                                           dividing_factor))
    chunk_shape = (1, max(shape[1] // 2, 1), max(shape[2] // 2,
                                                 1), max(shape[3] // 2, 1))
    f1_trainvalidation = h5c.File(all_features_hdf5,
                                  'r',
                                  chunk_cache_mem_size=total_mem_usage //
                                  dividing_factor)
    f1_label = h5.File(all_labels_hdf5, 'r')
    train_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                        f1_label['data'],
                                        subset='training',
                                        batch_size=batch_size,
                                        shuffle=False)
    validation_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                             f1_label['data'],
                                             subset='validation',
                                             batch_size=batch_size,
                                             shuffle=False)
    model = Sequential()
    model.add(Flatten(input_shape=shape[1:]))
    model.add(Dense(1024, activation='relu'))
    #model.add(Dense(256,input_shape=(train_data_shape[1:],),activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(85, activation='sigmoid'))

    #model.compile(optimizer='rmsprop',loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[f1])
    model.fit_generator(
        generator=train_generator,
        steps_per_epoch=int(ceil(train_generator.samples / batch_size)),
        validation_data=validation_generator,
        max_queue_size=
        10,  # use a value which can fit batch_size * image_size * max_queue_size in your CPU memory
        workers=
        4,  # I don't see multi workers can have any performance benefit without multi threading
        use_multiprocessing=False,  # HDF5Matrix cannot support multi-threads
        validation_steps=int(ceil(validation_generator.samples / batch_size)),
        shuffle=True,
        epochs=epochs)
    model.save_weights(top_model_weights_path)

Example #2

Show file

f1_trainvalidation.close()
#15 Gb is upper limit for cache memory.
total_mem_usage, dividing_factor = calculateDividingFactor(shape)

#print("Dividing factor {}:".format(dividing_factor))
print("Cached memory usage: {}".format(total_mem_usage / (1024**3) /
                                       dividing_factor))
chunk_shape = (1, 100, 100, 3)
f1_trainvalidation = h5c.File(all_features_hdf5,
                              'r',
                              chunk_cache_mem_size=total_mem_usage //
                              dividing_factor)
f1_label = h5.File(all_labels_hdf5, 'r')
train_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                    f1_label['data'],
                                    subset='training',
                                    batch_size=batch_size,
                                    shuffle=False)
validation_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                         f1_label['data'],
                                         subset='validation',
                                         batch_size=batch_size,
                                         shuffle=False)

# fine-tune the model
model.fit_generator(
    generator=train_generator,
    steps_per_epoch=int(ceil(train_generator.samples / batch_size)),
    validation_data=validation_generator,
    max_queue_size=
    10,  # use a value which can fit batch_size * image_size * max_queue_size in your CPU memory