コード例 #1
0
def train_new_top_model(all_features_hdf5, all_labels_hdf5):
    #The Bottleneck or other image extracted features are  stored in h5 files
    # Use HDF5MatrixCacheIterator to use them. By default HDF5MatrixCacheIterator doesnt transform and doesnt shuffle.
    # so we can use old ImageDataGenerator

    datagen = ImageDataGenerator(validation_split=0.2)
    f1_trainvalidation = h5.File(all_features_hdf5, 'r')
    shape = f1_trainvalidation['data'].shape
    f1_trainvalidation.close()
    #15 Gb is upper limit for cache memory.
    total_mem_usage, dividing_factor = calculateDividingFactor(shape)

    #print("Dividing factor {}:".format(dividing_factor))
    print("Cached memory usage: {}".format(total_mem_usage / (1024**3) /
                                           dividing_factor))
    chunk_shape = (1, max(shape[1] // 2, 1), max(shape[2] // 2,
                                                 1), max(shape[3] // 2, 1))
    f1_trainvalidation = h5c.File(all_features_hdf5,
                                  'r',
                                  chunk_cache_mem_size=total_mem_usage //
                                  dividing_factor)
    f1_label = h5.File(all_labels_hdf5, 'r')
    train_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                        f1_label['data'],
                                        subset='training',
                                        batch_size=batch_size,
                                        shuffle=False)
    validation_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                             f1_label['data'],
                                             subset='validation',
                                             batch_size=batch_size,
                                             shuffle=False)
    model = Sequential()
    model.add(Flatten(input_shape=shape[1:]))
    model.add(Dense(1024, activation='relu'))
    #model.add(Dense(256,input_shape=(train_data_shape[1:],),activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(85, activation='sigmoid'))

    #model.compile(optimizer='rmsprop',loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[f1])
    model.fit_generator(
        generator=train_generator,
        steps_per_epoch=int(ceil(train_generator.samples / batch_size)),
        validation_data=validation_generator,
        max_queue_size=
        10,  # use a value which can fit batch_size * image_size * max_queue_size in your CPU memory
        workers=
        4,  # I don't see multi workers can have any performance benefit without multi threading
        use_multiprocessing=False,  # HDF5Matrix cannot support multi-threads
        validation_steps=int(ceil(validation_generator.samples / batch_size)),
        shuffle=True,
        epochs=epochs)
    model.save_weights(top_model_weights_path)
コード例 #2
0
f1_trainvalidation.close()
#15 Gb is upper limit for cache memory.
total_mem_usage, dividing_factor = calculateDividingFactor(shape)

#print("Dividing factor {}:".format(dividing_factor))
print("Cached memory usage: {}".format(total_mem_usage / (1024**3) /
                                       dividing_factor))
chunk_shape = (1, 100, 100, 3)
f1_trainvalidation = h5c.File(all_features_hdf5,
                              'r',
                              chunk_cache_mem_size=total_mem_usage //
                              dividing_factor)
f1_label = h5.File(all_labels_hdf5, 'r')
train_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                    f1_label['data'],
                                    subset='training',
                                    batch_size=batch_size,
                                    shuffle=False)
validation_generator = datagen.flow_hdf5(f1_trainvalidation['data'],
                                         f1_label['data'],
                                         subset='validation',
                                         batch_size=batch_size,
                                         shuffle=False)

# fine-tune the model
model.fit_generator(
    generator=train_generator,
    steps_per_epoch=int(ceil(train_generator.samples / batch_size)),
    validation_data=validation_generator,
    max_queue_size=
    10,  # use a value which can fit batch_size * image_size * max_queue_size in your CPU memory