def autoencoder_train(folder, batch_size, epoch_size, model_name):
    """
    Autoencoding, inherently UNET, is a data compression algorithm where the compression and decompression functions are:
    - data specific, ie, only compress data similar to what they have been trained on
    - lossy, ie, decompressed output will be degraded
    - learned automatically from examples.

    Two practical applications of autoencoders are data removal and dimensionality reduction

    There is an implementation from scikit-learn:
    http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

    :param folder: image folder for training
    :param batch_size: training batch size
    :param epoch_size: training epoch size
    :param model_name: IR2, InceptionResNetV2; NL, NASNetLarge; NM, NASNetLarge
    :return: None
    """
    image_wh = system_config['image_wh']

    image_size = (image_wh, image_wh)
    image_shape = (image_wh, image_wh, 1)

    train_list, valid_list = create_tv_list(folder)
    print(f'Train size: {len(train_list)}, valid size: {len(valid_list)}')

    train_df = pd.DataFrame(train_list, columns=['fname', 'class'])
    valid_df = pd.DataFrame(valid_list, columns=['fname', 'class'])

    model = None
    if 'NM' in model_name:
        model_name = 'NM'
        model = NASNetMobile(include_top=True,
                             weights=None,
                             input_tensor=None,
                             input_shape=image_shape,
                             pooling='max',
                             classes=6)
    elif 'NL' in model_name:
        model_name = 'NL'
        model = NASNetLarge(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'XC' in model_name:
        model_name = 'XC'
        model = Xception(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=image_shape,
                         pooling='max',
                         classes=6)
    elif 'D21' in model_name:
        model_name = 'D21'
        model = DenseNet201(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'IV3' in model_name:
        model_name = 'IV3'
        model = InceptionV3(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=6)
    elif 'SC' in model_name:
        model_name = 'SC'
        model = simple_cnn(input_shape=image_shape, classes=6)
    else:
        model_name = 'IR2'
        model = InceptionResNetV2(include_top=True,
                                  weights=None,
                                  input_tensor=None,
                                  input_shape=image_shape,
                                  pooling='max',
                                  classes=6)

    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=lr_schedule(0)),
                  metrics=['accuracy'])
    model.summary()

    # Image generator does data augmentation:
    datagen = data_generator()

    train_gen = datagen.flow_from_dataframe(dataframe=train_df,
                                            directory=folder,
                                            x_col="fname",
                                            y_col="class",
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='grayscale',
                                            batch_size=batch_size,
                                            shuffle=False)

    valid_gen = datagen.flow_from_dataframe(dataframe=valid_df,
                                            directory=folder,
                                            x_col="fname",
                                            y_col="class",
                                            class_mode="categorical",
                                            target_size=image_size,
                                            color_mode='grayscale',
                                            batch_size=batch_size,
                                            shuffle=False)

    # Prepare model model saving directory.
    save_dir = Path(os.path.dirname(
        os.path.realpath(__file__))).joinpath('models')
    if not save_dir.is_dir():
        save_dir.mkdir(exist_ok=True)
    filepath = f'{str(save_dir)}/{MODEL_NAMES[model_name]}'
    print(f'{filepath}\n')

    # Prepare callbacks for model saving and for learning rate adjustment.
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    lr_scheduler = LearningRateScheduler(lr_schedule)

    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=5,
                                   min_lr=0.5e-6)

    callbacks = [checkpoint, lr_reducer, lr_scheduler]

    # Fit the model on the batches generated by datagen.flow().
    steps_per_epoch = int(len(train_list) / batch_size)
    history = model.fit_generator(generator=train_gen,
                                  steps_per_epoch=steps_per_epoch,
                                  validation_data=valid_gen,
                                  validation_steps=steps_per_epoch,
                                  epochs=epoch_size,
                                  use_multiprocessing=False,
                                  verbose=1,
                                  workers=4,
                                  callbacks=callbacks)

    # Score trained model.
    scores = model.evaluate_generator(generator=valid_gen,
                                      steps=steps_per_epoch,
                                      verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    # Save score in configuration file
    system_config[f'{model_name}_Accuracy'] = scores[1]
    save_config()

    return history
Exemple #2
0
def aishufan_train(folder, batch_size, epoch_size, model_name):
    """
    Train network with the parameters specified.

    :param folder: image folder for training
    :param batch_size: training batch size
    :param epoch_size: training epoch size
    :param model_name: IR2, InceptionResNetV2; NL, NASNetLarge; NM, NASNetLarge
    :return: None
    """
    image_wh = system_config['image_wh']

    image_size = (image_wh, image_wh)
    image_shape= (image_wh, image_wh, 3)

    train_list, valid_list = create_tv_list(folder)
    print(f'Train size: {len(train_list)}, valid size: {len(valid_list)}')

    train_df = pd.DataFrame(train_list, columns=['fname', 'class'])
    valid_df = pd.DataFrame(valid_list, columns=['fname', 'class'])

    model = None
    if 'NM' in model_name:
        model_name = 'NM'
        model = NASNetMobile(include_top=True,
                             weights=None,
                             input_tensor=None,
                             input_shape=image_shape,
                             pooling='max',
                             classes=2)

    elif 'XC' in model_name:
        model_name = 'XC'
        model = Xception(include_top=True,
                         weights=None,
                         input_tensor=None,
                         input_shape=image_shape,
                         pooling='max',
                         classes=2)
    elif 'D21' in model_name:
        model_name = 'D21'
        model = DenseNet201(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=2)
    elif 'IV3' in model_name:
        model_name = 'IV3'
        model = InceptionV3(include_top=True,
                            weights=None,
                            input_tensor=None,
                            input_shape=image_shape,
                            pooling='max',
                            classes=2)

    else:
        model_name = 'IR2'
        model = InceptionResNetV2(include_top=True,
                                  weights=None,
                                  input_tensor=None,
                                  input_shape=image_shape,
                                  pooling='max',
                                  classes=2)

    model.compile(loss='categorical_crossentropy',
                  optimizer=Adam(lr=lr_schedule(0)),
                  metrics=['accuracy'])
    model.summary()

    # Image generator does data augmentation:
    datagen = data_generator()

    train_gen = datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=folder,
        x_col="fname",
        y_col="class",
        class_mode="categorical",
        target_size=image_size,
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=False)

    valid_gen = datagen.flow_from_dataframe(
        dataframe=valid_df,
        directory=folder,
        x_col="fname",
        y_col="class",
        class_mode="categorical",
        target_size=image_size,
        color_mode='rgb',
        batch_size=batch_size,
        shuffle=False)

    # Save class indices
    system_config['class_indices'] = train_gen.class_indices
    save_config()

    # Prepare model model saving directory.
    save_dir = Path(os.path.dirname(os.path.realpath(__file__))).joinpath('models')
    if not save_dir.is_dir():
        save_dir.mkdir(exist_ok=True)
    filepath = f'{str(save_dir)}/{MODEL_NAMES[model_name]}'
    print(f'{filepath}\n')

    # Prepare callbacks for model saving and for learning rate adjustment.
    checkpoint = ModelCheckpoint(filepath=filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    lr_scheduler = LearningRateScheduler(lr_schedule)

    lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                                   cooldown=0,
                                   patience=5,
                                   min_lr=0.5e-6)

    callbacks = [checkpoint, lr_reducer, lr_scheduler]

    # Fit the model on the batches generated by datagen.flow().
    steps_per_epoch = int(len(train_list)/batch_size)
    history = model.fit_generator(
        generator=train_gen,
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_gen,
        validation_steps=steps_per_epoch,
        epochs=epoch_size,
        use_multiprocessing=False,
        verbose=1,
        workers=4,
        callbacks=callbacks)

    # Score trained model.
    scores = model.evaluate_generator(generator=valid_gen, steps=steps_per_epoch, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    # Save score in configuration file
    system_config[f'{model_name}_Accuracy'] = scores[1]
    save_config()

    return history
# This callback will log model stats to Tensorboard.
tb_callback = TensorBoard()
# This callback will checkpoint the best model at every epoch.
mc_callback = ModelCheckpoint(filepath='current_best.hdf5',
                              verbose=1,
                              save_best_only=True)

# This is the train DataSequence.
train_sequence = DataSequence(train_pd, "./images", batch_size=batch_size)
train_steps = len(train_pd) // batch_size

# This is the validation DataSequence.
validation_sequence = DataSequence(test_pd, "./images", batch_size=batch_size)
validation_steps = len(test_pd) // batch_size

# These are the callbacks.
callbacks = [lr_callback, tb_callback, mc_callback]

# This line will train the model.
model.fit_generator(train_sequence,
                    validation_data=validation_sequence,
                    epochs=20,
                    use_multiprocessing=True,
                    workers=80,
                    steps_per_epoch=train_steps,
                    validation_steps=validation_steps,
                    callbacks=callbacks)

# Finally, we save the model.
model.save(MODEL_NAME)
callbacksList = [tensorboard]

if earlyStopFlag:
    callbacksList.append(earlyStop)
if reduceLRFlag:
    callbacksList.append(reduce_lr)
if modelCheckpointFlag:
    callbacksList.append(modelCheckpoint)

print('passing class weights: {}'.format(
    getClassWeights(trainGenerator.classes)))
history = model.fit_generator(
    trainGenerator,
    steps_per_epoch=trainSamplesNumber // batchSize * foldAugment,
    epochs=epochs,
    verbose=1,
    callbacks=callbacksList,
    validation_data=validationGenerator,
    class_weight=getClassWeights(trainGenerator.classes),
    shuffle=True,
    validation_steps=validateSamplesNumber // batchSize)

score = model.evaluate_generator(testGenerator, testSamplesNumber)

print('Test loss:', score[0])
print('Test accuracy:', score[1])

# serialize model to JSON
model_json = model.to_json()
with open(modelFile, "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
class Model_Nas:
    def __init__(self, args, load=False):
        self.ckpt = args.pre_train
        self.model = "NasNet"
        self.args = args
        self.class_num = args.class_num
        self.lr = args.lr
        self.epoch = args.epoch
        self.c = args.n_color
        self.is_online = args.online
        self.batch_size = args.batch_size
        self.save_dir = args.save
        self.sess = None
        self.is_test = load
        # self.mode = args.processing_mode

        self.callbacks = []
        self.init_callbacks()

    def init_callbacks(self):
        self.callbacks.append(
            ModelCheckpoint(filepath=self.save_dir + self.model +
                            '_best_weights.h5',
                            verbose=1,
                            monitor='val_categorical_accuracy',
                            mode='auto',
                            save_best_only=True))

        self.callbacks.append(
            TensorBoard(
                log_dir=self.args.save,
                write_images=True,
                write_graph=True,
            ))

        self.callbacks.append(
            EarlyStopping(
                # patience=self.args.early_stopping
                patience=1000))

        # self.callbacks.append(
        #    ReduceLROnPlateau(
        #        monitor='val_loss',
        #        factor=0.5,
        #        patience=5,
        #        min_lr=1e-5
        #    )
        # )

        def custom_schedule(epochs):
            if epochs <= 5:
                lr = 1e-3
            elif epochs <= 50:
                lr = 5e-4
            elif epochs <= 100:
                lr = 2.5e-4
            elif epochs <= 500:
                lr = 1e-4
            elif epochs <= 700:
                lr = 5e-5
            else:
                lr = 1e-5

            return lr

        self.callbacks.append(LearningRateScheduler(custom_schedule))

    def train(self, training_images, training_labels, validation_images,
              validation_labels):
        self.model = NASNetMobile(classes=2, include_top=True, weights=None)
        self.model.trainable = True
        self.model.compile(optimizer=Adam(lr=0.0001, beta_1=0.1),
                           loss='categorical_crossentropy',
                           metrics=['categorical_accuracy'])
        train_datagen = ImageDataGenerator(rotation_range=40,
                                           width_shift_range=0.2,
                                           height_shift_range=0.2,
                                           shear_range=0.2,
                                           zoom_range=0.2,
                                           horizontal_flip=True,
                                           fill_mode='nearest')
        val_datagen = ImageDataGenerator(rotation_range=40,
                                         width_shift_range=0.2,
                                         height_shift_range=0.2,
                                         shear_range=0.2,
                                         zoom_range=0.2,
                                         horizontal_flip=True,
                                         fill_mode='nearest')
        steps = int(np.size(training_images, 0) // self.batch_size)
        val_steps = int(np.size(validation_images, 0) // self.batch_size)

        self.model.fit_generator(
            generator=train_datagen.flow(x=training_images,
                                         y=training_labels,
                                         batch_size=self.batch_size),
            epochs=self.args.epoch,
            steps_per_epoch=steps,
            validation_steps=val_steps,
            verbose=1,
            callbacks=self.callbacks,
            validation_data=val_datagen.flow(x=validation_images,
                                             y=validation_labels,
                                             batch_size=self.batch_size))