Python CyclicLR Exemples, keras_contrib.callbacks.CyclicLR Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : train_vgg.py Projet : GriffinBabe/PROJ-H402

def train_vgg_model(weight_save, checkpoints_path, epochs=n_epochs):
    model = vgg_unet(n_classes=n_classes,
                     input_height=input_height,
                     input_width=input_width)
    # model.load_weights(os.path.join(checkpt_paths, '.38'))

    # Set a cyclical learning rate as a callback. The learning rate varies between 0.005 and 0.0001
    # A triangle shape of cycle is used
    cyclic_lr = CyclicLR(base_lr=0.001,
                         max_lr=0.01,
                         step_size=1024,
                         mode='triangular')
    print_lr = PrintLR()

    model.train(train_images=os.path.join(AUGMENTED_IMAGES_DIRECTORY, 'img'),
                train_annotations=os.path.join(AUGMENTED_LABEL_DIRECTORY,
                                               'img'),
                auto_resume_checkpoint=True,
                checkpoints_path=checkpoints_path,
                verify_dataset=False,
                epochs=epochs,
                more_callbacks=[cyclic_lr, print_lr])

    model.save_weights(os.path.join(MODELS_DIRECTORY, weight_save))
    return model

Exemple #2

0

Afficher le fichier

    def update_model(self, stage_params):
        dataset = self.memory.recall(shuffle=True,
                                     n_sample=stage_params["training_samples"])
        dataset_size = dataset["Boards"].shape[0]

        train_select = np.random.choice(a=[False, True],
                                        size=dataset_size,
                                        p=[0.1, 0.9])
        # Protect against the case when the val split means there is no val
        # data
        if train_select.sum() == dataset_size:
            train_select[0] = False
            self.logger.debug("Validation size is 0, setting 1 example.")

        validation_select = ~train_select

        train_boards = dataset["Boards"][train_select]
        train_policies = dataset["Policies"][train_select]
        train_values = dataset["Values"][train_select]

        validation_boards = dataset["Boards"][validation_select]
        validation_policies = dataset["Policies"][validation_select]
        validation_values = dataset["Values"][validation_select]

        validation_data = (
            validation_boards,
            {
                "value": validation_values,
                "policy": validation_policies
            },
        )
        patience = stage_params["update_epochs"] // 5
        early_stopping = tf.keras.callbacks.EarlyStopping(
            patience=patience, restore_best_weights=True)

        clr = CyclicLR(
            base_lr=stage_params["learning_rate"],
            max_lr=stage_params["learning_rate"] / 3,
            step_size=4 * len(train_boards) // 64,
            mode="triangular",
        )

        train_history = self.model.fit(
            train_boards,
            {
                "value": train_values,
                "policy": train_policies
            },
            validation_data=validation_data,
            batch_size=512,
            epochs=stage_params["update_epochs"],
            verbose=1,
            callbacks=[clr, early_stopping],
        )
        return train_history

Exemple #3

0

Afficher le fichier

Fichier : cifar10_clr.py Projet : TaylorHere/chinese_bilstm_cnn_crf

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation("softmax"))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# initiate CyclicLR LR scheduler
clr = CyclicLR(base_lr=0.0001,
               max_lr=0.0005,
               step_size=2000,
               mode="triangular")

# Let's train the model using RMSprop
model.compile(loss="categorical_crossentropy",
              optimizer=opt,
              metrics=["accuracy"])

x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
x_train /= 255
x_test /= 255

if not data_augmentation:
    print("Not using data augmentation.")
    model.fit(

Exemple #4

0

Afficher le fichier

Fichier : regressor_training.py Projet : pgrespan/cta-cnn

def regressor_training_main(folders,
                            val_folders,
                            load_model,
                            model_name,
                            time,
                            epochs,
                            batch_size,
                            opt,
                            learning_rate,
                            lropf,
                            sd,
                            es,
                            feature,
                            workers,
                            test_dirs,
                            intensity_cut,
                            leakage,
                            tb,
                            gpu_fraction=0.5,
                            emin=-100,
                            emax=100,
                            class_model='',
                            clr=False,
                            train_indexes=None,
                            valid_indexes=None,
                            clr_values=[5e-5, 5e-3, 4]):
    ###################################
    #TensorFlow wizardry for GPU dynamic memory allocation
    if 0 < gpu_fraction < 1:
        config = tf.ConfigProto()
        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True
        # Only allow a fraction of the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = gpu_fraction
        # Create a session with the above options specified.
        K.tensorflow_backend.set_session(tf.Session(config=config))
        ###################################

    # remove semaphore warnings
    os.environ["PYTHONWARNINGS"] = "ignore:semaphore_tracker:UserWarning"

    # avoid validation deadlock problem
    mp.set_start_method('spawn', force=True)

    # hard coded parameters
    shuffle = True
    channels = 1
    if time:
        channels = 2

    # early stopping
    md_es = 0.1  # min delta
    p_es = 50  # patience

    # cycle learning rate CLR
    base_lr = clr_values[0]
    max_lr = clr_values[1]
    step_size = clr_values[2]

    # sgd
    lr = 0.01  # lr
    decay = 1e-4  # decay
    momentum = 0.9  # momentum
    nesterov = True

    # adam
    a_lr = learning_rate
    a_beta_1 = 0.9
    a_beta_2 = 0.999
    a_epsilon = None
    a_decay = 0
    amsgrad = True

    # adabound
    ab_lr = 1e-03
    ab_final_lr = 0.1
    ab_gamma = 1e-03
    ab_weight_decay = 0
    amsbound = False

    # rmsprop
    r_lr = 0.01
    r_rho = 0.9
    r_epsilon = None
    r_decay = 0.0

    # reduce lr on plateau
    f_lrop = 0.1  # factor
    p_lrop = 15  # patience
    md_lrop = 0.005  # min delta
    cd_lrop = 5  # cool down
    mlr_lrop = a_lr / 100  # min lr

    # cuts
    # intensity_cut = 50
    leakage2_intensity_cut = leakage

    training_files = get_all_files(folders)
    validation_files = get_all_files(val_folders)

    # create a folder to keep model & results
    now = datetime.datetime.now()
    root_dir = now.strftime(model_name + '_' + feature + '_' +
                            '%Y-%m-%d_%H-%M')
    mkdir(root_dir)
    models_dir = join(root_dir, "models")
    mkdir(models_dir)

    # generators
    print('Building training generator...')
    training_generator = LSTGenerator(
        training_files,
        batch_size=batch_size,
        arrival_time=time,
        feature=feature,
        shuffle=shuffle,
        intensity=intensity_cut,
        leakage2_intensity=leakage2_intensity_cut,
        load_indexes=train_indexes)
    if train_indexes is None:
        train_idxs = training_generator.get_all_info()
        train_idxs.to_pickle(join(root_dir, "train_indexes.pkl"))

    if len(val_folders) > 0:
        print('Building validation generator...')
        validation_generator = LSTGenerator(
            validation_files,
            batch_size=batch_size,
            arrival_time=time,
            feature=feature,
            shuffle=False,
            intensity=intensity_cut,
            leakage2_intensity=leakage2_intensity_cut,
            load_indexes=valid_indexes)
        if valid_indexes is None:
            valid_idxs = validation_generator.get_all_info()
            valid_idxs.to_pickle(join(root_dir, "valid_indexes.pkl"))
    # class_weight = {0: 1., 1: train_protons/train_gammas}
    # print(class_weight)

    # get image size (rows and columns)
    img_rows = training_generator.img_rows
    img_cols = training_generator.img_cols

    hype_print = '\n' + '======================================HYPERPARAMETERS======================================'

    hype_print += '\n' + 'Image rows: ' + str(
        img_rows) + ' Image cols: ' + str(img_cols)
    hype_print += '\n' + 'Folders:' + str(folders)
    hype_print += '\n' + 'Model: ' + str(model_name)
    hype_print += '\n' + 'Use arrival time: ' + str(time)
    hype_print += '\n' + 'Epochs:' + str(epochs)
    hype_print += '\n' + 'Batch size: ' + str(batch_size)
    hype_print += '\n' + 'Optimizer: ' + str(opt)
    hype_print += '\n' + 'Feature: ' + str(feature)
    hype_print += '\n' + 'Validation: ' + str(val_folders)
    hype_print += '\n' + 'Test dirs: ' + str(test_dirs)

    hype_print += '\n' + 'intensity_cut: ' + str(intensity_cut)
    hype_print += '\n' + 'leakage2_intensity_cut: ' + str(
        leakage2_intensity_cut)

    if clr:
        hype_print += '\n' + '--- Cycle Learning Rate ---'
        hype_print += '\n' + 'Base LR: ' + str(base_lr)
        hype_print += '\n' + 'Max LR: ' + str(max_lr)
        hype_print += '\n' + 'Step size: ' + str(step_size) + ' (' + str(
            step_size * len(training_generator)) + ')'
    if es:
        hype_print += '\n' + '--- Early stopping ---'
        hype_print += '\n' + 'Min delta: ' + str(md_es)
        hype_print += '\n' + 'Patience: ' + str(p_es)
        hype_print += '\n' + '----------------------'
    if opt == 'sgd':
        hype_print += '\n' + '--- SGD ---'
        hype_print += '\n' + 'Learning rate:' + str(lr)
        hype_print += '\n' + 'Decay: ' + str(decay)
        hype_print += '\n' + 'Momentum: ' + str(momentum)
        hype_print += '\n' + 'Nesterov: ' + str(nesterov)
        hype_print += '\n' + '-----------'
    elif opt == 'adam':
        hype_print += '\n' + '--- ADAM ---'
        hype_print += '\n' + 'lr: ' + str(a_lr)
        hype_print += '\n' + 'beta_1: ' + str(a_beta_1)
        hype_print += '\n' + 'beta_2: ' + str(a_beta_2)
        hype_print += '\n' + 'epsilon: ' + str(a_epsilon)
        hype_print += '\n' + 'decay: ' + str(a_decay)
        hype_print += '\n' + 'Amsgrad: ' + str(amsgrad)
        hype_print += '\n' + '------------'
    elif opt == 'rmsprop':
        hype_print += '\n' + '--- RMSprop ---'
        hype_print += '\n' + 'lr: ' + str(r_lr)
        hype_print += '\n' + 'rho: ' + str(r_rho)
        hype_print += '\n' + 'epsilon: ' + str(r_epsilon)
        hype_print += '\n' + 'decay: ' + str(r_decay)
        hype_print += '\n' + '------------'
    if lropf:
        hype_print += '\n' + '--- Reduce lr on plateau ---'
        hype_print += '\n' + 'lr decrease factor: ' + str(f_lrop)
        hype_print += '\n' + 'Patience: ' + str(p_lrop)
        hype_print += '\n' + 'Min delta: ' + str(md_lrop)
        hype_print += '\n' + 'Cool down:' + str(cd_lrop)
        hype_print += '\n' + 'Min lr: ' + str(mlr_lrop)
        hype_print += '\n' + '----------------------------'
    if sd:
        hype_print += '\n' + '--- Step decay ---'

    hype_print += '\n' + 'Workers: ' + str(workers)
    hype_print += '\n' + 'Shuffle: ' + str(shuffle)

    hype_print += '\n' + 'Number of training batches: ' + str(
        len(training_generator))

    if len(val_folders) > 0:
        hype_print += '\n' + 'Number of validation batches: ' + str(
            len(validation_generator))

    outcomes = 1
    # loss = 'mean_absolute_percentage_error'
    loss = 'mean_absolute_error'
    # loss = 'mean_squared_error'
    if feature == 'direction':
        outcomes = 2
        # loss = 'mean_absolute_error'
        # loss = 'mean_squared_error'

    # keras.backend.set_image_data_format('channels_first')
    if load_model:
        model = keras.models.load_model(model_name)
        model_name = Path(model_name).name
    else:
        model, hype_print = regressor_selector(model_name, hype_print,
                                               channels, img_rows, img_cols,
                                               outcomes)

    hype_print += '\n' + '========================================================================================='

    # printing on screen hyperparameters
    print(hype_print)

    # writing hyperparameters on file
    f = open(root_dir + '/hyperparameters.txt', 'w')
    f.write(hype_print)
    f.close()

    model.summary()

    callbacks = []

    if len(val_folders) > 0:
        checkpoint = ModelCheckpoint(
            filepath=models_dir + '/' + model_name +
            '_{epoch:02d}_{loss:.5f}_{val_loss:.5f}.h5',
            monitor='val_loss',
            save_best_only=False)
    else:
        checkpoint = ModelCheckpoint(filepath=models_dir + '/' + model_name +
                                     '_{epoch:02d}_{loss:.5f}.h5',
                                     monitor='loss',
                                     save_best_only=True)

    callbacks.append(checkpoint)

    # tensorboard = keras.callbacks.TensorBoard(log_dir=root_dir + "/logs",
    #                                          histogram_freq=5,
    #                                          batch_size=batch_size,
    #                                          write_images=True,
    #                                          update_freq=batch_size * 100)

    history = LossHistoryR()

    csv_callback = keras.callbacks.CSVLogger(root_dir + '/epochs_log.csv',
                                             separator=',',
                                             append=False)

    callbacks.append(history)
    callbacks.append(csv_callback)

    # callbacks.append(tensorboard)

    # sgd
    optimizer = None
    if opt == 'sgd':
        sgd = optimizers.SGD(lr=lr,
                             decay=decay,
                             momentum=momentum,
                             nesterov=nesterov)
        optimizer = sgd
    elif opt == 'adam':
        adam = optimizers.Adam(lr=a_lr,
                               beta_1=a_beta_1,
                               beta_2=a_beta_2,
                               epsilon=a_epsilon,
                               decay=a_decay,
                               amsgrad=amsgrad)
        optimizer = adam


#    elif opt == 'adabound':
#        adabound = AdaBound(lr=ab_lr, final_lr=ab_final_lr, gamma=ab_gamma, weight_decay=ab_weight_decay,
#                            amsbound=False)
#        optimizer = adabound
    elif opt == 'rmsprop':
        rmsprop = optimizers.RMSprop(lr=r_lr,
                                     rho=r_rho,
                                     epsilon=r_epsilon,
                                     decay=r_decay)
        optimizer = rmsprop

    # reduce lr on plateau
    if lropf:
        lrop = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=f_lrop,
                                                 patience=p_lrop,
                                                 verbose=1,
                                                 mode='auto',
                                                 min_delta=md_lrop,
                                                 cooldown=cd_lrop,
                                                 min_lr=mlr_lrop)
        callbacks.append(lrop)

    if sd:

        # learning rate schedule
        def step_decay(epoch):
            current = K.eval(model.optimizer.lr)
            lrate = current
            if epoch == 99:
                lrate = current / 10
                print('Reduced learning rate by a factor 10')
            return lrate

        stepd = LearningRateScheduler(step_decay)
        callbacks.append(stepd)

    if es:
        # early stopping
        early_stopping = EarlyStopping(monitor='val_loss',
                                       min_delta=md_es,
                                       patience=p_es,
                                       verbose=1,
                                       mode='max')
        callbacks.append(early_stopping)

    if tb:
        tb_path = os.path.join(root_dir, 'tb')
        if not os.path.exists(tb_path):
            os.mkdir(tb_path)
        #tb_path = os.path.join(tb_path, root_dir)
        # os.mkdir(tb_path)
        tensorboard = TensorBoard(log_dir=tb_path)
        callbacks.append(tensorboard)
    '''
    findlr=False
    if findlr:
        lr_callback = LRFinder(len(training_generator)*batch_size, batch_size,
                               1e-05, 1e01,
                               # validation_data=(X_val, Y_val),
                               lr_scale='exp', save_dir=join(root_dir,'clr'))
        callbacks.append(lr_callback)
    
    oclr=True #CORREGGI STA MERDA OPPURE CANCELLA
    if oclr:
        lr_manager = OneCycleLR(
            max_lr=0.001,
            maximum_momentum=0.9,
            minimum_momentum=None,
            verbose=True
        )
        callbacks.append(lr_manager)
    
    oclr=True
    if oclr:
        ocp = ktools.one_cycle.OneCycle(
            lr_range=(1e-5, 1e-3),
            momentum_range=(0.95, 0.85),
            reset_on_train_begin=True,
            record_frq=10
        )
        callbacks.append(ocp)
    '''

    if clr:
        cyclelr = CyclicLR(
            base_lr=base_lr,
            max_lr=max_lr,
            step_size=step_size * len(training_generator),
            #mode='exp_range' # uncomment to activate exp mode, instead of traingular
        )
        callbacks.append(cyclelr)

    model.compile(optimizer=optimizer, loss=loss)

    if len(val_folders) > 0:
        model.fit(x=training_generator,
                  validation_data=validation_generator,
                  steps_per_epoch=len(training_generator),
                  validation_steps=len(validation_generator),
                  epochs=epochs,
                  verbose=1,
                  max_queue_size=10,
                  use_multiprocessing=True,
                  workers=workers,
                  shuffle=False,
                  callbacks=callbacks)
    else:
        model.fit(x=training_generator,
                  steps_per_epoch=len(training_generator),
                  epochs=epochs,
                  verbose=1,
                  max_queue_size=10,
                  use_multiprocessing=True,
                  workers=workers,
                  shuffle=False,
                  callbacks=callbacks)

    # mp.set_start_method('fork')

    # save results
    train_history = root_dir + '/train-history'
    with open(train_history, 'wb') as file_pi:
        pickle.dump(history.dic, file_pi)

    # post training operations

    # training plots
    train_plots(train_history, False)

    if len(test_dirs) > 0:

        if len(val_folders) > 0:
            # get the best model on validation
            val_loss = history.dic['val_losses']
            m = val_loss.index(
                min(val_loss))  # get the index with the highest accuracy

            model_checkpoints = [
                join(root_dir, f) for f in listdir(root_dir)
                if (isfile(join(root_dir, f)) and f.startswith(
                    model_name + '_' + '{:02d}'.format(m + 1)))
            ]

            best = model_checkpoints[0]

            print('Best checkpoint: ', best)

        else:
            # get the best model on validation
            acc = history.dic['losses']
            m = acc.index(min(acc))  # get the index with the highest accuracy

            model_checkpoints = [
                join(root_dir, f) for f in listdir(root_dir)
                if (isfile(join(root_dir, f)) and f.startswith(
                    model_name + '_' + '{:02d}'.format(m + 1)))
            ]

            best = model_checkpoints[0]

            print('Best checkpoint: ', best)

        # test plots & results if test data is provided
        if len(test_dirs) > 0:
            pkl = tester(test_dirs, best, batch_size, time, feature, workers)
            test_plots(pkl, feature)

Exemple #5

0

Afficher le fichier

    model = Model(inputs=inp, outputs=pred)
    model.compile(loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'] , optimizer = 'nadam')
    return model
  
  
K.clear_session()
model = gen_model(n_classes)
#model = gen_model(58)
model.summary()

batch_size = 512
epochs = 5
step_size = int(int(len(X_train)/batch_size)*epochs/2)

cb = [
    CyclicLR(5e-4 , 2e-3 , step_size)
]


model.fit(X_train, 
          y_train,
          validation_data = (X_val, y_val),
          epochs = epochs,
          batch_size= batch_size,
          verbose = 1,
          shuffle = True,
          callbacks = cb)


preds = model.predict(test_text,verbose = 1).argmax(axis = 1)

Exemple #6

0

Afficher le fichier

Fichier : classifier_training.py Projet : pgrespan/cta-cnn

def classifier_training_main(
        folders,
        val_folders,
        model_name,
        time,
        epochs,
        batch_size,
        opt,
        learn_rate,
        lropf=False,
        sd=False,
        es=False,
        clr=False,
        workers=1,
        test_dirs='',
        load_model=False,
        tb=False,
        intensity_cut=None,
        leakage=0.2,
        gpu_fraction=1,
        train_indexes=None,
        valid_indexes=None,
        clr_values=[5e-5, 5e-3, 4]):
    if 0 >= gpu_fraction or gpu_fraction > 1:
        pass
    ###################################
    # TensorFlow wizardry for GPU dynamic memory allocation
    else:
        config = tf.ConfigProto()
        # Don't pre-allocate memory; allocate as-needed
        config.gpu_options.allow_growth = True
        # Only allow a fraction of the GPU memory to be allocated
        config.gpu_options.per_process_gpu_memory_fraction = gpu_fraction
        # Create a session with the above options specified.
        K.tensorflow_backend.set_session(tf.Session(config=config))
    ###################################

    # remove semaphore warnings
    os.environ["PYTHONWARNINGS"] = "ignore:semaphore_tracker:UserWarning"

    # avoid validation deadlock problem
    mp.set_start_method('spawn', force=True)

    # hard coded parameters
    shuffle = True
    channels = 1
    if time:
        channels = 2

    # early stopping
    md_es = 0.01  # min delta
    p_es = 25  # patience

    # cycle learning rate CLR
    base_lr = clr_values[0]
    max_lr = clr_values[1]
    step_size = clr_values[2]

    # sgd
    lr = 0.01  # lr
    decay = 1e-4  # decay
    momentum = 0.9  # momentum
    nesterov = True

    # adam
    # default a_lr should be 0.001
    a_lr = learn_rate
    a_beta_1 = 0.9
    a_beta_2 = 0.999
    a_epsilon = None
    a_decay = 0
    amsgrad = True

    # adabound
    ab_lr = 1e-03
    ab_final_lr = 0.1
    ab_gamma = 1e-03
    ab_weight_decay = 0
    amsbound = True

    # reduce lr on plateau
    f_lrop = 0.1  # factor
    p_lrop = 15  # patience
    md_lrop = 0.005  # min delta
    cd_lrop = 5  # cool down
    mlr_lrop = a_lr / 100  # min lr

    # cuts
    #intensity_cut = 500
    #leakage2_intensity_cut = 0.2

    training_files = get_all_files(folders)
    validation_files = get_all_files(val_folders)

    # generators
    print('Building training generator...')
    feature = 'gammaness'  # hardcoded by now
    '''
    training_generator = DataGeneratorC(training_files,
                                        batch_size=batch_size,
                                        arrival_time=time,
                                        shuffle=shuffle,
                                        intensity=intensity_cut)
    train_idxs = training_generator.get_indexes()
    train_gammas = np.unique(train_idxs[:, 2], return_counts=True)[1][1]
    train_protons = np.unique(train_idxs[:, 2], return_counts=True)[1][0]
    '''
    training_generator = LSTGenerator(training_files,
                                      batch_size=batch_size,
                                      arrival_time=time,
                                      feature=feature,
                                      shuffle=shuffle,
                                      intensity=intensity_cut,
                                      leakage2_intensity=leakage,
                                      load_indexes=train_indexes)
    # get image size (rows and columns)
    img_rows = training_generator.img_rows
    img_cols = training_generator.img_cols
    print("IMG rows: {}, cols: {}".format(img_rows, img_cols))
    # create a folder to keep model & results
    now = datetime.datetime.now()
    root_dir = now.strftime(model_name + '_' + '%Y-%m-%d_%H-%M')
    mkdir(root_dir)
    models_dir = join(root_dir, "models")
    mkdir(models_dir)

    # save data info
    train_idxs = training_generator.get_all_info()
    train_gammas = np.unique(train_idxs['class'], return_counts=True)[1][1]
    train_protons = np.unique(train_idxs['class'], return_counts=True)[1][0]
    train_gamma_frac = training_generator.gamma_fraction()
    if train_indexes is None:
        train_idxs.to_pickle(join(root_dir, "train_indexes.pkl"))

    if len(val_folders) > 0:
        print('Building validation generator...')
        validation_generator = LSTGenerator(validation_files,
                                            batch_size=batch_size,
                                            arrival_time=time,
                                            feature=feature,
                                            shuffle=False,
                                            intensity=intensity_cut,
                                            leakage2_intensity=leakage,
                                            load_indexes=valid_indexes
                                            )
        valid_idxs = validation_generator.get_all_info()
        valid_gammas = np.unique(valid_idxs['class'], return_counts=True)[1][1]
        valid_protons = np.unique(valid_idxs['class'], return_counts=True)[1][0]
        valid_gamma_frac = validation_generator.gamma_fraction()
        if valid_indexes is None:
            valid_idxs.to_pickle(join(root_dir, "valid_indexes.pkl"))

    # class_weight = {0: 1., 1: train_protons/train_gammas}
    # print(class_weight)

    hype_print = '\n' + '======================================HYPERPARAMETERS======================================'

    hype_print += '\n' + 'Image rows: ' + str(img_rows) + ' Image cols: ' + str(img_cols)
    hype_print += '\n' + 'Folders:' + str(folders)
    hype_print += '\n' + 'Model: ' + str(model_name)
    hype_print += '\n' + 'Use arrival time: ' + str(time)
    hype_print += '\n' + 'Epochs:' + str(epochs)
    hype_print += '\n' + 'Batch size: ' + str(batch_size)
    hype_print += '\n' + 'Optimizer: ' + str(opt)
    hype_print += '\n' + 'Validation: ' + str(val_folders)
    hype_print += '\n' + 'Test dirs: ' + str(test_dirs)

    hype_print += '\n' + 'intensity_cut: ' + str(intensity_cut)
    hype_print += '\n' + 'leakage2_intensity_cut: ' + str(leakage)

    if clr:
        hype_print += '\n' + '--- Cycle Learning Rate ---'
        hype_print += '\n' + 'Base LR: ' + str(base_lr)
        hype_print += '\n' + 'Max LR: ' + str(max_lr)
        hype_print += '\n' + 'Step size: ' + str(step_size) + ' (' + str(step_size*len(training_generator)) + ')'
    if es:
        hype_print += '\n' + '--- Early stopping ---'
        hype_print += '\n' + 'Min delta: ' + str(md_es)
        hype_print += '\n' + 'Patience: ' + str(p_es)
        hype_print += '\n' + '----------------------'
    if opt == 'sgd':
        hype_print += '\n' + '--- SGD ---'
        hype_print += '\n' + 'Learning rate:' + str(lr)
        hype_print += '\n' + 'Decay: ' + str(decay)
        hype_print += '\n' + 'Momentum: ' + str(momentum)
        hype_print += '\n' + 'Nesterov: ' + str(nesterov)
        hype_print += '\n' + '-----------'
    elif opt == 'adam':
        hype_print += '\n' + '--- ADAM ---'
        hype_print += '\n' + 'lr: ' + str(a_lr)
        hype_print += '\n' + 'beta_1: ' + str(a_beta_1)
        hype_print += '\n' + 'beta_2: ' + str(a_beta_2)
        hype_print += '\n' + 'epsilon: ' + str(a_epsilon)
        hype_print += '\n' + 'decay: ' + str(a_decay)
        hype_print += '\n' + 'Amsgrad: ' + str(amsgrad)
        hype_print += '\n' + '------------'
    if lropf:
        hype_print += '\n' + '--- Reduce lr on plateau ---'
        hype_print += '\n' + 'lr decrease factor: ' + str(f_lrop)
        hype_print += '\n' + 'Patience: ' + str(p_lrop)
        hype_print += '\n' + 'Min delta: ' + str(md_lrop)
        hype_print += '\n' + 'Cool down:' + str(cd_lrop)
        hype_print += '\n' + 'Min lr: ' + str(mlr_lrop)
        hype_print += '\n' + '----------------------------'
    if sd:
        hype_print += '\n' + '--- Step decay ---'

    hype_print += '\n' + 'Workers: ' + str(workers)
    hype_print += '\n' + 'Shuffle: ' + str(shuffle)

    hype_print += '\n' + 'Number of training batches: ' + str(len(training_generator))
    hype_print += '\n' + 'Number of training gammas: ' + str(train_gammas)
    hype_print += '\n' + 'Number of training protons: ' + str(train_protons)
    hype_print += '\n' + 'Fraction of gamma in training set: ' + str(train_gamma_frac)
    if len(val_folders) > 0:
        hype_print += '\n' + 'Number of validation batches: ' + str(len(validation_generator))
        hype_print += '\n' + 'Number of validation gammas: ' + str(valid_gammas)
        hype_print += '\n' + 'Number of validation protons: ' + str(valid_protons)
        hype_print += '\n' + 'Fraction of gamma in validation set: ' + str(valid_gamma_frac)

    # keras.backend.set_image_data_format('channels_first')
    if load_model:
        model = keras.models.load_model(model_name)
        model_name = Path(model_name).name
    else:
        model, hype_print = select_classifier(model_name, hype_print, channels, img_rows, img_cols)
    #model = load_model('/home/pgrespan/nick_models/ResNetFSE_49_0.82375_0.80292.h5')

    hype_print += '\n' + '========================================================================================='

    # printing on screen hyperparameters
    print(hype_print)

    # writing hyperparameters on file
    f = open(root_dir + '/hyperparameters.txt', 'w')
    f.write(hype_print)
    f.close()

    model.summary()

    callbacks = []

    if len(val_folders) > 0:
        checkpoint = ModelCheckpoint(
            filepath=models_dir + '/' + model_name + '_{epoch:02d}_{acc:.5f}_{val_acc:.5f}.h5', monitor='val_acc',
            save_best_only=False)
    else:
        checkpoint = ModelCheckpoint(
            filepath=models_dir + '/' + model_name + '_{epoch:02d}_{acc:.5f}.h5', monitor='acc',
            save_best_only=True)

    callbacks.append(checkpoint)

    # tensorboard = keras.callbacks.TensorBoard(log_dir=root_dir + "/logs",
    #                                          histogram_freq=5,
    #                                          batch_size=batch_size,
    #                                          write_images=True,
    #                                          update_freq=batch_size * 100)

    history = LossHistoryC()

    csv_callback = keras.callbacks.CSVLogger(root_dir + '/epochs_log.csv', separator=',', append=False)

    callbacks.append(history)
    callbacks.append(csv_callback)

    # callbacks.append(tensorboard)

    # sgd
    optimizer = None
    if opt == 'sgd':
        sgd = optimizers.SGD(lr=lr, decay=decay, momentum=momentum, nesterov=nesterov)
        optimizer = sgd
    elif opt == 'adam':
        adam = optimizers.Adam(lr=a_lr, beta_1=a_beta_1, beta_2=a_beta_2, epsilon=a_epsilon, decay=a_decay,
                               amsgrad=amsgrad)
        optimizer = adam
    '''
    elif opt == 'adabound':
        adabound = AdaBound(lr=ab_lr, final_lr=ab_final_lr, gamma=ab_gamma, weight_decay=ab_weight_decay,
                            amsbound=False)
        optimizer = adabound
    '''
    # reduce lr on plateau
    if lropf:
        lrop = keras.callbacks.ReduceLROnPlateau(monitor='val_acc', factor=f_lrop, patience=p_lrop, verbose=1,
                                                 mode='auto',
                                                 min_delta=md_lrop, cooldown=cd_lrop, min_lr=mlr_lrop)
        callbacks.append(lrop)

    if sd:
        # learning rate schedule
        def step_decay(epoch):
            current = K.eval(model.optimizer.lr)
            lrate = current
            if epoch == 99:
                lrate = current / 10
                print('Reduced learning rate by a factor 10')
            return lrate

        stepd = LearningRateScheduler(step_decay)
        callbacks.append(stepd)

    if es:
        # early stopping
        early_stopping = EarlyStopping(monitor='val_acc', min_delta=md_es, patience=p_es, verbose=1, mode='max')
        callbacks.append(early_stopping)

    if tb:
        tb_path = os.path.join(root_dir, 'tb')
        if not os.path.exists(tb_path):
            os.mkdir(tb_path)
        #tb_path = os.path.join(tb_path, root_dir)
        # os.mkdir(tb_path)
        tensorboard = TensorBoard(log_dir=tb_path)
        callbacks.append(tensorboard)

    if clr:
        cyclelr = CyclicLR(
            base_lr=base_lr,
            max_lr=max_lr,
            step_size=step_size*len(training_generator)
        )
        callbacks.append(cyclelr)

    lrfinder=False
    if lrfinder:

        lr_callback = LRFinder(len(training_generator)*batch_size, batch_size,
                               1e-05, 1e01,
                               # validation_data=(X_val, Y_val),
                               lr_scale='exp', save_dir=join(root_dir,'clr'))
        callbacks.append(lr_callback)

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    if len(val_folders) > 0:
        model.fit(
            x=training_generator,
            validation_data=validation_generator,
            steps_per_epoch=len(training_generator),
            validation_steps=len(validation_generator),
            epochs=epochs,
            verbose=1,
            max_queue_size=10,
            use_multiprocessing=True,
            workers=workers,
            shuffle=False,
            callbacks=callbacks
        )
    else:
        model.fit(
            x=training_generator,
            steps_per_epoch=len(training_generator),
            epochs=epochs,
            verbose=1,
            max_queue_size=10,
            use_multiprocessing=True,
            workers=workers,
            shuffle=False,
            callbacks=callbacks
        )

    # save results
    train_history = root_dir + '/train-history'
    with open(train_history, 'wb') as file_pi:
        pickle.dump(history.dic, file_pi)

    # post training operations

    # training plots
    train_plots(train_history, False)

    if len(test_dirs) > 0:

        if len(val_folders) > 0:
            # get the best model on validation
            val_acc = history.dic['val_accuracy']
            m = val_acc.index(max(val_acc))  # get the index with the highest accuracy

            model_checkpoints = [join(root_dir, f) for f in listdir(root_dir) if
                                 (isfile(join(root_dir, f)) and f.startswith(
                                     model_name + '_' + '{:02d}'.format(m + 1)))]

            best = model_checkpoints[0]

            print('Best checkpoint: ', best)

        else:
            # get the best model
            acc = history.dic['accuracy']
            m = acc.index(max(acc))  # get the index with the highest accuracy

            model_checkpoints = [join(root_dir, f) for f in listdir(root_dir) if
                                 (isfile(join(root_dir, f)) and f.startswith(
                                     model_name + '_' + '{:02d}'.format(m + 1)))]

            best = model_checkpoints[0]

            print('Best checkpoint: ', best)

        # test plots & results if test data is provided
        if len(test_dirs) > 0:
            csv = tester(test_dirs, best, batch_size, time, workers)
            test_plots(csv)