def tunertrain(d, n_input, epochs=50): """ Use keras-tuner to select the number of activations and units (Illustrates a more explicit calling pattern that ensures the model can be reconstructed, and possibly avoids confusion between hp names and keras arguments) """ def build_model(hp, n_input:int): model = keras.Sequential() model.add(keras.layers.Dense( units= hp.Choice('units', [8, 16, 24, 32, 64]), activation=hp.Choice('activation_1',['linear','relu']), input_shape=(1, n_input) )) model.add(keras.layers.Dense(8, activation=hp.Choice('activation_2',['linear','relu']))) model.add(keras.layers.Dense(1, activation='linear')) model.compile(loss='mse') return model p_build = partial(build_model, n_input=n_input) tuner = kt.Hyperband( p_build, objective='val_loss', overwrite=True, max_epochs=100) tuner.search(d['x_train'], d['y_train'], epochs=epochs, validation_data=(d['x_val'], d['y_val'])) print(tuner.results_summary()) best_model = tuner.get_best_models()[0] return best_model
def tunertrain(d, n_input, epochs=50): def build_model(hp, n_input): model = keras.Sequential() model.add( keras.layers.Dense(hp.Choice('units', [8, 16, 24, 32, 64]), activation=hp.Choice('activation_1', ['linear', 'relu']), input_shape=(1, n_input))) model.add( keras.layers.Dense(8, activation=hp.Choice('activation_2', ['linear', 'relu']))) model.add(keras.layers.Dense(1, activation='linear')) model.compile(loss='mse') return model p_build = partial(build_model, n_input=n_input) tuner = kt.Hyperband(p_build, objective='val_loss', overwrite=True, max_epochs=100) tuner.search(d['x_train'], d['y_train'], epochs=epochs, validation_data=(d['x_val'], d['y_val'])) print(tuner.results_summary()) best_model = tuner.get_best_models()[0] return best_model
def tunertrain(d, n_input, epochs): def build_model(hp, n_inputs): model = keras.Sequential() learning_rate = hp.Choice('learning_rate',[0.00001, 0.0001,0.001, 0.005, 0.01, 0.02, 0.04, 0.07, 0.1]) optimizer_name = hp.Choice('optimizer_name',['adam','adagrad','rmsprop','sgd']) if optimizer_name == 'adam': optimizer = keras.optimizers.Adam(learning_rate=learning_rate) elif optimizer_name == 'adagrad': optimizer = keras.optimizers.Adagrad(learning_rate=learning_rate) elif optimizer_name == 'rmsprop': optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate) elif optimizer_name == 'sgd': optimizer = keras.optimizers.SGD(learning_rate=learning_rate) else: raise ValueError("missing case") kernel_0_min = hp.Choice('kernel_0_min',[-0.01,0.01,0.1]) kernel_0_up = hp.Choice('kernel_0_up',[0.001,0.01,0.01,0.1]) bias_0_min = hp.Choice('bias_0_min', [-1.,-0.1,-0.01,0.0, 0.01, 0.1,1.0]) bias_0_up = hp.Choice('bias_0_up', [0.01, 0.1, 0.2, 1.]) kernel_initializer_0 = keras.initializers.RandomUniform(minval=kernel_0_min, maxval=kernel_0_min+kernel_0_up, seed=None) bias_initializer_0 = keras.initializers.RandomUniform(minval=bias_0_min, maxval=bias_0_min+bias_0_up, seed=None) # First layer model.add(keras.layers.Dense( units=hp.Choice('units_0', [8, 16, 32, 64, 92, 128, 156, 256]), activation=hp.Choice('activation_0', ['linear','relu','tanh','sigmoid','selu','elu','softsign']), input_shape=(1, n_inputs), kernel_initializer=kernel_initializer_0, bias_initializer=bias_initializer_0)) # Second layer model.add(keras.layers.Dense( units = hp.Choice('units_1', [8,16,32,64]), activation=hp.Choice('activation_1',['linear','relu','tanh','sigmoid','selu','elu','softsign']) )) # Third layer model.add( keras.layers.Dense( units=hp.Choice('units_2', [2,4,8,16,32]), activation=hp.Choice('activation_2',['linear','relu','tanh','sigmoid','selu','elu','softsign']))) # Final layer model.add(keras.layers.Dense(1, activation='linear')) model.compile(loss='mse',optimizer=optimizer) return model p_build = partial(build_model, n_inputs=n_input) callback = keras.callbacks.EarlyStopping(monitor='loss', patience=250) tuner = kt.Hyperband( p_build, objective='val_loss', overwrite=True, max_epochs=2500) tuner.search(d['x_train'], d['y_train'], epochs=epochs, validation_data=(d['x_val'], d['y_val']),callbacks = [callback]) print(tuner.results_summary()) best_model = tuner.get_best_models()[0] return best_model
def get_best_hps(X_train, y_train): print('Finding best hyperparameters...') tuner = kt.Hyperband(build_mod, objective='val_accuracy', max_epochs=10, factor=3, direcory='.', project_name='kt_intro') stop_early = EarlyStopping(monitor='val_loss', patience=5) tuner.search(X_train, y_train, epochs=EPOCHS, validation_split=0.2, callbacks=[stop_early]) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print('Best hyperparams found:\n' f' Dense units: {best_hps.get("units")}\n' f' Learning rate: {best_hps.get("learning_rate")}') return tuner, best_hps
def get_tuner(cfg_hypertune, model_builder, outdir, recreate, strategy): import keras_tuner as kt if cfg_hypertune["algorithm"] == "random": print("Keras Tuner: Using RandomSearch") cfg_rand = cfg_hypertune["random"] return kt.RandomSearch( model_builder, objective=cfg_rand["objective"], max_trials=cfg_rand["max_trials"], project_name=outdir, overwrite=recreate, ) elif cfg_hypertune["algorithm"] == "bayesian": print("Keras Tuner: Using BayesianOptimization") cfg_bayes = cfg_hypertune["bayesian"] return kt.BayesianOptimization( model_builder, objective=cfg_bayes["objective"], max_trials=cfg_bayes["max_trials"], num_initial_points=cfg_bayes["num_initial_points"], project_name=outdir, overwrite=recreate, ) elif cfg_hypertune["algorithm"] == "hyperband": print("Keras Tuner: Using Hyperband") cfg_hb = cfg_hypertune["hyperband"] return kt.Hyperband( model_builder, objective=cfg_hb["objective"], max_epochs=cfg_hb["max_epochs"], factor=cfg_hb["factor"], hyperband_iterations=cfg_hb["iterations"], directory=outdir + "/tb", project_name="mlpf", overwrite=recreate, executions_per_trial=cfg_hb["executions_per_trial"], distribution_strategy=strategy, )
def main(): '''Demo of keras_tuner for Neural Architecture Search (NAS). Smaller dense layers -> faster training -> more epochs, saves copute resources. Better than hand-designed. You can optimize the count of dense layers, number of units in dense layers, dropout %, type of activation function. Outcomes are saved in files, which enables resuming when once stopped. ''' import tensorflow as tf from tensorflow import keras import keras_tuner as kt mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 def model_builder(hp): model = keras.Sequential() model.add(keras.layers.Flatten(input_shape=(28, 28))) # define search band for number of neurons/units and plug into dense layer hp_units = hp.Int('int', min_value=16, max_value=512, step=16) model.add(keras.layers.Dense(units=hp_units, activation='relu')) model.add(tf.keras.layers.Dropout(0.2)) model.add(tf.keras.layers.Dense(10)) # define search band for learning rate hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]) model.compile( optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate), loss=keras.losses.SparseCategoricalCrossentropy(), metrics=['accuracy']) return model # the tuner will automatically create files with the results of each trial. # Beyond Hyperband, there are also other methods for covering the search space. tuner = kt.Hyperband(model_builder, objective='val_accuracy', max_epochs=10, factor=3, directory='keras_tuner_files', project_name='mnist_tune2') # stop early callback if validation loss does not change significantly in 5 epochs stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) # start tuning tuner.search(x_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early]) # from best to worst, 0 = best best_hps = tuner.get_best_hyperparameters()[0] # print('The best number of neurons/units in the dense hidden layer is: ',best_hps.get('units')) # print('The best learning rate is: ',best_hps.get('learning_rate')) # # re-train with best value from tuning # # hardcode a hidden layer with the best val_accuracy from keras_tuner # model2 = keras.Sequential() # model2.add(keras.layers.Flatten(input_shape=(28, 28))) # model2.add(keras.layers.Dense(units=16, activation='relu')) # model2.add(tf.keras.layers.Dropout(0.2)) # model2.add(tf.keras.layers.Dense(10, activation='softmax')) # load the tuner's best model and train fully best_model = tuner.hypermodel.build(best_hps) best_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) history = best_model.fit(x_train, y_train, epochs=5, validation_split=0.2) val_acc_per_epoch = history.history['val_accuracy'] best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1 print(f'Best epoch: {best_epoch}') best_model.evaluate(x_test, y_test)
return keras_model # quick check that the model builds successfully: build_model(kt.HyperParameters()) # The (hyperband) algorithm trains a large number of models for a few epochs # ...and carries forward only the top-performing half of models to the next round hyperparam_tuner = kt.Hyperband( hypermodel=build_model, objective="val_loss", max_epochs=50 # the maximum number of epochs to train one model , hyperband_iterations= 10 # the number of times to iterate over the full Hyperband algorithm , overwrite= True # continue to use previous model training or overwrite previous model training , directory=os.path.normpath("C:/temp/hyperparam_tuner"), project_name="image_gender_classifier", ) hyperparam_tuner.search_space_summary() stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) hyperparam_tuner.search( train_ds,
def tunertrain(d, n_input, epochs): def build_model(hp, n_inputs): model = keras.Sequential() learning_rate = hp.Choice( 'learni ng_rate', [0.00001, 0.0001, 0.001, 0.005, 0.01, 0.02, 0.04, 0.07, 0.1]) optimizer_name = hp.Choice('optimizer_name', ['adam', 'adagrad', 'rmsprop', 'sgd']) if optimizer_name == 'adam': optimizer = keras.optimizers.Adam(learning_rate=learning_rate) elif optimizer_name == 'adagrad': optimizer = keras.optimizers.Adagrad(learning_rate=learning_rate) elif optimizer_name == 'rmsprop': optimizer = keras.optimizers.RMSprop(learning_rate=learning_rate) elif optimizer_name == 'sgd': optimizer = keras.optimizers.SGD(learning_rate=learning_rate) else: raise ValueError("missing case") # Characteristics of layers n_layers = hp.Choice('num_layers', [1, 2, 3, 4]) for layer_ndx in range(n_layers): units = hp.Choice('units_' + str(layer_ndx), [2, 4, 8, 16, 32, 64, 128]) activation = hp.Choice( 'activation_' + str(layer_ndx), ['softsign', 'linear', 'tanh', 'selu', 'elu', 'relu']) kernel_init = hp.Choice('kernel_' + str(layer_ndx), [0.001, 0.01, 0.1, 1., 10.]) bias_init = hp.Choice('bias_' + str(layer_ndx), [0.001, 0.01, 0.1, 1., 10.]) bias_offset = hp.Choice('bias_' + str(layer_ndx), [-0.1, -0.01, 0., 0.01, 0.1]) layer_kwargs = dict( units=units, activation=activation, kernel_initializer=keras.initializers.RandomUniform( minval=-kernel_init, maxval=kernel_init, seed=None), bias_initializer=keras.initializers.RandomUniform( minval=-bias_init + bias_offset, maxval=bias_init + bias_offset, seed=None)) if layer_ndx == 1: layer_kwargs['input_shape'] = (1, n_inputs) model.add(keras.layers.Dense(**layer_kwargs)) model.add(keras.layers.Dense(1, activation='linear')) dropout = hp.Choice('dropout', [0.0, 0.05, 0.2]) if dropout > 0: model.add(keras.layers.Dropout(rate=dropout)) model.compile(loss='mse', optimizer=optimizer) return model p_build = partial(build_model, n_inputs=n_input) callback = keras.callbacks.EarlyStopping(monitor='loss', patience=250) tuner = kt.Hyperband(p_build, objective='val_loss', overwrite=True, max_epochs=2500) from sklearned.augment.affine import affine, jiggle aug_X, aug_y = affine(X=d['x_train'], y=d['y_train'], s=[0.95, 0.975, 0.99, 1.0, 1.01, 1.025, 1.05]) jiggle_X = jiggle(aug_X, jiggle_fraction=0.1) tuner.search(jiggle_X, aug_y, epochs=epochs, validation_data=(d['x_val'], d['y_val']), callbacks=[callback]) print(tuner.results_summary()) best_model = tuner.get_best_models()[0] return best_model
x = tf.keras.layers.Dropout( hp.Float("dropout", 0, 0.5, step=0.1, default=0.5))(x) outputs = tf.keras.layers.Dense(10, activation="softmax")(x) model = tf.keras.Model(inputs, outputs) model.compile( optimizer=tf.keras.optimizers.Adam( hp.Float("learning_rate", 1e-4, 1e-2, sampling="log")), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) return model tuner = kt.Hyperband(build_model, objective="val_accuracy", max_epochs=30, hyperband_iterations=2) data = tfds.load("cifar10") train_ds, test_ds = data["train"], data["test"] def standardize_record(record): return tf.cast(record["image"], tf.float32) / 255.0, record["label"] train_ds = train_ds.map(standardize_record).cache().batch(64).shuffle(10000) test_ds = test_ds.map(standardize_record).cache().batch(64) tuner.search( train_ds,
model.add(Dense(units=hp_units, activation='relu', input_shape=(11, ))) model.add(Dense(units=hp_units, activation='relu')) model.add(Dense(units=1, activation='sigmoid')) model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss='binary_crossentropy', metrics=['accuracy']) return model import keras_tuner as kt # model_fucn = from where model can be created # objective = what we want to optimized # max_epochs = for deciding initial models tuner = kt.Hyperband(hypermodel=model_fucn, objective='val_accuracy', max_epochs=20) X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.33, random_state=42) from tensorflow.keras.callbacks import EarlyStopping # monitor = what to do # patient = no. of epoch with no improvement stop_early = EarlyStopping(monitor='val_loss', patience=3) tuner.search(X_train, y_train, epochs=10, validation_split=0.2, callbacks=[stop_early]) #tuner.search(X_train, y_train, epochs=50, validation_split=0.2)
model = tf.keras.Model(inputs, outputs) optimizer = hp.Choice("optimizer", ["adam", "sgd"]) model.compile(optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model tuner = kt.Hyperband( hypermodel=build_model, objective="val_accuracy", max_epochs=2, factor=3, hyperband_iterations=1, distribution_strategy=tf.distribute.MirroredStrategy(), directory="results_dir", project_name="mnist", overwrite=True, ) (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Reshape the images to have the channel dimension. x_train = (x_train.reshape(x_train.shape + (1, )) / 255.0)[:1000] y_train = y_train.astype(np.int64)[:1000] x_test = (x_test.reshape(x_test.shape + (1, )) / 255.0)[:100] y_test = y_test.astype(np.int64)[:100] tuner.search(
autoencoder = Model(input, decoder(encoder(input)), name="autoencoder") #optimizer = hp.Choice("optimizer", ["adam", "sgd"]) hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4]) opt = tf.optimizers.Adam(hp_learning_rate) autoencoder.compile(opt, loss="mse", metrics=["accuracy"]) return autoencoder train_data, test_data = create_datasets() noisy_train_data = noise(train_data) noisy_test_data = noise(test_data) tuner = kt.Hyperband(build_model, objective='val_accuracy', max_epochs=150, factor=10) stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) tuner.search(x=noisy_train_data, y=train_data, validation_split=0.2, shuffle=True, batch_size=BS, callbacks=[stop_early]) # Get the optimal hyperparameters best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print(best_hps.values) #print best 10 results best10_models = tuner.results_summary(num_trials=10)