def hyperband_optimization(df: pd.DataFrame): df_train, df_val = get_datagen_split(df) datagen_train = BauGenerator(df=df_train, binarize_activity_hours=False, batch_size=BATCH_SIZE, window_size=LOOK_BACK_WINDOW_SIZE, look_ahead_steps=LOOK_AHEAD_SIZE) datagen_val = BauGenerator(df=df_val, binarize_activity_hours=False, batch_size=BATCH_SIZE, window_size=LOOK_BACK_WINDOW_SIZE, look_ahead_steps=LOOK_AHEAD_SIZE) machine_learning.models.HYPER_NUM_ROWS_DF = datagen_train.X_batches.shape[ 2] machine_learning.models.HYPER_NUM_OUTPUT_FIELDS = datagen_train.Y_batches.shape[ 2] machine_learning.models.HYPER_WINDOW_SIZE = LOOK_BACK_WINDOW_SIZE machine_learning.models.HYPER_LOOK_AHEAD_SIZE = LOOK_AHEAD_SIZE tuner = kt.BayesianOptimization(create_hyperband_model, objective='val_binary_accuracy', max_trials=200) tuner.search(datagen_train, validation_data=datagen_val, epochs=70, callbacks=[], workers=16) best_model = tuner.get_best_models(1)[0] best_hyperparameters = tuner.get_best_hyperparameters(1)[0] print(best_hyperparameters)
def setUp(self): import Activity06_02 self.exercises = Activity06_02 self.usecols = [ 'AAGE', 'ADTIND', 'ADTOCC', 'SEOTR', 'WKSWORK', 'PTOTVAL' ] self.train_url = 'https://github.com/PacktWorkshops/The-TensorFlow-Workshop/blob/master/Chapter06/dataset/census-income-train.csv?raw=true' self.test_url = 'https://github.com/PacktWorkshops/The-TensorFlow-Workshop/blob/master/Chapter06/dataset/census-income-test.csv?raw=true' self.train_data = pd.read_csv(self.train_url, usecols=self.usecols) self.train_target = self.train_data.pop('PTOTVAL') self.test_data = pd.read_csv(self.test_url, usecols=self.usecols) self.test_target = self.test_data.pop('PTOTVAL') np.random.seed(8) tf.random.set_seed(8) tuner = kt.BayesianOptimization(model_builder, objective='val_mse', max_trials=10) tuner.search(self.train_data, self.train_target, validation_data=(self.test_data, self.test_target)) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] self.best_units = best_hps.get('units') self.best_lr = best_hps.get('learning_rate') self.best_l2 = best_hps.get('l2') self.model = tuner.hypermodel.build(best_hps)
def dummy_classification(df: pd.DataFrame): df_train, df_val = get_datagen_split(df) datagen_train = BauGenerator(df=df_train, batch_size=BATCH_SIZE, window_size=LOOK_BACK_WINDOW_SIZE, look_ahead_steps=LOOK_AHEAD_SIZE) datagen_val = BauGenerator(df=df_val, batch_size=BATCH_SIZE, window_size=LOOK_BACK_WINDOW_SIZE, look_ahead_steps=LOOK_AHEAD_SIZE) machine_learning.classfication_models.HYPER_NUM_ROWS_DF = datagen_train.X_batches.shape[ 2] machine_learning.classfication_models.HYPER_NUM_OUTPUT_FIELDS = datagen_train.Y_batches.shape[ 1] machine_learning.classfication_models.HYPER_WINDOW_SIZE = LOOK_BACK_WINDOW_SIZE machine_learning.classfication_models.HYPER_LOOK_AHEAD_SIZE = LOOK_AHEAD_SIZE tuner = kt.BayesianOptimization(create_bayesian_dummy_classifier, objective='val_accuracy', max_trials=100, project_name="arch_opt_") tuner.search(datagen_train, validation_data=datagen_val, epochs=60, callbacks=[], workers=16) best_model = tuner.get_best_models(1)[0] best_hyperparameters = tuner.get_best_hyperparameters(1)[0] print(best_hyperparameters)
def tuneHP(self, hyperModel, X_train, X_test, y_train, y_test, tuner_epochs=50, tuner_batch_size=10000, tuner_mode=0): if tuner_mode == 0: tuner = kt.Hyperband(hyperModel, objective=kt.Objective("auc", direction="max"), # ['loss', 'auc', 'accuracy', 'val_loss', 'val_auc', 'val_accuracy'] max_epochs=200, hyperband_iterations=3, factor=3, seed=seed_value, directory='tuning', project_name='model_hyperband_1', overwrite=True) elif tuner_mode == 1: tuner = kt.BayesianOptimization(hyperModel, objective='val_loss', max_trials=100, seed=seed_value, directory='tuning', project_name='model_bayesian_1', overwrite=True) elif tuner_mode == 2: tuner = kt.RandomSearch(hyperModel, objective='val_loss', max_trials=1000, seed=seed_value, directory='tuning', project_name='model_random_1', overwrite=True) else: raise ValueError('Invalid tuner mode') tuner.search(X_train, y_train, epochs=tuner_epochs, batch_size=tuner_batch_size, validation_data=(X_test, y_test), verbose=0) # tuner.search(X_train, y_train, epochs=tuner_epochs, validation_data=(X_test, y_test), verbose=1) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print(tuner.search_space_summary()) return best_hps, tuner
def tuner_fn(fn_args: TrainerFnArgs) -> TunerFnResult: """Build the tuner using the KerasTuner API. Args: fn_args: Holds args as name/value pairs. - working_dir: working dir for tuning. - train_files: List of file paths containing training tf.Example data. - eval_files: List of file paths containing eval tf.Example data. - train_steps: number of train steps. - eval_steps: number of eval steps. - schema_path: optional schema of the input data. - transform_graph_path: optional transform graph produced by TFT. Returns: A namedtuple contains the following: - tuner: A BaseTuner that will be used for tuning. - fit_kwargs: Args to pass to tuner's run_trial function for fitting the model , e.g., the training and validation dataset. Required args depend on the above tuner's implementation. """ transform_graph = tft.TFTransformOutput(fn_args.transform_graph_path) # Construct a build_keras_model_fn that just takes hyperparams from get_hyperparameters as input. build_keras_model_fn = functools.partial( _build_keras_model, tf_transform_output=transform_graph) # BayesianOptimization is a subclass of kerastuner.Tuner which inherits from BaseTuner. tuner = kerastuner.BayesianOptimization( build_keras_model_fn, max_trials=10, hyperparameters=_get_hyperparameters(), # New entries allowed for n_units hyperparameter construction conditional on n_layers selected. # allow_new_entries=True, # tune_new_entries=True, objective=kerastuner.Objective('val_sparse_categorical_accuracy', 'max'), directory=fn_args.working_dir, project_name='covertype_tuning') train_dataset = _input_fn(fn_args.train_files, fn_args.data_accessor, transform_graph, batch_size=TRAIN_BATCH_SIZE) eval_dataset = _input_fn(fn_args.eval_files, fn_args.data_accessor, transform_graph, batch_size=EVAL_BATCH_SIZE) return TunerFnResult(tuner=tuner, fit_kwargs={ 'x': train_dataset, 'validation_data': eval_dataset, 'steps_per_epoch': fn_args.train_steps, 'validation_steps': fn_args.eval_steps })
def tune_model(name: str): training_set, test_set = load_training_and_test_set(name) save_folder_path = MODEL_PATH / "BO" try_to_find_folder_path_otherwise_make_one(save_folder_path / f'{name}') model_x, model_y, _ = get_data_for_nn(name) # Run pressure test @load_exist_pkl_file_otherwise_run_and_save(save_folder_path / f"{name}/pressure_test.pkl") def func(): print("Start pressure test") pressure_test_model = make_hp_model(training_set, None, True) pressure_test_model.fit(model_x, model_y, epochs=10, validation_split=0.1, batch_size=BATCH_SIZE, callbacks=[ tf.keras.callbacks.EarlyStopping( monitor='val_loss', patience=EPOCHS) ]) print("Pressure test passed") return "pass" func() tuner = kt.BayesianOptimization( hypermodel=lambda x: make_hp_model(training_set, x, False), objective='val_mae', max_trials=32, directory=save_folder_path.__str__(), project_name=f'{name}') stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=int(EPOCHS * 0.05)) tuner.search(model_x, model_y, verbose=2, epochs=EPOCHS, validation_split=0.1, callbacks=[stop_early], batch_size=BATCH_SIZE) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] return tuner, best_hps
def run_bayesian(tr_x, tr_y, tx, ty, config: Config, class_weights): tuner_model = TunerModel(tr_x.shape[1:]) tuner = kt.BayesianOptimization(tuner_model.build_tuner_model, objective=kt.Objective( config.objective, config.direction), project_name=config.project_name, directory=config.directory, max_trials=config.max_trials, overwrite=True) tuner.search_space_summary() tuner.search(tr_x, tr_y, validation_data=(tx, ty), epochs=config.epochs, batch_size=config.batch_size, class_weight=class_weights, callbacks=[ tf.keras.callbacks.EarlyStopping(monitor=config.objective, min_delta=0, patience=3, verbose=0, mode=config.direction) ]) # Show a summary of the search tuner.results_summary() # Retrieve the best model. print("Saving the top model...") best_hyperparameters = tuner.get_best_hyperparameters(1)[0] print("Top hyperparameters:", best_hyperparameters) best_model = tuner.hypermodel.build(best_hyperparameters) model = DDModel.load(best_model, kt_hyperparameters=best_hyperparameters) for key in best_hyperparameters.values: print(key, "->", best_hyperparameters[key]) return model
def main(_): flags_obj = flags.FLAGS setup_keras_tuner_config() if flags_obj.distribution_strategy == 'tpu': resolver = tf.distribute.cluster_resolver.TPUClusterResolver() tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.experimental.TPUStrategy(resolver) strategy_scope = strategy.scope() print("All devices: ", tf.config.list_logical_devices('TPU')) elif flags_obj.distribution_strategy == 'gpu': strategy = tf.distribute.MirroredStrategy() strategy_scope = strategy.scope() devices = ["device:GPU:%d" % i for i in range(flags_obj.num_gpus)] print('NUMBER OF DEVICES: ', strategy.num_replicas_in_sync) ## identify data paths and sources root_dir = flags_obj.data_dir # this is gs://<bucket>/folder where tfrecord are stored file_pattern = "{}/image_classification_builder-train*.tfrecord*".format( root_dir) val_file_pattern = "{}/image_classification_builder-validation*.tfrecord*".format( root_dir) file_list = tf.io.gfile.glob(file_pattern) all_files = tf.data.Dataset.list_files(tf.io.gfile.glob(file_pattern)) val_file_list = tf.io.gfile.glob(val_file_pattern) val_all_files = tf.data.Dataset.list_files( tf.io.gfile.glob(val_file_pattern)) train_all_ds = tf.data.TFRecordDataset( all_files, num_parallel_reads=tf.data.experimental.AUTOTUNE) val_all_ds = tf.data.TFRecordDataset( val_all_files, num_parallel_reads=tf.data.experimental.AUTOTUNE) # perform data engineering dataset = train_all_ds.map(decode_and_resize) val_dataset = val_all_ds.map(decode_and_resize) # BATCH_SIZE = flags_obj.train_batch_size VALIDATION_BATCH_SIZE = flags_obj.validation_batch_size dataset = dataset.map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_dataset = val_dataset.map( normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE) val_ds = val_dataset.batch(VALIDATION_BATCH_SIZE) AUTOTUNE = tf.data.experimental.AUTOTUNE train_ds = prepare_for_training(dataset) FINE_TUNING_CHOICE = True NUM_CLASSES = 5 IMAGE_SIZE = (224, 224) train_sample_size = 0 for raw_record in train_all_ds: train_sample_size += 1 print('TRAIN_SAMPLE_SIZE = ', train_sample_size) validation_sample_size = 0 for raw_record in val_all_ds: validation_sample_size += 1 print('VALIDATION_SAMPLE_SIZE = ', validation_sample_size) STEPS_PER_EPOCHS = train_sample_size // BATCH_SIZE VALIDATION_STEPS = validation_sample_size // VALIDATION_BATCH_SIZE """Runs the hyperparameter search.""" if (flags_obj.tuner_type.lower() == 'BayesianOptimization'.lower()): tuner = kt.BayesianOptimization(hypermodel=model_builder, objective='val_accuracy', tune_new_entries=True, allow_new_entries=True, max_trials=5, directory=flags_obj.model_dir, project_name='hp_tune_bo', overwrite=True) elif (flags_obj.tuner_type.lower() == 'RandomSearch'.lower()): tuner = kt.RandomSearch(hypermodel=model_builder, objective='val_accuracy', tune_new_entries=True, allow_new_entries=True, max_trials=5, directory=flags_obj.model_dir, project_name='hp_tune_rs', overwrite=True) else: tuner = kt.Hyperband(hypermodel=model_builder, objective='val_accuracy', max_epochs=3, factor=2, distribution_strategy=strategy, directory=flags_obj.model_dir, project_name='hp_tune_hb', overwrite=True) tuner.search(train_ds, steps_per_epoch=STEPS_PER_EPOCHS, validation_data=val_ds, validation_steps=VALIDATION_STEPS, epochs=3, callbacks=[ tf.keras.callbacks.EarlyStopping('val_accuracy'), ClearTrainingOutput() ]) # Get the optimal hyperparameters best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print(f""" The hyperparameter search is done. The best number of nodes in the dense layer is {best_hps.get('units')}. The best activation function in mid dense layer is {best_hps.get('dense_activation')}. """) # Build the model with the optimal hyperparameters and train it on the data model = tuner.hypermodel.build(best_hps) checkpoint_prefix = os.path.join(flags_obj.model_dir, "best_hp_train_ckpt_{epoch}") callbacks = [ tf.keras.callbacks.TensorBoard( log_dir=os.path.join(flags_obj.model_dir, 'tensorboard_logs')), tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True) ] model.fit(train_ds, epochs=3, steps_per_epoch=STEPS_PER_EPOCHS, validation_data=val_ds, validation_steps=VALIDATION_STEPS, callbacks=callbacks) logging.info('INSIDE MAIN FUNCTION user input model_dir %s', flags_obj.model_dir) # Save model trained with chosen HP in user specified bucket location model_save_dir = os.path.join(flags_obj.model_dir, 'best_save_model') model.save(model_save_dir)
batch_size = config['batch_size'] print("Loading Dataset...") data = Docking.ML.load_data.load(train_path, test_path) train_x, train_y, test_x, test_y = data() train_x, train_y = train_x.tolist(), train_y.tolist() test_x, test_y = test_x.tolist(), test_y.tolist() tr_x = np.array(train_x) tr_y = np.array(train_y) tx = np.array(test_x) ty = np.array(test_y) tuner = kt.BayesianOptimization(DDModel.build_tuner_model, objective=kt.Objective( objective, direction), project_name=project_name, directory=directory, max_trials=max_trials) tuner.search_space_summary() tuner.search(tr_x, tr_y, validation_data=(tx, ty), epochs=epochs, batch_size=batch_size, class_weight={ 0: 2, 1: 1 }, callbacks=[ tf.keras.callbacks.EarlyStopping(monitor=objective,
def hyper_parameter_search(search_type='BO', objective='mse', seed=101, max_trails=10, directory=os.path.normpath('C:/'), project_name='', max_epochs=10, factor=3, epochs=10, train_data=(), val_data=()): ''' Given the search type this method uses that optimization method from keras tuner and finds the best parameters. and returns the model with the best parameteres. ''' search_type = search_type.upper() if search_type == 'BO' or search_type == 'BAYESIANOPTIMIZATION': tuner = kt.BayesianOptimization(model_build, objective=objective, seed=seed, max_trials=max_trails, directory=directory, project_name=project_name) elif search_type == 'RS' or search_type == 'RANDOMSEARCH': tuner = kt.RandomSearch(model_build, objective=objective, seed=seed, max_trials=max_trails, directory=directory, project_name=project_name) elif search_type == 'HB' or search_type == 'HYPERBAND': tuner = kt.Hyperband(model_build, max_epochs=max_epochs, objective=objective, factor=factor, directory=directory, project_name=project_name) else: raise ValueError( 'The requested keras tuner search type doesnot exist\n') tuner.search(train_data[0], train_data[1], epochs=epochs, validation_data=(val_data[0], val_data[1]), callbacks=[ClearTrainingOutput()], verbose=1) best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print(f""" The hyperparameter search is complete. The optimal units {best_hps.get('units')} and the optimal learning rate is {best_hps.get('learning_rate')} and the optimal dropout {best_hps.get('dropout')} and the optimal activation {best_hps.get('dense_activation')}.""") model = tuner.hypermodel.build(best_hps) return model
# ---------------------------------- # # 우리 만의 베스트 모델을 만들자 best_model = tuner.hypermodel.build(best_hps[0]) best_model.fit(x_train, y_train, epochs=10, batch_size=100, verbose=2, validation_split=0.2) print('acc :', best_model.evaluate(x_test, y_test, verbose=0)) tuner_bayesian = kt.BayesianOptimization( model_builder, objective='val_acc', max_trials=5, directory='keras_tuner/bayesian', # 따로 만들어주지 않아도 알아서 만들어줌 project_name='mnist') tuner_random_search = kt.RandomSearch( model_builder, objective='val_acc', max_trials=5, directory='keras_tuner/random_search', # 따로 만들어주지 않아도 알아서 만들어줌 project_name='mnist') # 세가지중 hyperband 가 가장 잘찾는다 이거로 쓰면됨 tuner_hyperband = kt.Hyperband( model_builder, objective='val_loss', max_trials=5,
loss='mse', metrics=['accuracy']) return model #tuner = kt.Hyperband(model_builder, # objective='val_accuracy', # max_epochs=200, # factor=3, # directory='kerastuner', # project_name='hyperband_elementals') tuner = kt.BayesianOptimization(model_builder, objective='val_accuracy', max_trials=100, directory='D:/kerastuner', project_name='D:/kerastuner/Bayesian_elementals') class ClearTrainingOutput(tf.keras.callbacks.Callback): def on_train_end(*args, **kwargs): IPython.display.clear_output(wait=True) tuner.search(x_train, y_train, epochs=150, validation_data=(x_val, y_val), callbacks=[ClearTrainingOutput()]) # Get the optimal hyperparameters best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
# Instantiate the random search tuner object print("[INFO] Instantiating a random search tuner object...") tuner = kt.RandomSearch(build_model, objective="val_accuracy", max_trials=10, seed=42, directory=config.OUTPUT_PATH, project_name=args["tuner"]) # Otherwise, use the bayesian optimization tuner else: # Instantiate the bayesian optimization tuner object print("[INFO] Instantiating a bayesian optimization tuner object...") tuner = kt.BayesianOptimization(build_model, objective="val_accuracy", max_trials=10, seed=42, directory=config.OUTPUT_PATH, project_name=args["tuner"]) # Perform the hyperparameter search print("[INFO] Performing hyperparameter search...") tuner.search(x=trainX, y=trainY, validation_data=(testX, testY), batch_size=config.BS, callbacks=[es], epochs=config.EPOCHS) # Grab the best hyperparameters bestHP = tuner.get_best_hyperparameters(num_trials=1)[0] print("[INFO] Optimal number of filters in conv_1 layer: {}".format(
model.add(Dense(1, kernel_initializer='normal')) model.compile(optimizer=tf.keras.optimizers.Adam( hp.Float('learning_rate', min_value=5e-4, max_value=1e-3, sampling='LOG', default=5e-4)), loss='mean_squared_error', metrics=['mean_absolute_error']) return model # tune model tuner_1 = kt.BayesianOptimization(build_model, objective='val_loss', max_trials=1000, num_initial_points=2, directory=dir_opt, project_name=dir_prj) tuner_2 = kt.Hyperband(build_model, objective='val_loss', factor=2, max_epochs=1000, hyperband_iterations=10, directory=dir_opt, project_name=dir_prj) tuner = tuner_2 if hyperbandit else tuner_1 tuner.search(X_train, Y_train, validation_data=(X_val, Y_val),
def main(n_img): ############ # INITIALIZE ############ # Create unique results folder today = datetime.now() res_dir = '../results/hp_search_ResUNet53D_' + str( n_img) + '/' + today.strftime('%Y%m%d') + '_' + today.strftime( '%H%M%S') # Create results directory if not already existing os.makedirs(res_dir) print('Directory ' + res_dir + ' succesfully created!') # Logger file log_file = res_dir + '/log_ResUNet53D_' + str(n_img) + '.txt' sys.stdout = Logger(log_file) # Print start processing time print('Start processing: ' + today.strftime('%Y%m%d') + ' ' + today.strftime('%H%M%S')) n_epochs = 2500 # Create dict to store training parameters train_log = {} # Append training parameters to training log train_log["n_img"] = n_img train_log["epochs"] = n_epochs ########## # DATASETS ########## # Load TRAIN and DEV datasets X_train, Y_train = load_dataset('train', n_img, 740) X_dev, Y_dev = load_dataset('dev', n_img, 40) # Standardize X data Xmean = np.mean(X_train) Xstd = np.std(X_train) X_train = (X_train - Xmean) / Xstd X_dev = (X_dev - Xmean) / Xstd ################### # INSTANTIATE TUNER ################### hypermodel = myHyperModel(n_img=n_img) tuner = kt.BayesianOptimization(hypermodel, objective=kt.Objective('val_ssim', direction='max'), num_initial_points=15, max_trials=15, executions_per_trial=1, seed=1, directory=res_dir, project_name='hp_opt') # Print summary of search space tuner.search_space_summary() ################ # EXECUTE SEARCH ################ class ClearTrainingOutput(tf.keras.callbacks.Callback): def on_train_end(*args, **kwargs): IPython.display.clear_output(wait=True) results = tuner.search( X_train, Y_train, validation_data=(X_dev, Y_dev), epochs=n_epochs, batch_size=1, ) # callbacks=[escb] # Print end processing time today = datetime.now() print('End processing: ' + today.strftime('%Y%m%d') + ' ' + today.strftime('%H%M%S')) # Print summary of results tuner.results_summary() # Get the optimal hyperparameters best_hps = tuner.get_best_hyperparameters(num_trials=1)[0] print('Hyperparameter search completed!') print('Optimal learning rate: ' + str(best_hps.get('learning_rate'))) print('Optimal dropout rate: ' + str(best_hps.get('dropout_rate'))) print('Optimal beta: ' + str(best_hps.get('beta'))) print('Optimal filt0_k12: ' + str(best_hps.get('filt0_k12'))) print('Optimal filt0_k3: ' + str(best_hps.get('filt0_k3'))) # Log train and val losses and metrics train_log["learning_rate"] = best_hps.get('learning_rate') train_log["dropout_rate"] = best_hps.get('dropout_rate') train_log["beta"] = best_hps.get('beta') train_log["filt0_k12"] = best_hps.get('filt0_k12') train_log["filt0_k3"] = best_hps.get('filt0_k3') # Select best model model = tuner.get_best_models(num_models=1)[0] # Store search and train history to file with open(res_dir + '/search_train_history', 'w') as file: json.dump(train_log, file) # Save trained model model.save(res_dir + '/my_model.h5') ########################### # PREDICT AND PLOT TEST SET ########################### # Load TEST examples X_test, Y_test = load_dataset('test', n_img, 40) X_test = (X_test - Xmean) / Xstd # Predict TEST examples Yhat_test = model.predict(X_test, verbose=0) # Plot original and predicted TEST examples plot_and_stats(Yhat_test, Y_test, res_dir) # Close logger sys.stdout.close() return