def test_hypergbm_bankdata(self): rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='accuracy', callbacks=[SummaryCallback(), FileLoggingCallback(rs)]) df = pd.read_csv('cooka/test/dataset/Bank_Marketing_Data/train.csv') df.drop(['id'], axis=1, inplace=True) X_train, X_test = train_test_split(df.head(1000), test_size=0.2, random_state=42) y_train = X_train.pop('y') y_test = X_test.pop('y') X_train_origin = X_train.copy() hk.search(X_train, y_train, X_test, y_test, max_trails=1) assert hk.best_model best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, X_train, y_train) result = estimator.evaluate(X_test, y_test) print(result) predict_result = estimator.predict(X_train_origin) print(predict_result)
def test_hyper_dt(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback()], reward_metric='accuracy', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, cache_preprocessed_data=True, cache_home=homedir + '/cache' ) x1 = np.random.randint(0, 10, size=(100), dtype='int') x2 = np.random.randint(0, 2, size=(100)).astype('str') x3 = np.random.randint(0, 2, size=(100)).astype('str') x4 = np.random.normal(0.0, 1.0, size=(100)) y = np.random.randint(0, 2, size=(100), dtype='int') df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) hdt.search(df, y, df, y, max_trials=3, epochs=1) best_trial = hdt.get_best_trial() model = hdt.load_estimator(best_trial.model_file) assert model score = model.predict(df) result = model.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(model, DeepTable) estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1) score = estimator.predict(df) result = estimator.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(estimator.model, DeepTable)
def test_boston(self): print("Loading datasets...") boston_dataset = load_boston() df_train = pd.DataFrame(boston_dataset.data) df_train.columns = boston_dataset.feature_names self.y = pd.Series(boston_dataset.target) self.X = df_train self.X_train, \ self.X_test, \ self.y_train, \ self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42) rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='RootMeanSquaredError', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3) best_trial = hdt.get_best_trial() estimator = hdt.final_train(best_trial.space_sample, self.X, self.y) score = estimator.predict(self.X_test) result = estimator.evaluate(self.X_test, self.y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_bankdata(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(frac=0.1, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search(df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
def main(): # client = Client("tcp://127.0.0.1:64958") client = Client(processes=False, threads_per_worker=2, n_workers=1, memory_limit='4GB') print(client) rs = RandomSearcher(get_space_num_cat_pipeline_complex, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='accuracy', cache_dir=f'{test_output_dir}/hypergbm_cache', callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')]) df = dsutils.load_bank_by_dask() df.drop(['id'], axis=1) df['y'] = dm_pre.LabelEncoder().fit_transform(df['y']) # df = df.sample(frac=0.1) # object_columns = [i for i, v in df.dtypes.items() if v == 'object'] # for c in object_columns: # df[c] = df[c].astype('category') # df = df.categorize(object_columns) X_train, X_test = train_test_split(df, test_size=0.8, random_state=42) y_train = X_train.pop('y') y_test = X_test.pop('y') hk.search(X_train, y_train, X_test, y_test, max_trails=50) print('-' * 30) best_trial = hk.get_best_trail() print(f'best_train:{best_trial}') estimator = hk.final_train(best_trial.space_sample, X_train, y_train) score = estimator.predict(X_test) result = estimator.evaluate(X_test, y_test, metrics=['accuracy', 'auc', 'logloss']) print(f'final result:{result}')
def test_hypergbm_diabetes(self): rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='accuracy', callbacks=[SummaryCallback(), FileLoggingCallback(rs)]) df = pd.read_csv('cooka/test/dataset/diabetes_10k.csv') X_train, X_test = train_test_split(df.head(1000), test_size=0.2, random_state=42) y_train = X_train.pop('readmitted') y_test = X_test.pop('readmitted') X_train_origin = X_train.copy() hk.search(X_train, y_train, X_test, y_test, max_trails=2) assert hk.best_model best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, X_train, y_train) # result = estimator.evaluate(X_test, y_test) # print(result) series_gender = X_train_origin.pop('gender') X_train_origin['gender'] = series_gender predict_result = estimator.predict(X_train_origin) print(predict_result)
def test_cnn_space_hyper_model(self): rs = RandomSearcher( lambda: cnn_search_space(input_shape=(28, 28, 1), output_units=10, output_activation='softmax', block_num_choices=[2, 3, 4, 5], filters_choices=[32, 64, 128], kernel_size_choices=[(1, 1), (3, 3)]), optimize_direction='max') hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Rescale the images from [0,255] to the [0.0,1.0] range. x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0 y_train = tf.keras.utils.to_categorical(y_train) y_test = tf.keras.utils.to_categorical(y_test) print("Number of original training examples:", len(x_train)) print("Number of original test examples:", len(x_test)) # sample for speed up samples = 100 hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)], max_trails=3, epochs=1) assert hk.best_model
def test_dnn_space_hyper_model(self): rs = RandomSearcher(lambda: dnn_search_space(input_shape=10, output_units=2, output_activation='sigmoid'), optimize_direction='max') hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x = np.random.randint(0, 10000, size=(100, 10)) y = np.random.randint(0, 2, size=(100), dtype='int') hk.search(x, y, x, y, max_trails=3) assert hk.best_model
def test_fit_one_shot_model_epoch(self): rs = RandomSearcher(self.get_space_simple, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()], one_shot_mode=True, one_shot_train_sampler=rs) x, y = self.get_x_y_1() hk.fit_one_shot_model_epoch(x, y)
def test_random_searcher(self): searcher = RandomSearcher(self.get_space, space_sample_validation_fn=lambda s: False) with pytest.raises(ValueError): searcher.sample() searcher = RandomSearcher(self.get_space, space_sample_validation_fn=lambda s: True) sample = searcher.sample() assert sample def valid_sample(sample): if sample.Param_Bool_1.value: return True else: return False searcher = RandomSearcher(self.get_space, space_sample_validation_fn=valid_sample) sample = searcher.sample() assert sample
def run_search(): searchers = (RandomSearcher(get_space, space_sample_validation_fn=lambda s: True), MCTSSearcher(get_space, max_node_space=10), EvolutionSearcher(get_space, 5, 3, regularized=False)) for searcher in searchers: for i in range(100): space_sample = searcher.sample() assert space_sample.all_assigned == True print(searcher.__class__.__name__, i, space_sample.params_summary()) searcher.update_result(space_sample, np.random.uniform(0.1, 0.9))
def test_build_dataset_iter(self): rs = RandomSearcher(self.get_space, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x, y = self.get_x_y_1() ds_iter = hk.build_dataset_iter(x, y, batch_size=10) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() assert len(x_b) == 10 assert len(y_b) == 10 batch_counter += 1 assert batch_counter == 10 ds_iter = hk.build_dataset_iter(x, y, batch_size=32) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() if batch_counter < 3: assert len(x_b) == 32 assert len(y_b) == 32 else: assert len(x_b) == 4 assert len(y_b) == 4 batch_counter += 1 assert batch_counter == 4 ds_iter = hk.build_dataset_iter(x, y, batch_size=32, repeat_count=2) batch_counter = 0 for x_b, y_b in ds_iter: # x_b, y_b = next() if batch_counter < 6: assert len(x_b) == 32 assert len(y_b) == 32 else: assert len(x_b) == 8 assert len(y_b) == 8 batch_counter += 1 assert batch_counter == 7
def main(): rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='auc', cache_dir=f'{test_output_dir}/hypergbm_cache', callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')]) df = dsutils.load_bank() df.drop(['id'], axis=1, inplace=True) X_train, X_test = train_test_split(df, test_size=0.8, random_state=42) y_train = X_train.pop('y') y_test = X_test.pop('y') hk.search(X_train, y_train, X_test, y_test, max_trails=500) best_trial = hk.get_best_trail() print(f'best_train:{best_trial}') estimator = hk.final_train(best_trial.space_sample, X_train, y_train) score = estimator.predict(X_test) result = estimator.evaluate(X_test, y_test, metrics=['auc', 'accuracy']) print(f'final result:{result}')
def test_model_with_hp(self): rs = RandomSearcher(self.get_space, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x, y = self.get_x_y() hk.search(x, y, x, y, max_trails=3) assert hk.best_model best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, x, y) score = estimator.predict(x) result = estimator.evaluate(x, y) assert len(score) == 100 assert result
def train_bankdata(self, data_partition): rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='accuracy', cache_dir=f'{test_output_dir}/hypergbm_cache', callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')]) df = dsutils.load_bank() df.drop(['id'], axis=1, inplace=True) X_train, X_test, y_train, y_test = data_partition() hk.search(X_train, y_train, X_test, y_test, max_trails=3) best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, X_train, y_train) score = estimator.predict(X_test) result = estimator.evaluate(X_test, y_test) assert len(score) == 200 return estimator, hk
def test_set_random_state(self): from hypernets.core import set_random_state set_random_state(9527) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(None) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors != [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(9527) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(1) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[38, 1, 0, 0.93], [10, 1, 1, 0.15], [17, 1, 0, 0.39], [7, 1, 0, 0.85], [19, 0, 1, 0.44], [29, 1, 0, 0.67], [88, 1, 1, 0.43], [95, 0, 0, 0.8], [10, 1, 1, 0.09]] set_random_state(None)
from hypernets.searchers.random_searcher import RandomSearcher from hyperkeras.search_space.enas_micro import enas_micro_search_space from hyperkeras.one_shot_model import OneShotModel (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Rescale the images from [0,255] to the [0.0,1.0] range. x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0 y_train = tf.keras.utils.to_categorical(y_train) y_test = tf.keras.utils.to_categorical(y_test) print("Number of original training examples:", len(x_train)) print("Number of original test examples:", len(x_test)) # sample for speed up samples = 100 searcher = MCTSSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max') one_shot_sampler = RandomSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max') model = OneShotModel(searcher, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], epochs=3, batch_size=64, controller_train_per_epoch=False, # Single path callbacks=[SummaryCallback()], one_shot_train_sampler=one_shot_sampler, # uniform sampler visualization=False) # model.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)], model.search(x_train, y_train, x_test, y_test, max_trails=1000, epochs=100, callbacks=[]) assert model.best_model
def get_space(): space = HyperSpace() with space.as_default(): in1 = Input(shape=(10, )) dense1 = Dense(10, activation=Choice(['relu', 'tanh', None]), use_bias=Bool())(in1) bn1 = BatchNormalization()(dense1) dropout1 = Dropout(Choice([0.3, 0.4, 0.5]))(bn1) output = Dense(2, activation='softmax', use_bias=True)(dropout1) return space rs = RandomSearcher(get_space, optimize_direction=OptimizeDirection.Maximize) hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x = np.random.randint(0, 10000, size=(100, 10)) y = np.random.randint(0, 2, size=(100), dtype='int') hk.search(x, y, x, y, max_trails=3) best_trial = hk.get_best_trail() assert best_trial estimator = hk.final_train(best_trial.space_sample, x, y) score = estimator.predict(x)