def test_bankdata(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(frac=0.1, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search(df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_boston(self): print("Loading datasets...") boston_dataset = load_boston() df_train = pd.DataFrame(boston_dataset.data) df_train.columns = boston_dataset.feature_names self.y = pd.Series(boston_dataset.target) self.X = df_train self.X_train, \ self.X_test, \ self.y_train, \ self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42) rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='RootMeanSquaredError', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3) best_trial = hdt.get_best_trial() estimator = hdt.final_train(best_trial.space_sample, self.X, self.y) score = estimator.predict(self.X_test) result = estimator.evaluate(self.X_test, self.y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_hyper_dt(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback()], reward_metric='accuracy', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, cache_preprocessed_data=True, cache_home=homedir + '/cache' ) x1 = np.random.randint(0, 10, size=(100), dtype='int') x2 = np.random.randint(0, 2, size=(100)).astype('str') x3 = np.random.randint(0, 2, size=(100)).astype('str') x4 = np.random.normal(0.0, 1.0, size=(100)) y = np.random.randint(0, 2, size=(100), dtype='int') df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) hdt.search(df, y, df, y, max_trials=3, epochs=1) best_trial = hdt.get_best_trial() model = hdt.load_estimator(best_trial.model_file) assert model score = model.predict(df) result = model.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(model, DeepTable) estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1) score = estimator.predict(df) result = estimator.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(estimator.model, DeepTable)
def train(X_train, y_train, X_eval, y_eval, task=None, reward_metric=None, optimize_direction='max', **kwargs): from hypernets.core.callbacks import SummaryCallback from hypernets.searchers import make_searcher if task is None: task, _ = get_tool_box(y_train).infer_task_type(y_train) if reward_metric is None: reward_metric = 'rmse' if task == const.TASK_REGRESSION else 'accuracy' search_space = PlainSearchSpace() searcher = make_searcher('mcts', search_space, optimize_direction=optimize_direction) callbacks = [SummaryCallback()] hm = PlainModel(searcher=searcher, task=task, reward_metric=reward_metric, callbacks=callbacks) hm.search(X_train, y_train, X_eval, y_eval, **kwargs) best = hm.get_best_trial() model = hm.final_train(best.space_sample, X_train, y_train) return hm, model
def test_cnn_space_hyper_model(self): rs = RandomSearcher( lambda: cnn_search_space(input_shape=(28, 28, 1), output_units=10, output_activation='softmax', block_num_choices=[2, 3, 4, 5], filters_choices=[32, 64, 128], kernel_size_choices=[(1, 1), (3, 3)]), optimize_direction='max') hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Rescale the images from [0,255] to the [0.0,1.0] range. x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0 y_train = tf.keras.utils.to_categorical(y_train) y_test = tf.keras.utils.to_categorical(y_test) print("Number of original training examples:", len(x_train)) print("Number of original test examples:", len(x_test)) # sample for speed up samples = 100 hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)], max_trails=3, epochs=1) assert hk.best_model
def test_dnn_space_hyper_model(self): rs = RandomSearcher(lambda: dnn_search_space(input_shape=10, output_units=2, output_activation='sigmoid'), optimize_direction='max') hk = HyperKeras(rs, optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()]) x = np.random.randint(0, 10000, size=(100, 10)) y = np.random.randint(0, 2, size=(100), dtype='int') hk.search(x, y, x, y, max_trails=3) assert hk.best_model
def create_plain_model(reward_metric='auc', optimize_direction='max', with_encoder=False, with_dask=False): search_space = PlainSearchSpace(enable_dt=True, enable_lr=True, enable_nn=False) searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction) encoder = MultiLabelEncoder if with_encoder else None cls = DaskPlainModel if with_dask else PlainModel hyper_model = cls(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()], transformer=encoder) return hyper_model
def train_bankdata(self, data_partition): rs = RandomSearcher(search_space_general, optimize_direction=OptimizeDirection.Maximize) hk = HyperGBM(rs, task='classification', reward_metric='accuracy', cache_dir=f'{test_output_dir}/hypergbm_cache', callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'{test_output_dir}/hyn_logs')]) df = dsutils.load_bank() df.drop(['id'], axis=1, inplace=True) X_train, X_test, y_train, y_test = data_partition() hk.search(X_train, y_train, X_test, y_test, max_trails=3) best_trial = hk.get_best_trail() estimator = hk.final_train(best_trial.space_sample, X_train, y_train) score = estimator.predict(X_test) result = estimator.evaluate(X_test, y_test) assert len(score) == 200 return estimator, hk
homedir = f'{consts.PROJECT_NAME}_run_dt_{time.strftime("%Y%m%d%H%M%S")}' disk_trial_store = DiskTrialStore(f'hotexamples_com/trial_store') # searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize) # searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize) searcher = EvolutionSearcher(mini_dt_space, 200, 100, regularized=True, candidates_size=30, optimize_direction=OptimizeDirection.Maximize) hdt = HyperDT(searcher, callbacks=[ SummaryCallback(), FileStorageLoggingCallback(searcher, output_dir=f'hotexamples_com/hyn_logs') ], reward_metric='AUC', earlystopping_patience=1) space = mini_dt_space() assert space.combinations == 589824 space2 = default_dt_space() assert space2.combinations == 3559292928 df = dsutils.load_adult() # df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) X = df_train
from hypernets.searchers.mcts_searcher import MCTSSearcher from hypernets.searchers.evolution_searcher import EvolutionSearcher from hypernets.core.trial import DiskTrailStore from deeptables.datasets import dsutils from sklearn.model_selection import train_test_split from .. import homedir disk_trail_store = DiskTrailStore(f'hotexamples_com/trail_store') # searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize) # searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize) searcher = EvolutionSearcher(mini_dt_space, 200, 100, regularized=True, candidates_size=30, optimize_direction=OptimizeDirection.Maximize) hdt = HyperDT(searcher, callbacks=[SummaryCallback(), FileLoggingCallback(searcher, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='AUC', earlystopping_patience=1) space = mini_dt_space() assert space.combinations == 589824 space2 = default_dt_space() assert space2.combinations == 3559292928 df = dsutils.load_adult() # df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) X = df_train y = df_train.pop(14) y_test = df_test.pop(14) # dataset_id='adult_whole_data',
from hyperkeras.search_space.enas_micro import enas_micro_search_space from hyperkeras.one_shot_model import OneShotModel (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # Rescale the images from [0,255] to the [0.0,1.0] range. x_train, x_test = x_train[..., np.newaxis] / 255.0, x_test[..., np.newaxis] / 255.0 y_train = tf.keras.utils.to_categorical(y_train) y_test = tf.keras.utils.to_categorical(y_test) print("Number of original training examples:", len(x_train)) print("Number of original test examples:", len(x_test)) # sample for speed up samples = 100 searcher = MCTSSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max') one_shot_sampler = RandomSearcher(lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max') model = OneShotModel(searcher, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], epochs=3, batch_size=64, controller_train_per_epoch=False, # Single path callbacks=[SummaryCallback()], one_shot_train_sampler=one_shot_sampler, # uniform sampler visualization=False) # model.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)], model.search(x_train, y_train, x_test, y_test, max_trails=1000, epochs=100, callbacks=[]) assert model.best_model
# model2 = space.keras_model(deepcopy=False) # model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # model2.fit(x_train[:samples], y_train[:samples], batch_size=32) # result2 = model.evaluate(x_train[:samples], y_train[:samples]) # # weights_cache = LayerWeightsCache() # space = enas_micro_search_space(arch='NR', hp_dict={}, use_input_placeholder=False, weights_cache=weights_cache) # space.random_sample() # # model = SharingWeightModel(space) # model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # model.fit(x_train[:samples], y_train[:samples], batch_size=32) # result = model.evaluate(x_train[:samples], y_train[:samples]) # # space = enas_micro_search_space(arch='NR', hp_dict={}, use_input_placeholder=False, weights_cache=weights_cache) # space.random_sample() # model.update_search_space(space) # model.fit(x_train[:samples], y_train[:samples], batch_size=100) # result = model.evaluate(x_train[:samples], y_train[:samples]) rs = RandomSearcher( lambda: enas_micro_search_space(arch='NNRNNR', hp_dict={}), optimize_direction='max') hk = HyperKeras(rs, optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], callbacks=[SummaryCallback()], one_shot_mode=True, visualization=False) # tenserboard = TensorBoard('./tensorboard/run_enas') hk.search(x_train[:samples], y_train[:samples], x_test[:int(samples / 10)], y_test[:int(samples / 10)], max_trails=100, epochs=1, callbacks=[]) assert hk.get
def create_hyper_model(reward_metric='AUC', optimize_direction='max'): search_space = tiny_dt_space searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction) hyper_model = HyperDT(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()]) return hyper_model