def test_hyper_dt(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback()], reward_metric='accuracy', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, cache_preprocessed_data=True, cache_home=homedir + '/cache' ) x1 = np.random.randint(0, 10, size=(100), dtype='int') x2 = np.random.randint(0, 2, size=(100)).astype('str') x3 = np.random.randint(0, 2, size=(100)).astype('str') x4 = np.random.normal(0.0, 1.0, size=(100)) y = np.random.randint(0, 2, size=(100), dtype='int') df = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4}) hdt.search(df, y, df, y, max_trials=3, epochs=1) best_trial = hdt.get_best_trial() model = hdt.load_estimator(best_trial.model_file) assert model score = model.predict(df) result = model.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(model, DeepTable) estimator = hdt.final_train(best_trial.space_sample, df, y, epochs=1) score = estimator.predict(df) result = estimator.evaluate(df, y) assert len(score) == 100 assert result assert isinstance(estimator.model, DeepTable)
def test_boston(self): print("Loading datasets...") boston_dataset = load_boston() df_train = pd.DataFrame(boston_dataset.data) df_train.columns = boston_dataset.feature_names self.y = pd.Series(boston_dataset.target) self.X = df_train self.X_train, \ self.X_test, \ self.y_train, \ self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42) rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='RootMeanSquaredError', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3) best_trial = hdt.get_best_trial() estimator = hdt.final_train(best_trial.space_sample, self.X, self.y) score = estimator.predict(self.X_test) result = estimator.evaluate(self.X_test, self.y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_bankdata(self): rs = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(frac=0.1, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search(df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
def train(self, X, y, X_test): searcher = EvolutionSearcher( mini_dt_space, optimize_direction=OptimizeDirection.Maximize, population_size=30, sample_size=10, regularized=True, candidates_size=10) es = EarlyStoppingCallback(self.earlystop_rounds, 'max', time_limit=self.time_limit, expected_reward=self.expected_reward) hdt = HyperDT( searcher, callbacks=[es], reward_metric=self.reward_metric, cache_preprocessed_data=True, ) stratify = y if self.task == 'regression': stratify = None X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size=0.3, random_state=9527, stratify=stratify) hdt.search(X_train, y_train, X_eval, y_eval, max_trials=self.max_trials, epochs=self.epochs) best_trial = hdt.get_best_trial() self.estimator = hdt.load_estimator(best_trial.model_file)
homedir = f'{consts.PROJECT_NAME}_run_dt_{time.strftime("%Y%m%d%H%M%S")}' disk_trial_store = DiskTrialStore(f'hotexamples_com/trial_store') # searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize) # searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize) searcher = EvolutionSearcher(mini_dt_space, 200, 100, regularized=True, candidates_size=30, optimize_direction=OptimizeDirection.Maximize) hdt = HyperDT(searcher, callbacks=[ SummaryCallback(), FileStorageLoggingCallback(searcher, output_dir=f'hotexamples_com/hyn_logs') ], reward_metric='AUC', earlystopping_patience=1) space = mini_dt_space() assert space.combinations == 589824 space2 = default_dt_space() assert space2.combinations == 3559292928 df = dsutils.load_adult() # df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) X = df_train y = df_train.pop(14) y_test = df_test.pop(14)
reduce_factor=Choice([1, 0.8]), dnn_dropout=Choice([0, 0.3]), use_bn=Bool(), dnn_layers=2, activation='relu')(dt_module) fit = DTFit(batch_size=Choice([128, 256]))(dt_module) return space rs = MCTSSearcher(my_space, max_node_space=5) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileLoggingCallback(rs)], reward_metric='AUC', dnn_params={ 'dnn_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }) from deeptables.datasets import dsutils df = dsutils.load_bank()[:1000] print("data shape: ") print(df.shape) df.drop(['id'], axis=1, inplace=True) y = df.pop("y") X = df X_train, X_test, y_train, y_test = train_test_split(X,
def create_hyper_model(reward_metric='AUC', optimize_direction='max'): search_space = tiny_dt_space searcher = make_searcher('random', search_space_fn=search_space, optimize_direction=optimize_direction) hyper_model = HyperDT(searcher=searcher, reward_metric=reward_metric, callbacks=[SummaryCallback()]) return hyper_model