def test_boston(self): print("Loading datasets...") boston_dataset = load_boston() df_train = pd.DataFrame(boston_dataset.data) df_train.columns = boston_dataset.feature_names self.y = pd.Series(boston_dataset.target) self.X = df_train self.X_train, \ self.X_test, \ self.y_train, \ self.y_test = train_test_split(self.X, self.y, test_size=0.2, random_state=42) rs = RandomSearcher(tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT(rs, callbacks=[SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs')], reward_metric='RootMeanSquaredError', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) hdt.search(self.X_train, self.y_train, self.X_test, self.y_test, max_trials=3) best_trial = hdt.get_best_trial() estimator = hdt.final_train(best_trial.space_sample, self.X, self.y) score = estimator.predict(self.X_test) result = estimator.evaluate(self.X_test, self.y_test) assert result assert isinstance(estimator.model, DeepTable)
def test_bankdata(self): rs = RandomSearcher( tiny_dt_space, optimize_direction=OptimizeDirection.Maximize, ) hdt = HyperDT( rs, callbacks=[ SummaryCallback(), FileStorageLoggingCallback(rs, output_dir=f'hotexamples_com/hyn_logs') ], # reward_metric='accuracy', reward_metric='AUC', dnn_params={ 'hidden_units': ((256, 0, False), (256, 0, False)), 'dnn_activation': 'relu', }, ) df = dsutils.load_bank().sample(n=2000, random_state=9527) df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) y = df_train.pop('y') y_test = df_test.pop('y') hdt.search( df_train, y, df_test, y_test, max_trials=3, ) best_trial = hdt.get_best_trial() assert best_trial estimator = hdt.final_train(best_trial.space_sample, df_train, y) score = estimator.predict(df_test) result = estimator.evaluate(df_test, y_test) assert len(score) == len(y_test) assert result assert isinstance(estimator.model, DeepTable)
homedir = f'{consts.PROJECT_NAME}_run_dt_{time.strftime("%Y%m%d%H%M%S")}' disk_trial_store = DiskTrialStore(f'hotexamples_com/trial_store') # searcher = MCTSSearcher(mini_dt_space, max_node_space=0,optimize_direction=OptimizeDirection.Maximize) # searcher = RandomSearcher(mini_dt_space, optimize_direction=OptimizeDirection.Maximize) searcher = EvolutionSearcher(mini_dt_space, 200, 100, regularized=True, candidates_size=30, optimize_direction=OptimizeDirection.Maximize) hdt = HyperDT(searcher, callbacks=[ SummaryCallback(), FileStorageLoggingCallback(searcher, output_dir=f'hotexamples_com/hyn_logs') ], reward_metric='AUC', earlystopping_patience=1) space = mini_dt_space() assert space.combinations == 589824 space2 = default_dt_space() assert space2.combinations == 3559292928 df = dsutils.load_adult() # df.drop(['id'], axis=1, inplace=True) df_train, df_test = train_test_split(df, test_size=0.2, random_state=42) X = df_train y = df_train.pop(14) y_test = df_test.pop(14)