def load_data(): set_random_state(9527) df = dsutils.load_bank().head(3000) encoder = MultiLabelEncoder() df = encoder.fit_transform(df) df.drop(['id'], axis=1, inplace=True) return df
def test_set_random_state(self): from hypernets.core import set_random_state set_random_state(9527) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(None) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors != [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(9527) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(1) searcher = RandomSearcher(self.get_space) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[38, 1, 0, 0.93], [10, 1, 1, 0.15], [17, 1, 0, 0.39], [7, 1, 0, 0.85], [19, 0, 1, 0.44], [29, 1, 0, 0.67], [88, 1, 1, 0.43], [95, 0, 0, 0.8], [10, 1, 1, 0.09]] set_random_state(None)
def test_set_random_state(self): from hypernets.core import set_random_state set_random_state(9527) searcher = MCTSSearcher(self.get_space, max_node_space=10) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 1, 0.86], [2, 0, 1, 0.58], [2, 0, 1, 0.73], [2, 0, 1, 0.58], [2, 1, 0, 0.67], [2, 0, 1, 0.01], [2, 0, 1, 0.22], [2, 0, 1, 0.55], [2, 0, 1, 0.32]] set_random_state(None) searcher = MCTSSearcher(self.get_space, max_node_space=10) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors != [[98, 0, 1, 0.86], [2, 0, 1, 0.58], [2, 0, 1, 0.73], [2, 0, 1, 0.58], [2, 1, 0, 0.67], [2, 0, 1, 0.01], [2, 0, 1, 0.22], [2, 0, 1, 0.55], [2, 0, 1, 0.32]] set_random_state(9527) searcher = MCTSSearcher(self.get_space, max_node_space=10) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 1, 0.86], [2, 0, 1, 0.58], [2, 0, 1, 0.73], [2, 0, 1, 0.58], [2, 1, 0, 0.67], [2, 0, 1, 0.01], [2, 0, 1, 0.22], [2, 0, 1, 0.55], [2, 0, 1, 0.32]] set_random_state(1) searcher = MCTSSearcher(self.get_space, max_node_space=10) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[73, 1, 0, 0.4], [2, 0, 0, 0.42], [2, 0, 0, 0.2], [2, 0, 0, 0.03], [2, 0, 0, 0.42], [2, 1, 1, 0.14], [2, 0, 0, 0.8], [2, 1, 1, 0.31], [2, 0, 1, 0.88]] set_random_state(None)
def test_basic(self): df = self.load_data() y = df.pop('y') tb = get_tool_box(df) X_train, X_test, y_train, y_test = tb.train_test_split(df, y, test_size=0.3, random_state=42) hm, _ = train(X_train, y_train, X_test, y_test, max_trials=5) best_trials = hm.get_top_trials(3) estimators = [ hm.load_estimator(trial.model_file) for trial in best_trials ] importances = tb.permutation_importance_batch( estimators, X_test, y_test, get_scorer('roc_auc_ovr'), n_jobs=1, n_repeats=5, random_state=get_random_state()) feature_index = np.argwhere(importances.importances_mean < 1e-5) selected_features = [ feat for i, feat in enumerate(X_train.columns.to_list()) if i not in feature_index ] unselected_features = [ c for c in X_train.columns.to_list() if c not in selected_features ] set_random_state(None) print('selected: ', selected_features) print('unselected:', unselected_features)
def test_set_random_state(self): from hypernets.core import set_random_state set_random_state(9527) searcher = EvolutionSearcher( get_space, 5, 3, regularized=False, optimize_direction=OptimizeDirection.Maximize) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(None) searcher = EvolutionSearcher( get_space, 5, 3, regularized=False, optimize_direction=OptimizeDirection.Maximize) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors != [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(9527) searcher = EvolutionSearcher( get_space, 5, 3, regularized=False, optimize_direction=OptimizeDirection.Maximize) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[98, 0, 0, 0.96], [9, 0, 0, 0.93], [60, 0, 1, 0.24], [54, 0, 1, 0.7], [25, 0, 1, 0.73], [67, 1, 1, 0.43], [57, 1, 1, 0.05], [49, 0, 0, 0.71], [71, 1, 1, 0.49]] set_random_state(1) searcher = EvolutionSearcher( get_space, 5, 3, regularized=False, optimize_direction=OptimizeDirection.Maximize) vectors = [] for i in range(1, 10): vectors.append(searcher.sample().vectors) assert vectors == [[38, 1, 0, 0.93], [10, 1, 1, 0.15], [17, 1, 0, 0.39], [7, 1, 0, 0.85], [19, 0, 1, 0.44], [29, 1, 0, 0.67], [88, 1, 1, 0.43], [95, 0, 0, 0.8], [10, 1, 1, 0.09]] set_random_state(None)
def train_heart_disease(**kwargs): from hypernets.tabular.datasets import dsutils from sklearn.model_selection import train_test_split X = dsutils.load_heart_disease_uci() y = X.pop('target') X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.3, random_state=randint()) X_train, X_eval, y_train, y_eval = \ train_test_split(X_train, y_train, test_size=0.3, random_state=randint()) kwargs = {'reward_metric': 'auc', 'max_trials': 10, **kwargs} hm, model = train(X_train, y_train, X_eval, y_eval, const.TASK_BINARY, **kwargs) print('-' * 50) scores = model.evaluate(X_test, y_test, metrics=['auc', 'accuracy', 'f1', 'recall', 'precision']) print('scores:', scores) trials = hm.get_top_trials(10) models = [hm.load_estimator(t.model_file) for t in trials] msgs = [f'{t.trial_no},{t.reward},{m.cls.__name__} {m.model_args}' for t, m in zip(trials, models)] print('top trials:') print('\n'.join(msgs)) if __name__ == '__main__': set_random_state(335) train_heart_disease()