def test_bayes(self): # testing for sklearn-based model with gp_min # https://scikit-optimize.github.io/stable/modules/generated/skopt.BayesSearchCV.html X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.75, random_state=0) if int(''.join(sklearn.__version__.split('.')[1])) > 22: # https://github.com/scikit-optimize/scikit-optimize/issues/978 pass else: opt = HyperOpt("bayes", objective_fn=SVC(), param_space={ 'C': Real(1e-6, 1e+6, prior='log-uniform'), 'gamma': Real(1e-6, 1e+1, prior='log-uniform'), 'degree': Integer(1, 8), 'kernel': Categorical(['linear', 'poly', 'rbf']), }, n_iter=32, random_state=0) # executes bayesian optimization _ = opt.fit(X_train, y_train) # model can be saved, used for predictions or scoring np.testing.assert_almost_equal(0.9736842105263158, opt.score(X_test, y_test), 5) print("BayesSearchCV test passed") return
def run_unified_interface(algorithm, backend, num_iterations, num_samples=None): def fn(**suggestion): model = Model(inputs=inputs, outputs=outputs, model={"xgboostregressor": suggestion}, data=data, prefix=f'test_{algorithm}_xgboost_{backend}', verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.test_indices, prefix='test') mse = RegressionMetrics(t, p).mse() return mse search_space = [ Categorical(['gbtree', 'dart'], name='booster'), Integer(low=1000, high=2000, name='n_estimators', num_samples=num_samples), Real(low=1.0e-5, high=0.1, name='learning_rate', num_samples=num_samples) ] optimizer = HyperOpt(algorithm, objective_fn=fn, param_space=search_space, backend=backend, num_iterations=num_iterations, opt_path=os.path.join( os.getcwd(), f'results\\test_{algorithm}_xgboost_{backend}')) optimizer.fit() check_attrs(optimizer, 3) for f in [ "fanova_importance.html", "convergence.png", "iterations.json", "iterations_sorted.json" ]: fpath = os.path.join(optimizer.opt_path, f) assert os.path.exists(fpath) return optimizer
def test_named_custom_bayes(self): dims = [ Integer(low=1000, high=2000, name='n_estimators'), Integer(low=3, high=6, name='max_depth'), Real(low=1e-5, high=0.1, name='learning_rate'), Categorical(categories=["gbtree", "dart"], name="booster") ] def f(**kwargs): kwargs['objective'] = 'reg:squarederror' kwargs = Jsonize(kwargs)() model = Model(inputs=inputs, outputs=outputs, lookback=1, batches="2d", val_data="same", test_fraction=0.3, model={"xgboostregressor": kwargs}, transformation=None, data=data, prefix='testing', verbosity=0) model.fit(indices="random") t, p = model.predict(indices=model.test_indices, prefix='test') mse = RegressionMetrics(t, p).mse() print(f"Validation mse {mse}") return mse opt = HyperOpt( "bayes", objective_fn=f, param_space=dims, acq_func='EI', # Expected Improvement. n_calls=12, # acq_optimizer='auto', x0=[1000, 3, 0.01, "gbtree"], n_random_starts=3, # the number of random initialization points random_state=2) opt.fit() check_attrs(opt, 4) return
def test_grid_custom_model(self): # testing grid search algorithm for custom model def f(x, noise_level=0.1): return np.sin(5 * x) * (1 - np.tanh(x ** 2)) \ + np.random.randn() * noise_level opt = HyperOpt( "grid", objective_fn=f, param_space=[Real(low=-2.0, high=2.0, num_samples=20)], n_calls=15, # the number of evaluations of f ) # executes bayesian optimization sr = opt.fit() assert len(sr) == 20 return
def test_ai4water_bayes(self): dims = [ Integer(low=1000, high=2000, name='n_estimators'), Integer(low=3, high=6, name='max_depth'), Real(low=1e-5, high=0.1, name='learning_rate'), Categorical(categories=["gbtree", "dart"], name="booster") ] ai4water_args = { "inputs": inputs, "outputs": outputs, "lookback": 1, "batches": "2d", "val_data": "same", "test_fraction": 0.3, "model": { "xgboostregressor": {} }, #"ml_model_args": {'objective': 'reg:squarederror'}, TODO "transformation": None } opt = HyperOpt( "bayes", param_space=dims, ai4water_args=ai4water_args, data=data, acq_func='EI', # Expected Improvement. n_calls=12, # acq_optimizer='auto', x0=[1000, 3, 0.01, "gbtree"], n_random_starts=3, # the number of random initialization points random_state=2) opt.fit() check_attrs(opt, 4, ai4water_args) return
batch_size=int(suggestion['batch_size']), data=data['224206'], verbosity=0, epochs=500, prefix=_suffix) h = model.fit() return np.min(h.history['val_loss']) num_samples = 4 d = [ Categorical(categories=['relu', 'sigmoid', 'tanh', 'linear'], name='activation'), Integer(low=3, high=15, name='lookback', num_samples=num_samples), Categorical(categories=[16, 32, 64, 128], name='batch_size'), Real(low=1e-5, high=0.001, name='lr', num_samples=num_samples) ] x0 = ['relu', 5, 32, 0.0001] optimizer = HyperOpt(m, objective_fn=objective_fn, param_space=d, num_iterations=50, x0=x0, use_named_args=True, opt_path=os.path.join(os.getcwd(), f'results{SEP}{_suffix}')) r = optimizer.fit() results[m] = optimizer opt_paths[m] = optimizer.opt_path
def test_real_grid(self): grit = [1, 2, 3, 4, 5] r = Real(grid=grit) np.testing.assert_array_equal(grit, r.grid)
def test_real_steps(self): r = Real(low=10, high=100, step=20) grit = r.grid assert grit.shape == (5, )
def test_real_num_samples(self): r = Real(low=10, high=100, num_samples=20) grit = r.grid assert grit.shape == (20, )