def grid_spacing(self): interval = [1, 10] p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1]) p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1]) p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2']) p4 = Parameter('D', 'boolean') grid_sizes = {'A': 5, 'B': 6} grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score, grid_sizes) grid = grid_search.grid for params in grid: self.assertIn(params['A'], range(*interval)) self.assertIn(params['B'] >= interval[0]) self.assertIn(params['B'] <= interval[1]) self.assertIn(params['C'], ['Bla1', 'Bla2']) self.assertIn(params['D'], ['True', 'False']) lenA = len(np.unique([params['A'] for params in grid])) lenB = len(np.unique([params['B'] for params in grid])) lenC = len(np.unique([params['C'] for params in grid])) lenD = len(np.unique([params['D'] for params in grid])) self.assertTrue((lenA == grid_sizes['A']) or (lenA == grid_sizes['A'] + 1)) self.assertTrue((lenB == grid_sizes['B']) or (lenB == grid_sizes['B'] + 1)) self.assertTrue((lenC == grid_sizes['C']) or (lenC == grid_sizes['C'] + 1)) self.assertTrue((lenD == grid_sizes['D']) or (lenD == grid_sizes['D'] + 1))
def test_param_space(self): interval = [0, 10] p1 = Parameter('test_integer', 'integer', lower=interval[0], upper=interval[1]) p2 = Parameter('test_categorical', 'categorical', possible_values=['A', 'B', 'C']) p3 = Parameter('test_boolean', 'boolean') p4 = Parameter('test_continuous', 'continuous', lower=interval[0], upper=interval[1]) p5 = Parameter('test_continuous_array', 'continuous_array', lower=[interval[0]], upper=[interval[1]]) model = RandomForestClassifier() hyperopt = HyperoptOptimizer(model, [p1, p2, p3, p4], lambda x: x) param_space = hyperopt.param_space with self.assertRaises(ValueError): hyperopt = HyperoptOptimizer(model, [p1, p2, p3, p4, p5], lambda x: x)
def test_continuous(self): interval = [0,10] p = Parameter('test_continuous', 'continuous', lower=interval[0], upper=interval[1]) s = p.random_sample() self.assertTrue(s>=interval[0]) self.assertTrue(s<=interval[1]) with self.assertRaises(MissingValueException): Parameter('test_continuous', 'continuous')
def test_integer(self): interval = [0,10] p = Parameter('test_integer', 'integer', lower=interval[0], upper=interval[1]) s = p.random_sample() self.assertTrue(s in range(*interval)) self.assertTrue(isinstance(s, int)) with self.assertRaises(MissingValueException): Parameter('test_integer', 'integer')
def test_int_array(self): lower = [0,10,20] upper = [5,15,25] p = Parameter('test_int_array', 'int_array', lower=lower, upper=upper) for _ in range(100): s = p.random_sample() for i,v in enumerate(s): self.assertTrue(v in range(lower[i],upper[i])) with self.assertRaises(ValueError): Parameter('test_int_array', 'int_array',lower=[1,2],upper=[3,4,5]) with self.assertRaises(MissingValueException): Parameter('test_int_array', 'int_array')
def test_improvement(self): np.random.seed(4) data, target = make_classification(n_samples=100, n_features=45, n_informative=15, n_redundant=5, class_sep=1, n_clusters_per_class=4, flip_y=0.4) model = RandomForestClassifier(max_depth=5) model.fit(data, target) start_score = clf_score(target, model.predict(data)) p1 = Parameter('max_depth', 'integer', lower=1, upper=10) n_init_samples = 4 mutation_noise = { 'max_depth': 0.4, 'learning_rate': 0.05, 'reg_lambda': 0.5 } geneticOpt = GeneticOptimizer(model, [p1], clf_score, n_init_samples, 'RouletteWheel', mutation_noise) best_params, best_model = geneticOpt.fit(X_train=data, y_train=target, n_iters=30) best_model.fit(data, target) final_score = clf_score(target, best_model.predict(data)) self.assertTrue(final_score > start_score)
def test_bounds_arr(self): interval1 = [0, 10] interval2 = [11, 20] p1 = Parameter('test_integer1', 'integer', lower=interval1[0], upper=interval1[1]) p2 = Parameter('test_integer2', 'integer', lower=interval2[0], upper=interval2[1]) model = RandomForestClassifier() bayesOpt = BayesianOptimizer(model, [p1, p2], lambda x: x) self.assertTrue(bayesOpt.bounds_arr[0][0] >= interval1[0]) self.assertTrue(bayesOpt.bounds_arr[0][1] <= interval1[1]) self.assertTrue(bayesOpt.bounds_arr[1][0] >= interval2[0]) self.assertTrue(bayesOpt.bounds_arr[1][1] >= interval2[1])
def test_bounds_arr(self): interval1 = [0, 10] interval2 = [11, 20] p1 = Parameter('test_integer1', 'integer', lower=interval1[0], upper=interval1[1]) p2 = Parameter('test_integer2', 'integer', lower=interval2[0], upper=interval2[1]) mutation_noise = {'test_integer1': 0.4, 'test_integer2': 0.05} model = RandomForestClassifier() geneticOpt = GeneticOptimizer(model, [p1, p2], lambda x: x, 4, 'RouletteWheel', mutation_noise) self.assertTrue(geneticOpt.bounds['test_integer1'][0] >= interval1[0]) self.assertTrue(geneticOpt.bounds['test_integer1'][1] <= interval1[1]) self.assertTrue(geneticOpt.bounds['test_integer2'][0] >= interval2[0]) self.assertTrue(geneticOpt.bounds['test_integer2'][1] >= interval2[1])
def test_improvement(self): np.random.seed(4) data, target = make_classification(n_samples=100, n_features=45, n_informative=15, n_redundant=5, class_sep=1, n_clusters_per_class=4, flip_y=0.4) model = RandomForestClassifier(max_depth=5) model.fit(data, target) start_score = clf_score(target, model.predict(data)) p1 = Parameter('max_depth', 'integer', lower=1, upper=10) grid_sizes = {'max_depth': 5} grid_search = GridSearchOptimizer(model, [p1], clf_score, grid_sizes) best_params, best_model = grid_search.fit(X_train=data, y_train=target) best_model.fit(data, target) final_score = clf_score(target, best_model.predict(data)) self.assertTrue(final_score > start_score)
def test_upper_confidence_bound_tractable(self): np.random.seed(5) data, target = make_classification(n_samples=100, n_features=45, n_informative=15, n_redundant=5, class_sep=1, n_clusters_per_class=4, flip_y=0.4) model = RandomForestClassifier(max_depth=5) model.fit(data, target) start_score = clf_score(target, model.predict(data)) p1 = Parameter('max_depth', 'integer', lower=1, upper=10) bayesOpt = BayesianOptimizer(model, [p1], clf_score, method='upper_confidence_bound') best_params, best_model = bayesOpt.fit(X_train=data, y_train=target, n_iters=10) self.assertTrue(bayesOpt.success) best_model.fit(data, target) final_score = clf_score(target, best_model.predict(data)) self.assertTrue(final_score > start_score)
def test_improvement(self): np.random.seed(4) data, target = make_classification(n_samples=100, n_features=45, n_informative=15, n_redundant=5, class_sep=1, n_clusters_per_class=4, flip_y=0.4) model = RandomForestClassifier(max_depth=5) model.fit(data, target) start_score = clf_score(target, model.predict(data)) p1 = Parameter('max_depth', 'integer', lower=1, upper=10) hyperopt = HyperoptOptimizer(model, [p1], clf_score) best_params, best_model = hyperopt.fit(X_train=data, y_train=target, n_iters=10) best_model.fit(data, target) final_score = clf_score(target, best_model.predict(data)) self.assertTrue(final_score > start_score) for status in hyperopt.trials.statuses(): self.assertEqual(status, 'ok')
n_redundant=5, class_sep=1, n_clusters_per_class=4, flip_y=0.4) X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=42) # define three different classifiers model = xgb.XGBClassifier(max_depth=3, learning_rate=0.1, reg_lambda=1) # define the list of hyperparameters to tune for each classifier params = [ Parameter(name='max_depth', param_type='integer', lower=1, upper=4), Parameter(name='learning_rate', param_type='continuous', lower=0.01, upper=0.5), Parameter(name='reg_lambda', param_type='continuous', lower=0.1, upper=10) ] # define the score function def clf_score(y_true, y_pred): return np.sum(y_true == y_pred) / float(len(y_true)) rand_search = RandomSearchOptimizer(model=model, eval_func=clf_score,
n_clusters_per_class=4, flip_y=0.4) X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=42) # define three different classifiers model = NNModel(input_dim=data.shape[1], hidden_dim=10, train_epochs=100, batch_size=32) params = [ Parameter(name='hidden_dim', param_type='integer', lower=10, upper=200) ] # define the score function def clf_score(y_true, y_pred): return np.sum(y_true == y_pred) / float(len(y_true)) rand_search = RandomSearchOptimizer(model=model, eval_func=clf_score, hyperparams=params, grid_size=10) kernel = gp.kernels.Matern() bayesOpt = BayesianOptimizer(model=model,
from optml.bayesian_optimizer import BayesianOptimizer from optml import Parameter from optml.models import Model from sklearn.metrics import log_loss import matplotlib.pyplot as plt class DummyModel(Model): __module__ = 'xgboost' def __init__(self): pass params = [ Parameter(name='eta', param_type='continuous', lower=0.001, upper=1), Parameter(name='max_depth', param_type='integer', lower=2, upper=20), Parameter(name='subsample', param_type='continuous', lower=0.5, upper=1), Parameter(name='colsample_bytree', param_type='continuous', lower=0.5, upper=1), Parameter(name='colsample_bylevel', param_type='continuous', lower=0.5, upper=1), Parameter(name='min_child_weight', param_type='continuous', lower=0.001, upper=1), Parameter(name='alpha', param_type='continuous', lower=0.001, upper=1),
def __init__(self, learning_task, n_estimators=5000, max_hyperopt_evals=50, counters_sort_col=None, holdout_size=0, train_path=None, test_path=None, cd_path=None, output_folder_path='./'): Experiment.__init__(self, learning_task, 'xgb', n_estimators, max_hyperopt_evals, True, counters_sort_col, holdout_size, train_path, test_path, cd_path, output_folder_path) self.space = { 'eta': hp.loguniform('eta', -7, 0), 'max_depth': hp.quniform('max_depth', 2, 10, 1), 'subsample': hp.uniform('subsample', 0.5, 1), 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1), 'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1), 'min_child_weight': hp.loguniform('min_child_weight', -16, 5), 'alpha': hp.choice('alpha', [0, hp.loguniform('alpha_positive', -16, 2)]), 'lambda': hp.choice('lambda', [0, hp.loguniform('lambda_positive', -16, 2)]), 'gamma': hp.choice('gamma', [0, hp.loguniform('gamma_positive', -16, 2)]) } #self.hyperparams = [ # Parameter(name='eta', param_type='continuous', lower=0.001, upper=1), # Parameter(name='max_depth', param_type='integer', lower=2, upper=20), # Parameter(name='subsample', param_type='continuous', lower=0.5, upper=1), # Parameter(name='colsample_bytree', param_type='continuous', lower=0.5, upper=1), # Parameter(name='colsample_bylevel', param_type='continuous', lower=0.5, upper=1), # Parameter(name='min_child_weight', param_type='continuous', lower=0.001, upper=1), # Parameter(name='alpha', param_type='continuous', lower=0.001, upper=1), # Parameter(name='lambda', param_type='continuous', lower=0.001, upper=1), # Parameter(name='gamma', param_type='continuous', lower=0.001, upper=1), # Parameter(name='n_estimators', param_type='integer', lower=10, upper=10) #] self.hyperparams = [ Parameter(name='max_depth', param_type='integer', lower=2, upper=20), Parameter(name='gamma', param_type='continuous', lower=0., upper=1.) ] self.default_params = { 'eta': 0.3, 'max_depth': 6, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 1.0, 'min_child_weight': 1, 'alpha': 0, 'lambda': 1, 'gamma': 0, 'n_estimators': n_estimators } self.default_params = self.preprocess_params(self.default_params) self.title = 'XGBoost' self.model = xgb
class_sep=1, n_clusters_per_class=4, flip_y=0.4) X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.33, random_state=42) # define three different classifiers rf = RandomForestClassifier(max_depth=3, n_estimators=10, min_samples_split=4) svm = SVC(C=1, kernel='rbf', degree=3) # define the list of hyperparameters to tune for each classifier rf_params = [ Parameter(name='min_samples_split', param_type='integer', lower=2, upper=6), Parameter(name='min_weight_fraction_leaf', param_type='continuous', lower=0, upper=0.5) ] svm_params = [ Parameter(name='C', param_type='continuous', lower=0.1, upper=5), Parameter(name='degree', param_type='integer', lower=1, upper=5), Parameter(name='kernel', param_type='categorical', possible_values=['linear', 'poly', 'rbf', 'sigmoid']) ] model = svm params = svm_params
def test_categorical(self): vals = ['a','b','c'] p = Parameter('test_categorical', 'categorical', possible_values=vals) self.assertIn(p.random_sample(), vals) with self.assertRaises(MissingValueException): Parameter('test_categorical', 'categorical')
def test_boolean(self): p = Parameter('test_bool', 'boolean') s = p.random_sample() self.assertTrue(isinstance(s, np.bool_))
# use a toy problem optimizer = GaussianProcessRegressorWithCategorical(kernel=Matern( nu=1.5, length_scale_bounds=(0.1, 100.0)), alpha=1e-4, n_restarts_optimizer=5, normalize_y=True) func = func2 xs_truth = np.linspace(0, 2, 1000) ys_truth = [func(x) for x in xs_truth] bayesOpt = BayesianOptimizer(model=DummyModel(), hyperparams=[ Parameter(name='bla', param_type='continuous', lower=0.00, upper=2) ], eval_func=log_loss) bayesOpt.acquisition_function = 'upper_confidence_bound' xs = [[0.05], [0.3]] bayesOpt.hyperparam_history.append((xs[0][0], func(xs[0][0]))) bayesOpt.hyperparam_history.append((xs[1][0], func(xs[1][0]))) for i in range(15): print(i) ys = [func(x[0]) for x in xs] optimizer.fit(xs, ys) minimized = bayesOpt.optimize_continuous_problem(optimizer, [0.1]) minimized['success'] minimized['x']