Beispiel #1
0
 def grid_spacing(self):
     interval = [1, 10]
     p1 = Parameter('A', 'integer', lower=interval[0], upper=interval[1])
     p2 = Parameter('B', 'continuous', lower=interval[0], upper=interval[1])
     p3 = Parameter('C', 'categorical', possible_values=['Bla1', 'Bla2'])
     p4 = Parameter('D', 'boolean')
     grid_sizes = {'A': 5, 'B': 6}
     grid_search = GridSearchOptimizer(model, [p1, p2, p3, p4], clf_score,
                                       grid_sizes)
     grid = grid_search.grid
     for params in grid:
         self.assertIn(params['A'], range(*interval))
         self.assertIn(params['B'] >= interval[0])
         self.assertIn(params['B'] <= interval[1])
         self.assertIn(params['C'], ['Bla1', 'Bla2'])
         self.assertIn(params['D'], ['True', 'False'])
     lenA = len(np.unique([params['A'] for params in grid]))
     lenB = len(np.unique([params['B'] for params in grid]))
     lenC = len(np.unique([params['C'] for params in grid]))
     lenD = len(np.unique([params['D'] for params in grid]))
     self.assertTrue((lenA == grid_sizes['A'])
                     or (lenA == grid_sizes['A'] + 1))
     self.assertTrue((lenB == grid_sizes['B'])
                     or (lenB == grid_sizes['B'] + 1))
     self.assertTrue((lenC == grid_sizes['C'])
                     or (lenC == grid_sizes['C'] + 1))
     self.assertTrue((lenD == grid_sizes['D'])
                     or (lenD == grid_sizes['D'] + 1))
Beispiel #2
0
    def test_param_space(self):
        interval = [0, 10]
        p1 = Parameter('test_integer',
                       'integer',
                       lower=interval[0],
                       upper=interval[1])
        p2 = Parameter('test_categorical',
                       'categorical',
                       possible_values=['A', 'B', 'C'])
        p3 = Parameter('test_boolean', 'boolean')
        p4 = Parameter('test_continuous',
                       'continuous',
                       lower=interval[0],
                       upper=interval[1])
        p5 = Parameter('test_continuous_array',
                       'continuous_array',
                       lower=[interval[0]],
                       upper=[interval[1]])
        model = RandomForestClassifier()
        hyperopt = HyperoptOptimizer(model, [p1, p2, p3, p4], lambda x: x)
        param_space = hyperopt.param_space

        with self.assertRaises(ValueError):
            hyperopt = HyperoptOptimizer(model, [p1, p2, p3, p4, p5],
                                         lambda x: x)
 def test_continuous(self):
     interval = [0,10]
     p = Parameter('test_continuous', 'continuous', lower=interval[0], upper=interval[1])
     s = p.random_sample()
     self.assertTrue(s>=interval[0])
     self.assertTrue(s<=interval[1])
     with self.assertRaises(MissingValueException):
         Parameter('test_continuous', 'continuous')  
 def test_integer(self):
     interval = [0,10]
     p = Parameter('test_integer', 'integer', lower=interval[0], upper=interval[1])
     s = p.random_sample()
     self.assertTrue(s in range(*interval))
     self.assertTrue(isinstance(s, int))
     with self.assertRaises(MissingValueException):
         Parameter('test_integer', 'integer')
    def test_int_array(self):
        lower = [0,10,20]
        upper = [5,15,25]
        p = Parameter('test_int_array', 'int_array', lower=lower, upper=upper)
        for _ in range(100):
            s = p.random_sample()
            for i,v in enumerate(s):
                self.assertTrue(v in range(lower[i],upper[i]))

        with self.assertRaises(ValueError):
            Parameter('test_int_array', 'int_array',lower=[1,2],upper=[3,4,5])

        with self.assertRaises(MissingValueException):
            Parameter('test_int_array', 'int_array')
Beispiel #6
0
    def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                           n_features=45,
                                           n_informative=15,
                                           n_redundant=5,
                                           class_sep=1,
                                           n_clusters_per_class=4,
                                           flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        n_init_samples = 4
        mutation_noise = {
            'max_depth': 0.4,
            'learning_rate': 0.05,
            'reg_lambda': 0.5
        }
        geneticOpt = GeneticOptimizer(model, [p1], clf_score, n_init_samples,
                                      'RouletteWheel', mutation_noise)

        best_params, best_model = geneticOpt.fit(X_train=data,
                                                 y_train=target,
                                                 n_iters=30)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score > start_score)
Beispiel #7
0
    def test_bounds_arr(self):
        interval1 = [0, 10]
        interval2 = [11, 20]
        p1 = Parameter('test_integer1',
                       'integer',
                       lower=interval1[0],
                       upper=interval1[1])
        p2 = Parameter('test_integer2',
                       'integer',
                       lower=interval2[0],
                       upper=interval2[1])
        model = RandomForestClassifier()
        bayesOpt = BayesianOptimizer(model, [p1, p2], lambda x: x)

        self.assertTrue(bayesOpt.bounds_arr[0][0] >= interval1[0])
        self.assertTrue(bayesOpt.bounds_arr[0][1] <= interval1[1])
        self.assertTrue(bayesOpt.bounds_arr[1][0] >= interval2[0])
        self.assertTrue(bayesOpt.bounds_arr[1][1] >= interval2[1])
Beispiel #8
0
    def test_bounds_arr(self):
        interval1 = [0, 10]
        interval2 = [11, 20]
        p1 = Parameter('test_integer1',
                       'integer',
                       lower=interval1[0],
                       upper=interval1[1])
        p2 = Parameter('test_integer2',
                       'integer',
                       lower=interval2[0],
                       upper=interval2[1])
        mutation_noise = {'test_integer1': 0.4, 'test_integer2': 0.05}
        model = RandomForestClassifier()
        geneticOpt = GeneticOptimizer(model, [p1, p2], lambda x: x, 4,
                                      'RouletteWheel', mutation_noise)

        self.assertTrue(geneticOpt.bounds['test_integer1'][0] >= interval1[0])
        self.assertTrue(geneticOpt.bounds['test_integer1'][1] <= interval1[1])
        self.assertTrue(geneticOpt.bounds['test_integer2'][0] >= interval2[0])
        self.assertTrue(geneticOpt.bounds['test_integer2'][1] >= interval2[1])
Beispiel #9
0
 def test_improvement(self):
     np.random.seed(4)
     data, target = make_classification(n_samples=100,
                                        n_features=45,
                                        n_informative=15,
                                        n_redundant=5,
                                        class_sep=1,
                                        n_clusters_per_class=4,
                                        flip_y=0.4)
     model = RandomForestClassifier(max_depth=5)
     model.fit(data, target)
     start_score = clf_score(target, model.predict(data))
     p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
     grid_sizes = {'max_depth': 5}
     grid_search = GridSearchOptimizer(model, [p1], clf_score, grid_sizes)
     best_params, best_model = grid_search.fit(X_train=data, y_train=target)
     best_model.fit(data, target)
     final_score = clf_score(target, best_model.predict(data))
     self.assertTrue(final_score > start_score)
Beispiel #10
0
 def test_upper_confidence_bound_tractable(self):
     np.random.seed(5)
     data, target = make_classification(n_samples=100,
                                        n_features=45,
                                        n_informative=15,
                                        n_redundant=5,
                                        class_sep=1,
                                        n_clusters_per_class=4,
                                        flip_y=0.4)
     model = RandomForestClassifier(max_depth=5)
     model.fit(data, target)
     start_score = clf_score(target, model.predict(data))
     p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
     bayesOpt = BayesianOptimizer(model, [p1],
                                  clf_score,
                                  method='upper_confidence_bound')
     best_params, best_model = bayesOpt.fit(X_train=data,
                                            y_train=target,
                                            n_iters=10)
     self.assertTrue(bayesOpt.success)
     best_model.fit(data, target)
     final_score = clf_score(target, best_model.predict(data))
     self.assertTrue(final_score > start_score)
Beispiel #11
0
    def test_improvement(self):
        np.random.seed(4)
        data, target = make_classification(n_samples=100,
                                           n_features=45,
                                           n_informative=15,
                                           n_redundant=5,
                                           class_sep=1,
                                           n_clusters_per_class=4,
                                           flip_y=0.4)
        model = RandomForestClassifier(max_depth=5)
        model.fit(data, target)
        start_score = clf_score(target, model.predict(data))
        p1 = Parameter('max_depth', 'integer', lower=1, upper=10)
        hyperopt = HyperoptOptimizer(model, [p1], clf_score)
        best_params, best_model = hyperopt.fit(X_train=data,
                                               y_train=target,
                                               n_iters=10)
        best_model.fit(data, target)
        final_score = clf_score(target, best_model.predict(data))
        self.assertTrue(final_score > start_score)

        for status in hyperopt.trials.statuses():
            self.assertEqual(status, 'ok')
Beispiel #12
0
                                   n_redundant=5,
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.33,
                                                    random_state=42)

# define three different classifiers
model = xgb.XGBClassifier(max_depth=3, learning_rate=0.1, reg_lambda=1)

# define the list of hyperparameters to tune for each classifier
params = [
    Parameter(name='max_depth', param_type='integer', lower=1, upper=4),
    Parameter(name='learning_rate',
              param_type='continuous',
              lower=0.01,
              upper=0.5),
    Parameter(name='reg_lambda', param_type='continuous', lower=0.1, upper=10)
]


# define the score function
def clf_score(y_true, y_pred):
    return np.sum(y_true == y_pred) / float(len(y_true))


rand_search = RandomSearchOptimizer(model=model,
                                    eval_func=clf_score,
Beispiel #13
0
                                   n_clusters_per_class=4,
                                   flip_y=0.4)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.33,
                                                    random_state=42)

# define three different classifiers
model = NNModel(input_dim=data.shape[1],
                hidden_dim=10,
                train_epochs=100,
                batch_size=32)

params = [
    Parameter(name='hidden_dim', param_type='integer', lower=10, upper=200)
]


# define the score function
def clf_score(y_true, y_pred):
    return np.sum(y_true == y_pred) / float(len(y_true))


rand_search = RandomSearchOptimizer(model=model,
                                    eval_func=clf_score,
                                    hyperparams=params,
                                    grid_size=10)

kernel = gp.kernels.Matern()
bayesOpt = BayesianOptimizer(model=model,
from optml.bayesian_optimizer import BayesianOptimizer
from optml import Parameter
from optml.models import Model
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt


class DummyModel(Model):
    __module__ = 'xgboost'

    def __init__(self):
        pass


params = [
    Parameter(name='eta', param_type='continuous', lower=0.001, upper=1),
    Parameter(name='max_depth', param_type='integer', lower=2, upper=20),
    Parameter(name='subsample', param_type='continuous', lower=0.5, upper=1),
    Parameter(name='colsample_bytree',
              param_type='continuous',
              lower=0.5,
              upper=1),
    Parameter(name='colsample_bylevel',
              param_type='continuous',
              lower=0.5,
              upper=1),
    Parameter(name='min_child_weight',
              param_type='continuous',
              lower=0.001,
              upper=1),
    Parameter(name='alpha', param_type='continuous', lower=0.001, upper=1),
Beispiel #15
0
    def __init__(self,
                 learning_task,
                 n_estimators=5000,
                 max_hyperopt_evals=50,
                 counters_sort_col=None,
                 holdout_size=0,
                 train_path=None,
                 test_path=None,
                 cd_path=None,
                 output_folder_path='./'):
        Experiment.__init__(self, learning_task, 'xgb', n_estimators,
                            max_hyperopt_evals, True, counters_sort_col,
                            holdout_size, train_path, test_path, cd_path,
                            output_folder_path)

        self.space = {
            'eta':
            hp.loguniform('eta', -7, 0),
            'max_depth':
            hp.quniform('max_depth', 2, 10, 1),
            'subsample':
            hp.uniform('subsample', 0.5, 1),
            'colsample_bytree':
            hp.uniform('colsample_bytree', 0.5, 1),
            'colsample_bylevel':
            hp.uniform('colsample_bylevel', 0.5, 1),
            'min_child_weight':
            hp.loguniform('min_child_weight', -16, 5),
            'alpha':
            hp.choice('alpha', [0, hp.loguniform('alpha_positive', -16, 2)]),
            'lambda':
            hp.choice('lambda',
                      [0, hp.loguniform('lambda_positive', -16, 2)]),
            'gamma':
            hp.choice('gamma', [0, hp.loguniform('gamma_positive', -16, 2)])
        }

        #self.hyperparams = [
        #    Parameter(name='eta', param_type='continuous', lower=0.001, upper=1),
        #    Parameter(name='max_depth', param_type='integer', lower=2, upper=20),
        #    Parameter(name='subsample', param_type='continuous', lower=0.5, upper=1),
        #    Parameter(name='colsample_bytree', param_type='continuous', lower=0.5, upper=1),
        #    Parameter(name='colsample_bylevel', param_type='continuous', lower=0.5, upper=1),
        #    Parameter(name='min_child_weight', param_type='continuous', lower=0.001, upper=1),
        #    Parameter(name='alpha', param_type='continuous', lower=0.001, upper=1),
        #    Parameter(name='lambda', param_type='continuous', lower=0.001, upper=1),
        #    Parameter(name='gamma', param_type='continuous', lower=0.001, upper=1),
        #    Parameter(name='n_estimators', param_type='integer', lower=10, upper=10)
        #]

        self.hyperparams = [
            Parameter(name='max_depth',
                      param_type='integer',
                      lower=2,
                      upper=20),
            Parameter(name='gamma',
                      param_type='continuous',
                      lower=0.,
                      upper=1.)
        ]

        self.default_params = {
            'eta': 0.3,
            'max_depth': 6,
            'subsample': 1.0,
            'colsample_bytree': 1.0,
            'colsample_bylevel': 1.0,
            'min_child_weight': 1,
            'alpha': 0,
            'lambda': 1,
            'gamma': 0,
            'n_estimators': n_estimators
        }
        self.default_params = self.preprocess_params(self.default_params)
        self.title = 'XGBoost'
        self.model = xgb
Beispiel #16
0
                                   class_sep=1,
                                   n_clusters_per_class=4,
                                   flip_y=0.4)

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.33,
                                                    random_state=42)

# define three different classifiers
rf = RandomForestClassifier(max_depth=3, n_estimators=10, min_samples_split=4)
svm = SVC(C=1, kernel='rbf', degree=3)

# define the list of hyperparameters to tune for each classifier
rf_params = [
    Parameter(name='min_samples_split', param_type='integer', lower=2,
              upper=6),
    Parameter(name='min_weight_fraction_leaf',
              param_type='continuous',
              lower=0,
              upper=0.5)
]
svm_params = [
    Parameter(name='C', param_type='continuous', lower=0.1, upper=5),
    Parameter(name='degree', param_type='integer', lower=1, upper=5),
    Parameter(name='kernel',
              param_type='categorical',
              possible_values=['linear', 'poly', 'rbf', 'sigmoid'])
]

model = svm
params = svm_params
 def test_categorical(self):
     vals = ['a','b','c']
     p = Parameter('test_categorical', 'categorical', possible_values=vals)
     self.assertIn(p.random_sample(), vals)
     with self.assertRaises(MissingValueException):
         Parameter('test_categorical', 'categorical')
 def test_boolean(self):
     p = Parameter('test_bool', 'boolean')
     s = p.random_sample()
     self.assertTrue(isinstance(s, np.bool_))
    # use a toy problem
    optimizer = GaussianProcessRegressorWithCategorical(kernel=Matern(
        nu=1.5, length_scale_bounds=(0.1, 100.0)),
                                                        alpha=1e-4,
                                                        n_restarts_optimizer=5,
                                                        normalize_y=True)

    func = func2

    xs_truth = np.linspace(0, 2, 1000)
    ys_truth = [func(x) for x in xs_truth]

    bayesOpt = BayesianOptimizer(model=DummyModel(),
                                 hyperparams=[
                                     Parameter(name='bla',
                                               param_type='continuous',
                                               lower=0.00,
                                               upper=2)
                                 ],
                                 eval_func=log_loss)
    bayesOpt.acquisition_function = 'upper_confidence_bound'
    xs = [[0.05], [0.3]]

    bayesOpt.hyperparam_history.append((xs[0][0], func(xs[0][0])))
    bayesOpt.hyperparam_history.append((xs[1][0], func(xs[1][0])))
    for i in range(15):
        print(i)
        ys = [func(x[0]) for x in xs]
        optimizer.fit(xs, ys)
        minimized = bayesOpt.optimize_continuous_problem(optimizer, [0.1])
        minimized['success']
        minimized['x']