Esempio n. 1
0
def test_min_variance():
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 1))
    y = np.ones(1000)
    rf = RandomForestRegressor(min_variance=0.1)
    rf.fit(X, y)
    mean, std = rf.predict(X, return_std=True)
    assert_array_almost_equal(mean, y)
    assert_array_almost_equal(std, np.sqrt(0.1 * np.ones(1000)))
Esempio n. 2
0
def test_min_variance():
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 1))
    y = np.ones(1000)
    rf = RandomForestRegressor(min_variance=0.1)
    rf.fit(X, y)
    mean, std = rf.predict(X, return_std=True)
    assert_array_almost_equal(mean, y)
    assert_array_almost_equal(std, np.sqrt(0.1*np.ones(1000)))
Esempio n. 3
0
    def __init__(
        self,
        problem,
        num_workers,
        surrogate_model="RF",
        acq_func="gp_hedge",
        acq_kappa=1.96,
        acq_xi=None,
        liar_strategy="cl_max",
        n_jobs=1,
        **kwargs,
    ):

        assert surrogate_model in [
            "RF",
            "ET",
            "GBRT",
            "GP",
            "DUMMY",
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"

        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        # queue of remaining starting points
        self.starting_points = problem.starting_point

        n_init = (inf if surrogate_model == "DUMMY" else max(
            num_workers, len(self.starting_points)))

        # Set acq_func_kwargs parameters
        acq_func_kwargs = {}
        if type(acq_kappa) is float:
            acq_func_kwargs["kappa"] = acq_kappa
        if type(acq_xi) is float:
            acq_func_kwargs["xi"] = acq_xi

        self._optimizer = SkOptimizer(
            dimensions=self.space,
            base_estimator=base_estimator,
            acq_optimizer="sampling",
            acq_func=acq_func,
            acq_func_kwargs=acq_func_kwargs,
            random_state=self.SEED,
            n_initial_points=n_init,
        )

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info(
            f"Using skopt.Optimizer with {surrogate_model} base_estimator")
Esempio n. 4
0
    def __init__(
        self,
        problem,
        run,
        evaluator,
        population_size=100,
        sample_size=10,
        plot="true",
        n_jobs=1,
        **kwargs,
    ):
        super().__init__(
            problem=problem,
            run=run,
            evaluator=evaluator,
            population_size=population_size,
            sample_size=sample_size,
            **kwargs,
        )

        self.do_plot = plot == "true"
        self.n_jobs = int(n_jobs)

        # Initialize Hyperaparameter space

        # self.hp_space = cs.ConfigurationSpace(seed=42)
        # self.hp_space.add_hyperparameter(
        #     check_hyperparameter(
        #         self.problem.space["hyperparameters"]["learning_rate"], "learning_rate"
        #     )
        # )
        # self.hp_space.add_hyperparameter(
        #     check_hyperparameter(
        #         self.problem.space["hyperparameters"]["batch_size"], "batch_size"
        #     )
        # )

        self.hp_space = []
        self.hp_space.append(
            self.problem.space["hyperparameters"]["learning_rate"])

        # ploting
        lr_range = self.problem.space["hyperparameters"]["learning_rate"][:2]
        self.domain_x = np.linspace(*lr_range, 400).reshape(-1, 1)

        # Initialize opitmizer of hyperparameter space
        acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001}  # tiny exploration
        self.n_initial_points = self.free_workers

        self.hp_opt = SkOptimizer(
            dimensions=self.hp_space,
            base_estimator=RandomForestRegressor(n_jobs=32),
            # base_estimator=RandomForestRegressor(n_jobs=self.n_jobs),
            acq_func="LCB",
            acq_optimizer="sampling",
            acq_func_kwargs=acq_func_kwargs,
            n_initial_points=self.n_initial_points,
            # model_queue_size=100,
        )
Esempio n. 5
0
def choose_optimizer(optimizer):
    """
    Choose a surrogate model for Bayesian Optimization

    :param optimizer: list of setting of the BO experiment
    :type optimizer: Optimizer
    :return: surrogate model
    :rtype: scikit object
    """
    params_space_list = dimensions_aslist(optimizer.search_space)
    estimator = None
    # Choice of the surrogate model
    # Random forest
    if optimizer.surrogate_model == "RF":
        estimator = RandomForestRegressor(n_estimators=100,
                                          min_samples_leaf=3,
                                          random_state=optimizer.random_state)
    # Extra Tree
    elif optimizer.surrogate_model == "ET":
        estimator = ExtraTreesRegressor(n_estimators=100,
                                        min_samples_leaf=3,
                                        random_state=optimizer.random_state)
    # GP Minimize
    elif optimizer.surrogate_model == "GP":
        estimator = GaussianProcessRegressor(
            kernel=optimizer.kernel, random_state=optimizer.random_state)
        # Random Search
    elif optimizer.surrogate_model == "RS":
        estimator = "dummy"

    if estimator == "dummy":
        opt = skopt_optimizer(
            params_space_list,
            base_estimator=estimator,
            acq_func=optimizer.acq_func,
            acq_optimizer='sampling',
            initial_point_generator=optimizer.initial_point_generator,
            random_state=optimizer.random_state)
    else:
        opt = skopt_optimizer(
            params_space_list,
            base_estimator=estimator,
            acq_func=optimizer.acq_func,
            acq_optimizer='sampling',
            n_initial_points=optimizer.n_random_starts,
            initial_point_generator=optimizer.initial_point_generator,
            # work only for version skopt 8.0!!!
            acq_optimizer_kwargs={
                "n_points": 10000,
                "n_restarts_optimizer": 5,
                "n_jobs": 1
            },
            acq_func_kwargs={
                "xi": 0.01,
                "kappa": 1.96
            },
            random_state=optimizer.random_state)
    return opt
Esempio n. 6
0
File: bo.py Progetto: mborisyak/abo
def rfbo_cycle(ndim,
               space,
               target_f,
               n_iters=10,
               acq_function=ei,
               n_samples=int(1.0e+5),
               model=None,
               show_progress=True):
    xrange = (lambda title, n: tqdm_notebook(range(n), postfix=title)
              ) if show_progress else (lambda title, n: range(n))
    space = np.array(space)

    if model is None:
        model = RandomForestRegressor(n_estimators=200,
                                      n_jobs=20,
                                      min_variance=1.0e-3,
                                      random_state=1234)

    known_points = []
    known_values = []
    cost = []

    for i in xrange('BO iteration', n_iters):
        acq = acq_function(model, known_points, known_values)

        candidates = np.random.uniform(size=(
            n_samples,
            ndim,
        ))
        f = acq(candidates)

        best = np.argmin(f)
        suggestion = reverse_transform(candidates[best].reshape(1, -1),
                                       space)[0, :]

        point_cost, observed = target_f(suggestion)

        known_points.append(suggestion)
        known_values.append(observed)
        cost.append(point_cost)

        model.fit(transform(np.array(known_points), space),
                  np.array(known_values))

        yield model, acq, space, known_points, known_values, cost
Esempio n. 7
0
 def _init_optimizer(self, n_calls):
     return Optimizer(
         dimensions=dimensions_aslist(search_space=self.search_space),
         base_estimator=RandomForestRegressor(n_estimators=10),
         n_initial_points=int(n_calls * self.random_ratio),
         acq_func="EI",
         acq_optimizer="sampling",
         acq_optimizer_kwargs=dict(n_points=1000, n_jobs=-1),
         acq_func_kwargs=dict(xi=0.01, kappa=1.96))
Esempio n. 8
0
    def __init__(
        self,
        problem,
        run,
        evaluator,
        population_size=100,
        sample_size=10,
        n_jobs=1,
        kappa=0.001,
        xi=0.000001,
        **kwargs,
    ):
        super().__init__(
            problem=problem,
            run=run,
            evaluator=evaluator,
            population_size=population_size,
            sample_size=sample_size,
            **kwargs,
        )

        self.n_jobs = int(n_jobs)

        # Initialize Hyperaparameter space

        self.hp_space = []
        # add the 'learning_rate' space to the HPO search space
        self.hp_space.append(
            self.problem.space["hyperparameters"]["learning_rate"])
        # add the 'batch_size' space to the HPO search space
        self.hp_space.append(
            self.problem.space["hyperparameters"]["batch_size"])
        # add the 'num_ranks_per_node' space to the HPO search space
        self.hp_space.append(
            self.problem.space["hyperparameters"]["ranks_per_node"])

        # Initialize opitmizer of hyperparameter space
        acq_func_kwargs = {
            "xi": float(xi),
            "kappa": float(kappa)
        }  # tiny exploration
        # self.free_workers = 128  #! TODO: test
        self.n_initial_points = self.free_workers

        self.hp_opt = SkOptimizer(
            dimensions=self.hp_space,
            base_estimator=RandomForestRegressor(n_jobs=32),
            # base_estimator=RandomForestRegressor(n_jobs=4),
            acq_func="LCB",
            acq_optimizer="sampling",
            acq_func_kwargs=acq_func_kwargs,
            n_initial_points=self.n_initial_points,
            # model_queue_size=100,
        )
Esempio n. 9
0
    def __init__(self,
                 problem,
                 num_workers,
                 surrogate_model='RF',
                 acq_func='gp_hedge',
                 acq_kappa=1.96,
                 liar_strategy='cl_max',
                 n_jobs=-1,
                 **kwargs):
        assert surrogate_model in [
            "RF", "ET", "GBRT", "GP", "DUMMY"
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"
        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        cs_kwargs = self.space['create_search_space'].get('kwargs')
        if cs_kwargs is None:
            search_space = self.space['create_search_space']['func']()
        else:
            search_space = self.space['create_search_space']['func'](
                **cs_kwargs)

        # // queue of remaining starting points
        # // self.starting_points = problem.starting_point
        n_init = np.inf if surrogate_model == 'DUMMY' else num_workers

        self.starting_points = []  # ! EMPTY for now TODO

        # Building search space for SkOptimizer
        skopt_space = [(0, vnode.num_ops - 1)
                       for vnode in search_space.variable_nodes]

        self._optimizer = SkOptimizer(skopt_space,
                                      base_estimator=base_estimator,
                                      acq_optimizer='sampling',
                                      acq_func=acq_func,
                                      acq_func_kwargs={'kappa': acq_kappa},
                                      random_state=self.SEED,
                                      n_initial_points=n_init)

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info("Using skopt.Optimizer with %s base_estimator" %
                    surrogate_model)
Esempio n. 10
0
    def __init__(self, problem, run, evaluator, **kwargs):
        super().__init__(problem=problem,
                         run=run,
                         evaluator=evaluator,
                         **kwargs)

        self.free_workers = self.evaluator.num_workers

        dhlogger.info(
            jm(
                type="start_infos",
                alg="bayesian-optimization-for-hpo-nas",
                nworkers=self.evaluator.num_workers,
                encoded_space=json.dumps(self.problem.space, cls=Encoder),
            ))

        # Setup
        self.pb_dict = self.problem.space
        cs_kwargs = self.pb_dict["create_search_space"].get("kwargs")
        if cs_kwargs is None:
            search_space = self.pb_dict["create_search_space"]["func"]()
        else:
            search_space = self.pb_dict["create_search_space"]["func"](
                **cs_kwargs)

        self.space_list = [(0, vnode.num_ops - 1)
                           for vnode in search_space.variable_nodes]

        # Initialize Hyperaparameter space
        self.dimensions = []
        self.size_ha = None  # Number of algorithm hyperparameters in the dimension list
        self.add_ha_dimensions()
        self.add_hm_dimensions()

        # Initialize opitmizer of hyperparameter space
        # acq_func_kwargs = {"xi": 0.000001, "kappa": 0.001}  # tiny exploration
        acq_func_kwargs = {"xi": 0.000001, "kappa": 1.96}  # tiny exploration
        self.n_initial_points = self.free_workers

        self.opt = SkOptimizer(
            dimensions=self.dimensions,
            base_estimator=RandomForestRegressor(n_jobs=32),
            # base_estimator=RandomForestRegressor(n_jobs=4),
            acq_func="LCB",
            acq_optimizer="sampling",
            acq_func_kwargs=acq_func_kwargs,
            n_initial_points=self.n_initial_points,
            # model_queue_size=100,
        )
Esempio n. 11
0
def cook_estimator(base_estimator, space=None, **kwargs):
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        allowed_estimators = ['GP', 'ET', 'RF', 'GBRT', 'DUMMY']
        if base_estimator not in allowed_estimators:
            raise ValueError(
                'invalid estimator, should be in {}, got {}'.format(
                    allowed_estimators, base_estimator))
    elif not is_regressor(base_estimator):
        raise ValueError('base estimator should be a regressor, got {}'.format(
            base_estimator))

    if base_estimator == 'GP':
        if space is not None:
            # space = Space(space)
            space = Space(normalize_param_space(space))
            n_params = space.transformed_n_params
            is_cat = space.is_categorical
        else:
            raise ValueError('expected a space instance, got None')
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_params))
        else:
            other_kernel = Matern(length_scale=np.ones(n_params),
                                  length_scale_bounds=[(0.01, 100)] * n_params,
                                  nu=2.5)
        base_estimator = GaussianProcessRegressor(kernel=cov_amplitude *
                                                  other_kernel,
                                                  normalize_y=True,
                                                  noise='gaussian',
                                                  n_restarts_optimizer=2)
    elif base_estimator == 'RF':
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == 'ET':
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == 'GRBT':
        grbt = GradientBoostingRegressor(n_estimators=30, loss='quantile')
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=grbt)
    elif base_estimator == 'DUMMY':
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Esempio n. 12
0
    def __init__(self,
                 problem,
                 num_workers,
                 surrogate_model='RF',
                 acq_func='gp_hedge',
                 acq_kappa=1.96,
                 liar_strategy='cl_max',
                 n_jobs=1,
                 **kwargs):

        assert surrogate_model in [
            "RF", "ET", "GBRT", "GP", "DUMMY"
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"

        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        # queue of remaining starting points
        self.starting_points = problem.starting_point

        n_init = inf if surrogate_model == 'DUMMY' else max(
            num_workers, len(self.starting_points))

        self._optimizer = SkOptimizer(self.space.values(),
                                      base_estimator=base_estimator,
                                      acq_optimizer='sampling',
                                      acq_func=acq_func,
                                      acq_func_kwargs={'kappa': acq_kappa},
                                      random_state=self.SEED,
                                      n_initial_points=n_init)

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info(
            f"Using skopt.Optimizer with {surrogate_model} base_estimator")
Esempio n. 13
0
    def __init__(
        self,
        problem,
        run,
        evaluator,
        population_size=100,
        sample_size=10,
        n_jobs=1,
        kappa=0.001,
        xi=0.000001,
        acq_func="LCB",
        **kwargs,
    ):
        super().__init__(
            problem=problem,
            run=run,
            evaluator=evaluator,
            population_size=population_size,
            sample_size=sample_size,
            **kwargs,
        )

        self.n_jobs = int(
            n_jobs)  # parallelism of BO surrogate model estimator

        # Initialize Hyperaparameter space
        self.hp_space = self.problem._hp_space

        # Initialize opitmizer of hyperparameter space
        acq_func_kwargs = {
            "xi": float(xi),
            "kappa": float(kappa)
        }  # tiny exploration
        self.n_initial_points = self.free_workers

        self.hp_opt = SkOptimizer(
            dimensions=self.hp_space._space,
            base_estimator=RandomForestRegressor(n_jobs=self.n_jobs),
            acq_func=acq_func,
            acq_optimizer="sampling",
            acq_func_kwargs=acq_func_kwargs,
            n_initial_points=self.n_initial_points,
        )
Esempio n. 14
0
from sklearn.multioutput import MultiOutputRegressor
from numpy.testing import assert_array_equal
from numpy.testing import assert_equal
from numpy.testing import assert_raises

from skopt import gp_minimize
from skopt import forest_minimize
from skopt.benchmarks import bench1, bench1_with_time
from skopt.benchmarks import branin
from skopt.learning import ExtraTreesRegressor, RandomForestRegressor
from skopt.learning import GradientBoostingQuantileRegressor
from skopt.optimizer import Optimizer
from scipy.optimize import OptimizeResult

TREE_REGRESSORS = (ExtraTreesRegressor(random_state=2),
                   RandomForestRegressor(random_state=2),
                   GradientBoostingQuantileRegressor(random_state=2))
ACQ_FUNCS_PS = ["EIps", "PIps"]
ACQ_FUNCS_MIXED = ["EI", "EIps"]
ESTIMATOR_STRINGS = [
    "GP", "RF", "ET", "GBRT", "DUMMY", "gp", "rf", "et", "gbrt", "dummy"
]


@pytest.mark.fast_test
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)],
                    base_estimator,
Esempio n. 15
0

# generating the data
X = np.linspace(0, 20, 1000).reshape(-1, 1)
y = np.sin(X)/2 - ((10 - X)**2)/50 + 2
with plt.style.context('seaborn-white'):
    plt.figure(figsize=(10, 5))
    plt.plot(X, y, c='k', linewidth=6)
    plt.title('The function to be optimized')
    plt.show()
# assembling initial training set
X_initial, y_initial = X[150].reshape(1, -1), y[150].reshape(1, -1)

# defining the kernel for the Gaussian process
kernel = Matern(length_scale=1.0)
base = RandomForestRegressor()
regressor = RandomForestRegressor(max_depth=3, random_state=0)#ExtraTreesRegressor()#GradientBoostingQuantileRegressor(random_state=0)
'''
base = RandomForestRegressor()
regressor = GradientBoostingQuantileRegressor(base_estimator=base)
with pytest.raises(ValueError):
    # 'type GradientBoostingRegressor',
    regressor.fit(X, y)
    '''
#print(regressor)

# initializing the optimizer
optimizer = BayesianOptimizer(
    estimator= regressor,
    X_training=X_initial, y_training=np.ravel(y_initial),
    query_strategy=max_UCB
Esempio n. 16
0
def cook_estimator(base_estimator, space=None, **kwargs):
    """Cook a default estimator

    For the special `base_estimator` called "DUMMY", the return value is None. This corresponds to
    sampling points at random, hence there is no need for an estimator

    Parameters
    ----------
    base_estimator: {SKLearn Regressor, "GP", "RF", "ET", "GBRT", "DUMMY"}, default="GP"
        If not string, should inherit from `sklearn.base.RegressorMixin`. In addition, the `predict`
        method should have an optional `return_std` argument, which returns `std(Y | x)`,
        along with `E[Y | x]`.

        If `base_estimator` is a string in {"GP", "RF", "ET", "GBRT", "DUMMY"}, a surrogate model
        corresponding to the relevant `X_minimize` function is created
    space: `hyperparameter_hunter.space.space_core.Space`
        Required only if the `base_estimator` is a Gaussian Process. Ignored otherwise
    **kwargs: Dict
        Extra parameters provided to the `base_estimator` at initialization time

    Returns
    -------
    SKLearn Regressor
        Regressor instance cooked up according to `base_estimator` and `kwargs`"""
    #################### Validate `base_estimator` ####################
    str_estimators = ["GP", "ET", "RF", "GBRT", "DUMMY"]
    if isinstance(base_estimator, str):
        if base_estimator.upper() not in str_estimators:
            raise ValueError(
                f"Expected `base_estimator` in {str_estimators}. Got {base_estimator}"
            )
        # Convert to upper after error check, so above error shows actual given `base_estimator`
        base_estimator = base_estimator.upper()
    elif not is_regressor(base_estimator):
        raise ValueError("`base_estimator` must be a regressor")

    #################### Get Cooking ####################
    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            # NOTE: Below `normalize_dimensions` is NOT an unnecessary duplicate of the call in
            #   `Optimizer` - `Optimizer` calls `cook_estimator` before its `dimensions` have been
            #   normalized, so `normalize_dimensions` must also be called here
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical
        else:
            raise ValueError("Expected a `Space` instance, not None")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # Only special if *all* dimensions are `Categorical`
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(length_scale=np.ones(n_dims),
                                  length_scale_bounds=[(0.01, 100)] * n_dims,
                                  nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True,
            noise="gaussian",
            n_restarts_optimizer=2,
        )
    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)
    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Esempio n. 17
0
OptimalParams4 = ([])
OptimalParams_NoBatch = ([])

OptimalScore1 = ([])
OptimalScore2 = ([])
OptimalScore3 = ([])
OptimalScore4 = ([])
OptimalScore_NoBatch = ([])

# First network, original network, two dense layers
for i in range(0, 3):

    bo_rf_41 = Optimizer(
        dimensions=dimensions_4,
        base_estimator=RandomForestRegressor(n_estimators=100,
                                             n_jobs=4,
                                             min_variance=1.0e-6),
        n_initial_points=10,
        acq_func='EI',
    )

    bo = bo_rf_41

    for j in range(100):
        x = bo.ask()
        print(x)
        f = target_function1(x)  # Other inputs are automatically set

        bo.tell(x, f)

        #plot_convergence(bo)
Esempio n. 18
0
def cook_estimator(base_estimator, space=None, **kwargs):
    """
    Cook a default estimator.
    For the special base_estimator called "DUMMY" the return value is None.
    This corresponds to sampling points at random, hence there is no need
    for an estimator.
    Parameters
    ----------
    * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY"
                        or sklearn regressor, default="GP"]:
        Should inherit from `sklearn.base.RegressorMixin`.
        In addition the `predict` method should have an optional `return_std`
        argument, which returns `std(Y | x)`` along with `E[Y | x]`.
        If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a
        surrogate model corresponding to the relevant `X_minimize` function
        is created.
    * `space` [Space instance]:
        Has to be provided if the base_estimator is a gaussian process.
        Ignored otherwise.
    * `kwargs` [dict]:
        Extra parameters provided to the base_estimator at init time.
    """
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY", "GPM32", "GPM1", "RBF", "RQ"]:
            raise ValueError("Valid strings for the base_estimator parameter "
                             " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not "
                             "%s." % base_estimator)
    elif not is_regressor(base_estimator):
        raise ValueError("base_estimator has to be a regressor.")

    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM32":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM1":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RBF":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RBF(length_scale=np.ones(n_dims))

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RQ":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RationalQuadratic(length_scale=np.ones(n_dims), alpha=0.1)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)

    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Esempio n. 19
0
    def __init__(self,
                 dimensions,
                 base_estimator="GP",
                 maximize=True,
                 n_random_starts=10,
                 acq_func="LCB",
                 acq_optimizer="lbfgs",
                 random_state=None,
                 n_points=10000,
                 n_restarts_optimizer=5,
                 xi=0.01,
                 kappa=1.96,
                 n_jobs=1):
        if not skopt_available:
            raise ImportError("skopt is not installed correctly")
        self.maximize = maximize
        self.n_params = len(dimensions)

        rng = check_random_state(random_state)
        if isinstance(base_estimator, str):
            if base_estimator == "RF":
                base_estimator = RandomForestRegressor(n_estimators=100,
                                                       min_samples_leaf=3,
                                                       n_jobs=n_jobs,
                                                       random_state=rng)
            elif base_estimator == "ET":
                base_estimator = ExtraTreesRegressor(n_estimators=100,
                                                     min_samples_leaf=3,
                                                     n_jobs=n_jobs,
                                                     random_state=rng)
            elif base_estimator == "GP":
                cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
                matern = Matern(length_scale=np.ones(len(dimensions)),
                                length_scale_bounds=[(0.01, 100)] *
                                len(dimensions),
                                nu=2.5)
                base_estimator = GaussianProcessRegressor(
                    kernel=cov_amplitude * matern,
                    normalize_y=True,
                    random_state=rng,
                    alpha=0.0,
                    noise="gaussian",
                    n_restarts_optimizer=2)
            elif base_estimator == "GBRT":
                gbrt = GradientBoostingRegressor(n_estimators=30,
                                                 loss="quantile")
                base_estimator = GradientBoostingQuantileRegressor(
                    base_estimator=gbrt, n_jobs=n_jobs, random_state=rng)
            else:
                raise ValueError(
                    "Valid strings for the base_estimator parameter"
                    " are: 'RF', 'ET', or 'GP', not '%s'" % base_estimator)

        acq_func_kwargs = {"xi": xi, "kappa": kappa}
        acq_optimizer_kwargs = {
            "n_points": n_points,
            "n_restarts_optimizer": n_restarts_optimizer,
            "n_jobs": n_jobs
        }
        self.optimizer = _SkOptOptimizer(
            dimensions=dimensions,
            base_estimator=base_estimator,
            n_initial_points=n_random_starts,
            acq_func=acq_func,
            acq_optimizer=acq_optimizer,
            random_state=random_state,
            acq_func_kwargs=acq_func_kwargs,
            acq_optimizer_kwargs=acq_optimizer_kwargs)
Esempio n. 20
0
def test_random_forest():
    # toy sample
    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
    y = [-1, -1, -1, 1, 1, 1]
    T = [[-1, -1], [2, 2], [3, 2]]
    true_result = [-1, 1, 1]

    clf = RandomForestRegressor(n_estimators=10, random_state=1)
    clf.fit(X, y)

    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                min_impurity_decrease=0.1,
                                random_state=1)
    clf.fit(X, y)

    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                criterion="mse",
                                max_depth=None,
                                min_samples_split=2,
                                min_samples_leaf=1,
                                min_weight_fraction_leaf=0.,
                                max_features="auto",
                                max_leaf_nodes=None,
                                min_impurity_decrease=0.,
                                bootstrap=True,
                                oob_score=False,
                                n_jobs=1,
                                random_state=1,
                                verbose=0,
                                warm_start=False)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    clf = RandomForestRegressor(n_estimators=10,
                                max_features=1,
                                random_state=1)
    clf.fit(X, y)
    assert_array_equal(clf.predict(T), true_result)
    assert 10 == len(clf)

    # also test apply
    leaf_indices = clf.apply(X)
    assert leaf_indices.shape == (len(X), clf.n_estimators)
Esempio n. 21
0
    def __init__(
        self,
        problem,
        num_workers,
        surrogate_model="RF",
        acq_func="gp_hedge",
        acq_kappa=1.96,
        liar_strategy="cl_max",
        n_jobs=1,
        **kwargs,
    ):
        assert surrogate_model in [
            "RF",
            "ET",
            "GBRT",
            "GP",
            "DUMMY",
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"
        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.problem = problem
        cs_kwargs = self.problem.space["create_search_space"].get("kwargs")
        if cs_kwargs is None:
            search_space = self.problem.space["create_search_space"]["func"]()
        else:
            search_space = self.problem.space["create_search_space"]["func"](
                **cs_kwargs)

        n_init = np.inf if surrogate_model == "DUMMY" else num_workers

        self.starting_points = []  # ! EMPTY for now TODO

        # Building search space for SkOptimizer using ConfigSpace
        skopt_space = cs.ConfigurationSpace(seed=self.problem.seed)
        for i, vnode in enumerate(search_space.variable_nodes):
            hp = csh.UniformIntegerHyperparameter(name=f"vnode_{i}",
                                                  lower=0,
                                                  upper=(vnode.num_ops - 1))
            skopt_space.add_hyperparameter(hp)

        self._optimizer = SkOptimizer(
            skopt_space,
            base_estimator=base_estimator,
            acq_optimizer="sampling",
            acq_func=acq_func,
            acq_func_kwargs={"kappa": acq_kappa},
            random_state=self.SEED,
            n_initial_points=n_init,
        )

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info("Using skopt.Optimizer with %s base_estimator" %
                    surrogate_model)
# The optimization algorithm, `opt`, needs two methods:
#
# - `opt.tell`, used like `opt.tell([armId], loss)`, to give an observation of a certain "loss" (`loss = - reward`) from arm #`armId` to the algorithm.
# - `opt.ask`, used like `asked = opt.ask()`, to ask the algorithm which arm should be sampled first.
#
# Let use a simple *Black-Box Bayesian* algorithm, implemented in the [scikit-optimize (`skopt`)](https://scikit-optimize.github.io/) package: [`RandomForestRegressor`](https://scikit-optimize.github.io/learning/index.html#skopt.learning.RandomForestRegressor).

# In[9]:

from skopt.learning import RandomForestRegressor

# First, we need to create a model.

# In[10]:

our_est = RandomForestRegressor()

# In[11]:

get_ipython().run_line_magic('pinfo', 'our_est')

# Then the optimization process is using the [`Optimizer`](https://scikit-optimize.github.io/#skopt.Optimizer) class from [`skopt`](https://scikit-optimize.github.io/).

# In[12]:

from skopt import Optimizer

# In[13]:


def arms_optimizer(nbArms, est):