def _get_gp_est(space, **kwargs): from skopt.utils import Space from skopt.utils import normalize_dimensions from skopt.utils import ConstantKernel, HammingKernel, Matern from skopt.learning import GaussianProcessRegressor # Set space space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # If all dimensions are categorical, use Hamming kernel if space.is_categorical: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) base_estimator.set_params(**kwargs) return base_estimator
def cook_estimator(base_estimator, space=None, **kwargs): """Cook a default estimator. For the special base_estimator called "DUMMY" the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator. Parameters ---------- base_estimator : "GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a surrogate model corresponding to the relevant `X_minimize` function is created. space : Space instance Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. kwargs : dict Extra parameters provided to the base_estimator at init time. """ if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY"]: raise ValueError("Valid strings for the base_estimator parameter " " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not " "%s." % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) if ('n_jobs' in kwargs.keys()) and not hasattr(base_estimator, 'n_jobs'): del kwargs['n_jobs'] base_estimator.set_params(**kwargs) return base_estimator
def cook_estimator(base_estimator, space=None, **kwargs): if isinstance(base_estimator, str): base_estimator = base_estimator.upper() allowed_estimators = ['GP', 'ET', 'RF', 'GBRT', 'DUMMY'] if base_estimator not in allowed_estimators: raise ValueError( 'invalid estimator, should be in {}, got {}'.format( allowed_estimators, base_estimator)) elif not is_regressor(base_estimator): raise ValueError('base estimator should be a regressor, got {}'.format( base_estimator)) if base_estimator == 'GP': if space is not None: # space = Space(space) space = Space(normalize_param_space(space)) n_params = space.transformed_n_params is_cat = space.is_categorical else: raise ValueError('expected a space instance, got None') cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_params)) else: other_kernel = Matern(length_scale=np.ones(n_params), length_scale_bounds=[(0.01, 100)] * n_params, nu=2.5) base_estimator = GaussianProcessRegressor(kernel=cov_amplitude * other_kernel, normalize_y=True, noise='gaussian', n_restarts_optimizer=2) elif base_estimator == 'RF': base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == 'ET': base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == 'GRBT': grbt = GradientBoostingRegressor(n_estimators=30, loss='quantile') base_estimator = GradientBoostingQuantileRegressor(base_estimator=grbt) elif base_estimator == 'DUMMY': return None base_estimator.set_params(**kwargs) return base_estimator
def cook_estimator(base_estimator, space=None, **kwargs): """Cook a default estimator For the special `base_estimator` called "DUMMY", the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator Parameters ---------- base_estimator: {SKLearn Regressor, "GP", "RF", "ET", "GBRT", "DUMMY"}, default="GP" If not string, should inherit from `sklearn.base.RegressorMixin`. In addition, the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`, along with `E[Y | x]`. If `base_estimator` is a string in {"GP", "RF", "ET", "GBRT", "DUMMY"}, a surrogate model corresponding to the relevant `X_minimize` function is created space: `hyperparameter_hunter.space.space_core.Space` Required only if the `base_estimator` is a Gaussian Process. Ignored otherwise **kwargs: Dict Extra parameters provided to the `base_estimator` at initialization time Returns ------- SKLearn Regressor Regressor instance cooked up according to `base_estimator` and `kwargs`""" #################### Validate `base_estimator` #################### str_estimators = ["GP", "ET", "RF", "GBRT", "DUMMY"] if isinstance(base_estimator, str): if base_estimator.upper() not in str_estimators: raise ValueError( f"Expected `base_estimator` in {str_estimators}. Got {base_estimator}" ) # Convert to upper after error check, so above error shows actual given `base_estimator` base_estimator = base_estimator.upper() elif not is_regressor(base_estimator): raise ValueError("`base_estimator` must be a regressor") #################### Get Cooking #################### if base_estimator == "GP": if space is not None: space = Space(space) # NOTE: Below `normalize_dimensions` is NOT an unnecessary duplicate of the call in # `Optimizer` - `Optimizer` calls `cook_estimator` before its `dimensions` have been # normalized, so `normalize_dimensions` must also be called here space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a `Space` instance, not None") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # Only special if *all* dimensions are `Categorical` if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern(length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2, ) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) elif base_estimator == "DUMMY": return None base_estimator.set_params(**kwargs) return base_estimator
def cook_estimator(base_estimator, space=None, **kwargs): """ Cook a default estimator. For the special base_estimator called "DUMMY" the return value is None. This corresponds to sampling points at random, hence there is no need for an estimator. Parameters ---------- * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY" or sklearn regressor, default="GP"]: Should inherit from `sklearn.base.RegressorMixin`. In addition the `predict` method should have an optional `return_std` argument, which returns `std(Y | x)`` along with `E[Y | x]`. If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a surrogate model corresponding to the relevant `X_minimize` function is created. * `space` [Space instance]: Has to be provided if the base_estimator is a gaussian process. Ignored otherwise. * `kwargs` [dict]: Extra parameters provided to the base_estimator at init time. """ if isinstance(base_estimator, str): base_estimator = base_estimator.upper() if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY", "GPM32", "GPM1", "RBF", "RQ"]: raise ValueError("Valid strings for the base_estimator parameter " " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not " "%s." % base_estimator) elif not is_regressor(base_estimator): raise ValueError("base_estimator has to be a regressor.") if base_estimator == "GP": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "GPM32": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "GPM1": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims is_cat = space.is_categorical else: raise ValueError("Expected a Space instance, not None.") cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) # only special if *all* dimensions are categorical if is_cat: other_kernel = HammingKernel(length_scale=np.ones(n_dims)) else: other_kernel = Matern( length_scale=np.ones(n_dims), length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RBF": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = RBF(length_scale=np.ones(n_dims)) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RQ": if space is not None: space = Space(space) space = Space(normalize_dimensions(space.dimensions)) n_dims = space.transformed_n_dims cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0)) other_kernel = RationalQuadratic(length_scale=np.ones(n_dims), alpha=0.1) base_estimator = GaussianProcessRegressor( kernel=cov_amplitude * other_kernel, normalize_y=True, noise="gaussian", n_restarts_optimizer=2) elif base_estimator == "RF": base_estimator = RandomForestRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "ET": base_estimator = ExtraTreesRegressor(n_estimators=100, min_samples_leaf=3) elif base_estimator == "GBRT": gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile") base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt) elif base_estimator == "DUMMY": return None base_estimator.set_params(**kwargs) return base_estimator