def test_gp_regressor():
    rng = np.random.RandomState(0)
    X = np.asarray([["ham", "spam", "ted"], ["ham", "ted", "ted"],
                    ["ham", "spam", "spam"]])
    y = rng.randn(3)
    hm = HammingKernel(length_scale=[1.0, 1.0, 1.0])

    gpr = GaussianProcessRegressor(hm)
    gpr.fit(X, y)
    assert_array_almost_equal(gpr.predict(X), y)
    assert_array_almost_equal(gpr.predict(X[:2]), y[:2])
Beispiel #2
0
    def __init__(self, dimensions_file: str, min_num_results_to_fit: int=8, lease_timout='2 days'):
        self.__all_experiments = pd.DataFrame()
        self.__all_experiments['status'] = [self.WAITING] * len(self.__all_experiments)
        self.__all_experiments['last_update'] = pd.Series(pd.Timestamp(float('NaN')))
        self.__all_experiments['client'] = [""] * len(self.__all_experiments)

        self.__lease_duration = pd.to_timedelta(lease_timout)
        self.__leased_experiments = []

        dims = self.__load_dimensions(dimensions_file)
        self.__dimension_names = list(dims.keys())
        self.__dimensions = list(dims.values())
        self.__min_num_results_to_fit = min_num_results_to_fit

        # Initialize

        dim_types = [check_dimension(d) for d in self.__dimensions]
        is_cat = all([isinstance(check_dimension(d), Categorical) for d in dim_types])
        if is_cat:
            transformed_dims = [check_dimension(d, transform="identity") for d in self.__dimensions]
        else:
            transformed_dims = []
            for dim_type, dim in zip(dim_types, self.__dimensions):
                if isinstance(dim_type, Categorical):
                    transformed_dims.append(check_dimension(dim, transform="onehot"))
                # To make sure that GP operates in the [0, 1] space
                else:
                    transformed_dims.append(check_dimension(dim, transform="normalize"))

        space = Space(transformed_dims)
        # Default GP
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))

        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(space.transformed_n_dims))
            acq_optimizer = "lbfgs"
        else:
            other_kernel = Matern(
                length_scale=np.ones(space.transformed_n_dims),
                length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, random_state=None, alpha=0.0, noise='gaussian',
            n_restarts_optimizer=2)

        self.__opt = Optimizer(self.__dimensions, base_estimator, acq_optimizer="lbfgs",
                               n_random_starts=100, acq_optimizer_kwargs=dict(n_points=10000))
Beispiel #3
0
def cook_estimator(base_estimator, space=None, **kwargs):
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        allowed_estimators = ['GP', 'ET', 'RF', 'GBRT', 'DUMMY']
        if base_estimator not in allowed_estimators:
            raise ValueError(
                'invalid estimator, should be in {}, got {}'.format(
                    allowed_estimators, base_estimator))
    elif not is_regressor(base_estimator):
        raise ValueError('base estimator should be a regressor, got {}'.format(
            base_estimator))

    if base_estimator == 'GP':
        if space is not None:
            # space = Space(space)
            space = Space(normalize_param_space(space))
            n_params = space.transformed_n_params
            is_cat = space.is_categorical
        else:
            raise ValueError('expected a space instance, got None')
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_params))
        else:
            other_kernel = Matern(length_scale=np.ones(n_params),
                                  length_scale_bounds=[(0.01, 100)] * n_params,
                                  nu=2.5)
        base_estimator = GaussianProcessRegressor(kernel=cov_amplitude *
                                                  other_kernel,
                                                  normalize_y=True,
                                                  noise='gaussian',
                                                  n_restarts_optimizer=2)
    elif base_estimator == 'RF':
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == 'ET':
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == 'GRBT':
        grbt = GradientBoostingRegressor(n_estimators=30, loss='quantile')
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=grbt)
    elif base_estimator == 'DUMMY':
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Beispiel #4
0
def test_distance_string():
    # Inspired by test_hamming_string_array in scipy.tests.test_distance
    a = np.array([
        'eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam', 'spam', 'spam',
        'spam', 'spam', 'eggs', 'eggs', 'spam', 'eggs', 'eggs', 'eggs', 'eggs',
        'eggs', 'spam'
    ],
                 dtype='|S4')
    b = np.array([
        'eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam', 'spam', 'spam',
        'eggs', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'spam', 'eggs', 'spam',
        'spam', 'eggs'
    ],
                 dtype='|S4')
    true_values = np.array([[0, 0.45], [0.45, 0]])
    X = np.vstack((a, b))
    hm = HammingKernel()
    assert_array_almost_equal(-np.log(hm(X)) / 20.0, true_values)
def test_gp_regressor():
    rng = np.random.RandomState(0)
    X = np.asarray([["ham", "spam", "ted"], ["ham", "ted", "ted"],
                    ["ham", "spam", "spam"]])
    y = rng.randn(3)
    hm = HammingKernel(length_scale=[1.0, 1.0, 1.0])
    if UseOrdinalEncoder:
        enc = OrdinalEncoder()
        enc.fit(X)

    gpr = GaussianProcessRegressor(hm)
    if UseOrdinalEncoder:
        gpr.fit(enc.transform(X), y)
        assert_array_almost_equal(gpr.predict(enc.transform(X)), y)
        assert_array_almost_equal(gpr.predict(enc.transform(X[:2])), y[:2])
    else:
        gpr.fit(X, y)
        assert_array_almost_equal(gpr.predict(X), y)
        assert_array_almost_equal(gpr.predict(X[:2]), y[:2])
def cook_estimator(base_estimator, space=None, **kwargs):
    """Cook a default estimator

    For the special `base_estimator` called "DUMMY", the return value is None. This corresponds to
    sampling points at random, hence there is no need for an estimator

    Parameters
    ----------
    base_estimator: {SKLearn Regressor, "GP", "RF", "ET", "GBRT", "DUMMY"}, default="GP"
        If not string, should inherit from `sklearn.base.RegressorMixin`. In addition, the `predict`
        method should have an optional `return_std` argument, which returns `std(Y | x)`,
        along with `E[Y | x]`.

        If `base_estimator` is a string in {"GP", "RF", "ET", "GBRT", "DUMMY"}, a surrogate model
        corresponding to the relevant `X_minimize` function is created
    space: `hyperparameter_hunter.space.space_core.Space`
        Required only if the `base_estimator` is a Gaussian Process. Ignored otherwise
    **kwargs: Dict
        Extra parameters provided to the `base_estimator` at initialization time

    Returns
    -------
    SKLearn Regressor
        Regressor instance cooked up according to `base_estimator` and `kwargs`"""
    #################### Validate `base_estimator` ####################
    str_estimators = ["GP", "ET", "RF", "GBRT", "DUMMY"]
    if isinstance(base_estimator, str):
        if base_estimator.upper() not in str_estimators:
            raise ValueError(
                f"Expected `base_estimator` in {str_estimators}. Got {base_estimator}"
            )
        # Convert to upper after error check, so above error shows actual given `base_estimator`
        base_estimator = base_estimator.upper()
    elif not is_regressor(base_estimator):
        raise ValueError("`base_estimator` must be a regressor")

    #################### Get Cooking ####################
    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            # NOTE: Below `normalize_dimensions` is NOT an unnecessary duplicate of the call in
            #   `Optimizer` - `Optimizer` calls `cook_estimator` before its `dimensions` have been
            #   normalized, so `normalize_dimensions` must also be called here
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical
        else:
            raise ValueError("Expected a `Space` instance, not None")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # Only special if *all* dimensions are `Categorical`
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(length_scale=np.ones(n_dims),
                                  length_scale_bounds=[(0.01, 100)] * n_dims,
                                  nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True,
            noise="gaussian",
            n_restarts_optimizer=2,
        )
    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)
    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Beispiel #7
0
    def __init__(self,
                 hyper_param_conf,
                 command,
                 expdir,
                 exp_recipe_dir,
                 recipe,
                 computing,
                 exp_proposal_watch_dir=None):
        base_estimator = 'GP'

        self.hyper_param_conf = hyper_param_conf
        self.command = command
        self.expdir = expdir
        self.exp_recipe_dir = exp_recipe_dir
        self.recipe = recipe
        self.computing = computing

        # read the hyper parameter file
        hyper_param_cfg = configparser.ConfigParser()
        hyper_param_cfg.read(hyper_param_conf)

        hyper_info = dict(hyper_param_cfg.items('info'))
        self.hyper_param_names = hyper_info['hyper_params'].split(' ')
        self.num_iters = int(hyper_info['num_iters'])
        self.n_initial_points = int(hyper_info['n_initial_points'])
        self.n_initial_points_to_start = int(
            hyper_info['n_initial_points_to_start'])
        self.max_parallel_jobs = int(hyper_info['max_parallel_jobs'])
        self.selected_segment_length = hyper_info['segment_length']
        self.selected_task = hyper_info['task']

        if 'adapt_hyper_param' in hyper_info:
            self.adapt_param = {
                'param_name': hyper_info['adapt_hyper_param'],
                'param_thr': int(hyper_info['param_thr']),
                'par_cnt_scheme': hyper_info['par_cnt_scheme']
            }
        else:
            self.adapt_param = None

        hyper_param_dict = dict()
        skopt_dims = []
        for par_name in self.hyper_param_names:
            par_dict = dict(hyper_param_cfg.items(par_name))
            par_type = par_dict['type']
            if par_type == 'Integer':
                skopt_dim = skopt_space.Integer(low=int(par_dict['min']),
                                                high=int(par_dict['max']),
                                                name=par_name)

            elif par_type == 'Real':
                skopt_dim = skopt_space.Real(low=float(par_dict['min']),
                                             high=float(par_dict['max']),
                                             name=par_name)

            elif par_type == 'Categorical':
                skopt_dim = skopt_space.Categorical(
                    categories=par_dict['categories'].split(' '),
                    name=par_name)

            else:
                raise ValueError('Type %s is not a valid parameter type' %
                                 par_type)

            hyper_param_dict[par_name] = par_dict
            skopt_dims.append(skopt_dim)

        self.hyper_param_dict = hyper_param_dict
        self.skopt_dims = skopt_dims

        self.last_result = None
        # self.all_results = []

        self.start_new_run_flag = True
        self.iter_ind = 0
        self.watch_list = dict()
        self.all_dim_values = []
        self.all_losses = dict()
        self.n_job_running = 0
        self.n_initial_points_started = 0
        self.n_unsuitable_points_for_estimator = 0
        self.max_n_unsuitable_points_for_estimator = 10000
        self.unsuitable_runs = []
        self.lost_runs = []

        self.exp_proposal_watch_dir = exp_proposal_watch_dir
        self.use_proposal_run = False
        self.proposed_loss_runs = []

        # only 0.25% of the point sample in the hyper space are wanted (since they lead to rougly the wanted amount of
        # trainable parameters)
        self.acq_optimizer_kwargs = {'n_points': 4000000}
        if 'debug' in expdir:
            self.acq_optimizer_kwargs = {'n_points': 40000}

        if base_estimator == 'boundedGP':
            # Make own estimator based on Gaussian Process Regressor.
            if skopt_dims is not None:
                space = Space(skopt_dims)
                space = Space(normalize_dimensions(space.dimensions))
                n_dims = space.transformed_n_dims
                is_cat = space.is_categorical

            else:
                raise ValueError("Expected a Space instance, not None.")

            cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
            # only special if *all* dimensions are categorical
            if is_cat:
                other_kernel = HammingKernel(length_scale=np.ones(n_dims))
            else:
                other_kernel = Matern(length_scale=np.ones(n_dims),
                                      length_scale_bounds=[(0.01, 100)] *
                                      n_dims,
                                      nu=2.5)

            base_estimator = BoundedGaussianProcessRegressor(
                space,
                self.hyper_param_names,
                self.adapt_param,
                kernel=cov_amplitude * other_kernel,
                normalize_y=True,
                noise="gaussian",
                n_restarts_optimizer=2)

        super(HyperParamOptimizer,
              self).__init__(skopt_dims,
                             base_estimator=base_estimator,
                             n_initial_points=self.n_initial_points,
                             acq_optimizer_kwargs=self.acq_optimizer_kwargs)
def cook_estimator(base_estimator, space=None, **kwargs):
    """
    Cook a default estimator.
    For the special base_estimator called "DUMMY" the return value is None.
    This corresponds to sampling points at random, hence there is no need
    for an estimator.
    Parameters
    ----------
    * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY"
                        or sklearn regressor, default="GP"]:
        Should inherit from `sklearn.base.RegressorMixin`.
        In addition the `predict` method should have an optional `return_std`
        argument, which returns `std(Y | x)`` along with `E[Y | x]`.
        If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a
        surrogate model corresponding to the relevant `X_minimize` function
        is created.
    * `space` [Space instance]:
        Has to be provided if the base_estimator is a gaussian process.
        Ignored otherwise.
    * `kwargs` [dict]:
        Extra parameters provided to the base_estimator at init time.
    """
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY", "GPM32", "GPM1", "RBF", "RQ"]:
            raise ValueError("Valid strings for the base_estimator parameter "
                             " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not "
                             "%s." % base_estimator)
    elif not is_regressor(base_estimator):
        raise ValueError("base_estimator has to be a regressor.")

    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM32":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM1":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RBF":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RBF(length_scale=np.ones(n_dims))

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RQ":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RationalQuadratic(length_scale=np.ones(n_dims), alpha=0.1)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)

    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator