Example #1
0
    def _initialise_gp(self, gp, hyper_grid):
        """
        Initialises the Gaussian process surrogate model. If `gp` is `None`
        uses the default kernel and Gaussian process:

            `kernel = sklearn.gaussian_process.kernels.Matern(nu=2.5)`
            `gp = sklearn.gaussian_process.GaussianProcessRegressor(
                kernel, alpha=1e-6, normalize_y=True, n_restarts_optimizer=5,
                random_state=self.generator)`,

        such that `random_state` is always set to the class generator. The
        data is always scaled using `sklearn.preprocessing.StandardScaler`.

        If `hyper_grid` is not `None` the best fit combination will be used
        as a surrogate model (calls `sklearn.model_selection.GridSearchCV`)
        with 5-fold cross-validation.

        Parameters
        ----------
        gp : None or `sklearn.gaussian_process.GaussianProcessRegressor`
            Surrogate model Gaussian process.
        hyper_grid : None or dict of dictionaries
            Hyperparameter grid to be explored when fitting the Gaussian
            process.
        """
        # Set up the Gaussian process, pipeline and grid search
        if gp is None:
            kernel = kernels.Matern(nu=2.5)
            gp = GaussianProcessRegressor(kernel,
                                          alpha=1e-6,
                                          normalize_y=True,
                                          n_restarts_optimizer=5,
                                          random_state=self.generator)
        elif not isinstance(gp, GaussianProcessRegressor):
            raise TypeError(
                "`gp` must be of {} type.".format(GaussianProcessRegressor))
        else:
            # Always overwrite the random state
            gp.random_state = self.generator
        # Set up the pipeline to scale the data
        pipe = Pipeline([('scaler', StandardScaler()), ('gp', gp)])
        # Optionally set the hyperparameter grid
        if hyper_grid is None:
            self._surrogate_model = pipe
        else:
            self._surrogate_model = GridSearchCV(pipe,
                                                 hyper_grid,
                                                 n_jobs=self.nthreads)