def set_gp_kernel(self,
                   kernel=DEFAULTS['kernel'],
                   in_dim=DEFAULTS['input_dim'],
                   variance=DEFAULTS['variance'],
                   lengthscale=DEFAULTS['lengthscale'],
                   multi_dim=False):
     self.kernel_name = kernel  # This is used for saving file names
     """Sets the kernel of this Gaussfit"""
     if kernel == 'RBF':
         self.kernel = RBF(input_dim=in_dim,
                           variance=variance,
                           lengthscale=lengthscale,
                           ARD=multi_dim)
     elif kernel == 'Exponential':
         self.kernel = Exponential(input_dim=in_dim,
                                   variance=variance,
                                   lengthscale=lengthscale,
                                   ARD=multi_dim)
     elif kernel == 'Matern32':
         self.kernel = Matern32(input_dim=in_dim,
                                variance=variance,
                                lengthscale=lengthscale,
                                ARD=multi_dim)
     elif kernel == 'Matern52':
         self.kernel = Matern52(input_dim=in_dim,
                                variance=variance,
                                lengthscale=lengthscale,
                                ARD=multi_dim)
     else:
         print 'Kernel not recognized or not implemented'
 def _model_chooser(self):
     """ Initialize the model used for the optimization """
     kernel = Matern52(len(self.variables_list), variance=1., ARD=False)
     gpmodel = GPRegression(self.X, self.Y, kernel)
     gpmodel.optimize()
     self.model = GPyModelWrapper(gpmodel)
     if self.noiseless:
         gpmodel.Gaussian_noise.constrain_fixed(0.001)
     self.model = GPyModelWrapper(gpmodel)
    def __init__(self,
                 objective,
                 bounds,
                 n_iterations,
                 n_init=2,
                 rand=None,
                 kernel_function="Exponential",
                 acquisition_func="EI",
                 noise_var=0.001,
                 log_info=True,
                 n_iters_aqui=15,
                 use_bashinhopping=False):
        """
        Peter Kostovcik: Bayesian Optimization; diploma thesis 2017
        Charles University, faculty of mathematics and physics
        e-mail: [email protected]
        ======== INPUT ========
        objective:          objective function (input is numpy.array length = dimension)
        bounds:             box bounds (list of tuples)
        n_iterations:       number of iterations = evaluations of objective
        n_init:             number of starting points (default = 2)
        kernel_function     Exponential or Matern from GPy package
                            ["Exponential", "Matern32", "Matern52"]
                            (default = "Exponential")
        acquisition_func:   acquisition function ["UCB" == Upper Confidence Bound,
                                                 "EI" == Expected Improvement]
                                                 (default = EI)
        rand:               np.RandomState(some_number) (default = None, random choice)
        noise_var:          variance for noise (default = 0.001)
        log_info:           True/False -  (default = True)
        n_iters_aqui:       # restarts in optimization of acquisition function (default = 15)
        use_bashinhopping   True/False - use Bashinhopping algorithm (default = False)
        =================

        """
        self.objective, self.bounds = objective, bounds
        self.n_iterations, self.n_init = n_iterations, n_init
        self.kernel_function = kernel_function
        self.acquisition_func = acquisition_func
        self.noise_var = noise_var
        self.bashop = use_bashinhopping
        if rand is None:
            self.rand = np.random.RandomState(np.random.randint(0, 10000))
        else:
            self.rand = rand
        self.log_info = log_info
        self.dim, self.n_iters_aqui = len(self.bounds), n_iters_aqui
        if self.kernel_function == "Exponential":
            self.kernel = Exponential(input_dim=self.dim)
        elif self.kernel_function == "Matern52":
            self.kernel = Matern52(input_dim=self.dim)
        else:
            self.kernel = Matern32(input_dim=self.dim)
        self.computation_time = None
        self.run()
Exemple #4
0
def test_set_hyperparameters():
    from GPy.models import GPRegression
    from GPy.kern import RBF as gRBF, Matern52

    expected_hyperparameters = {
        'length_scale': np.array([1]),
        'sigma_n': np.array([1]),
        'sigma_f': np.array([1])
    }

    sur = GPySurrogate()

    # Default RBF kernel
    sur.model = GPRegression(Xtrain, ytrain)
    sur._set_hyperparameters_from_model()
    assert sur.hyperparameters == expected_hyperparameters

    # Product kernel
    sur.model = GPRegression(Xtrain, ytrain, kernel=gRBF(1) * Matern52(1))
    sur._set_hyperparameters_from_model()
    assert sur.hyperparameters == expected_hyperparameters
Exemple #5
0
    def _fit(self, X, y, ECM=None):
        self._X = X
        self._y = y

        kern_dict = {
            'm32':
            Matern32(input_dim=self._X.shape[1],
                     active_dims=list(range(self._X.shape[1])),
                     ARD=True),
            'm52':
            Matern52(input_dim=self._X.shape[1],
                     active_dims=list(range(self._X.shape[1])),
                     ARD=True),
            'rbf':
            RBF(input_dim=self._X.shape[1],
                active_dims=list(range(self._X.shape[1])),
                ARD=True)
        }

        self.__model = GPRegression(X, y, kern_dict[self.__kernel_name])
        self.__model.optimize_restarts(self.__n_restarts,
                                       verbose=self._verbose)
        return self
Exemple #6
0
    def _model(self, loc):
        def __D_z(sj):
            return self._Gamma[np.ix_(sj, sj)]

        def __obfunc(x):
            kernel = kern_dict[self.__kernel_name]
            kernel.variance = x[0]
            kernel.lengthscale = x[1:]
            kern_vals = kernel.K(self._X[self.__close_locs[loc]])
            term = (__D_z(self.__close_locs[loc]) - kern_vals)/kern_vals
            return np.sum(term**2)
        
        # ARD can be added
        kern_dict = {'m32': Matern32(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True), 
                 'm52': Matern52(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True), 
                 'rbf': RBF(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True),
                  'expqd':  ExpQuad(input_dim=self._X.shape[1], active_dims=list(range(self._X.shape[1])), ARD=True)}

        kernel = kern_dict[self.__kernel_name]
        params = least_squares(__obfunc, np.ones((self._X.shape[1]+1))).x
        kernel.variance = params[0]
        kernel.lengthscale = params[1:]
        return kernel.K
    def _init_kernel_function(self, kern_types=None, hyp=None):
        """ Initialize GPy kernel functions based on name. Check if supported.

        Utility function to return a kernel based on its type name.
        Checks if the kernel type is supported.

        Parameters
        ----------
        kern_types: n_s x 0 array_like[str]
            The names of the kernels for each dimension

        Returns
        -------
        kern: GPy.Kern
            The Gpy kernel function
        """

        input_dim = self.n_s_in + self.n_u
        kerns = [None] * self.n_s_out

        if hyp is None:
            hyp = [None] * self.n_s_out
        warnings.warn(
            "Changed the kernel structure from the cdc paper implementation, see old structure commented out"
        )
        """
        if kern_types[i] == "rbf":
                    kern_i = RBF(input_dim, ARD = True)
                elif kern_types[i] == "lin_rbf":
                    kern_i = Linear(1,active_dims = [1])*RBF(1,active_dims=[1]) + Linear(input_dim,ARD=True)
                elif kern_types[i] == "lin_mat52":
                    kern_i = Linear(1,active_dims = [1])*Matern52(1,active_dims=[1]) + Linear(input_dim,ARD=True)
                else:
        """

        if kern_types is None:
            kern_types = [None] * self.n_s_out
            for i in range(self.n_s_out):
                kern_types[i] = "rbf"
                kerns[i] = RBF(input_dim, ARD=True)

        else:
            for i in range(self.n_s_out):
                hyp_i = hyp[i]
                if kern_types[i] == "rbf":
                    kern_i = RBF(input_dim, ARD=True)
                elif kern_types[i] == "mat52":
                    kern_i = Matern52(input_dim, ARD=True)
                elif kern_types[i] == "lin_rbf":
                    kern_i = Linear(input_dim) * RBF(input_dim) + Linear(
                        input_dim, ARD=True)
                elif kern_types[i] == "lin_mat52":
                    kern_i = Linear(input_dim) * Matern52(input_dim) + Linear(
                        input_dim, ARD=True)
                else:
                    raise ValueError("kernel type '{}' not supported".format(
                        kern_types[i]))

                if not hyp_i is None:
                    for k, v in list(hyp_i.items()):
                        try:
                            rsetattr(kern_i, k, v)
                            kern_hyp = rgetattr(kern_i, k)
                            kern_hyp.fix()

                        except:
                            warnings.warn(
                                "Cannot set and fix hyperparameter: {}".format(
                                    k))
                kerns[i] = kern_i

        self.base_kerns = kerns
        self.kern_types = kern_types
x = np.linspace(0, 10, n_dims)[:, np.newaxis]

#kernel = Matern32(input_dim=1, variance=2.0)
#kernel = Brownian(input_dim=1, variance=2.0)
#kernel = RBF(input_dim=1, variance=2.0)
#kernel = Cosine(input_dim=1)
#kernel = Exponential(input_dim=1, variance=1.0)
#kernel = Linear(input_dim=1)
#kernel = GridRBF(input_dim=1, variance=2)
#kernel = MLP(input_dim=1, variance=2)
#kernel = PeriodicMatern32(input_dim=1)
#kernel = Spline(input_dim=1)
#kernel = White(input_dim=1)
#kernel = StdPeriodic(input_dim=1)
#kernel = DomainKernel(input_dim=1, start=0, stop=5)

kernel1 = LogisticBasisFuncKernel(input_dim=1, centers=[4])
kernel2 = Matern52(input_dim=1)
kernel = Prod(kernels=[kernel1, kernel2])

kernel_matrix = kernel.K(x, x)

gaussian_process_animation = GaussianProcessAnimation(kernel_matrix,
                                                      n_dims=n_dims,
                                                      n_frames=n_frames)
frames = gaussian_process_animation.get_traces(n_traces)
frames = np.stack(frames).transpose(
    (2, 0, 1))  #should be in the format of (length, n_traces, n_frame))
animate_multi_plots(frames, interval=10, title=kernel.name)
Exemple #9
0
    def __init__(self,
                 domain,
                 transform=None,
                 gp_model_type=None,
                 acquisition_type=None,
                 optimizer_type=None,
                 evaluator_type=None,
                 **kwargs):
        from GPy.kern import Matern52

        Strategy.__init__(self, domain, transform=transform, **kwargs)

        self.use_descriptors = kwargs.get("use_descriptors", False)
        # TODO: notation - discrete in our model (e.g., catalyst type) = categorical?
        self.input_domain = []
        for v in self.domain.variables:
            if not v.is_objective:
                if isinstance(v, ContinuousVariable):
                    self.input_domain.append({
                        "name":
                        v.name,
                        "type":
                        v.variable_type,
                        "domain": (v.bounds[0], v.bounds[1]),
                    })
                elif isinstance(v, CategoricalVariable):
                    if not self.use_descriptors:
                        self.input_domain.append({
                            "name":
                            v.name,
                            "type":
                            "categorical",
                            "domain":
                            tuple(self.categorical_wrapper(v.levels)),
                        })
                    elif v.ds is not None and self.use_descriptors:
                        if v.ds is None:
                            raise ValueError(
                                "No descriptors provided for variable: {}".
                                format(v.name))
                        descriptor_names = v.ds.data_columns
                        descriptors = np.asarray([
                            v.ds.loc[:, [l]].values.tolist()
                            for l in v.ds.data_columns
                        ])
                        for j, d in enumerate(descriptors):
                            self.input_domain.append({
                                "name":
                                descriptor_names[j],
                                "type":
                                "continuous",
                                "domain": (
                                    np.min(np.asarray(d)),
                                    np.max(np.asarray(d)),
                                ),
                            })
                    elif v.ds is None and self.use_descriptors:
                        raise ValueError(
                            "Cannot use descriptors because none are provided."
                        )
                    # TODO: GPyOpt currently does not support mixed-domains w/ bandit inputs, there is a PR for this though
                else:
                    raise TypeError("Unknown variable type.")

        # TODO: how to handle equality constraints? Could we remove '==' from constraint types as each equality
        #  constraint reduces the degrees of freedom?
        if self.domain.constraints is not None:
            constraints = self.constr_wrapper(self.domain)
            self.constraints = [{
                "name":
                "constr_" + str(i),
                "constraint":
                c[0] if c[1] in ["<=", "<"] else "(" + c[0] + ")*(-1)",
            } for i, c in enumerate(constraints) if not (c[1] == "==")]
        else:
            self.constraints = None

        self.input_dim = len(self.domain.input_variables)

        if gp_model_type in [
                "GP",
                "GP_MCMC",
                "sparseGP",
                "warpedGP",
                "InputWarpedGP",
                "RF",
        ]:
            self.gp_model_type = gp_model_type
        else:
            self.gp_model_type = "GP"  # default model type is a standard Gaussian Process (from GPy package)

        if acquisition_type in [
                "EI",
                "EI_MCMC",
                "LCB",
                "LCB_MCMC",
                "MPI",
                "MPI_MCMC",
                "LP",
                "ES",
        ]:
            self.acquisition_type = acquisition_type
        else:
            self.acquisition_type = (
                "EI"  # default acquisition function is expected utility improvement
            )
        """ 
        Method for optimization of acquisition function
           lbfgs: Limited-memory Broyden–Fletcher–Goldfarb–Shanno,
           DIRECT: Dividing Rectangles,
           CMA: covariance matrix adaption
        """
        if optimizer_type in ["lbfgs", "DIRECT", "CMA"]:
            self.optimizer_type = optimizer_type
        else:
            self.optimizer_type = "lbfgs"  # default optimizer: lbfgs

        if evaluator_type in [
                "sequential",
                "random",
                "local_penalization",
                "thompson_sampling",
        ]:
            self.evaluator_type = evaluator_type
        else:
            self.evaluator_type = "random"

        # specify GPy kernel: # https://gpy.readthedocs.io/en/deploy/GPy.kern.html#subpackages
        self.kernel = kwargs.get("kernel", Matern52(self.input_dim))
        # Are function values exact (w/o noise)?
        self.exact_feval = kwargs.get("exact_feval", False)
        # automatic relevance determination
        self.ARD = kwargs.get("ARD", True)
        # Standardization of outputs?
        self.standardize_outputs = kwargs.get("standardize_outputs", True)
        self.prev_param = None
Exemple #10
0
space = ParameterSpace(list_params)

init_design = RandomDesign(space)
X_init = init_design.get_samples(2)
Y_init = np.array([b.objective_function(xi)["function_value"] for xi in X_init])[:, None]

if args.model_type == "bnn":
    model = Bohamiann(X_init=X_init, Y_init=Y_init, verbose=True)

elif args.model_type == "rf":
    model = RandomForest(X_init=X_init, Y_init=Y_init)
    with_gradients = False


elif args.model_type == "gp":
    kernel = Matern52(len(list_params), variance=1., ARD=True)
    gpmodel = GPRegression(X_init, Y_init, kernel)
    gpmodel.optimize()
    model = GPyModelWrapper(gpmodel)

acquisition = ExpectedImprovement(model)
acquisition_optimizer = DirectOptimizer(space)

candidate_point_calculator = Sequential(acquisition, acquisition_optimizer)

bo = BayesianOptimizationLoop(model=model, space=space, X_init=X_init, Y_init=Y_init, acquisition=acquisition,
                              candidate_point_calculator=candidate_point_calculator)

overhead = []
st = time.time()
for i in range(args.num_iterations):
Exemple #11
0
    N_names = 100
    state = None

    # Set the downsampling level
    downsample = 50

    train, test, full_train, full_test = \
        load_data(start_train_year=start_train_year,
                  end_train_year=end_train_year,
                  N_names=N_names,
                  continental=True, DC=False,
                  train_state=state,
                  downsample=downsample)

    # Make a kernel
    k_year   = Matern52(input_dim=1, active_dims=[0], variance=10, lengthscale=30)
    k_latlon = RBF(input_dim=2, active_dims=[1,2], variance=0.1, lengthscale=5)
    kernel = k_year * k_latlon


    from pgmult.internals.utils import mkdir
    results_dir = os.path.join("results", "names", "run%03d" % run)
    mkdir(results_dir)
    results_file = os.path.join("results", "names", "run%03d" % run, "census_results.pkl.gz")

    # Fit a static model to the last year of training data
    static_model = pgmult.distributions.IndependentMultinomialsModel(train.data[train.years==end_train_year-1])
    static_pll = compute_static_pred_ll(static_model, full_test)
    print("Static (%d) PLL: %f" % (end_train_year-1, static_pll))

    # Fit a standard GP to the raw probabilities