예제 #1
0
class TestGaussianProcessMCMC(unittest.TestCase):
    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        self.model = GaussianProcessMCMC(kernel,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)

    def test_predict(self):
        X_test = np.random.rand(10, 2)

        m, v = self.model.predict(X_test)

        assert len(m.shape) == 1
        assert m.shape[0] == X_test.shape[0]
        assert len(v.shape) == 1
        assert v.shape[0] == X_test.shape[0]

    def test_loglikelihood(self):
        theta = np.array([0.2, 0.2, 0.001])
        ll = self.model.loglikelihood(theta)

    def test_get_incumbent(self):
        inc, inc_val = self.model.get_incumbent()

        b = np.argmin(self.y)

        np.testing.assert_almost_equal(inc, self.X[b], decimal=5)
        assert inc_val == self.y[b]
예제 #2
0
    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        self.model = GaussianProcessMCMC(kernel,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)
    def suggest_configuration(self):
        if self.X is None and self.y is None:
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        elif self.X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        else:
            cov_amp = 1
            n_dims = self.lower.shape[0]

            initial_ls = np.ones([n_dims])
            exp_kernel = george.kernels.Matern52Kernel(initial_ls,
                                                       ndim=n_dims)
            kernel = cov_amp * exp_kernel

            prior = DefaultPrior(len(kernel) + 1)

            model = GaussianProcessMCMC(kernel, prior=prior,
                                        n_hypers=self.n_hypers,
                                        chain_length=self.chain_length,
                                        burnin_steps=self.burnin,
                                        normalize_input=False,
                                        normalize_output=True,
                                        rng=self.rng,
                                        lower=self.lower,
                                        upper=self.upper)

            a = LogEI(model)

            acquisition_func = MarginalizationGPMCMC(a)

            max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False)

            model.train(self.X, self.y)

            acquisition_func.update(model)

            new_x = max_func.maximize()

        next_config = Configuration(self.config_space, vector=new_x)

        # Transform to sacred configuration
        result = configspace_config_to_sacred(next_config)

        return result
class TestGaussianProcessMCMC(unittest.TestCase):

    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        prior = TophatPrior(-2, 2)
        self.model = GaussianProcessMCMC(kernel,
                                         prior=prior,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)

    def test_predict(self):
        X_test = np.random.rand(10, 2)

        m, v = self.model.predict(X_test)

        assert len(m.shape) == 1
        assert m.shape[0] == X_test.shape[0]
        assert len(v.shape) == 1
        assert v.shape[0] == X_test.shape[0]

    def test_loglikelihood(self):
        theta = np.array([0.2, 0.2, 0.001])
        ll = self.model.loglikelihood(theta)

    def test_get_incumbent(self):
        inc, inc_val = self.model.get_incumbent()

        b = np.argmin(self.y)

        np.testing.assert_almost_equal(inc, self.X[b], decimal=5)
        assert inc_val == self.y[b]
    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        prior = TophatPrior(-2, 2)
        self.model = GaussianProcessMCMC(kernel,
                                         prior=prior,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)
예제 #6
0
def fabolas_fmin(objective_func,
                 X_lower,
                 X_upper,
                 num_iterations=100,
                 n_init=40,
                 burnin=100,
                 chain_length=200,
                 Nb=50,
                 initX=None,
                 initY=None):
    """
	Interface to Fabolas [1] which models loss and training time as a
	function of dataset size and automatically trades off high information
	gain about the global optimum against computational cost.
		
	[1] Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets
		A. Klein and S. Falkner and S. Bartels and P. Hennig and F. Hutter
		http://arxiv.org/abs/1605.07079

	Parameters
	----------
	objective_func : func
		Function handle for the objective function that get a configuration x
		and the training data subset size s and returns the validation error
		of x. See the example_fmin_fabolas.py script how the
		interface to this function should look like.
	X_lower : np.ndarray(D)
		Lower bound of the input space        
	X_upper : np.ndarray(D)
		Upper bound of the input space
	num_iterations: int
		Number of iterations for the Bayesian optimization loop
	n_init: int
		Number of points for the initial design that is run before BO starts
	burnin: int
		Determines the length of the burnin phase of the MCMC sampling
		for the GP hyperparameters
	chain_length: int
		Specifies the chain length of the MCMC sampling for the GP 
		hyperparameters
	Nb: int
		The number of representer points for approximating pmin
		
	Returns
	-------
	x : (1, D) numpy array
		The estimated global optimium also called incumbent

	"""

    assert X_upper.shape[0] == X_lower.shape[0]

    def f(x):
        x_ = x[:, :-1]
        s = x[:, -1]
        return objective_func(x_, s)

    class Task(BaseTask):
        def __init__(self, X_lower, X_upper, f):
            super(Task, self).__init__(X_lower, X_upper)
            self.objective_function = f
            is_env = np.zeros([self.n_dims])
            # Assume the last dimension to be the system size
            is_env[-1] = 1
            self.is_env = is_env

    task = Task(X_lower, X_upper, f)

    def basis_function(x):
        return (1 - x)**2

    # Define model for the objective function
    # Covariance amplitude
    cov_amp = 1

    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(task.n_dims - 1):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=task.n_dims,
                                                dim=d)

    # Kernel for the environmental variable
    # We use (1-s)**2 as basis function for the Bayesian linear kernel
    degree = 1
    env_kernel = george.kernels.BayesianLinearRegressionKernel(
        task.n_dims, dim=task.n_dims - 1, degree=degree)
    env_kernel[:] = np.ones([degree + 1]) * 0.1

    kernel *= env_kernel

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    # Define the prior of the kernel's hyperparameters
    prior = EnvPrior(len(kernel) + 1, n_ls=task.n_dims - 1, n_lr=(degree + 1))

    model = GaussianProcessMCMC(kernel,
                                prior=prior,
                                burnin=burnin,
                                chain_length=chain_length,
                                n_hypers=n_hypers,
                                basis_func=basis_function,
                                dim=task.n_dims - 1)

    # Define model for the cost function
    cost_cov_amp = 3000

    cost_kernel = cost_cov_amp

    for d in range(task.n_dims - 1):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.1,
                                                     ndim=task.n_dims,
                                                     dim=d)

    cost_degree = 1
    cost_env_kernel = george.kernels.BayesianLinearRegressionKernel(
        task.n_dims, dim=task.n_dims - 1, degree=cost_degree)
    cost_env_kernel[:] = np.ones([cost_degree + 1]) * 0.1

    cost_kernel *= cost_env_kernel

    cost_prior = EnvPrior(len(cost_kernel) + 1,
                          n_ls=task.n_dims - 1,
                          n_lr=(cost_degree + 1))
    cost_model = GaussianProcessMCMC(cost_kernel,
                                     prior=cost_prior,
                                     burnin=burnin,
                                     chain_length=chain_length,
                                     n_hypers=n_hypers)

    # Define acquisition function and maximizer
    es = InformationGainPerUnitCost(model,
                                    cost_model,
                                    task.X_lower,
                                    task.X_upper,
                                    task.is_env,
                                    Nb=Nb)

    acquisition_func = IntegratedAcquisition(model, es, task.X_lower,
                                             task.X_upper, cost_model)

    maximizer = cmaes.CMAES(acquisition_func, task.X_lower, task.X_upper)

    rec = BestProjectedObservation(model, task.X_lower, task.X_upper,
                                   task.is_env)

    bo = Fabolas(acquisition_func=acquisition_func,
                 model=model,
                 cost_model=cost_model,
                 maximize_func=maximizer,
                 task=task,
                 initial_points=n_init,
                 incumbent_estimation=rec)
    best_x, f_min = bo.run(num_iterations, X=initX, Y=initY)

    return task.retransform(best_x), f_min, model, acquisition_func, maximizer
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          maximizer="random",
                          acquisition_func="log_ei",
                          model_type="gp_mcmc",
                          n_init=3,
                          rng=None,
                          output_path=None):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy
        array (D,) as input and returns the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"direct", "cmaes", "random", "scipy"}
        The optimizer for the acquisition function. NOTE: "cmaes" only works in D > 1 dimensions
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model_type: {"gp", "gp_mcmc", "rf"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it
        is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model_type == "gp":
        model = GaussianProcess(kernel,
                                prior=prior,
                                rng=rng,
                                normalize_output=False,
                                normalize_input=True,
                                lower=lower,
                                upper=upper)
    elif model_type == "gp_mcmc":
        model = GaussianProcessMCMC(kernel,
                                    prior=prior,
                                    n_hypers=n_hypers,
                                    chain_length=200,
                                    burnin_steps=100,
                                    normalize_input=True,
                                    normalize_output=True,
                                    rng=rng,
                                    lower=lower,
                                    upper=upper)

    elif model_type == "rf":
        model = RandomForest(rng=rng)

    else:
        raise ValueError("'{}' is not a valid model".format(model_type))

    if acquisition_func == "ei":
        a = EI(model)
    elif acquisition_func == "log_ei":
        a = LogEI(model)
    elif acquisition_func == "pi":
        a = PI(model)
    elif acquisition_func == "lcb":
        a = LCB(model)
    else:
        raise ValueError("'{}' is not a valid acquisition function".format(
            acquisition_func))

    if model_type == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)
    else:
        acquisition_func = a

    if maximizer == "cmaes":
        max_func = CMAES(acquisition_func,
                         lower,
                         upper,
                         verbose=False,
                         rng=rng)
    elif maximizer == "direct":
        max_func = Direct(acquisition_func, lower, upper, verbose=True)
    elif maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)

    else:
        raise ValueError("'{}' is not a valid function to maximize the "
                         "acquisition function".format(maximizer))

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              model,
                              max_func,
                              initial_points=n_init,
                              rng=rng,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #8
0
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          X_init=None,
                          Y_init=None,
                          maximizer="random",
                          acquisition_func="log_ei",
                          model_type="gp_mcmc",
                          n_init=3,
                          rng=None,
                          output_path=None,
                          kernel=None,
                          sampling_method="origin",
                          distance="cosine",
                          replacement=True,
                          pool=None,
                          best=None):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy
        array (D,) as input and returns the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    X_init: np.ndarray(N,D)
            Initial points to warmstart BO
    Y_init: np.ndarray(N,1)
            Function values of the already initial points
    maximizer: {"random", "scipy", "differential_evolution"}
        The optimizer for the acquisition function.
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it
        is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator
    kernel: george.kernels.ConstantKernel
            {"constant", "polynomial", "linear", "dotproduct",
             "exp", "expsquared", "matern32", "matern52", "rationalquadratic",
             "cosine", "expsine2", "heuristic"}
        Specify the kernel for Gaussian process.
    sampling_method: {"origin", "approx", "exact"}
        Specify the method to choose next sample to update model.
        approx: choose the sample in the candidate pool that is closest (measured by distance
        arg) to the one returned from maximizing acquisition function.
        exact: evaluate all samples in the candidate pool on acquisition function
        and choose the one with maximum output.
    distance: {"cosine", "euclidean"}
        The distance measurement for approximation sampling.
    replacement: boolean
        Whether to sample from pool with replacement.
    pool: np.ndarray(N,D)
        Candidate pool containing possible x
    best: float
        Stop training when the best point is sampled.
    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    #n_dims = lower.shape[0]

    #initial_ls = np.ones([n_dims])

    # if kernel == "constant":
    #     exp_kernel = george.kernels.ConstantKernel(1, ndim=n_dims)
    # elif kernel == "polynomial":
    #     exp_kernel = george.kernels.PolynomialKernel(log_sigma2=1, order=3, ndim=n_dims)
    # elif kernel == "linear":
    #     exp_kernel = george.kernels.LinearKernel(log_gamma2=1, order=3, ndim=n_dims)
    # elif kernel == "dotproduct":
    #     exp_kernel = george.kernels.DotProductKernel(ndim=n_dims)
    # elif kernel == "exp":
    #     exp_kernel = george.kernels.ExpKernel(initial_ls, ndim=n_dims)
    # elif kernel == "expsquared":
    #     exp_kernel = george.kernels.ExpSquaredKernel(initial_ls, ndim=n_dims)
    # elif kernel == "matern32":
    #     exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=n_dims)
    # elif kernel == "matern52":
    #     exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    # elif kernel == "rationalquadratic":
    #     exp_kernel = george.kernels.RationalQuadraticKernel(log_alpha=1, metric=initial_ls, ndim=n_dims)
    # elif kernel == "cosine":
    #     exp_kernel = george.kernels.CosineKernel(4, ndim=n_dims)
    # elif kernel == "expsine2":
    #     exp_kernel = george.kerngels.ExpSine2Kernel(1, 2, ndim=n_dims)
    # elif kernel == "heuristic":
    #     exp_kernel = george.kernels.PythonKernel(heuristic_kernel_function, ndim=n_dims)
    # else:
    #     raise ValueError("'{}' is not a valid kernel".format(kernel))

    kernel = cov_amp * kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model_type == "gp":
        model = GaussianProcess(kernel,
                                prior=prior,
                                rng=rng,
                                normalize_output=False,
                                normalize_input=True,
                                lower=lower,
                                upper=upper)
    elif model_type == "gp_mcmc":
        model = GaussianProcessMCMC(kernel,
                                    prior=prior,
                                    n_hypers=n_hypers,
                                    chain_length=200,
                                    burnin_steps=100,
                                    normalize_input=True,
                                    normalize_output=False,
                                    rng=rng,
                                    lower=lower,
                                    upper=upper)

    elif model_type == "rf":
        model = RandomForest(rng=rng)

    elif model_type == "bohamiann":
        model = WrapperBohamiann()

    elif model_type == "dngo":
        model = DNGO()

    else:
        raise ValueError("'{}' is not a valid model".format(model_type))

    if acquisition_func == "ei":
        a = EI(model)
    elif acquisition_func == "log_ei":
        a = LogEI(model)
    elif acquisition_func == "pi":
        a = PI(model)
    elif acquisition_func == "lcb":
        a = LCB(model)
    else:
        raise ValueError("'{}' is not a valid acquisition function".format(
            acquisition_func))

    if model_type == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)
    else:
        acquisition_func = a

    if maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "differential_evolution":
        max_func = DifferentialEvolution(acquisition_func,
                                         lower,
                                         upper,
                                         rng=rng)
    else:
        raise ValueError("'{}' is not a valid function to maximize the "
                         "acquisition function".format(maximizer))

    if sampling_method == "exact":
        max_func = ExactSampling(acquisition_func,
                                 lower,
                                 upper,
                                 pool,
                                 replacement,
                                 rng=rng)
        init_design = init_exact_random
    elif sampling_method == "approx":
        max_func = ApproxSampling(acquisition_func,
                                  lower,
                                  upper,
                                  pool,
                                  replacement,
                                  distance,
                                  rng=rng)
        init_design = init_exact_random
    else:
        init_design = init_latin_hypercube_sampling

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              model,
                              max_func,
                              pool,
                              best,
                              sampling_method,
                              distance,
                              replacement,
                              initial_points=n_init,
                              rng=rng,
                              initial_design=init_design,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations, X=X_init, y=Y_init)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #9
0
def entropy_search(objective_function,
                   lower,
                   upper,
                   num_iterations=30,
                   maximizer="random",
                   model="gp_mcmc",
                   n_init=3,
                   output_path=None,
                   rng=None):
    """
    Entropy search for global black box optimization problems. This is a reimplemenation of the entropy search
    algorithm by Henning and Schuler[1].

    [1] Entropy search for information-efficient global optimization.
        P. Hennig and C. Schuler.
        JMLR, (1), 2012.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy array (D,) as input and returns
        the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"random", "scipy", "differential_evolution"}
        Defines how the acquisition function is maximized.
    model: {"gp", "gp_mcmc"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model == "gp":
        gp = GaussianProcess(kernel,
                             prior=prior,
                             rng=rng,
                             normalize_output=False,
                             normalize_input=True,
                             lower=lower,
                             upper=upper)
    elif model == "gp_mcmc":
        gp = GaussianProcessMCMC(kernel,
                                 prior=prior,
                                 n_hypers=n_hypers,
                                 chain_length=200,
                                 burnin_steps=100,
                                 normalize_input=True,
                                 normalize_output=False,
                                 rng=rng,
                                 lower=lower,
                                 upper=upper)
    else:
        print("ERROR: %s is not a valid model!" % model)
        return

    a = InformationGain(gp, lower=lower, upper=upper, sampling_acquisition=EI)

    if model == "gp":
        acquisition_func = a
    elif model == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)

    if maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "differential_evolution":
        max_func = DifferentialEvolution(acquisition_func,
                                         lower,
                                         upper,
                                         rng=rng)
    else:
        print(
            "ERROR: %s is not a valid function to maximize the acquisition function!"
            % maximizer)
        return

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              gp,
                              max_func,
                              initial_design=init_latin_hypercube_sampling,
                              initial_points=n_init,
                              rng=rng,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #10
0
def fmin(objective_func,
         X_lower,
         X_upper,
         num_iterations=30,
         maximizer="direct",
         acquisition="LogEI",
         initX=None,
         initY=None):

    assert X_upper.shape[0] == X_lower.shape[0]

    class Task(BaseTask):
        def __init__(self, X_lower, X_upper, objective_fkt):
            super(Task, self).__init__(X_lower, X_upper)
            self.objective_function = objective_fkt

    task = Task(X_lower, X_upper, objective_func)

    cov_amp = 2

    initial_ls = np.ones([task.n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=task.n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1
    model = GaussianProcessMCMC(kernel,
                                prior=prior,
                                n_hypers=n_hypers,
                                chain_length=200,
                                burnin_steps=100)

    if acquisition == "EI":
        a = EI(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition == "LogEI":
        a = LogEI(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition == "PI":
        a = PI(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition == "UCB":
        a = LCB(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition == "InformationGain":
        a = InformationGain(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition == "InformationGainMC":
        a = InformationGainMC(
            model,
            X_upper=task.X_upper,
            X_lower=task.X_lower,
        )
    else:
        logger.error("ERROR: %s is not a"
                     "valid acquisition function!" % (acquisition))
        return None

    acquisition_func = IntegratedAcquisition(model, a, task.X_lower,
                                             task.X_upper)

    if maximizer == "cmaes":
        max_fkt = cmaes.CMAES(acquisition_func, task.X_lower, task.X_upper)
    elif maximizer == "direct":
        max_fkt = direct.Direct(acquisition_func, task.X_lower, task.X_upper)
    elif maximizer == "stochastic_local_search":
        max_fkt = stochastic_local_search.StochasticLocalSearch(
            acquisition_func, task.X_lower, task.X_upper)
    elif maximizer == "grid_search":
        max_fkt = grid_search.GridSearch(acquisition_func, task.X_lower,
                                         task.X_upper)
    else:
        logger.error("ERROR: %s is not a valid function"
                     "to maximize the acquisition function!" % (acquisition))
        return None

    bo = BayesianOptimization(acquisition_func=acquisition_func,
                              model=model,
                              maximize_func=max_fkt,
                              task=task)

    best_x, f_min = bo.run(num_iterations, X=initX, Y=initY)
    return task.retransform(best_x), f_min, model, acquisition_func, max_fkt
예제 #11
0
def fmin(objective_fkt,
         X_lower,
         X_upper,
         num_iterations=30,
         maximizer="direct",
         acquisition_fkt="EI"):

    assert X_upper.shape[0] == X_lower.shape[0]

    class Task(BaseTask):
        def __init__(self, X_lower, X_upper, objective_fkt):
            super(Task, self).__init__(X_lower, X_upper)
            self.objective_function = objective_fkt

    task = Task(X_lower, X_upper, objective_fkt)

    noise = 1.0
    cov_amp = 2

    initial_ls = np.ones([task.n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=task.n_dims)
    noise_kernel = george.kernels.WhiteKernel(noise, ndim=task.n_dims)
    kernel = cov_amp * (exp_kernel + noise_kernel)

    prior = DefaultPrior(len(kernel))

    model = GaussianProcessMCMC(kernel,
                                prior=prior,
                                n_hypers=20,
                                chain_length=100,
                                burnin_steps=50)

    if acquisition_fkt == "EI":
        a = EI(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition_fkt == "PI":
        a = PI(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition_fkt == "UCB":
        a = LCB(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition_fkt == "Entropy":
        a = Entropy(model, X_upper=task.X_upper, X_lower=task.X_lower)
    elif acquisition_fkt == "EntropyMC":
        a = EntropyMC(
            model,
            X_upper=task.X_upper,
            X_lower=task.X_lower,
        )
    else:
        logger.error("ERROR: %s is not a"
                     "valid acquisition function!" % (acquisition_fkt))
        return None

    if maximizer == "cmaes":
        max_fkt = cmaes.CMAES(a, task.X_lower, task.X_upper)
    elif maximizer == "direct":
        max_fkt = direct.Direct(a, task.X_lower, task.X_upper)
    elif maximizer == "stochastic_local_search":
        max_fkt = stochastic_local_search.StochasticLocalSearch(
            a, task.X_lower, task.X_upper)
    elif maximizer == "grid_search":
        max_fkt = grid_search.GridSearch(a, task.X_lower, task.X_upper)
    else:
        logger.error("ERROR: %s is not a valid function"
                     "to maximize the acquisition function!" %
                     (acquisition_fkt))
        return None

    bo = BayesianOptimization(acquisition_func=a,
                              model=model,
                              maximize_func=max_fkt,
                              task=task)

    best_x, f_min = bo.run(num_iterations)
    return best_x, f_min
예제 #12
0
chain_length = 200
n_hypers = 20

task = Branin()

cov_amp = 1.0
config_kernel = george.kernels.Matern52Kernel(np.ones([task.n_dims]),
                                              ndim=task.n_dims)

kernel = cov_amp * config_kernel

prior = MyPrior(len(kernel) + 1)

model = GaussianProcessMCMC(kernel,
                            prior=prior,
                            burnin=burnin,
                            chain_length=chain_length,
                            n_hypers=n_hypers)

ei = EI(
    model,
    X_upper=task.X_upper,
    X_lower=task.X_lower,
)

acquisition_func = IntegratedAcquisition(model, ei, task.X_lower, task.X_upper)

maximizer = Direct(acquisition_func, task.X_lower, task.X_upper)

bo = BayesianOptimization(acquisition_func=acquisition_func,
                          model=model,
예제 #13
0
from robo.acquisition.ei import EI
from robo.maximizers.direct import Direct
from robo.task.controlling_tasks.walker import Walker
from robo.solver.bayesian_optimization import BayesianOptimization
from robo.priors.default_priors import DefaultPrior
from robo.acquisition.integrated_acquisition import IntegratedAcquisition

task = Walker()
test = '/test'

kernel = 1 * george.kernels.Matern52Kernel(np.ones([task.n_dims]),
                                           ndim=task.n_dims)
prior = DefaultPrior(len(kernel) + 1)
model = GaussianProcessMCMC(kernel,
                            prior=prior,
                            chain_length=100,
                            burnin_steps=200,
                            n_hypers=8)

ei = EI(model, task.X_lower, task.X_upper)
acquisition_func = IntegratedAcquisition(model, ei, task.X_lower, task.X_upper)

maximizer = Direct(acquisition_func, task.X_lower, task.X_upper)

bo = BayesianOptimization(acquisition_func=acquisition_func,
                          model=model,
                          maximize_func=maximizer,
                          task=task,
                          save_dir=test)

print bo.run(2)
예제 #14
0
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          maximizer="direct",
                          acquisition_func="log_ei",
                          model="gp_mcmc",
                          n_init=3,
                          rng=None):
    """
    General interface for Bayesian optimization for global black box optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy array (D,) as input and returns
        the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"direct", "cmaes"}
        Defines how the acquisition function is maximized. NOTE: "cmaes" only works in D > 1 dimensions
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model: {"gp", "gp_mcmc"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it is <= num_iterations.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0]
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model == "gp":
        gp = GaussianProcess(kernel,
                             prior=prior,
                             rng=rng,
                             normalize_output=True,
                             normalize_input=True,
                             lower=lower,
                             upper=upper)
    elif model == "gp_mcmc":
        gp = GaussianProcessMCMC(kernel,
                                 prior=prior,
                                 n_hypers=n_hypers,
                                 chain_length=200,
                                 burnin_steps=100,
                                 normalize_input=True,
                                 normalize_output=True,
                                 rng=rng,
                                 lower=lower,
                                 upper=upper)
    else:
        print("ERROR: %s is not a valid model!" % model)
        return

    if acquisition_func == "ei":
        a = EI(gp)
    elif acquisition_func == "log_ei":
        a = LogEI(gp)
    elif acquisition_func == "pi":
        a = PI(gp)
    elif acquisition_func == "lcb":
        a = LCB(gp)
    else:
        print("ERROR: %s is not a valid acquisition function!" %
              acquisition_func)
        return

    if model == "gp":
        acquisition_func = a
    elif model == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)

    if maximizer == "cmaes":
        max_func = CMAES(acquisition_func,
                         lower,
                         upper,
                         verbose=False,
                         rng=rng)
    elif maximizer == "direct":
        max_func = Direct(acquisition_func, lower, upper, verbose=False)
    else:
        print(
            "ERROR: %s is not a valid function to maximize the acquisition function!"
            % maximizer)
        return

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              gp,
                              max_func,
                              initial_points=n_init,
                              rng=rng)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    return results
예제 #15
0
def build_model(lower,
                upper,
                model_type="gp_mcmc",
                model_seed=1,
                prior_seed=1):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    lower: numpy.ndarray (D,)
        The lower bound of the search space
    upper: numpy.ndarray (D,)
        The upper bound of the search space
    model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"}
        The model for the objective function.
    model_seed: int
        Seed for random number generator of the model 
    prior_seed: int
        Seed for random number generator of the prior

    Returns
    -------
        Model
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert numpy.all(lower < upper), "Lower bound >= upper bound"

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = numpy.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1, numpy.random.RandomState(prior_seed))

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    # NOTE: Some models do not support RNG properly and rely on global RNG state
    #       so we need to seed here as well...
    numpy.random.seed(model_seed)
    model_rng = numpy.random.RandomState(model_seed)
    if model_type == "gp":
        model = GaussianProcess(kernel,
                                prior=prior,
                                rng=model_rng,
                                normalize_output=False,
                                normalize_input=True,
                                lower=lower,
                                upper=upper)
    elif model_type == "gp_mcmc":
        model = GaussianProcessMCMC(kernel,
                                    prior=prior,
                                    n_hypers=n_hypers,
                                    chain_length=200,
                                    burnin_steps=100,
                                    normalize_input=True,
                                    normalize_output=False,
                                    rng=model_rng,
                                    lower=lower,
                                    upper=upper)

    elif model_type == "rf":
        model = RandomForest(rng=model_rng)

    elif model_type == "bohamiann":
        model = WrapperBohamiann()

    elif model_type == "dngo":
        from pybnn.dngo import DNGO
        model = DNGO()

    else:
        raise ValueError("'{}' is not a valid model".format(model_type))

    return model
예제 #16
0
파일: mtbo.py 프로젝트: snpc94/RoBO
def mtbo(objective_function,
         lower,
         upper,
         n_tasks=2,
         n_init=2,
         num_iterations=30,
         burnin=100,
         chain_length=200,
         rng=None):
    """
    Interface to MTBO[1] which uses an auxiliary cheaper task to speed up the optimization
    of a more expensive but similar task.

    [1] Multi-Task Bayesian Optimization
        K. Swersky and J. Snoek and R. Adams
        Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13)

    Parameters
    ----------
    objective_function: function
        Objective function that will be optimized
    lower: np.array(D,)
        Lower bound of the input space
    upper: np.array(D,)
        Upper bound of the input space
    n_tasks: int
        Number of task
    n_init: int
        Number of initial design points
    num_iterations: int
        Number of iterations
    chain_length : int
        The length of the MCMC chain for each walker.
    burnin : int
        The number of burnin steps before the actual MCMC sampling starts.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """

    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"
    assert lower.shape[0] == upper.shape[
        0], "Dimension miss match between upper and lower bound"

    time_start = time.time()
    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    n_dims = lower.shape[0]

    # Bookkeeping
    time_func_eval = []
    time_overhead = []
    incumbents = []
    runtime = []

    X = []
    y = []
    c = []

    # Define model for the objective function
    cov_amp = 1  # Covariance amplitude
    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=n_dims + 1,
                                                dim=d)

    task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    kernel *= task_kernel

    # Take 3 times more samples than we have hyperparameters
    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    prior = MTBOPrior(len(kernel) + 1,
                      n_ls=n_dims,
                      n_kt=len(task_kernel),
                      rng=rng)

    model_objective = GaussianProcessMCMC(kernel,
                                          prior=prior,
                                          burnin_steps=burnin,
                                          chain_length=chain_length,
                                          n_hypers=n_hypers,
                                          normalize_input=False,
                                          lower=lower,
                                          upper=upper,
                                          rng=rng)

    # Define model for the cost function
    cost_cov_amp = 1

    cost_kernel = cost_cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                     ndim=n_dims + 1,
                                                     dim=d)

    cost_task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    cost_kernel *= cost_task_kernel

    cost_prior = MTBOPrior(len(cost_kernel) + 1,
                           n_ls=n_dims,
                           n_kt=len(task_kernel),
                           rng=rng)

    model_cost = GaussianProcessMCMC(cost_kernel,
                                     prior=cost_prior,
                                     burnin_steps=burnin,
                                     chain_length=chain_length,
                                     n_hypers=n_hypers,
                                     normalize_input=False,
                                     lower=lower,
                                     upper=upper,
                                     rng=rng)

    # Extend input space by task variable
    extend_lower = np.append(lower, 0)
    extend_upper = np.append(upper, n_tasks - 1)
    is_env = np.zeros(extend_lower.shape[0])
    is_env[-1] = 1

    # Define acquisition function and maximizer
    ig = InformationGainPerUnitCost(model_objective,
                                    model_cost,
                                    extend_lower,
                                    extend_upper,
                                    is_env_variable=is_env,
                                    n_representer=50)
    acquisition_func = MarginalizationGPMCMC(ig)

    wrapper_func = partial(transformation,
                           acq=acquisition_func,
                           lower=lower,
                           upper=upper)
    maximizer = Direct(wrapper_func, extend_lower, extend_upper, verbose=True)

    # Initial Design
    for _ in range(n_init):
        logger.info("Initial design")
        start_time_overhead = time.time()
        # Draw random configuration and evaluate it just on the auxiliary task
        task = 0
        x = init_random_uniform(lower, upper, 1, rng)[0]
        logger.info("Evaluate candidate %s", str(x))
        st = time.time()
        func_val, cost = objective_function(x, task)
        time_func_eval.append(time.time() - st)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    func_val, cost)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Bookkeeping
        config = np.append(x, task)
        X.append(config)
        y.append(func_val)
        c.append(cost)

        # Estimate incumbent as the best observed value so far
        best_idx = np.argmin(y)
        incumbents.append(np.append(
            X[best_idx],
            n_tasks - 1))  # Incumbent is always on the task of interest

        time_overhead.append(time.time() - start_time_overhead)
        runtime.append(time.time() - time_start)

    X = np.array(X)
    y = np.array(y)
    c = np.array(c)

    for it in range(n_init, num_iterations):
        logger.info("Start iteration %d ... ", it)

        start_time = time.time()

        # Train models
        model_objective.train(transform(X, lower, upper), y, do_optimize=True)
        model_cost.train(transform(X, lower, upper), c, do_optimize=True)

        # Estimate incumbent by projecting all observed points to the task of interest and
        # pick the point with the lowest mean prediction
        incumbent, incumbent_value = projected_incumbent_estimation(
            model_objective,
            transform(X, lower, upper)[:, :-1],
            proj_value=n_tasks - 1)
        incumbent[:-1] = normalization.zero_one_unnormalization(
            incumbent[:-1], lower, upper)
        incumbents.append(incumbent)
        logger.info("Current incumbent %s with estimated performance %f",
                    str(incumbent), incumbent_value)

        # Maximize acquisition function
        acquisition_func.update(model_objective, model_cost)

        new_x = maximizer.maximize()
        new_x[-1] = np.rint(
            new_x[-1])  # Map float value to discrete task variable

        time_overhead.append(time.time() - start_time)
        logger.info("Optimization overhead was %f seconds", time_overhead[-1])

        # Evaluate the chosen configuration
        logger.info("Evaluate candidate %s", str(new_x))
        start_time = time.time()
        new_y, new_c = objective_function(new_x[:-1], new_x[-1])
        time_func_eval.append(time.time() - start_time)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    new_y, new_c)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Add new observation to the data
        X = np.concatenate((X, new_x[None, :]), axis=0)
        y = np.concatenate((y, np.array([new_y])), axis=0)
        c = np.concatenate((c, np.array([new_c])), axis=0)

        runtime.append(time.time() - time_start)

    # Estimate the final incumbent
    model_objective.train(transform(X, lower, upper), y)
    incumbent, incumbent_value = projected_incumbent_estimation(
        model_objective,
        transform(X, lower, upper)[:, :-1],
        proj_value=n_tasks - 1)
    incumbent[:-1] = normalization.zero_one_unnormalization(
        incumbent[:-1], lower, upper)
    incumbents.append(incumbent)
    logger.info("Final incumbent %s with estimated performance %f",
                str(incumbent), incumbent_value)

    results = dict()
    results["x_opt"] = incumbent[:-1]
    results["trajectory"] = [inc for inc in incumbents]
    results["runtime"] = runtime
    results["overhead"] = time_overhead
    results["time_func_eval"] = time_func_eval

    return results