Example #1
0
class TestGaussianProcessMCMC(unittest.TestCase):
    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        self.model = GaussianProcessMCMC(kernel,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)

    def test_predict(self):
        X_test = np.random.rand(10, 2)

        m, v = self.model.predict(X_test)

        assert len(m.shape) == 1
        assert m.shape[0] == X_test.shape[0]
        assert len(v.shape) == 1
        assert v.shape[0] == X_test.shape[0]

    def test_loglikelihood(self):
        theta = np.array([0.2, 0.2, 0.001])
        ll = self.model.loglikelihood(theta)

    def test_get_incumbent(self):
        inc, inc_val = self.model.get_incumbent()

        b = np.argmin(self.y)

        np.testing.assert_almost_equal(inc, self.X[b], decimal=5)
        assert inc_val == self.y[b]
    def suggest_configuration(self):
        if self.X is None and self.y is None:
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        elif self.X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        else:
            cov_amp = 1
            n_dims = self.lower.shape[0]

            initial_ls = np.ones([n_dims])
            exp_kernel = george.kernels.Matern52Kernel(initial_ls,
                                                       ndim=n_dims)
            kernel = cov_amp * exp_kernel

            prior = DefaultPrior(len(kernel) + 1)

            model = GaussianProcessMCMC(kernel, prior=prior,
                                        n_hypers=self.n_hypers,
                                        chain_length=self.chain_length,
                                        burnin_steps=self.burnin,
                                        normalize_input=False,
                                        normalize_output=True,
                                        rng=self.rng,
                                        lower=self.lower,
                                        upper=self.upper)

            a = LogEI(model)

            acquisition_func = MarginalizationGPMCMC(a)

            max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False)

            model.train(self.X, self.y)

            acquisition_func.update(model)

            new_x = max_func.maximize()

        next_config = Configuration(self.config_space, vector=new_x)

        # Transform to sacred configuration
        result = configspace_config_to_sacred(next_config)

        return result
class TestGaussianProcessMCMC(unittest.TestCase):

    def setUp(self):
        self.X = np.random.randn(10, 2)
        self.y = np.sinc(self.X * 10 - 5).sum(axis=1)

        kernel = george.kernels.Matern52Kernel(np.ones(self.X.shape[1]),
                                               ndim=self.X.shape[1])

        prior = TophatPrior(-2, 2)
        self.model = GaussianProcessMCMC(kernel,
                                         prior=prior,
                                         n_hypers=6,
                                         burnin_steps=100,
                                         chain_length=200)
        self.model.train(self.X, self.y, do_optimize=True)

    def test_predict(self):
        X_test = np.random.rand(10, 2)

        m, v = self.model.predict(X_test)

        assert len(m.shape) == 1
        assert m.shape[0] == X_test.shape[0]
        assert len(v.shape) == 1
        assert v.shape[0] == X_test.shape[0]

    def test_loglikelihood(self):
        theta = np.array([0.2, 0.2, 0.001])
        ll = self.model.loglikelihood(theta)

    def test_get_incumbent(self):
        inc, inc_val = self.model.get_incumbent()

        b = np.argmin(self.y)

        np.testing.assert_almost_equal(inc, self.X[b], decimal=5)
        assert inc_val == self.y[b]
Example #4
0
File: mtbo.py Project: snpc94/RoBO
def mtbo(objective_function,
         lower,
         upper,
         n_tasks=2,
         n_init=2,
         num_iterations=30,
         burnin=100,
         chain_length=200,
         rng=None):
    """
    Interface to MTBO[1] which uses an auxiliary cheaper task to speed up the optimization
    of a more expensive but similar task.

    [1] Multi-Task Bayesian Optimization
        K. Swersky and J. Snoek and R. Adams
        Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13)

    Parameters
    ----------
    objective_function: function
        Objective function that will be optimized
    lower: np.array(D,)
        Lower bound of the input space
    upper: np.array(D,)
        Upper bound of the input space
    n_tasks: int
        Number of task
    n_init: int
        Number of initial design points
    num_iterations: int
        Number of iterations
    chain_length : int
        The length of the MCMC chain for each walker.
    burnin : int
        The number of burnin steps before the actual MCMC sampling starts.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """

    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"
    assert lower.shape[0] == upper.shape[
        0], "Dimension miss match between upper and lower bound"

    time_start = time.time()
    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    n_dims = lower.shape[0]

    # Bookkeeping
    time_func_eval = []
    time_overhead = []
    incumbents = []
    runtime = []

    X = []
    y = []
    c = []

    # Define model for the objective function
    cov_amp = 1  # Covariance amplitude
    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=n_dims + 1,
                                                dim=d)

    task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    kernel *= task_kernel

    # Take 3 times more samples than we have hyperparameters
    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    prior = MTBOPrior(len(kernel) + 1,
                      n_ls=n_dims,
                      n_kt=len(task_kernel),
                      rng=rng)

    model_objective = GaussianProcessMCMC(kernel,
                                          prior=prior,
                                          burnin_steps=burnin,
                                          chain_length=chain_length,
                                          n_hypers=n_hypers,
                                          normalize_input=False,
                                          lower=lower,
                                          upper=upper,
                                          rng=rng)

    # Define model for the cost function
    cost_cov_amp = 1

    cost_kernel = cost_cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                     ndim=n_dims + 1,
                                                     dim=d)

    cost_task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    cost_kernel *= cost_task_kernel

    cost_prior = MTBOPrior(len(cost_kernel) + 1,
                           n_ls=n_dims,
                           n_kt=len(task_kernel),
                           rng=rng)

    model_cost = GaussianProcessMCMC(cost_kernel,
                                     prior=cost_prior,
                                     burnin_steps=burnin,
                                     chain_length=chain_length,
                                     n_hypers=n_hypers,
                                     normalize_input=False,
                                     lower=lower,
                                     upper=upper,
                                     rng=rng)

    # Extend input space by task variable
    extend_lower = np.append(lower, 0)
    extend_upper = np.append(upper, n_tasks - 1)
    is_env = np.zeros(extend_lower.shape[0])
    is_env[-1] = 1

    # Define acquisition function and maximizer
    ig = InformationGainPerUnitCost(model_objective,
                                    model_cost,
                                    extend_lower,
                                    extend_upper,
                                    is_env_variable=is_env,
                                    n_representer=50)
    acquisition_func = MarginalizationGPMCMC(ig)

    wrapper_func = partial(transformation,
                           acq=acquisition_func,
                           lower=lower,
                           upper=upper)
    maximizer = Direct(wrapper_func, extend_lower, extend_upper, verbose=True)

    # Initial Design
    for _ in range(n_init):
        logger.info("Initial design")
        start_time_overhead = time.time()
        # Draw random configuration and evaluate it just on the auxiliary task
        task = 0
        x = init_random_uniform(lower, upper, 1, rng)[0]
        logger.info("Evaluate candidate %s", str(x))
        st = time.time()
        func_val, cost = objective_function(x, task)
        time_func_eval.append(time.time() - st)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    func_val, cost)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Bookkeeping
        config = np.append(x, task)
        X.append(config)
        y.append(func_val)
        c.append(cost)

        # Estimate incumbent as the best observed value so far
        best_idx = np.argmin(y)
        incumbents.append(np.append(
            X[best_idx],
            n_tasks - 1))  # Incumbent is always on the task of interest

        time_overhead.append(time.time() - start_time_overhead)
        runtime.append(time.time() - time_start)

    X = np.array(X)
    y = np.array(y)
    c = np.array(c)

    for it in range(n_init, num_iterations):
        logger.info("Start iteration %d ... ", it)

        start_time = time.time()

        # Train models
        model_objective.train(transform(X, lower, upper), y, do_optimize=True)
        model_cost.train(transform(X, lower, upper), c, do_optimize=True)

        # Estimate incumbent by projecting all observed points to the task of interest and
        # pick the point with the lowest mean prediction
        incumbent, incumbent_value = projected_incumbent_estimation(
            model_objective,
            transform(X, lower, upper)[:, :-1],
            proj_value=n_tasks - 1)
        incumbent[:-1] = normalization.zero_one_unnormalization(
            incumbent[:-1], lower, upper)
        incumbents.append(incumbent)
        logger.info("Current incumbent %s with estimated performance %f",
                    str(incumbent), incumbent_value)

        # Maximize acquisition function
        acquisition_func.update(model_objective, model_cost)

        new_x = maximizer.maximize()
        new_x[-1] = np.rint(
            new_x[-1])  # Map float value to discrete task variable

        time_overhead.append(time.time() - start_time)
        logger.info("Optimization overhead was %f seconds", time_overhead[-1])

        # Evaluate the chosen configuration
        logger.info("Evaluate candidate %s", str(new_x))
        start_time = time.time()
        new_y, new_c = objective_function(new_x[:-1], new_x[-1])
        time_func_eval.append(time.time() - start_time)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    new_y, new_c)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Add new observation to the data
        X = np.concatenate((X, new_x[None, :]), axis=0)
        y = np.concatenate((y, np.array([new_y])), axis=0)
        c = np.concatenate((c, np.array([new_c])), axis=0)

        runtime.append(time.time() - time_start)

    # Estimate the final incumbent
    model_objective.train(transform(X, lower, upper), y)
    incumbent, incumbent_value = projected_incumbent_estimation(
        model_objective,
        transform(X, lower, upper)[:, :-1],
        proj_value=n_tasks - 1)
    incumbent[:-1] = normalization.zero_one_unnormalization(
        incumbent[:-1], lower, upper)
    incumbents.append(incumbent)
    logger.info("Final incumbent %s with estimated performance %f",
                str(incumbent), incumbent_value)

    results = dict()
    results["x_opt"] = incumbent[:-1]
    results["trajectory"] = [inc for inc in incumbents]
    results["runtime"] = runtime
    results["overhead"] = time_overhead
    results["time_func_eval"] = time_func_eval

    return results