예제 #1
0
    def test_pi(self):
        pi = PI(self.model)
        acq = MarginalizationGPMCMC(pi)

        X_test = np.random.rand(5, 2)
        a = acq.compute(X_test, derivative=False)
        assert a.shape[0] == X_test.shape[0]
        assert len(a.shape) == 1
    def test_pi(self):
        pi = PI(self.model)
        acq = MarginalizationGPMCMC(pi)

        X_test = np.random.rand(5, 2)
        a = acq.compute(X_test, derivative=False)
        assert a.shape[0] == X_test.shape[0]
        assert len(a.shape) == 1
    def suggest_configuration(self):
        if self.X is None and self.y is None:
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        elif self.X.shape[0] == 1:
            # We need at least 2 data points to train a GP
            new_x = init_random_uniform(self.lower, self.upper,
                                        n_points=1, rng=self.rng)[0, :]

        else:
            cov_amp = 1
            n_dims = self.lower.shape[0]

            initial_ls = np.ones([n_dims])
            exp_kernel = george.kernels.Matern52Kernel(initial_ls,
                                                       ndim=n_dims)
            kernel = cov_amp * exp_kernel

            prior = DefaultPrior(len(kernel) + 1)

            model = GaussianProcessMCMC(kernel, prior=prior,
                                        n_hypers=self.n_hypers,
                                        chain_length=self.chain_length,
                                        burnin_steps=self.burnin,
                                        normalize_input=False,
                                        normalize_output=True,
                                        rng=self.rng,
                                        lower=self.lower,
                                        upper=self.upper)

            a = LogEI(model)

            acquisition_func = MarginalizationGPMCMC(a)

            max_func = Direct(acquisition_func, self.lower, self.upper, verbose=False)

            model.train(self.X, self.y)

            acquisition_func.update(model)

            new_x = max_func.maximize()

        next_config = Configuration(self.config_space, vector=new_x)

        # Transform to sacred configuration
        result = configspace_config_to_sacred(next_config)

        return result
예제 #4
0
파일: fabolas.py 프로젝트: fuhuifang/RoBo
def fabolas(objective_function, lower, upper, s_min, s_max,
            n_init=40, num_iterations=100, subsets=[256, 128, 64], inc_estimation="mean",
            burnin=100, chain_length=100, n_hypers=12, output_path=None, rng=None):
    """
    Fast Bayesian Optimization of Machine Learning Hyperparameters
    on Large Datasets

    Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets
    A. Klein and S. Falkner and S. Bartels and P. Hennig and F. Hutter
    http://arxiv.org/abs/1605.07079

    Parameters
    ----------
    objective_function: function
        Objective function that will be optimized
    lower: np.array(D,)
        Lower bound of the input space
    upper: np.array(D,)
        Upper bound of the input space
    s_min: int
        Minimum number of data points for the training data set
    s_max: int
        Maximum number of data points for the training data set
    n_init: int
        Number of initial design points
    n_hypers: int
        Number of hyperparameter samples for the GP
    subsets: list
        The ratio of the subsets size of the initial design.
        For example if subsets=[256, 128, 64] then the each point of the
        initial design is evaluated on s_max/256, s_max/128 and s_max/64
    num_iterations: int
        Number of iterations
    chain_length : int
        The length of the MCMC chain for each walker.
    burnin : int
        The number of burnin steps before the actual MCMC sampling starts.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator
    inc_estimation: string
        Specifies how the incumbent is estimated:
            "last_seen" : determined the incumbent as the configuration that achieved the highest observed value
            "mean" :  determined the incumbent as the configuration that achieved the highest predicted mean value
    Returns
    -------
        dict
    """

    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"
    assert lower.shape[0] == upper.shape[0], "Dimension miss match between upper and lower bound"

    time_start = time.time()
    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    n_dims = lower.shape[0]

    # Bookkeeping
    time_func_eval = []
    time_overhead = []
    incumbents = []
    runtime = []

    X = []
    y = []
    c = []

    # Define model for the objective function
    cov_amp = 1  # Covariance amplitude
    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=n_dims+1, dim=d)

    # Kernel for the environmental variable
    # We use (1-s)**2 as basis function for the Bayesian linear kernel
    degree = 1
    env_kernel = george.kernels.BayesianLinearRegressionKernel(n_dims+1,
                                                               dim=n_dims,
                                                               degree=degree)
    env_kernel[:] = np.ones([degree + 1]) * 0.1

    kernel *= env_kernel

    # Take 3 times more samples than we have hyperparameters
    if n_hypers < 2*len(kernel):
        n_hypers = 3 * len(kernel)
        if n_hypers % 2 == 1:
            n_hypers += 1

    prior = EnvPrior(len(kernel) + 1,
                     n_ls=n_dims,
                     n_lr=(degree + 1),
                     rng=rng)

    quadratic_bf = lambda x: (1 - x) ** 2
    linear_bf = lambda x: x

    model_objective = FabolasGPMCMC(kernel,
                                    prior=prior,
                                    burnin_steps=burnin,
                                    chain_length=chain_length,
                                    n_hypers=n_hypers,
                                    normalize_output=False,
                                    basis_func=quadratic_bf,
                                    lower=lower,
                                    upper=upper,
                                    rng=rng)

    # Define model for the cost function
    cost_cov_amp = 1

    cost_kernel = cost_cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                     ndim=n_dims+1, dim=d)

    cost_degree = 1
    cost_env_kernel = george.kernels.BayesianLinearRegressionKernel(n_dims+1,
                                                                    dim=n_dims,
                                                                    degree=cost_degree)
    cost_env_kernel[:] = np.ones([cost_degree + 1]) * 0.1

    cost_kernel *= cost_env_kernel

    cost_prior = EnvPrior(len(cost_kernel) + 1,
                          n_ls=n_dims,
                          n_lr=(cost_degree + 1),
                          rng=rng)

    model_cost = FabolasGPMCMC(cost_kernel,
                               prior=cost_prior,
                               burnin_steps=burnin,
                               chain_length=chain_length,
                               n_hypers=n_hypers,
                               basis_func=linear_bf,
                               normalize_output=False,
                               lower=lower,
                               upper=upper,
                               rng=rng)

    # Extend input space by task variable
    extend_lower = np.append(lower, 0)
    extend_upper = np.append(upper, 1)
    is_env = np.zeros(extend_lower.shape[0])
    is_env[-1] = 1

    # Define acquisition function and maximizer
    ig = InformationGainPerUnitCost(model_objective,
                                    model_cost,
                                    extend_lower,
                                    extend_upper,
                                    sampling_acquisition=EI,
                                    is_env_variable=is_env,
                                    n_representer=50)
    acquisition_func = MarginalizationGPMCMC(ig)
    # maximizer = Direct(acquisition_func, extend_lower, extend_upper, verbose=True, n_func_evals=200)
    # maximizer = DifferentialEvolution(acquisition_func, extend_lower, extend_upper)
    maximizer = RandomSampling(acquisition_func, extend_lower, extend_upper)

    # Initial Design
    logger.info("Initial Design")
    x_init = init_latin_hypercube_sampling(lower, upper, n_init, rng)
    for it in range(n_init):

        for subset in subsets:
            start_time_overhead = time.time()
            # s = int(s_max / float(subsets[it % len(subsets)]))
            s = int(s_max / float(subset))

            x = x_init[it]
            logger.info("Evaluate %s on subset size %d", str(x), s)
            st = time.time()
            func_val, cost = objective_function(x, s)
            time_func_eval.append(time.time() - st)

            logger.info("Configuration achieved a performance of %f with cost %f", func_val, cost)
            logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1])

            # Bookkeeping
            config = np.append(x, transform(s, s_min, s_max))
            X.append(config)
            y.append(np.log(func_val))  # Model the target function on a logarithmic scale
            c.append(np.log(cost))  # Model the cost on a logarithmic scale

            # Estimate incumbent as the best observed value so far
            best_idx = np.argmin(y)
            #incumbents.append(X[best_idx][:-1])  # Incumbent is always on s=s_max
            incumbents.append(X[best_idx])
            
            time_overhead.append(time.time() - start_time_overhead)
            runtime.append(time.time() - time_start)

            if output_path is not None:
                data = dict()
                data["optimization_overhead"] = time_overhead[it]
                data["runtime"] = runtime[it]
                data["incumbent"] = incumbents[it].tolist()
                data["time_func_eval"] = time_func_eval[it]
                data["iteration"] = it

                json.dump(data, open(os.path.join(output_path, "fabolas_iter_%d.json" % it), "w"))

    X = np.array(X)
    y = np.array(y)
    c = np.array(c)

    for it in range(n_init, num_iterations):
        logger.info("Start iteration %d ... ", it)

        start_time = time.time()

        # Train models
        model_objective.train(X, y, do_optimize=True)
        model_cost.train(X, c, do_optimize=True)

        if inc_estimation == "last_seen":
            # Estimate incumbent as the best observed value so far
            best_idx = np.argmin(y)
            incumbent = X[best_idx]
            #incumbent = X[best_idx][:-1]
            #incumbent = np.append(incumbent, 1)
            incumbent_value = y[best_idx]
        else:
            # Estimate incumbent by projecting all observed points to the task of interest and
            # pick the point with the lowest mean prediction
            incumbent, incumbent_value, idx = projected_incumbent_estimation(model_objective, X[:, :-1],
                                                                        proj_value=1)
            incumbent = np.append(incumbent, X[idx][-1])
        #incumbents.append(incumbent[:-1])
        logger.info("Current incumbent %s with estimated performance %f",
                    str(incumbent), np.exp(incumbent_value))

        # Maximize acquisition function
        acquisition_func.update(model_objective, model_cost)
        new_x = maximizer.maximize()

        s = retransform(new_x[-1], s_min, s_max)  # Map s from log space to original linear space

        time_overhead.append(time.time() - start_time)
        logger.info("Optimization overhead was %f seconds", time_overhead[-1])

        # Evaluate the chosen configuration
        logger.info("Evaluate candidate %s on subset size %f", str(new_x[:-1]), s)
        start_time = time.time()
        new_y, new_c = objective_function(new_x[:-1], s)
        time_func_eval.append(time.time() - start_time)

        logger.info("Configuration achieved a performance of %f with cost %f", new_y, new_c)
        logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1])

        # Add new observation to the data
        X = np.concatenate((X, new_x[None, :]), axis=0)
        y = np.concatenate((y, np.log(np.array([new_y]))), axis=0)  # Model the target function on a logarithmic scale
        c = np.concatenate((c, np.log(np.array([new_c]))), axis=0)  # Model the cost function on a logarithmic scale

        runtime.append(time.time() - time_start)

        if output_path is not None:
            data = dict()
            data["optimization_overhead"] = time_overhead[it]
            data["runtime"] = runtime[it]
            data["incumbent"] = incumbents[it].tolist()
            data["time_func_eval"] = time_func_eval[it]
            data["iteration"] = it

            json.dump(data, open(os.path.join(output_path, "fabolas_iter_%d.json" % it), "w"))

    # Estimate the final incumbent
    model_objective.train(X, y, do_optimize=True)
    incumbent, incumbent_value, idx = projected_incumbent_estimation(model_objective, X[:, :-1],
                                                                proj_value=1)
    incumbent = np.append(incumbent, X[idx][-1])
    logger.info("Final incumbent %s with estimated performance %f",
                str(incumbent), incumbent_value)

    results = dict()
    results["x_opt"] = incumbent[:-1].tolist()
    results["incumbents"] = [inc.tolist() for inc in incumbents]
    results["runtime"] = runtime
    results["overhead"] = time_overhead
    results["time_func_eval"] = time_func_eval
    results["X"] = [x.tolist() for x in X]
    results["y"] = [yi.tolist() for yi in y]
    results["c"] = [ci.tolist() for ci in c]

    return results
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          maximizer="random",
                          acquisition_func="log_ei",
                          model_type="gp_mcmc",
                          n_init=3,
                          rng=None,
                          output_path=None):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy
        array (D,) as input and returns the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"direct", "cmaes", "random", "scipy"}
        The optimizer for the acquisition function. NOTE: "cmaes" only works in D > 1 dimensions
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model_type: {"gp", "gp_mcmc", "rf"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it
        is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model_type == "gp":
        model = GaussianProcess(kernel,
                                prior=prior,
                                rng=rng,
                                normalize_output=False,
                                normalize_input=True,
                                lower=lower,
                                upper=upper)
    elif model_type == "gp_mcmc":
        model = GaussianProcessMCMC(kernel,
                                    prior=prior,
                                    n_hypers=n_hypers,
                                    chain_length=200,
                                    burnin_steps=100,
                                    normalize_input=True,
                                    normalize_output=True,
                                    rng=rng,
                                    lower=lower,
                                    upper=upper)

    elif model_type == "rf":
        model = RandomForest(rng=rng)

    else:
        raise ValueError("'{}' is not a valid model".format(model_type))

    if acquisition_func == "ei":
        a = EI(model)
    elif acquisition_func == "log_ei":
        a = LogEI(model)
    elif acquisition_func == "pi":
        a = PI(model)
    elif acquisition_func == "lcb":
        a = LCB(model)
    else:
        raise ValueError("'{}' is not a valid acquisition function".format(
            acquisition_func))

    if model_type == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)
    else:
        acquisition_func = a

    if maximizer == "cmaes":
        max_func = CMAES(acquisition_func,
                         lower,
                         upper,
                         verbose=False,
                         rng=rng)
    elif maximizer == "direct":
        max_func = Direct(acquisition_func, lower, upper, verbose=True)
    elif maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)

    else:
        raise ValueError("'{}' is not a valid function to maximize the "
                         "acquisition function".format(maximizer))

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              model,
                              max_func,
                              initial_points=n_init,
                              rng=rng,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #6
0
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          X_init=None,
                          Y_init=None,
                          maximizer="random",
                          acquisition_func="log_ei",
                          model_type="gp_mcmc",
                          n_init=3,
                          rng=None,
                          output_path=None,
                          kernel=None,
                          sampling_method="origin",
                          distance="cosine",
                          replacement=True,
                          pool=None,
                          best=None):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy
        array (D,) as input and returns the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    X_init: np.ndarray(N,D)
            Initial points to warmstart BO
    Y_init: np.ndarray(N,1)
            Function values of the already initial points
    maximizer: {"random", "scipy", "differential_evolution"}
        The optimizer for the acquisition function.
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model_type: {"gp", "gp_mcmc", "rf", "bohamiann", "dngo"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it
        is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator
    kernel: george.kernels.ConstantKernel
            {"constant", "polynomial", "linear", "dotproduct",
             "exp", "expsquared", "matern32", "matern52", "rationalquadratic",
             "cosine", "expsine2", "heuristic"}
        Specify the kernel for Gaussian process.
    sampling_method: {"origin", "approx", "exact"}
        Specify the method to choose next sample to update model.
        approx: choose the sample in the candidate pool that is closest (measured by distance
        arg) to the one returned from maximizing acquisition function.
        exact: evaluate all samples in the candidate pool on acquisition function
        and choose the one with maximum output.
    distance: {"cosine", "euclidean"}
        The distance measurement for approximation sampling.
    replacement: boolean
        Whether to sample from pool with replacement.
    pool: np.ndarray(N,D)
        Candidate pool containing possible x
    best: float
        Stop training when the best point is sampled.
    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    #n_dims = lower.shape[0]

    #initial_ls = np.ones([n_dims])

    # if kernel == "constant":
    #     exp_kernel = george.kernels.ConstantKernel(1, ndim=n_dims)
    # elif kernel == "polynomial":
    #     exp_kernel = george.kernels.PolynomialKernel(log_sigma2=1, order=3, ndim=n_dims)
    # elif kernel == "linear":
    #     exp_kernel = george.kernels.LinearKernel(log_gamma2=1, order=3, ndim=n_dims)
    # elif kernel == "dotproduct":
    #     exp_kernel = george.kernels.DotProductKernel(ndim=n_dims)
    # elif kernel == "exp":
    #     exp_kernel = george.kernels.ExpKernel(initial_ls, ndim=n_dims)
    # elif kernel == "expsquared":
    #     exp_kernel = george.kernels.ExpSquaredKernel(initial_ls, ndim=n_dims)
    # elif kernel == "matern32":
    #     exp_kernel = george.kernels.Matern32Kernel(initial_ls, ndim=n_dims)
    # elif kernel == "matern52":
    #     exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    # elif kernel == "rationalquadratic":
    #     exp_kernel = george.kernels.RationalQuadraticKernel(log_alpha=1, metric=initial_ls, ndim=n_dims)
    # elif kernel == "cosine":
    #     exp_kernel = george.kernels.CosineKernel(4, ndim=n_dims)
    # elif kernel == "expsine2":
    #     exp_kernel = george.kerngels.ExpSine2Kernel(1, 2, ndim=n_dims)
    # elif kernel == "heuristic":
    #     exp_kernel = george.kernels.PythonKernel(heuristic_kernel_function, ndim=n_dims)
    # else:
    #     raise ValueError("'{}' is not a valid kernel".format(kernel))

    kernel = cov_amp * kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model_type == "gp":
        model = GaussianProcess(kernel,
                                prior=prior,
                                rng=rng,
                                normalize_output=False,
                                normalize_input=True,
                                lower=lower,
                                upper=upper)
    elif model_type == "gp_mcmc":
        model = GaussianProcessMCMC(kernel,
                                    prior=prior,
                                    n_hypers=n_hypers,
                                    chain_length=200,
                                    burnin_steps=100,
                                    normalize_input=True,
                                    normalize_output=False,
                                    rng=rng,
                                    lower=lower,
                                    upper=upper)

    elif model_type == "rf":
        model = RandomForest(rng=rng)

    elif model_type == "bohamiann":
        model = WrapperBohamiann()

    elif model_type == "dngo":
        model = DNGO()

    else:
        raise ValueError("'{}' is not a valid model".format(model_type))

    if acquisition_func == "ei":
        a = EI(model)
    elif acquisition_func == "log_ei":
        a = LogEI(model)
    elif acquisition_func == "pi":
        a = PI(model)
    elif acquisition_func == "lcb":
        a = LCB(model)
    else:
        raise ValueError("'{}' is not a valid acquisition function".format(
            acquisition_func))

    if model_type == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)
    else:
        acquisition_func = a

    if maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "differential_evolution":
        max_func = DifferentialEvolution(acquisition_func,
                                         lower,
                                         upper,
                                         rng=rng)
    else:
        raise ValueError("'{}' is not a valid function to maximize the "
                         "acquisition function".format(maximizer))

    if sampling_method == "exact":
        max_func = ExactSampling(acquisition_func,
                                 lower,
                                 upper,
                                 pool,
                                 replacement,
                                 rng=rng)
        init_design = init_exact_random
    elif sampling_method == "approx":
        max_func = ApproxSampling(acquisition_func,
                                  lower,
                                  upper,
                                  pool,
                                  replacement,
                                  distance,
                                  rng=rng)
        init_design = init_exact_random
    else:
        init_design = init_latin_hypercube_sampling

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              model,
                              max_func,
                              pool,
                              best,
                              sampling_method,
                              distance,
                              replacement,
                              initial_points=n_init,
                              rng=rng,
                              initial_design=init_design,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations, X=X_init, y=Y_init)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #7
0
 def build_acquisition_func(self):
     """Build a marginalized acquisition function with MCMC."""
     return MarginalizationGPMCMC(super(RoBO_GP_MCMC, self).build_acquisition_func())
예제 #8
0
def entropy_search(objective_function,
                   lower,
                   upper,
                   num_iterations=30,
                   maximizer="random",
                   model="gp_mcmc",
                   n_init=3,
                   output_path=None,
                   rng=None):
    """
    Entropy search for global black box optimization problems. This is a reimplemenation of the entropy search
    algorithm by Henning and Schuler[1].

    [1] Entropy search for information-efficient global optimization.
        P. Hennig and C. Schuler.
        JMLR, (1), 2012.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy array (D,) as input and returns
        the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"random", "scipy", "differential_evolution"}
        Defines how the acquisition function is maximized.
    model: {"gp", "gp_mcmc"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it is <= num_iterations.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0], "Dimension miss match"
    assert np.all(lower < upper), "Lower bound >= upper bound"
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model == "gp":
        gp = GaussianProcess(kernel,
                             prior=prior,
                             rng=rng,
                             normalize_output=False,
                             normalize_input=True,
                             lower=lower,
                             upper=upper)
    elif model == "gp_mcmc":
        gp = GaussianProcessMCMC(kernel,
                                 prior=prior,
                                 n_hypers=n_hypers,
                                 chain_length=200,
                                 burnin_steps=100,
                                 normalize_input=True,
                                 normalize_output=False,
                                 rng=rng,
                                 lower=lower,
                                 upper=upper)
    else:
        print("ERROR: %s is not a valid model!" % model)
        return

    a = InformationGain(gp, lower=lower, upper=upper, sampling_acquisition=EI)

    if model == "gp":
        acquisition_func = a
    elif model == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)

    if maximizer == "random":
        max_func = RandomSampling(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func, lower, upper, rng=rng)
    elif maximizer == "differential_evolution":
        max_func = DifferentialEvolution(acquisition_func,
                                         lower,
                                         upper,
                                         rng=rng)
    else:
        print(
            "ERROR: %s is not a valid function to maximize the acquisition function!"
            % maximizer)
        return

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              gp,
                              max_func,
                              initial_design=init_latin_hypercube_sampling,
                              initial_points=n_init,
                              rng=rng,
                              output_path=output_path)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    results["X"] = [x.tolist() for x in bo.X]
    results["y"] = [y for y in bo.y]
    return results
예제 #9
0
def mtbo(objective_function,
         lower,
         upper,
         n_tasks=2,
         n_init=2,
         num_iterations=30,
         burnin=100,
         chain_length=200,
         n_hypers=20,
         output_path=None,
         rng=None):
    """
    Interface to MTBO[1] which uses an auxiliary cheaper task to speed up the optimization
    of a more expensive but similar task.

    [1] Multi-Task Bayesian Optimization
        K. Swersky and J. Snoek and R. Adams
        Proceedings of the 27th International Conference on Advances in Neural Information Processing Systems (NIPS'13)

    Parameters
    ----------
    objective_function: function
        Objective function that will be optimized
    lower: np.array(D,)
        Lower bound of the input space
    upper: np.array(D,)
        Upper bound of the input space
    n_tasks: int
        Number of task
    n_init: int
        Number of initial design points
    num_iterations: int
        Number of iterations
    chain_length : int
        The length of the MCMC chain for each walker.
    burnin : int
        The number of burnin steps before the actual MCMC sampling starts.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """

    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"
    assert lower.shape[0] == upper.shape[
        0], "Dimension miss match between upper and lower bound"

    time_start = time.time()
    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    n_dims = lower.shape[0]

    # Bookkeeping
    time_func_eval = []
    time_overhead = []
    incumbents = []
    runtime = []

    X = []
    y = []
    c = []

    # Define model for the objective function
    cov_amp = 1  # Covariance amplitude
    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=n_dims + 1,
                                                dim=d)

    task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    kernel *= task_kernel

    # Take 3 times more samples than we have hyperparameters
    if n_hypers < 2 * len(kernel):
        n_hypers = 3 * len(kernel)
        if n_hypers % 2 == 1:
            n_hypers += 1

    prior = MTBOPrior(len(kernel) + 1,
                      n_ls=n_dims,
                      n_kt=len(task_kernel),
                      rng=rng)

    model_objective = MTBOGPMCMC(kernel,
                                 prior=prior,
                                 burnin_steps=burnin,
                                 chain_length=chain_length,
                                 n_hypers=n_hypers,
                                 lower=lower,
                                 upper=upper,
                                 rng=rng)

    # Define model for the cost function
    cost_cov_amp = 1

    cost_kernel = cost_cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                     ndim=n_dims + 1,
                                                     dim=d)

    cost_task_kernel = george.kernels.TaskKernel(n_dims + 1, n_dims, n_tasks)
    cost_kernel *= cost_task_kernel

    cost_prior = MTBOPrior(len(cost_kernel) + 1,
                           n_ls=n_dims,
                           n_kt=len(task_kernel),
                           rng=rng)

    model_cost = MTBOGPMCMC(cost_kernel,
                            prior=cost_prior,
                            burnin_steps=burnin,
                            chain_length=chain_length,
                            n_hypers=n_hypers,
                            lower=lower,
                            upper=upper,
                            rng=rng)

    # Extend input space by task variable
    extend_lower = np.append(lower, 0)
    extend_upper = np.append(upper, n_tasks - 1)
    is_env = np.zeros(extend_lower.shape[0])
    is_env[-1] = 1

    # Define acquisition function and maximizer
    ig = InformationGainPerUnitCost(model_objective,
                                    model_cost,
                                    extend_lower,
                                    extend_upper,
                                    sampling_acquisition=EI,
                                    is_env_variable=is_env,
                                    n_representer=50)
    acquisition_func = MarginalizationGPMCMC(ig)
    maximizer = Direct(acquisition_func,
                       extend_lower,
                       extend_upper,
                       n_func_evals=200)

    # Initial Design
    logger.info("Initial Design")
    for it in range(n_init):
        start_time_overhead = time.time()
        # Draw random configuration and evaluate it just on the auxiliary task
        task = 0
        x = init_random_uniform(lower, upper, 1, rng)[0]
        logger.info("Evaluate candidate %s", str(x))
        st = time.time()
        func_val, cost = objective_function(x, task)
        time_func_eval.append(time.time() - st)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    func_val, cost)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Bookkeeping
        config = np.append(x, task)
        X.append(config)
        y.append(np.log(
            func_val))  # Model the target function on a logarithmic scale
        c.append(np.log(cost))  # Model the cost on a logarithmic scale

        # Estimate incumbent as the best observed value so far
        best_idx = np.argmin(y)
        incumbents.append(X[best_idx][:-1])

        time_overhead.append(time.time() - start_time_overhead)
        runtime.append(time.time() - time_start)

        if output_path is not None:
            data = dict()
            data["optimization_overhead"] = time_overhead[it]
            data["runtime"] = runtime[it]
            data["incumbent"] = incumbents[it].tolist()
            data["time_func_eval"] = time_func_eval[it]
            data["iteration"] = it

            json.dump(
                data,
                open(os.path.join(output_path, "mtbo_iter_%d.json" % it), "w"))

    X = np.array(X)
    y = np.array(y)
    c = np.array(c)

    for it in range(n_init, num_iterations):
        logger.info("Start iteration %d ... ", it)

        start_time = time.time()

        # Train models
        model_objective.train(X, y, do_optimize=True)
        model_cost.train(X, c, do_optimize=True)

        # Estimate incumbent as the best observed value so far
        best_idx = np.argmin(y)
        incumbent = X[best_idx][:-1]
        incumbent = np.append(incumbent, 1)
        incumbent_value = y[best_idx]

        incumbents.append(incumbent[:-1])
        logger.info("Current incumbent %s with estimated performance %f",
                    str(incumbent), incumbent_value)

        # Maximize acquisition function
        acquisition_func.update(model_objective, model_cost)

        new_x = maximizer.maximize()
        new_x[-1] = np.rint(
            new_x[-1])  # Map float value to discrete task variable

        time_overhead.append(time.time() - start_time)
        logger.info("Optimization overhead was %f seconds", time_overhead[-1])

        # Evaluate the chosen configuration
        logger.info("Evaluate candidate %s", str(new_x))
        start_time = time.time()
        new_y, new_c = objective_function(new_x[:-1], new_x[-1])
        time_func_eval.append(time.time() - start_time)

        logger.info("Configuration achieved a performance of %f with cost %f",
                    new_y, new_c)
        logger.info("Evaluation of this configuration took %f seconds",
                    time_func_eval[-1])

        # Add new observation to the data
        X = np.concatenate((X, new_x[None, :]), axis=0)
        y = np.concatenate(
            (y, np.log(np.array([new_y]))),
            axis=0)  # Model the target function on a logarithmic scale
        c = np.concatenate(
            (c, np.log(np.array([new_c]))),
            axis=0)  # Model the cost function on a logarithmic scale

        runtime.append(time.time() - time_start)

        if output_path is not None:
            data = dict()
            data["optimization_overhead"] = time_overhead[it]
            data["runtime"] = runtime[it]
            data["incumbent"] = incumbents[it].tolist()
            data["time_func_eval"] = time_func_eval[it]
            data["iteration"] = it

            json.dump(
                data,
                open(os.path.join(output_path, "mtbo_iter_%d.json" % it), "w"))

    # Estimate the final incumbent
    model_objective.train(X, y)
    incumbent, incumbent_value = projected_incumbent_estimation(
        model_objective, X[:, :-1], proj_value=n_tasks - 1)
    logger.info("Final incumbent %s with estimated performance %f",
                str(incumbent), incumbent_value)

    results = dict()
    results["x_opt"] = incumbent[:-1].tolist()
    results["incumbents"] = [inc.tolist() for inc in incumbents]
    results["runtime"] = runtime
    results["overhead"] = time_overhead
    results["time_func_eval"] = time_func_eval

    results["X"] = X
    results["y"] = y
    results["c"] = c

    return results
예제 #10
0
def bayesian_optimization(objective_function,
                          lower,
                          upper,
                          num_iterations=30,
                          maximizer="direct",
                          acquisition_func="log_ei",
                          model="gp_mcmc",
                          n_init=3,
                          rng=None):
    """
    General interface for Bayesian optimization for global black box optimization problems.

    Parameters
    ----------
    objective_function: function
        The objective function that is minimized. This function gets a numpy array (D,) as input and returns
        the function value (scalar)
    lower: np.ndarray (D,)
        The lower bound of the search space
    upper: np.ndarray (D,)
        The upper bound of the search space
    num_iterations: int
        The number of iterations (initial design + BO)
    maximizer: {"direct", "cmaes"}
        Defines how the acquisition function is maximized. NOTE: "cmaes" only works in D > 1 dimensions
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    model: {"gp", "gp_mcmc"}
        The model for the objective function.
    n_init: int
        Number of points for the initial design. Make sure that it is <= num_iterations.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict with all results
    """
    assert upper.shape[0] == lower.shape[0]
    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"

    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    cov_amp = 2
    n_dims = lower.shape[0]

    initial_ls = np.ones([n_dims])
    exp_kernel = george.kernels.Matern52Kernel(initial_ls, ndim=n_dims)
    kernel = cov_amp * exp_kernel

    prior = DefaultPrior(len(kernel) + 1)

    n_hypers = 3 * len(kernel)
    if n_hypers % 2 == 1:
        n_hypers += 1

    if model == "gp":
        gp = GaussianProcess(kernel,
                             prior=prior,
                             rng=rng,
                             normalize_output=True,
                             normalize_input=True,
                             lower=lower,
                             upper=upper)
    elif model == "gp_mcmc":
        gp = GaussianProcessMCMC(kernel,
                                 prior=prior,
                                 n_hypers=n_hypers,
                                 chain_length=200,
                                 burnin_steps=100,
                                 normalize_input=True,
                                 normalize_output=True,
                                 rng=rng,
                                 lower=lower,
                                 upper=upper)
    else:
        print("ERROR: %s is not a valid model!" % model)
        return

    if acquisition_func == "ei":
        a = EI(gp)
    elif acquisition_func == "log_ei":
        a = LogEI(gp)
    elif acquisition_func == "pi":
        a = PI(gp)
    elif acquisition_func == "lcb":
        a = LCB(gp)
    else:
        print("ERROR: %s is not a valid acquisition function!" %
              acquisition_func)
        return

    if model == "gp":
        acquisition_func = a
    elif model == "gp_mcmc":
        acquisition_func = MarginalizationGPMCMC(a)

    if maximizer == "cmaes":
        max_func = CMAES(acquisition_func,
                         lower,
                         upper,
                         verbose=False,
                         rng=rng)
    elif maximizer == "direct":
        max_func = Direct(acquisition_func, lower, upper, verbose=False)
    else:
        print(
            "ERROR: %s is not a valid function to maximize the acquisition function!"
            % maximizer)
        return

    bo = BayesianOptimization(objective_function,
                              lower,
                              upper,
                              acquisition_func,
                              gp,
                              max_func,
                              initial_points=n_init,
                              rng=rng)

    x_best, f_min = bo.run(num_iterations)

    results = dict()
    results["x_opt"] = x_best
    results["f_opt"] = f_min
    results["incumbents"] = [inc for inc in bo.incumbents]
    results["incumbent_values"] = [val for val in bo.incumbents_values]
    results["runtime"] = bo.runtime
    results["overhead"] = bo.time_overhead
    return results
예제 #11
0
def build_optimizer(model,
                    maximizer="random",
                    acquisition_func="log_ei",
                    maximizer_seed=1):
    """
    General interface for Bayesian optimization for global black box
    optimization problems.

    Parameters
    ----------
    maximizer: {"random", "scipy", "differential_evolution"}
        The optimizer for the acquisition function.
    acquisition_func: {"ei", "log_ei", "lcb", "pi"}
        The acquisition function
    maximizer_seed: int
        Seed for random number generator of the acquisition function maximizer

    Returns
    -------
        Optimizer
    """

    if acquisition_func == "ei":
        a = EI(model)
    elif acquisition_func == "log_ei":
        a = LogEI(model)
    elif acquisition_func == "pi":
        a = PI(model)
    elif acquisition_func == "lcb":
        a = LCB(model)
    else:
        raise ValueError("'{}' is not a valid acquisition function".format(
            acquisition_func))

    if isinstance(model, GaussianProcessMCMC):
        acquisition_func = MarginalizationGPMCMC(a)
    else:
        acquisition_func = a

    maximizer_rng = numpy.random.RandomState(maximizer_seed)
    if maximizer == "random":
        max_func = RandomSampling(acquisition_func,
                                  model.lower,
                                  model.upper,
                                  rng=maximizer_rng)
    elif maximizer == "scipy":
        max_func = SciPyOptimizer(acquisition_func,
                                  model.lower,
                                  model.upper,
                                  rng=maximizer_rng)
    elif maximizer == "differential_evolution":
        max_func = DifferentialEvolution(acquisition_func,
                                         model.lower,
                                         model.upper,
                                         rng=maximizer_rng)
    else:
        raise ValueError("'{}' is not a valid function to maximize the "
                         "acquisition function".format(maximizer))

    # NOTE: Internal RNG of BO won't be used.
    # NOTE: Nb of initial points won't be used within BO, but rather outside
    bo = BayesianOptimization(lambda: None,
                              model.lower,
                              model.upper,
                              acquisition_func,
                              model,
                              max_func,
                              initial_points=None,
                              rng=None,
                              initial_design=init_latin_hypercube_sampling,
                              output_path=None)

    return bo
예제 #12
0
def fabolas(objective_function, lower, upper, s_min, s_max,
            n_init=40, num_iterations=100, subsets=[256, 128, 64], inc_estimation="mean",
            burnin=100, chain_length=100, n_hypers=12, output_path=None, rng=None):
    """
    Fast Bayesian Optimization of Machine Learning Hyperparameters
    on Large Datasets

    Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets
    A. Klein and S. Falkner and S. Bartels and P. Hennig and F. Hutter
    http://arxiv.org/abs/1605.07079

    Parameters
    ----------
    objective_function: function
        Objective function that will be optimized
    lower: np.array(D,)
        Lower bound of the input space
    upper: np.array(D,)
        Upper bound of the input space
    s_min: int
        Minimum number of data points for the training data set
    s_max: int
        Maximum number of data points for the training data set
    n_init: int
        Number of initial design points
    n_hypers: int
        Number of hyperparameter samples for the GP
    subsets: list
        The ratio of the subsets size of the initial design.
        For example if subsets=[256, 128, 64] then the first random point from the
        initial design is evaluated on s_max/256 of the data, the second point on
        s_max/256 of the data and so on.
    num_iterations: int
        Number of iterations
    chain_length : int
        The length of the MCMC chain for each walker.
    burnin : int
        The number of burnin steps before the actual MCMC sampling starts.
    output_path: string
        Specifies the path where the intermediate output after each iteration will be saved.
        If None no output will be saved to disk.
    rng: numpy.random.RandomState
        Random number generator

    Returns
    -------
        dict
    """

    assert n_init <= num_iterations, "Number of initial design point has to be <= than the number of iterations"
    assert lower.shape[0] == upper.shape[0], "Dimension miss match between upper and lower bound"

    time_start = time.time()
    if rng is None:
        rng = np.random.RandomState(np.random.randint(0, 10000))

    n_dims = lower.shape[0]

    # Bookkeeping
    time_func_eval = []
    time_overhead = []
    incumbents = []
    runtime = []

    X = []
    y = []
    c = []

    # Define model for the objective function
    cov_amp = 1  # Covariance amplitude
    kernel = cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                ndim=n_dims+1, dim=d)

    # Kernel for the environmental variable
    # We use (1-s)**2 as basis function for the Bayesian linear kernel
    degree = 1
    env_kernel = george.kernels.BayesianLinearRegressionKernel(n_dims+1,
                                                               dim=n_dims,
                                                               degree=degree)
    env_kernel[:] = np.ones([degree + 1]) * 0.1

    kernel *= env_kernel

    # Take 3 times more samples than we have hyperparameters
    if n_hypers < 2*len(kernel):
        n_hypers = 3 * len(kernel)
        if n_hypers % 2 == 1:
            n_hypers += 1

    prior = EnvPrior(len(kernel) + 1,
                     n_ls=n_dims,
                     n_lr=(degree + 1),
                     rng=rng)

    quadratic_bf = lambda x: (1 - x) ** 2
    linear_bf = lambda x: x

    model_objective = FabolasGPMCMC(kernel,
                                    prior=prior,
                                    burnin_steps=burnin,
                                    chain_length=chain_length,
                                    n_hypers=n_hypers,
                                    normalize_output=False,
                                    basis_func=quadratic_bf,
                                    lower=lower,
                                    upper=upper,
                                    rng=rng)

    # Define model for the cost function
    cost_cov_amp = 1

    cost_kernel = cost_cov_amp

    # ARD Kernel for the configuration space
    for d in range(n_dims):
        cost_kernel *= george.kernels.Matern52Kernel(np.ones([1]) * 0.01,
                                                     ndim=n_dims+1, dim=d)

    cost_degree = 1
    cost_env_kernel = george.kernels.BayesianLinearRegressionKernel(n_dims+1,
                                                                    dim=n_dims,
                                                                    degree=cost_degree)
    cost_env_kernel[:] = np.ones([cost_degree + 1]) * 0.1

    cost_kernel *= cost_env_kernel

    cost_prior = EnvPrior(len(cost_kernel) + 1,
                          n_ls=n_dims,
                          n_lr=(cost_degree + 1),
                          rng=rng)

    model_cost = FabolasGPMCMC(cost_kernel,
                               prior=cost_prior,
                               burnin_steps=burnin,
                               chain_length=chain_length,
                               n_hypers=n_hypers,
                               basis_func=linear_bf,
                               normalize_output=False,
                               lower=lower,
                               upper=upper,
                               rng=rng)

    # Extend input space by task variable
    extend_lower = np.append(lower, 0)
    extend_upper = np.append(upper, 1)
    is_env = np.zeros(extend_lower.shape[0])
    is_env[-1] = 1

    # Define acquisition function and maximizer
    ig = InformationGainPerUnitCost(model_objective,
                                    model_cost,
                                    extend_lower,
                                    extend_upper,
                                    sampling_acquisition=EI,
                                    is_env_variable=is_env,
                                    n_representer=50)
    acquisition_func = MarginalizationGPMCMC(ig)
    maximizer = Direct(acquisition_func, extend_lower, extend_upper, verbose=True, n_func_evals=200)

    # Initial Design
    logger.info("Initial Design")

    for it in range(n_init):
        start_time_overhead = time.time()
        # Draw random configuration
        s = int(s_max / float(subsets[it]))

        x = init_random_uniform(lower, upper, 1, rng)[0]
        logger.info("Evaluate %s on subset size %d", str(x), s)
        st = time.time()
        func_val, cost = objective_function(x, s)
        time_func_eval.append(time.time() - st)

        logger.info("Configuration achieved a performance of %f with cost %f", func_val, cost)
        logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1])

        # Bookkeeping
        config = np.append(x, transform(s, s_min, s_max))
        X.append(config)
        y.append(np.log(func_val))  # Model the target function on a logarithmic scale
        c.append(np.log(cost))  # Model the cost on a logarithmic scale

        # Estimate incumbent as the best observed value so far
        best_idx = np.argmin(y)
        incumbents.append(X[best_idx][:-1])  # Incumbent is always on s=s_max

        time_overhead.append(time.time() - start_time_overhead)
        runtime.append(time.time() - time_start)

        if output_path is not None:
            data = dict()
            data["optimization_overhead"] = time_overhead[it]
            data["runtime"] = runtime[it]
            data["incumbent"] = incumbents[it].tolist()
            data["time_func_eval"] = time_func_eval[it]
            data["iteration"] = it

            json.dump(data, open(os.path.join(output_path, "fabolas_iter_%d.json" % it), "w"))

    X = np.array(X)
    y = np.array(y)
    c = np.array(c)

    for it in range(n_init, num_iterations):
        logger.info("Start iteration %d ... ", it)

        start_time = time.time()

        # Train models
        model_objective.train(X, y, do_optimize=True)
        model_cost.train(X, c, do_optimize=True)

        if inc_estimation == "last_seen":
            # Estimate incumbent as the best observed value so far
            best_idx = np.argmin(y)
            incumbent = X[best_idx][:-1]
            incumbent = np.append(incumbent, 1)
            incumbent_value = y[best_idx]
        else:
            # Estimate incumbent by projecting all observed points to the task of interest and
            # pick the point with the lowest mean prediction
            incumbent, incumbent_value = projected_incumbent_estimation(model_objective, X[:, :-1],
                                                                        proj_value=1)
        incumbents.append(incumbent[:-1])
        logger.info("Current incumbent %s with estimated performance %f",
                    str(incumbent), np.exp(incumbent_value))

        # Maximize acquisition function
        acquisition_func.update(model_objective, model_cost)
        new_x = maximizer.maximize()

        s = retransform(new_x[-1], s_min, s_max)  # Map s from log space to original linear space

        time_overhead.append(time.time() - start_time)
        logger.info("Optimization overhead was %f seconds", time_overhead[-1])

        # Evaluate the chosen configuration
        logger.info("Evaluate candidate %s on subset size %f", str(new_x[:-1]), s)
        start_time = time.time()
        new_y, new_c = objective_function(new_x[:-1], s)
        time_func_eval.append(time.time() - start_time)

        logger.info("Configuration achieved a performance of %f with cost %f", new_y, new_c)
        logger.info("Evaluation of this configuration took %f seconds", time_func_eval[-1])

        # Add new observation to the data
        X = np.concatenate((X, new_x[None, :]), axis=0)
        y = np.concatenate((y, np.log(np.array([new_y]))), axis=0)  # Model the target function on a logarithmic scale
        c = np.concatenate((c, np.log(np.array([new_c]))), axis=0)  # Model the cost function on a logarithmic scale

        runtime.append(time.time() - time_start)

        if output_path is not None:
            data = dict()
            data["optimization_overhead"] = time_overhead[it]
            data["runtime"] = runtime[it]
            data["incumbent"] = incumbents[it].tolist()
            data["time_func_eval"] = time_func_eval[it]
            data["iteration"] = it

            json.dump(data, open(os.path.join(output_path, "fabolas_iter_%d.json" % it), "w"))

    # Estimate the final incumbent
    model_objective.train(X, y, do_optimize=True)
    incumbent, incumbent_value = projected_incumbent_estimation(model_objective, X[:, :-1],
                                                                proj_value=1)
    logger.info("Final incumbent %s with estimated performance %f",
                str(incumbent), incumbent_value)

    results = dict()
    results["x_opt"] = incumbent[:-1].tolist()
    results["incumbents"] = [inc.tolist() for inc in incumbents]
    results["runtime"] = runtime
    results["overhead"] = time_overhead
    results["time_func_eval"] = time_func_eval

    results["X"] = X
    results["y"] = y
    results["c"] = c

    return results