def _tree_minimize(func, dimensions, base_estimator, n_calls,
                   n_points, n_random_starts, random_state=None):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with random points
    if n_random_starts <= 0:
        raise ValueError(
            "Expected n_random_starts > 0, got %d" % n_random_starts)

    if n_calls <= 0:
        raise ValueError(
            "Expected n_calls > 0, got %d" % n_random_starts)

    if n_calls < n_random_starts:
        raise ValueError(
            "Expected n_calls >= %d, got %d" % (n_random_starts, n_calls))

    Xi = space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError(
            "The function to be optimized should return a scalar")

    # Tree-based optimization loop
    models = []

    n_model_iter = n_calls - n_random_starts
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points,
                                      random_state=rng))
        values = -gaussian_ei(X, rgr, np.min(yi))
        next_x = X[np.argmin(values)]

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space

    return res
Example #2
0
def create_result(Xi,
                  yi,
                  space=None,
                  rng=None,
                  specs=None,
                  models=None,
                  model_mu=None,
                  model_std=None,
                  gurobi_mipgap=None):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    Xi : list of lists, shape (n_iters, n_features)
        Location of the minimum at every iteration.

    yi : array-like, shape (n_iters,)
        Minimum value obtained at every iteration.

    space : Space instance, optional
        Search space.

    rng : RandomState instance, optional
        State of the random state.

    specs : dict, optional
        Call specifications.

    models : list, optional
        List of fit surrogate models.

    Returns
    -------
    res : `OptimizeResult`, scipy object
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()
    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models
    res.model_mu = model_mu
    res.model_std = model_std
    res.gurobi_mipgap = gurobi_mipgap
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
def dummy_minimize(func, bounds, maxiter=1000, random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `bounds` [array-like, shape=(n_parameters, 2)]:
        - ``bounds[i][0]`` should give the lower bound of each parameter and
        - ``bounds[i][1]`` should give the upper bound of each parameter.

    * `maxiter` [int, default=1000]:
        Number of iterations to find the minimum. In other words, the
        number of function evaluations.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)

    n_params = len(bounds)
    lb, ub = extract_bounds(bounds)

    X = lb + (ub - lb) * rng.rand(maxiter, n_params)
    init_y = func(X[0])
    if not np.isscalar(init_y):
        raise ValueError("The function to be optimized should return a scalar")
    y = np.asarray([init_y] + [func(X[i]) for i in range(maxiter - 1)])

    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X

    return res
Example #4
0
def df2result(df, metric_col, param_cols, param_types=None):
    """Converts dataframe with metrics and hyperparameters to the OptimizeResults format.

    It is a helper function that lets you use all the tools that expect OptimizeResult object
    like for example scikit-optimize plot_evaluations function.

    Args:
        df(`pandas.DataFrame`): Dataframe containing metric and hyperparameters.
        metric_col(str): Name of the metric column.
        param_cols(list): Names of the hyperparameter columns.
        param_types(list or None): Optional list of hyperparameter column types.
            By default it will treat all the columns as float but you can also pass str
            for categorical channels. E.g param_types=[float, str, float, float]

    Returns:
        `scipy.optimize.OptimizeResult`: Results object that contains the hyperparameter and metric
        information.

    Examples:
        Instantiate a session::

            from neptune.sessions import Session
            session = Session()

        Fetch a project and a list of experiments::

            project = session.get_projects('neptune-ai')['neptune-ai/Home-Credit-Default-Risk']
            leaderboard = project.get_leaderboard(state=['succeeded'], owner=['czakon'])

        Comvert the leaderboard dataframe to the `ResultOptimize` instance taking only the parameters and
        metric that you care about::

            result = df2result(leaderboard,
                metric_col='channel_ROC_AUC',
                param_cols=['parameter_lgbm__max_depth',
                            'parameter_lgbm__num_leaves',
                            'parameter_lgbm__min_child_samples'])

    """

    if not param_types:
        param_types = [float for _ in param_cols]

    df = _prep_df(df, param_cols, param_types)
    param_space = _convert_to_param_space(df, param_cols, param_types)

    results = OptimizeResult()
    results.x_iters = df[param_cols].values
    results.func_vals = df[metric_col].to_list()
    results.x = results.x_iters[np.argmin(results.func_vals)]
    results.fun = np.min(results.func_vals)
    results.space = param_space
    return results
Example #5
0
def hyperopt2skopt(trials, space):
    """Converts hyperopt trials to scipy OptimizeResult.

    Helper function that converts the hyperopt Trials instance into scipy OptimizeResult
    format.

    Args:
        trials(`hyperopt.base.Trials`): hyperopt trials object which stores training
            information from the fmin() optimization function.
        space(`collections.OrderedDict`): Hyper parameter space over which
            hyperopt will search. It is important to have this as OrderedDict rather
            than a simple dictionary because otherwise the parameter names will be
            shuffled.

    Returns:
        `scipy.optimize.optimize.OptimizeResult`: Converted OptimizeResult.


    Examples:
        Prepare the space of hyperparameters to search over::

            from hyperopt import hp, tpe, fmin, Trials
            space = OrderedDict(num_leaves=hp.choice('num_leaves', range(10, 60, 1)),
                        max_depth=hp.choice('max_depth', range(2, 30, 1)),
                        feature_fraction=hp.uniform('feature_fraction', 0.1, 0.9)
                               )

        Create an objective and run your hyperopt training::

            trials = Trials()
            _ = fmin(objective, space, trials=trials, algo=tpe.suggest, max_evals=100)

        Convert trials object to the OptimizeResult object::

            import neptunecontrib.hpo.utils as hp_utils
            results = hp_utils.hyperopt2skopt(trials, space)
    """
    param_names = list(space.keys())
    skopt_space = _convert_space_hop_skopt(space)
    results_ = {}
    for trial in trials.trials:
        trial_params = [trial['misc']['vals'][name][0] for name in param_names]
        results_.setdefault('x_iters', []).append(trial_params)
        results_.setdefault('func_vals', []).append(trial['result']['loss'])
    optimize_results = OptimizeResult()
    optimize_results.x = [trials.argmin[name] for name in param_names]
    optimize_results.x_iters = results_['x_iters']
    optimize_results.fun = trials.best_trial['result']['loss']
    optimize_results.func_vals = results_['func_vals']
    optimize_results.space = skopt_space
    return optimize_results
def df2result(df, metric_col, param_cols, param_types=None):
    """Converts dataframe with metrics and hyperparameters to the OptimizeResults format."""
    if not param_types:
        param_types = [float for _ in param_cols]

    df = _prep_df(df, param_cols, param_types)
    param_space = _convert_to_param_space(df, param_cols, param_types)

    results = OptimizeResult()
    results.x_iters = df[param_cols].values
    results.func_vals = df[metric_col].to_list()
    results.x = results.x_iters[np.argmin(results.func_vals)]
    results.fun = np.min(results.func_vals)
    results.space = param_space
    return results
Example #7
0
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    * `Xi` [list of lists, shape=(n_iters, n_features)]:
        Location of the minimum at every iteration.

    * `yi` [array-like, shape=(n_iters,)]:
        Minimum value obtained at every iteration.

    * `space` [Space instance, optional]:
        Search space.

    * `rng` [RandomState instance, optional]:
        State of the random state.

    * `specs` [dict, optional]:
        Call specifications.

    * `models` [list, optional]:
        List of fit surrogate models.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()
    yi = np.asarray(yi)

    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Example #8
0
def create_result(xi, yi, space=None, rs=None, specs=None, models=None):

    res = OptimizeResult()
    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = xi
    res.models = models
    res.space = space
    res.random_state = rs
    res.specs = specs
    return res
Example #9
0
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    * `Xi` [list of lists, shape=(n_iters, n_features)]:
        Location of the minimum at every iteration.

    * `yi` [array-like, shape=(n_iters,)]:
        Minimum value obtained at every iteration.

    * `space` [Space instance, optional]:
        Search space.

    * `rng` [RandomState instance, optional]:
        State of the random state.

    * `specs` [dict, optional]:
        Call specifications.

    * `models` [list, optional]:
        List of fit surrogate models.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()
    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
def dummy_minimize(func, dimensions, n_calls=100, random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)
    space = Space(dimensions)
    X = space.rvs(n_samples=n_calls, random_state=rng)

    init_y = func(X[0])
    if not np.isscalar(init_y):
        raise ValueError(
            "The function to be optimized should return a scalar")
    y = np.asarray([init_y] + [func(X[i]) for i in range(1, n_calls)])

    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.space = space

    return res
Example #11
0
def gp_minimize(func,
                dimensions,
                base_estimator=None,
                alpha=10e-10,
                acq="EI",
                xi=0.01,
                kappa=1.96,
                search="auto",
                n_calls=100,
                n_points=500,
                n_random_starts=10,
                n_restarts_optimizer=5,
                x0=None,
                y0=None,
                random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standard optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    The total number of evaluations, `n_calls`, are performed like the
    following. If `x0` is provided but not `y0`, then the elements of `x0`
    are first evaluated, followed by `n_random_starts` evaluations.
    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
    made guided by the surrogate model. If `x0` and `y0` are both
    provided then `n_random_starts` evaluations are first made then
    `n_calls - n_random_starts` subsequent evaluations are made
    guided by the surrogate model.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `alpha` [float, default=1e-10]:
        Value added to the diagonal of the kernel matrix during fitting.
        Larger values correspond to an increased noise level in the
        observations and reduce potential numerical issues during fitting.

    * `acq` [string, default=`"EI"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"auto"`, then it is set to `"lbfgs"`` if
        all the search dimensions are Real(continuous). It defaults to
        `"sampling"` for all other cases.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `n_calls` [int, default=100]:
        Number of calls to `func`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_random_starts` [int, default=10]:
        Number of evaluations of `func` with random initialization points
        before approximating the `func` with `base_estimator`.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimization space.
        - `specs` [dict]`: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {
        "args": copy.copy(inspect.currentframe().f_locals),
        "function": inspect.currentframe().f_code.co_name
    }

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(
                length_scale=np.ones(space.transformed_n_dims),
                length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                nu=2.5)),
            normalize_y=True,
            alpha=alpha,
            random_state=random_state)

    # Initialize with provided points (x0 and y0) and/or random points
    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError("Expected `n_random_starts` > 0, got %d" %
                         n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError("Expected `n_calls` >= %d, got %d" %
                         (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    if search == "auto":
        if space.is_real:
            search = "lbfgs"
        else:
            search = "sampling"
    elif search not in ["lbfgs", "sampling"]:
        raise ValueError(
            "Expected search to be 'lbfgs', 'sampling' or 'auto', "
            "got %s" % search)

    # Bayesian optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(X=X,
                                           model=gp,
                                           y_opt=np.min(yi),
                                           method=acq,
                                           xi=xi,
                                           kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(_acquisition,
                                            x0,
                                            args=(gp, np.min(yi), acq, xi,
                                                  kappa),
                                            bounds=space.transformed_bounds,
                                            approx_grad=True,
                                            maxiter=20)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Example #12
0
def gbrt_minimize(func,
                  bounds,
                  base_estimator=None,
                  maxiter=100,
                  n_points=20,
                  n_start=10,
                  random_state=None):
    """Sequential optimisation using gradient boosted trees.

    Gradient boosted regression trees are used to model the (very)
    expensive to evaluate function `func`. The model is improved
    by sequentially evaluating the expensive function at the next
    best point. Thereby finding the minimum of `func` with as
    few evaluations as possible.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `bounds` [array-like, shape=(n_parameters, 2)]:
        - ``bounds[i][0]`` should give the lower bound of each parameter and
        - ``bounds[i][1]`` should give the upper bound of each parameter.

    * `base_estimator` [`GradientBoostingQuantileRegressor`]:
        The regressor to use as surrogate model

    * `maxiter` [int, default=100]:
        Number of iterations used to find the minimum. This corresponds
        to the total number of evaluations of `func`. If `n_start` > 0
        only `maxiter - n_start` iterations are used.

    * `n_start` [int, default=10]:
        Number of random points to draw before fitting `base_estimator`
        for the first time. If `n_start > maxiter` this degrades to
        a random search for the minimum.

    * `n_points` [int, default=20]:
        Number of points to sample when minimizing the acquisition function.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)

    # Bounds
    num_params = len(bounds)
    lower_bounds, upper_bounds = extract_bounds(bounds)

    # Default estimator
    if base_estimator is None:
        base_estimator = GradientBoostingQuantileRegressor(random_state=rng)

    # Record the points and function values evaluated as part of
    # the minimization
    Xi = np.zeros((maxiter, num_params))
    yi = np.zeros(maxiter)

    # Initialize with random points
    if n_start == 0:
        raise ValueError("Need at least one starting point.")

    if maxiter == 0:
        raise ValueError("Need to perform at least one iteration.")

    n_start = min(n_start, maxiter)

    Xi[:n_start] = _random_points(lower_bounds,
                                  upper_bounds,
                                  n_points=n_start,
                                  random_state=rng)
    best_x = Xi[:n_start].ravel()
    yi[:n_start] = [func(xi) for xi in Xi[:n_start]]
    best_y = np.min(yi[:n_start])

    models = []

    for i in range(n_start, maxiter):
        rgr = clone(base_estimator)
        # only the first i points are meaningful
        rgr.fit(Xi[:i, :], yi[:i])
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimisers like BFGS, use random sampling
        # for the moment.
        x0 = _random_points(lower_bounds,
                            upper_bounds,
                            n_points=n_points,
                            random_state=rng)
        aq = _expected_improvement(x0, rgr, best_y)
        best = np.argmin(aq)

        Xi[i] = x0[best].ravel()
        yi[i] = func(x0[best])

        if yi[i] < best_y:
            best_y = yi[i]
            best_x = Xi[i]

    res = OptimizeResult()
    res.x = best_x
    res.fun = best_y
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models

    return res
Example #13
0
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10,
                acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100,
                n_points=500, n_random_starts=10, n_restarts_optimizer=5,
                x0=None, y0=None, random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standard optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    The total number of evaluations, `n_calls`, are performed like the
    following. If `x0` is provided but not `y0`, then the elements of `x0`
    are first evaluated, followed by `n_random_starts` evaluations.
    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
    made guided by the surrogate model. If `x0` and `y0` are both
    provided then `n_random_starts` evaluations are first made then
    `n_calls - n_random_starts` subsequent evaluations are made
    guided by the surrogate model.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `alpha` [float, default=1e-10]:
        Value added to the diagonal of the kernel matrix during fitting.
        Larger values correspond to an increased noise level in the
        observations and reduce potential numerical issues during fitting.

    * `acq` [string, default=`"EI"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"auto"`, then it is set to `"lbfgs"`` if
        all the search dimensions are Real(continuous). It defaults to
        `"sampling"` for all other cases.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `n_calls` [int, default=100]:
        Number of calls to `func`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_random_starts` [int, default=10]:
        Number of evaluations of `func` with random initialization points
        before approximating the `func` with `base_estimator`.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimization space.
        - `specs` [dict]`: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {"args": copy.copy(inspect.currentframe().f_locals),
             "function": inspect.currentframe().f_code.co_name}

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(space.transformed_n_dims),
                           length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                           nu=2.5)),
            normalize_y=True, alpha=alpha, random_state=random_state)

    # Initialize with provided points (x0 and y0) and/or random points
    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError(
            "Expected `n_random_starts` > 0, got %d" % n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError(
            "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError(
                "`y0` should be an iterable or a scalar, got %s" % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError(
                "`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    if search == "auto":
        if space.is_real:
            search = "lbfgs"
        else:
            search = "sampling"
    elif search not in ["lbfgs", "sampling"]:
        raise ValueError(
            "Expected search to be 'lbfgs', 'sampling' or 'auto', "
            "got %s" % search)

    # Bayesian optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(
                X=X, model=gp,  y_opt=np.min(yi), method=acq,
                xi=xi, kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(
                        _acquisition, x0,
                        args=(gp, np.min(yi), acq, xi, kappa),
                        bounds=space.transformed_bounds,
                        approx_grad=True, maxiter=20)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Example #14
0
def gp_minimize(func,
                bounds=None,
                search="sampling",
                random_state=None,
                maxiter=1000,
                acq="UCB",
                num_points=500):
    """
    Black-box optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquistion function
    over the Gaussian posterior which is much quicker to evaluate.

    Parameters
    ----------
    func: callable
        Function to minimize. Should take a array of parameters and
        return the function value.

    bounds: array-like, shape (n_parameters, 2)
        ``bounds[i][0]`` should give the lower bound of each parameter and
        ``bounds[i][1]`` should give the upper bound of each parameter.

    search: string, "sampling" or "lbfgs"
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to "sampling", ``num_points`` are sampled randomly
        and the Gaussian Process prior is updated with that point that gives
        the best acquision value over the Gaussian posterior.

        If search is set to "lbfgs", then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquistion function
        over the Gaussian posterior.

    random_state: int, RandomState instance, or None (default)
        Set random state to something other than None for reproducible
        results.

    maxiter: int, default 1000
        Number of iterations to find the minimum. In other words, the
        number of function evaluations.

    acq: string, default "UCB"
        Function to minimize over the gaussian posterior. Can be either
        the "UCB" which refers to the UpperConfidenceBound or "EI" which
        is the Expected Improvement.

    num_points: int, default 500
        Number of points to sample to determine the next "best" point.
        Useless if search is set to "lbfgs".

    Returns
    -------
    res: OptimizeResult, scipy object
        The optimization result returned as a OptimizeResult object.
        Important attributes are
        ``x`` - float, the optimization solution,
        ``fun`` - float, the value of the function at the optimum,
        ``models``- gp_models[i]. the prior on the function fit at
                       iteration[i].
        ``func_vals`` - the function value at the ith iteration.
        ``x_iters`` - the value of ``x`` corresponding to the function value
                      at the ith iteration.
        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = np.random.RandomState(random_state)

    num_params = len(bounds)
    lower_bounds, upper_bounds = zip(*bounds)
    upper_bounds = np.asarray(upper_bounds)
    lower_bounds = np.asarray(lower_bounds)
    x0 = rng.rand(num_params)
    func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)]

    length_scale = np.ones(num_params)
    gp_params = {
        'kernel': Matern(length_scale=length_scale, nu=2.5),
        'normalize_y': True,
        'random_state': random_state
    }
    lbfgs_bounds = np.tile((0, 1), (num_params, 1))

    gp_models = []
    x = np.reshape(x0, (1, -1))

    for i in range(maxiter):
        gpr = GaussianProcessRegressor(**gp_params)
        gpr.fit(x, func_val)

        if search == "sampling":
            sampling = rng.rand(num_points, num_params)
            acquis = acquisition_func(sampling, gpr, np.min(func_val), acq)
            best_arg = np.argmin(acquis)
            best_x = sampling[best_arg]
        elif search == "lbfgs":
            init = rng.rand(num_params)
            best_x, _, _ = fmin_l_bfgs_b(acquisition_func,
                                         np.asfortranarray(init),
                                         args=(gpr, np.min(func_val), acq),
                                         bounds=lbfgs_bounds,
                                         approx_grad=True,
                                         maxiter=10)

        gp_models.append(gpr)

        best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x)
        x_list = x.tolist()
        x_list.append(best_x)
        x = np.asarray(x_list)
        func_val.append(best_f)

    x = lower_bounds + (upper_bounds - lower_bounds) * x
    func_ind = np.argmin(func_val)
    x_val = x[func_ind]
    best_func_val = func_val[func_ind]
    res = OptimizeResult()
    res.models = gp_models

    res.x = x_val
    res.fun = best_func_val
    res.func_vals = func_val
    res.x_iters = x

    return res
Example #15
0
def gp_minimize(func, dimensions, base_estimator=None, acq="LCB", xi=0.01,
                kappa=1.96, search="sampling", maxiter=1000, n_points=500,
                n_start=10, n_restarts_optimizer=5, random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `acq` [string, default=`"LCB"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"sampling"` or `"lbfgs"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `maxiter` [int, default=1000]:
        Number of iterations to find the minimum. Note that `n_start`
        iterations are effectively discounted, such that total number of
        function evaluations is at most `maxiter`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_start` [int, default=10]:
        Number of random initialization points.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(space.transformed_n_dims),
                           length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                           nu=2.5)),
            normalize_y=True, alpha=10e-6, random_state=random_state)

    # First points
    Xi = space.rvs(n_samples=n_start, random_state=rng)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError(
            "The function to be optimized should return a scalar")

    # Bayesian optimization loop
    models = []

    for i in range(maxiter - n_start):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(
                X=X, model=gp,  y_opt=np.min(yi), method=acq,
                xi=xi, kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(
                        _acquisition, x0,
                        args=(gp, np.min(yi), acq, xi, kappa),
                        bounds=space.transformed_bounds,
                        approx_grad=True, maxiter=10)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space

    return res
Example #16
0
def gp_minimize(func,
                bounds,
                base_estimator=None,
                acq="LCB",
                xi=0.01,
                kappa=1.96,
                search="sampling",
                maxiter=1000,
                n_points=500,
                n_start=10,
                n_restarts_optimizer=5,
                random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `bounds` [array-like, shape=(n_parameters, 2)]:
        - ``bounds[i][0]`` should give the lower bound of each parameter and
        - ``bounds[i][1]`` should give the upper bound of each parameter.

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `acq` [string, default=`"LCB"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"sampling"` or `"lbfgs"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `maxiter` [int, default=1000]:
        Number of iterations to find the minimum. Note that `n_start`
        iterations are effectively discounted, such that total number of
        function evaluations is at most `maxiter`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_start` [int, default=10]:
        Number of random initialization points.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)

    # Bounds
    n_params = len(bounds)
    lb, ub = extract_bounds(bounds)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(n_params),
                           length_scale_bounds=[(0.01, 100)] * n_params,
                           nu=2.5)),
            normalize_y=True,
            alpha=10e-6,
            random_state=random_state)

    # First points
    Xi = lb + (ub - lb) * rng.rand(n_start, n_params)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError("The function to be optimized should return a scalar")

    # Bayesian optimization loop
    models = []

    for i in range(maxiter - n_start):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(Xi, yi)

        models.append(gp)

        if search == "sampling":
            X = lb + (ub - lb) * rng.rand(n_points, n_params)
            values = acquisition(X=X,
                                 model=gp,
                                 y_opt=np.min(yi),
                                 method=acq,
                                 xi=xi,
                                 kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = lb + (ub - lb) * rng.rand(n_params)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(_acquisition,
                                            x0,
                                            args=(gp, np.min(yi), acq, xi,
                                                  kappa),
                                            bounds=bounds,
                                            approx_grad=True,
                                            maxiter=10)

                if a < best:
                    next_x, best = x, a

        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models

    return res
Example #17
0
def dummy_minimize(func, dimensions, n_calls=100,
                   x0=None, y0=None, random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.
        - `specs` [dict]: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {"args": copy.copy(inspect.currentframe().f_locals),
             "function": inspect.currentframe().f_code.co_name}

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, got %s" % type(x0))

    if len(x0) > 0 and y0 is not None:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s"
                             % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")

        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")

    elif len(x0) > 0 and y0 is None:
        y0 = []
        n_calls -= len(x0)

    elif len(x0) == 0 and y0 is not None:
        raise ValueError("`x0`cannot be `None` when `y0` is provided")

    else:  # len(x0) == 0 and y0 is None
        y0 = []

    X = x0
    y = y0

    # Random search
    X = X + space.rvs(n_samples=n_calls, random_state=rng)
    first = True

    for i in range(len(y0), len(X)):
        y_i = func(X[i])

        if first:
            first = False
            if not np.isscalar(y_i):
                raise ValueError("`func` should return a scalar")

        y.append(y_i)

    y = np.array(y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.models = []  # Create attribute even though it is empty
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Example #18
0
def moving_frame_augmented_hessian_optimizer(
        rhf_objective: RestrictedHartreeFockObjective,
        initial_parameters: np.ndarray,
        opdm_aa_measurement_func: Callable,
        max_iter: Optional[int] = 15,
        rtol: Optional[float] = 0.2E-2,
        delta: Optional[float] = 0.03,
        verbose: Optional[bool] = True,
        hessian_update: Optional[bool] = 'diagonal'):  # testpragma: no cover
    # coverage: ignore
    """
    The moving frame optimizer

    Determine an optimal basis rotation by continuously updating the
    coordinate system and asking if stationarity is achieved.

    :param rhf_objective: openfermioncirq.experiments.hfvqe.RestrictedHartreeFockObjective
    :param initial_parameters: parameters to start the optimization
    :param opdm_aa_measurement_func: callable functioon that takes the parameter
                                     vector and returns the opdm
    :param max_iter: maximum number of iterations to take
    :param rtol: Terminate the optimization with the norm of the update angles
                 falls below this threshold
    :param verbose: Allow printing of intermediate optimization information
    :param hessian_update: Optional argument if diagonal or full Hessian is used
    :return:
    """
    if delta > 1 or delta < 0:
        raise ValueError("Delta must be in the domain [0, 1]")
    if hessian_update not in ['diagonal', 'energy']:
        raise ValueError("hessian_update parameter not valid.")

    res = OptimizeResult()
    res.fr_vals = []
    res.opdms = []
    res.x_iters = []
    res.func_vals = []
    res.f = None
    res.iter_times = []

    fr_vals = initial_parameters
    current_unitary = np.eye(rhf_objective.nocc + rhf_objective.nvirt)
    break_at_count = max_iter
    current_count = 0
    energies = []
    fval_norms = []
    # for debugging
    opdm_initial = np.diag([1] * rhf_objective.nocc +
                           [0] * rhf_objective.nvirt)
    start_time = time.time()
    while current_count < break_at_count:
        # Iterate of algorithm has a unitary and parameters
        # first step is to generate new unitary
        u_new = group_action(old_unitary=current_unitary,
                             new_parameters=fr_vals,
                             occ=rhf_objective.occ,
                             virt=rhf_objective.virt)

        # get initial opdm from starting parameters
        opdm = opdm_aa_measurement_func(u_new.copy())
        # opdm = u_new @ opdm_initial @ u_new.conj().T

        # Calculate energy, residual, and hessian terms
        rdms: InteractionRDM = rhf_objective.rdms_from_opdm_aa(opdm)
        current_energy: float = rdms.expectation(
            rhf_objective.hamiltonian).real
        energies.append(current_energy)

        res.x_iters.append(u_new)
        res.func_vals.append(current_energy)
        res.fr_vals.append(fr_vals)
        res.opdms.append(opdm)
        res.iter_times.append(time.time() - start_time)

        rot_gens = non_redundant_rotation_generators(rhf_objective)
        dvec, hmat = get_dvec_hmat(
            rotation_generators=rot_gens,
            rhf_objective=rhf_objective,
            rdms=rdms,
            diagonal_hessian=True if hessian_update == 'diagonal' else False)
        # talk if talking is allowed
        if verbose:
            print("\nITERATION NUMBER : ", current_count)
            print("\n unitary")
            print(current_unitary)
            test_opdm_aa = u_new @ opdm_initial @ u_new.conj().T
            true_energy = rhf_objective.energy_from_opdm(test_opdm_aa)
            print("Current Energy: ", current_energy)
            print("true energy ", true_energy)
            print("dvec")
            print(list(zip(dvec, rot_gens)))

        # build augmented Hessian
        dvec = dvec.reshape((-1, 1))
        aug_hess = np.hstack((np.array([[0]]), dvec.conj().T))
        aug_hess = np.vstack((aug_hess, np.hstack((dvec, hmat))))

        w, v = np.linalg.eig(aug_hess)
        sort_idx = np.argsort(w)
        w = w[sort_idx]
        v = v[:, sort_idx]
        new_fr_vals = v[1:, [0]].flatten() / v[0, 0]

        assert new_fr_vals.shape[0] == initial_parameters.shape[0]
        assert np.isclose(w[0], dvec.T @ new_fr_vals)

        # Qiming's algorithm for no learning rate rescaling
        if np.max(abs(new_fr_vals)) >= delta:
            new_fr_vals = delta * new_fr_vals / np.max(abs(new_fr_vals))

        # keep track of the norm
        fval_norms.append(np.linalg.norm(new_fr_vals))
        # allow a stopping condition
        if verbose:
            print("New fr values norm")
            print(np.linalg.norm(new_fr_vals))
        if np.linalg.norm(new_fr_vals) < rtol:
            if verbose:
                print("Finished Optimization")
            break

        # assign new values to the things being evaluated next iteration
        fr_vals = new_fr_vals.copy()
        current_unitary = u_new.copy()

        current_count += 1

    return res
Example #19
0
def model_policy_gradient(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        learning_rate: float = 1e-2,
        decay_rate: float = 0.96,
        decay_steps: int = 5,
        log_sigma_init: float = -5.0,
        max_iterations: int = 1000,
        batch_size: int = 10,
        radius_coeff: float = 3.0,
        warmup_steps: int = 10,
        batch_size_model: int = 65536,
        save_func_vals: bool = False,
        random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_evaluations: Optional[int] = None
) -> scipy.optimize.OptimizeResult:
    """Model policy gradient algorithm for black-box optimization.

    The idea of this algorithm is to perform policy gradient, but estimate
    the function values using a surrogate model. 
    The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        learning_rate: The learning rate for the policy gradient.
        decay_rate: the learning decay rate for the Adam optimizer.
        decay_steps: the learning decay steps for the Adam optimizer.
        log_sigma_init: the intial value for the sigma of the policy
            in the log scale. 
        max_iterations: The maximum number of iterations to allow before
            termination.
        batch_size: The number of points to sample in each iteration. The cost 
            of evaluation of these samples are computed through the 
            quantum computer cost model.
        radius_coeff: The ratio determining the size of the radius around 
            the current iterate to sample points from to build the quadratic model.
            The ratio is with respect to the maximal ratio of the samples 
            from the current policy. 
        warmup_steps: The number of steps before the model policy gradient is performed. 
            before these steps, we use the policy gradient without the model. 
        batch_size_model: The model sample batch size. 
            After we fit the quadratic model, we use the model to evaluate 
            on big enough batch of samples.
        save_func_vals: whether to compute and save the function values for 
            the current value of parameter.   
        random_state: A seed (int) or `np.random.RandomState` class to use when
            generating random values. If not set, defaults to using the module
            methods in `np.random`.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """
    random_state = value.parse_random_state(random_state)

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    log_sigma = np.ones(n) * log_sigma_init
    sigma = np.exp(log_sigma)

    # set up the first and second moment estimate
    m_mean = np.zeros(n)
    v_mean = np.zeros(n)
    m_log_sigma = np.zeros(n)
    v_log_sigma = np.zeros(n)

    # set up lr schedule and optimizer
    lr_schedule1 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)
    lr_schedule2 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.fun = 0
    total_evals = 0
    num_iter = 0
    message = None

    # stats
    history_max = -np.inf

    while num_iter < max_iterations:
        # get samples from the current policy to evaluate
        z = random_state.randn(batch_size, n)
        new_xs = sigma * z + current_x

        if total_evals + batch_size > max_evaluations:
            message = "Reached maximum number of evaluations."
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += batch_size
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        if save_func_vals:
            res.func_vals.append(f(current_x))
            res.x_iters.append(np.copy(current_x))
            res.fun = res.func_vals[-1]

        # current sampling radius (maximal)
        max_radius = 0
        for x in new_xs:
            if np.linalg.norm(x - current_x) > max_radius:
                max_radius = np.linalg.norm(x - current_x)

        reward = [-y for y in new_ys]

        # warmup steps control whether to use the model to estimate the f
        if num_iter >= warmup_steps:
            # Determine points to use to build model
            model_xs = []
            model_ys = []
            for x, y in zip(known_xs, known_ys):
                if np.linalg.norm(x - current_x) < radius_coeff * max_radius:
                    model_xs.append(x)
                    model_ys.append(y)
            # safer way without the `SVD` not converging
            try:
                model = _get_quadratic_model(model_xs, model_ys, x)
                use_model = True
            except ValueError:
                use_model = False

            if use_model:
                # get samples (from model)
                z = random_state.randn(batch_size_model, n)
                new_xs = sigma * z + current_x

                # use the model for prediction
                new_ys = model.predict(new_xs - current_x)
                reward = [-y for y in new_ys]

        reward = np.array(reward)

        # stats
        reward_mean = np.mean(reward)
        reward_max = np.max(reward)

        if reward_max > history_max:
            history_max = reward_max

        # subtract baseline
        reward = reward - reward_mean

        # analytic derivatives (natural gradient policy gradient)
        delta_mean = np.dot(z.T, reward) * sigma
        delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2)

        delta_mean_norm = np.linalg.norm(np.dot(z.T, reward))
        delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward))

        delta_mean = delta_mean / delta_mean_norm
        delta_log_sigma = delta_log_sigma / delta_log_sigma_norm

        # gradient ascend to update the parameters
        current_x, m_mean, v_mean = _adam_update(delta_mean,
                                                 current_x,
                                                 num_iter,
                                                 m_mean,
                                                 v_mean,
                                                 lr_schedule=lr_schedule1)
        log_sigma, m_log_sigma, v_log_sigma = _adam_update(
            delta_log_sigma,
            log_sigma,
            num_iter,
            m_log_sigma,
            v_log_sigma,
            lr_schedule=lr_schedule2,
        )

        log_sigma = np.clip(log_sigma, -20.0, 2.0)
        sigma = np.exp(log_sigma)

        num_iter += 1

    final_val = f(current_x)
    res.func_vals.append(final_val)

    if message is None:
        message = "Reached maximum number of iterations."

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Example #20
0
def _tree_minimize(func, dimensions, base_estimator, n_calls,
                   n_points, n_random_starts, x0=None, y0=None,
                   random_state=None, acq="EI", xi=0.01, kappa=1.96):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with provided points (x0 and y0) and/or random points
    if n_calls <= 0:
        raise ValueError(
            "Expected `n_calls` > 0, got %d" % n_random_starts)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError(
            "Expected `n_random_starts` > 0, got %d" % n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError(
            "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError(
                "`y0` should be an iterable or a scalar, got %s" % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    # Tree-based optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points,
                                      random_state=rng))
        values = _gaussian_acquisition(
            X=X, model=rgr, y_opt=np.min(yi), method=acq,
            xi=xi, kappa=kappa)
        next_x = X[np.argmin(values)]
        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng

    return res
Example #21
0
def gp_minimize(func, bounds=None, search="sampling", random_state=None,
                maxiter=1000, acq="UCB", num_points=500):
    """
    Black-box optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquistion function
    over the Gaussian posterior which is much quicker to evaluate.

    Parameters
    ----------
    func: callable
        Function to minimize. Should take a array of parameters and
        return the function value.

    bounds: array-like, shape (n_parameters, 2)
        ``bounds[i][0]`` should give the lower bound of each parameter and
        ``bounds[i][1]`` should give the upper bound of each parameter.

    search: string, "sampling" or "lbfgs"
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to "sampling", ``num_points`` are sampled randomly
        and the Gaussian Process prior is updated with that point that gives
        the best acquision value over the Gaussian posterior.

        If search is set to "lbfgs", then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquistion function
        over the Gaussian posterior.

    random_state: int, RandomState instance, or None (default)
        Set random state to something other than None for reproducible
        results.

    maxiter: int, default 1000
        Number of iterations to find the minimum. In other words, the
        number of function evaluations.

    acq: string, default "UCB"
        Function to minimize over the gaussian posterior. Can be either
        the "UCB" which refers to the UpperConfidenceBound or "EI" which
        is the Expected Improvement.

    num_points: int, default 500
        Number of points to sample to determine the next "best" point.
        Useless if search is set to "lbfgs".

    Returns
    -------
    res: OptimizeResult, scipy object
        The optimization result returned as a OptimizeResult object.
        Important attributes are
        ``x`` - float, the optimization solution,
        ``fun`` - float, the value of the function at the optimum,
        ``models``- gp_models[i]. the prior on the function fit at
                       iteration[i].
        ``func_vals`` - the function value at the ith iteration.
        ``x_iters`` - the value of ``x`` corresponding to the function value
                      at the ith iteration.
        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = np.random.RandomState(random_state)

    num_params = len(bounds)
    lower_bounds, upper_bounds = zip(*bounds)
    upper_bounds = np.asarray(upper_bounds)
    lower_bounds = np.asarray(lower_bounds)
    x0 = rng.rand(num_params)
    func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)]

    length_scale = np.ones(num_params)
    gp_params = {
        'kernel': Matern(length_scale=length_scale, nu=2.5),
        'normalize_y': True,
        'random_state': random_state
    }
    lbfgs_bounds = np.tile((0, 1), (num_params, 1))

    gp_models = []
    x = np.reshape(x0, (1, -1))

    for i in range(maxiter):
        gpr = GaussianProcessRegressor(**gp_params)
        gpr.fit(x, func_val)

        if search == "sampling":
            sampling = rng.rand(num_points, num_params)
            acquis = acquisition_func(sampling, gpr, np.min(func_val), acq)
            best_arg = np.argmin(acquis)
            best_x = sampling[best_arg]
        elif search == "lbfgs":
            init = rng.rand(num_params)
            best_x, _, _ = fmin_l_bfgs_b(
                acquisition_func,
                np.asfortranarray(init),
                args=(gpr, np.min(func_val), acq),
                bounds=lbfgs_bounds, approx_grad=True, maxiter=10)

        gp_models.append(gpr)

        best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x)
        x_list = x.tolist()
        x_list.append(best_x)
        x = np.asarray(x_list)
        func_val.append(best_f)

    x = lower_bounds + (upper_bounds - lower_bounds) * x
    func_ind = np.argmin(func_val)
    x_val = x[func_ind]
    best_func_val = func_val[func_ind]
    res = OptimizeResult()
    res.models = gp_models

    res.x = x_val
    res.fun = best_func_val
    res.func_vals = func_val
    res.x_iters = x

    return res
Example #22
0
def create_result(Xi,
                  yi,
                  n_evaluations=None,
                  space=None,
                  rng=None,
                  specs=None,
                  models=None,
                  maximize=False):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    * `Xi` [list of lists, shape=(n_iters, n_features)]:
        Location of the minimum at every iteration.

    * `yi` [array-like, shape=(n_iters,)]:
        Minimum value obtained at every iteration.

    * `space` [Space instance, optional]:
        Search space.

    * `rng` [RandomState instance, optional]:
        State of the random state.

    * `specs` [dict, optional]:
        Call specifications.

    * `models` [list, optional]:
        List of fit surrogate models.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()

    try:
        # Hyperband returns evaluations as lists of lists.
        # We want to store the results as a single array.
        yi = list(itertools.chain.from_iterable(yi))
        Xi = list(itertools.chain.from_iterable(Xi))
    except TypeError:
        # All algorithms other than Hyperband already return a single list.
        pass

    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])

    if maximize:
        best = np.argmax(yi)
    else:
        best = np.argmin(yi)

    res.x = Xi[best]
    res.fun = yi[best]

    if n_evaluations:
        unique, sort_indices = np.unique(yi, return_index=True)

        if len(unique) < n_evaluations:
            func_sort_idx = np.argsort(yi)
            func_vals = sorted(yi)
            res.func_vals = np.asarray(func_vals[:n_evaluations])

            x_iter_sort = []
            for idx in func_sort_idx:
                x_iter_sort.append(Xi[idx])

            res.x_iters = np.asarray(x_iter_sort[:n_evaluations])
            res.all_func_vals = np.asarray(yi)
            res.all_x_iters = np.asarray(Xi)
        else:
            func_vals = sorted(unique)
            res.func_vals = np.asarray(func_vals[:n_evaluations])

            x_iter_sort = []
            for idx in sort_indices:
                x_iter_sort.append(Xi[idx])

            res.x_iters = np.asarray(x_iter_sort[:n_evaluations])
            res.all_func_vals = np.asarray(yi)
            res.all_x_iters = np.asarray(Xi)
    else:
        res.func_vals = np.asarray(yi)
        res.x_iters = np.asarray(Xi)

    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
Example #23
0
def _tree_minimize(func,
                   dimensions,
                   base_estimator,
                   n_calls,
                   n_points,
                   n_random_starts,
                   x0=None,
                   y0=None,
                   random_state=None,
                   acq="EI",
                   xi=0.01,
                   kappa=1.96):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with provided points (x0 and y0) and/or random points
    if n_calls <= 0:
        raise ValueError("Expected `n_calls` > 0, got %d" % n_random_starts)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError("Expected `n_random_starts` > 0, got %d" %
                         n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError("Expected `n_calls` >= %d, got %d" %
                         (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    # Tree-based optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points, random_state=rng))
        values = _gaussian_acquisition(X=X,
                                       model=rgr,
                                       y_opt=np.min(yi),
                                       method=acq,
                                       xi=xi,
                                       kappa=kappa)
        next_x = X[np.argmin(values)]
        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng

    return res
Example #24
0
def dummy_minimize(func,
                   dimensions,
                   n_calls=100,
                   x0=None,
                   y0=None,
                   random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.
        - `specs` [dict]: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {
        "args": copy.copy(inspect.currentframe().f_locals),
        "function": inspect.currentframe().f_code.co_name
    }

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, got %s" % type(x0))

    if len(x0) > 0 and y0 is not None:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")

        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")

    elif len(x0) > 0 and y0 is None:
        y0 = []
        n_calls -= len(x0)

    elif len(x0) == 0 and y0 is not None:
        raise ValueError("`x0`cannot be `None` when `y0` is provided")

    else:  # len(x0) == 0 and y0 is None
        y0 = []

    X = x0
    y = y0

    # Random search
    X = X + space.rvs(n_samples=n_calls, random_state=rng)
    first = True

    for i in range(len(y0), len(X)):
        y_i = func(X[i])

        if first:
            first = False
            if not np.isscalar(y_i):
                raise ValueError("`func` should return a scalar")

        y.append(y_i)

    y = np.array(y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.models = []  # Create attribute even though it is empty
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Example #25
0
def model_gradient_descent(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        rate: float = 1e-1,
        sample_radius: float = 1e-1,
        n_sample_points: int = 100,
        n_sample_points_ratio: Optional[float] = None,
        rate_decay_exponent: float = 0.0,
        stability_constant: float = 0.0,
        sample_radius_decay_exponent: float = 0.0,
        tol: float = 1e-8,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_iterations: Optional[int] = None,
        max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult:
    """Model gradient descent algorithm for black-box optimization.

    The idea of this algorithm is to perform gradient descent, but estimate
    the gradient using a surrogate model instead of, say, by
    finite-differencing. The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.
    This algorithm works well when you have an initial guess which is in the
    convex neighborhood of a local optimum and you want to converge to that
    local optimum. It's meant to be used when the function is stochastic.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        rate: The learning rate for the gradient descent.
        sample_radius: The radius around the current iterate to sample
            points from to build the quadratic model.
        n_sample_points: The number of points to sample in each iteration.
        n_sample_points_ratio: This specifies the number of points to sample
            in each iteration as a coefficient of the number of points
            required to exactly determine a quadratic model. The number
            of sample points will be this coefficient times (n+1)(n+2)/2,
            rounded up, where n is the number of parameters.
            Setting this overrides n_sample_points.
        rate_decay_exponent: Controls decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant and a is the rate decay exponent
            (this parameter).
        stability_constant: Affects decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant (this parameter) and a is the rate decay
            exponent.
        sample_radius_decay_exponent: Controls decay of sample radius.
        tol: The algorithm terminates when the difference between the current
            iterate and the next suggested iterate is smaller than this value.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_iterations: The maximum number of iterations to allow before
            termination.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_iterations is None:
        max_iterations = np.inf
    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    if n_sample_points_ratio is not None:
        n_sample_points = int(
            np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2))

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.model_vals = [None]
    res.fun = 0
    total_evals = 0
    num_iter = 0
    converged = False
    message = None

    while num_iter < max_iterations:
        current_sample_radius = (sample_radius /
                                 (num_iter + 1)**sample_radius_decay_exponent)

        # Determine points to evaluate
        # in ball around current point
        new_xs = [np.copy(current_x)] + [
            current_x + _random_point_in_ball(n, current_sample_radius)
            for _ in range(n_sample_points)
        ]

        if total_evals + len(new_xs) > max_evaluations:
            message = 'Reached maximum number of evaluations.'
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += len(new_ys)
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        res.func_vals.append(new_ys[0])
        res.x_iters.append(np.copy(current_x))
        res.fun = res.func_vals[-1]

        # Determine points to use to build model
        model_xs = []
        model_ys = []
        for x, y in zip(known_xs, known_ys):
            if np.linalg.norm(x - current_x) < current_sample_radius:
                model_xs.append(x)
                model_ys.append(y)
        # Build and solve model
        model_gradient, model = _get_least_squares_model_gradient(
            model_xs, model_ys, current_x)

        # calculate the gradient and update the current point
        gradient_norm = np.linalg.norm(model_gradient)
        decayed_rate = (
            rate / (num_iter + 1 + stability_constant)**rate_decay_exponent)
        # Convergence criteria
        if decayed_rate * gradient_norm < tol:
            converged = True
            message = 'Optimization converged successfully.'
            break
        # Update
        current_x -= decayed_rate * model_gradient
        res.model_vals.append(
            model.predict([-decayed_rate * model_gradient])[0])

        num_iter += 1

    if converged:
        final_val = res.func_vals[-1]
    else:
        final_val = f(current_x)
        res.func_vals.append(final_val)

    if message is None:
        message = 'Reached maximum number of iterations.'

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res