Esempio n. 1
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self._xmin
     res.fun = self._fvalue
     res.message = self._message
     res.nit = self._step_record
     return res
Esempio n. 2
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self.es.xbest
     res.fun = self.es.ebest
     res.nit = self._iter
     res.ncall = self.owf.nb_fun_call
     return res
Esempio n. 3
0
def scipy_nlopt_cobyla(*args, **kwargs):
    """Wraps nlopt library cobyla function to be compatible with scipy optimize

    parameters:
        args[0]: target, function to be minimized
        args[1]: x0, starting point for minimization
        bounds: list of bounds for the movement
                [[min, max], [min, max], ...]
        ftol_rel: same as in nlopt
        xtol_rel: same as in nlopt
            one of the tol_rel should be specified
    returns:
        OptimizeResult() object with properly set x, fun, success.
            status is not set when nlopt.RoundoffLimited is raised
    """
    answ = OptimizeResult()
    bounds = kwargs['bounds']

    opt = nlopt.opt(nlopt.LN_COBYLA, len(args[1]))
    opt.set_lower_bounds([i[0] for i in bounds])
    opt.set_upper_bounds([i[1] for i in bounds])
    if 'ftol_rel' in kwargs.keys():
        opt.set_ftol_rel(kwargs['ftol_rel'])
    if 'xtol_rel' in kwargs.keys():
        opt.set_ftol_rel(kwargs['xtol_rel'])
    opt.set_min_objective(args[0])

    x0 = list(args[1])

    try:
        x1 = opt.optimize(x0)
    except nlopt.RoundoffLimited:
        answ.x = x0
        answ.fun = args[0](x0)
        answ.success = False
        answ.message = 'nlopt.RoundoffLimited'
        return answ

    answ.x = x1
    answ.fun = args[0](x1)
    answ.success = True if opt.last_optimize_result() in [3, 4] else False
    answ.status = opt.last_optimize_result()
    if not answ.fun == opt.last_optimum_value():
        print 'Something\'s wrong, ', answ.fun, opt.last_optimum_value()

    return answ
Esempio n. 4
0
    def setup_method(self):
        self.x0 = np.array(1)
        self.f0 = 0

        minres = OptimizeResult()
        minres.x = self.x0
        minres.fun = self.f0

        self.storage = Storage(minres)
def _tree_minimize(func, dimensions, base_estimator, n_calls,
                   n_points, n_random_starts, random_state=None):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with random points
    if n_random_starts <= 0:
        raise ValueError(
            "Expected n_random_starts > 0, got %d" % n_random_starts)

    if n_calls <= 0:
        raise ValueError(
            "Expected n_calls > 0, got %d" % n_random_starts)

    if n_calls < n_random_starts:
        raise ValueError(
            "Expected n_calls >= %d, got %d" % (n_random_starts, n_calls))

    Xi = space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError(
            "The function to be optimized should return a scalar")

    # Tree-based optimization loop
    models = []

    n_model_iter = n_calls - n_random_starts
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points,
                                      random_state=rng))
        values = -gaussian_ei(X, rgr, np.min(yi))
        next_x = X[np.argmin(values)]

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space

    return res
Esempio n. 6
0
    def test_higher_f_rejected(self):
        new_minres = OptimizeResult()
        new_minres.x = self.x0 + 1
        new_minres.fun = self.f0 + 1

        ret = self.storage.update(new_minres)
        minres = self.storage.get_lowest()
        assert_equal(self.x0, minres.x)
        assert_equal(self.f0, minres.fun)
        assert_(not ret)
Esempio n. 7
0
    def test_lower_f_accepted(self):
        new_minres = OptimizeResult()
        new_minres.x = self.x0 + 1
        new_minres.fun = self.f0 - 1

        ret = self.storage.update(new_minres)
        minres = self.storage.get_lowest()
        assert_(self.x0 != minres.x)
        assert_(self.f0 != minres.fun)
        assert_(ret)
Esempio n. 8
0
def scipy_graduate_walk(*args, **kwargs):
    """Scipy-compatible graduate_walk function wrapper.

    parameters:
        args[0]: target, function to be minimized
        args[1]: x0, starting point for minimization
        dx=1e-8: step in change of the point
        dx_start=0.1: starting value for dx step. Must be bigger that dx.
        dx_step=0.1: change of dx on each iteration. Should be less than 1.
        diagonal=False: defines directions for point movements. See
                generate_all_directions
                generate_nondiagonal_directions
            for more information.
        bounds=None: list of bounds for the movement
                [[min, max], [min, max], ...]
            if set to None, bounds are ignored
        ytol=1e-8: relative tolerance for search stop. See graduate_walk for
            more info.
    returns:
        OptimizeResult() object with properly set x, fun, nfev.
            success is always set to True, status to 1
    """
    target = args[0]
    x0 = args[1]
    dx = kwargs['dx'] if 'dx' in list(kwargs.keys()) else 1e-8
    dx_start = kwargs['dx_start'] if 'dx_start' in list(kwargs.keys()) else 0.1
    dx_step = kwargs['dx_step'] if 'dx_step' in list(kwargs.keys()) else 0.1
    if 'diagonal' in list(kwargs.keys()) and kwargs['diagonal']:
        directions = generate_all_directions(len(x0))
    else:
        directions = generate_nondiagonal_directions(len(x0))
    if 'bounds' in list(kwargs.keys()) and kwargs['bounds'] is not None:
        bounds = Bounds(kwargs['bounds'])
    else:
        bounds = None
    ytol_rel = kwargs['ytol_rel'] if 'ytol_rel' in list(kwargs.keys()) else 1e-8

    res = graduate_walk(target, x0, dx, directions, dx_start, dx_step,
                        bounds=bounds, ytol_rel=ytol_rel)

    answ = OptimizeResult()
    answ.x = res['x0']
    answ.fun = res['fval']
    answ.success = True
    answ.status = 1
    answ.nfev = res['fnval']
    return answ
Esempio n. 9
0
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    * `Xi` [list of lists, shape=(n_iters, n_features)]:
        Location of the minimum at every iteration.

    * `yi` [array-like, shape=(n_iters,)]:
        Minimum value obtained at every iteration.

    * `space` [Space instance, optional]:
        Search space.

    * `rng` [RandomState instance, optional]:
        State of the random state.

    * `specs` [dict, optional]:
        Call specifications.

    * `models` [list, optional]:
        List of fit surrogate models.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()
    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
Esempio n. 10
0
def get_optimization_results(t, population, factorial_cost, scalar_fitness,
                             skill_factor, message):
    K = len(set(skill_factor))
    N = len(population) // 2
    results = []
    for k in range(K):
        result = OptimizeResult()
        x, fun = get_best_individual(population, factorial_cost,
                                     scalar_fitness, skill_factor, k)
        result.x = x
        result.fun = fun
        result.message = message
        result.nit = t
        result.nfev = (t + 1) * N
        mean, std = get_statistics(factorial_cost, skill_factor, k)
        result.mean = mean
        result.std = std
        results.append(result)
    return results
Esempio n. 11
0
def scipy_walk(*args, **kwargs):
    """Scipy-compatible walk function wrapper.

    parameters:
        args[0]: target, function to be minimized
        args[1]: x0, starting point for minimization
        dx=1e-8: step in change of the point
        diagonal=False: defines directions for point movements. See
                generate_all_directions
                generate_nondiagonal_directions
            for more information.
        bounds=None: list of bounds for the movement
                [[min, max], [min, max], ...]
            if set to None, bounds are ignored
        ytol=1e-8: relative tolerance for search stop. See walk for more info.
    returns:
        OptimizeResult() object with properly set x, fun, nfev.
            success is always set to True, status to 1
    """
    target = args[0]
    x0 = args[1]
    dx = kwargs['dx'] if 'dx' in list(kwargs.keys()) else 1e-8
    if 'diagonal' in list(kwargs.keys()) and kwargs['diagonal']:
        directions = generate_all_directions(len(x0))
    else:
        directions = generate_nondiagonal_directions(len(x0))
    if 'bounds' in list(kwargs.keys()) and kwargs['bounds'] is not None:
        bounds = Bounds(kwargs['bounds'])
    else:
        bounds = None
    ytol_rel = kwargs['ytol_rel'] if 'ytol_rel' in list(kwargs.keys()) else 1e-8

    res = walk(target, x0, dx, directions, bounds=bounds, ytol_rel=ytol_rel)

    answ = OptimizeResult()
    answ.x = res['x0']
    answ.fun = res['fval']
    answ.success = True
    answ.status = 1
    answ.nfev = res['fnval']
    return answ
def steepest_decent(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000,
                    callback=None):
    '''最急降下法
    '''
    x = numpy.array(x0)

    for itr in xrange(maxiter):
        direction = -1 * fprime(x, *args)
        alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
def newton_method(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000,
                    callback=None):
    '''ニュートン法 ステップサイズにArmijo条件
    '''
    x = numpy.array(x0)
    A, b = args

    for itr in xrange(maxiter):
        direction =  -1 * numpy.linalg.solve(A, fprime(x, *args))
        alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
Esempio n. 14
0
def create_result(Xi, yi, space=None, rng=None, specs=None, models=None):
    """
    Initialize an `OptimizeResult` object.
    Parameters
    ----------
    Xi : list of lists, shape (n_iters, n_features)
        Location of the minimum at every iteration.
    yi : array-like, shape (n_iters,)
        Minimum value obtained at every iteration.
    space : Space instance, optional
        Search space.
    rng : RandomState instance, optional
        State of the random state.
    specs : dict, optional
        Call specifications.
    models : list, optional
        List of fit surrogate models.
    Returns
    -------
    res : `OptimizeResult`, scipy object
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()
    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
Esempio n. 15
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes.  If `polish`
            was employed, and a lower minimum was obtained by the polishing,
            then OptimizeResult also contains the ``jac`` attribute.
        """
        nit, warning_flag = 0, False

        # dictionary that holds standard status messages of optimizers
        status_message = _status_message['success']

        # The population may have just been initialized (all entries are
        # np.inf). If it has you have to calculate the initial energies.
        # Although this is also done in the evolve generator it's possible
        # that someone can set maxiter=0, at which point we still want the
        # initial energies to be calculated (the following loop isn't run).

        #np.all checks that there are no 0's in the array
        if self.maxiter == 0:
            if np.all(np.isinf(self.population_energies)):
                if self.disp:
                    print("Calculating initial energies when maxiter = 0")
                self._calculate_population_energies()

#        for i in range(self.num_population_members):
#            print(self.population[i,:])
# do the optimisation.
        for nit in xrange(1, self.maxiter + 1):
            if self.disp:
                print("iter: ", nit)
            # evolve the population by a generation
            try:
                next(self)
            except StopIteration:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

            print("differential_evolution step %d: f(x)= %g" %
                  (nit, self.population_energies[0]))

            #save populations at each iter and rank to analyze after
            #            np.save("before_rank"+str(self.rank)+"iter"+str(nit), self.population)

            #migrate
            self.migration()

            #            np.save("after_rank"+str(self.rank)+"iter"+str(nit), self.population)

            # should the solver terminate?
            #            print("Checking if should converge")
            #            convergence = self.convergence
            #
            #            if (self.callback and
            #                    self.callback(self._scale_parameters(self.population[0]),
            #                                  convergence=self.tol / convergence) is True):
            #
            #                warning_flag = True
            #                status_message = ('callback function requested stop early '
            #                                  'by returning True')
            #                break
            #            print("checking if tolerance level reached")
            ##            intol = (np.std(self.population_energies) <=
            ##                     self.atol +
            ##                     self.tol * np.abs(np.mean(self.population_energies)))
            #
            #            intol = self.population_energies[0] <= self.mse_thresh
            #            if warning_flag or intol:
            #                print("stopping iterations")
            #                break
            print("Starting next iter")

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(x=self.x,
                                   fun=self.population_energies[0],
                                   nfev=self._nfev,
                                   nit=nit,
                                   message=status_message,
                                   success=(warning_flag is not True))

        print("done iters")
        if self.polish:
            print("performing final polishing")
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            self._nfev += result.nfev
            DE_result.nfev = self._nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
def dummy_minimize(func, dimensions, n_calls=100, random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)
    space = Space(dimensions)
    X = space.rvs(n_samples=n_calls, random_state=rng)

    init_y = func(X[0])
    if not np.isscalar(init_y):
        raise ValueError(
            "The function to be optimized should return a scalar")
    y = np.asarray([init_y] + [func(X[i]) for i in range(1, n_calls)])

    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.space = space

    return res
Esempio n. 17
0
def gp_minimize(func, dimensions, base_estimator=None, alpha=10e-10,
                acq="EI", xi=0.01, kappa=1.96, search="auto", n_calls=100,
                n_points=500, n_random_starts=10, n_restarts_optimizer=5,
                x0=None, y0=None, random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standard optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    The total number of evaluations, `n_calls`, are performed like the
    following. If `x0` is provided but not `y0`, then the elements of `x0`
    are first evaluated, followed by `n_random_starts` evaluations.
    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
    made guided by the surrogate model. If `x0` and `y0` are both
    provided then `n_random_starts` evaluations are first made then
    `n_calls - n_random_starts` subsequent evaluations are made
    guided by the surrogate model.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `alpha` [float, default=1e-10]:
        Value added to the diagonal of the kernel matrix during fitting.
        Larger values correspond to an increased noise level in the
        observations and reduce potential numerical issues during fitting.

    * `acq` [string, default=`"EI"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"auto"`, then it is set to `"lbfgs"`` if
        all the search dimensions are Real(continuous). It defaults to
        `"sampling"` for all other cases.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `n_calls` [int, default=100]:
        Number of calls to `func`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_random_starts` [int, default=10]:
        Number of evaluations of `func` with random initialization points
        before approximating the `func` with `base_estimator`.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimization space.
        - `specs` [dict]`: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {"args": copy.copy(inspect.currentframe().f_locals),
             "function": inspect.currentframe().f_code.co_name}

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(space.transformed_n_dims),
                           length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                           nu=2.5)),
            normalize_y=True, alpha=alpha, random_state=random_state)

    # Initialize with provided points (x0 and y0) and/or random points
    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError(
            "Expected `n_random_starts` > 0, got %d" % n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError(
            "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError(
                "`y0` should be an iterable or a scalar, got %s" % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError(
                "`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    if search == "auto":
        if space.is_real:
            search = "lbfgs"
        else:
            search = "sampling"
    elif search not in ["lbfgs", "sampling"]:
        raise ValueError(
            "Expected search to be 'lbfgs', 'sampling' or 'auto', "
            "got %s" % search)

    # Bayesian optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(
                X=X, model=gp,  y_opt=np.min(yi), method=acq,
                xi=xi, kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(
                        _acquisition, x0,
                        args=(gp, np.min(yi), acq, xi, kappa),
                        bounds=space.transformed_bounds,
                        approx_grad=True, maxiter=20)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Esempio n. 18
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.
        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes.  If `polish`
            was employed, and a lower minimum was obtained by the polishing,
            then OptimizeResult also contains the ``jac`` attribute.
        """

        nfev, nit, warning_flag = 0, 0, False
        status_message = _status_message['success']

        # calculate energies to start with
        parameters = np.zeros_like(self.population, order='F')
        for index, candidate in enumerate(self.population):
            parameters[index, :] = self._scale_parameters(candidate)

        self.population_energies[:] = self.evaluate_func(parameters)
        nfev += self.num_population_members

        # put the lowest energy into the best solution position.
        minval = np.argmin(self.population_energies)
        self._swap_best(minval)

        if warning_flag:
            return OptimizeResult(x=self.x,
                                  fun=self.population_energies[0],
                                  nfev=nfev,
                                  nit=nit,
                                  message=status_message,
                                  success=(warning_flag is not True))

        # do the optimisation.
        trials = np.zeros_like(self.population, order='F')
        for nit in range(1, self.maxiter + 1):
            if self.dither is not None:
                self.scale = self.random_number_generator.rand() * (
                    self.dither[1] - self.dither[0]) + self.dither[0]

            # Unlike the standard DE, all the trials are created first and later
            # evaluated simultaneously.
            for index in range(self.num_population_members):
                # create a trial solution
                trials[index][:] = self._mutate(index)

                # ensuring that it's in the range [0, 1)
                self._ensure_constraint(trials[index])

                # scale from [0, 1) to the actual parameter value
                parameters[index][:] = self._scale_parameters(trials[index])

            # determine the energy of the objective function
            energies = self.evaluate_func(parameters)
            nfev += self.num_population_members

            # if the energy of the trial candidate is lower than the
            # original population member then replace it
            for index in range(self.num_population_members):
                if energies[index] < self.population_energies[index]:
                    self.population[index] = trials[index]
                    self.population_energies[index] = energies[index]

            # if the trial candidate also has a lower energy than the
            # best solution then replace that as well
            minval = np.argmin(self.population_energies)
            self._swap_best(minval)

            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (
                np.std(self.population_energies) /
                np.abs(np.mean(self.population_energies) + _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g" %
                      (nit, self.population_energies[0]))

            if self.callbacks:
                for callback in self.callbacks:
                    callback(step=nit,
                             parameter=self.x,
                             cost=self.population_energies[0])

            if (self.earlystop and self.earlystop(
                    self.x, convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('earlystop function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(x=self.x,
                                   fun=self.population_energies[0],
                                   nfev=nfev,
                                   nit=nit,
                                   message=status_message,
                                   success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            nfev += result.nfev
            DE_result.nfev = nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 19
0
def optimize_stiefel(func, X0, args=(), tau_max=.5, max_it=1, tol=1e-6,
                     disp=False, tau_find_freq=100):
    """
    Optimize a function over a Stiefel manifold.

    :param func: Function to be optimized
    :param X0: Initial point for line search
    :param tau_max: Maximum step size
    :param max_it: Maximum number of iterations
    :param tol: Tolerance criteria to terminate line search
    :param disp: Choose whether to display output
    :param args: Extra arguments passed to the function
    """
    tol = float(tol)
    assert tol > 0, 'Tolerance must be positive'
    max_it = int(max_it)
    assert max_it > 0, 'The maximum number of iterations must be a positive '\
                       + 'integer'
    tau_max = float(tau_max)
    assert tau_max > 0, 'The parameter `tau_max` must be positive.'
    k = 0
    X = X0.copy()
    nit = 0
    nfev = 0
    success = False
    if disp:
        print 'Stiefel Optimization'.center(80)
        print '{0:4s} {1:11s} {2:5s}'.format('It', 'F', '(F - F_old) / F_old')
        print '-' * 30

    
    ls_func = LSFunc()
    ls_func.func = func
    decrease_tau = False
    tau_max0 = tau_max
    while nit <= max_it:
        nit += 1
        F, G = func(X, *args)
        F_old = F
        nfev += 1
        A = compute_A(G, X)
        ls_func.A = A
        ls_func.X = X
        ls_func.func_args = args
        ls_func.tau_max = tau_max
        increased_tau = False
        if nit == 1 or decrease_tau or nit % tau_find_freq == 0:
            # Need to minimize ls_func with respect to each argument
            tau_init = np.linspace(-10, 0., 3)[:, None]
            tau_d = np.linspace(-10, 0., 50)[:, None]
            tau_all, F_all = pybgo.minimize(ls_func, tau_init, tau_d, fixed_noise=1e-16,
                    add_at_least=1, tol=1e-2, scale=True,
                    train_every=1)[:2]
            nfev += tau_all.shape[0]
            idx = np.argmin(F_all)
            tau = np.exp(tau_all[idx, 0]) * tau_max
            if tau_max - tau <= 1e-6:
                tau_max = 1.2 * tau_max
                if disp:
                    print 'increasing tau_max to {0:1.5e}'.format(tau_max)
                    increased_tau = True
            if decrease_tau:
                tau_max = .8 * tau_max
                if disp:
                    print 'decreasing max_tau to {0:1.5e}'.format(tau_max)
                decrease_tau = False
            F = F_all[idx, 0]
        else:
            F = ls_func([np.log(tau /  tau_max)])
        delta_F = (F_old - F) / np.abs(F_old)
        if delta_F < 0:
            if disp:
                print '*** backtracking'
            nit -= 1
            decrease_tau = True
            continue
        X_old = X
        X = Y_func(tau, X, A)
        if disp:
            print '{0:4s} {1:1.5e} {2:5e} tau = {3:1.3e}, tau_max = {4:1.3e}'.format(
             str(nit).zfill(4), F, delta_F, tau, tau_max)
        if delta_F <= tol:
            if disp:
                print '*** Converged ***'
            success = True
            break
    res = OptimizeResult()
    res.tau_max = tau_max
    res.X = X
    res.nfev = nfev
    res.nit = nit
    res.fun = F
    res.success = success
    return res
Esempio n. 20
0
def optimize_minimize_mhmcmc_cluster(objective,
                                     bounds,
                                     args=(),
                                     x0=None,
                                     T=1,
                                     N=3,
                                     burnin=100000,
                                     maxiter=1000000,
                                     target_ar=0.4,
                                     ar_tolerance=0.05,
                                     cluster_eps=DEFAULT_CLUSTER_EPS,
                                     rnd_seed=None,
                                     collect_samples=None,
                                     logger=None):
    """
    Minimize objective function and return up to N local minima solutions.

    :param objective: Objective function to minimize. Takes unpacked args as function call arguments and returns
        a float.
    :type objective: Callable(\*args) -> float
    :param bounds: Bounds of the parameter space.
    :type bounds: scipy.optimize.Bounds
    :param args: Any additional fixed parameters needed to completely specify the objective function.
    :type args: tuple or list
    :param x0: Initial guess. If None, will be selected randomly and uniformly within the parameter bounds.
    :type x0: numpy.array with same shape as elements of bounds
    :param T: The "temperature" parameter for the accept or reject criterion. To sample the domain well,
        should be in the order of the typical difference in local minima objective valuations.
    :type T: float
    :param N: Maximum number of minima to return
    :type N: int
    :param burnin: Number of random steps to discard before starting to accumulate statistics.
    :type burnin: int
    :param maxiter: Maximum number of steps to take (including burnin).
    :type maxiter: int
    :param target_ar: Target acceptance rate of point samples generated by stepping.
    :type target_ar: float between 0 and 1
    :param ar_tolerance: Tolerance on the acceptance rate before actively adapting the step size.
    :type ar_tolerance: float
    :param cluster_eps: Point proximity tolerance for DBSCAN clustering, in normalized bounds coordinates.
    :type cluster_eps: float
    :param rnd_seed: Random seed to force deterministic behaviour
    :type rnd_seed: int
    :param collect_samples: If not None and integral type, collect collect_samples at regular intervals
        and return as part of solution.
    :type collect_samples: int or NoneType
    :param logger: Logger instance for outputting log messages.
    :return: OptimizeResult containing solution(s) and solver data.
    :rtype: scipy.optimize.OptimizeResult with additional attributes
    """
    @call_counter
    def obj_counted(*args):
        return objective(*args)

    # end func

    assert maxiter >= 2 * burnin, "maxiter {} should be at least twice burnin steps {}".format(
        maxiter, burnin)
    main_iter = maxiter - burnin

    if collect_samples is not None:
        assert isinstance(collect_samples,
                          int), "collect_samples expected to be integral type"
        assert collect_samples > 0, "collect_samples expected to be positive"
    # end if

    beta = 1.0 / T

    if rnd_seed is None:
        rnd_seed = int(time.time() * 1000) % (1 << 31)
    # end if
    np.random.seed(rnd_seed)
    if logger:
        logger.info('Using random seed {}'.format(rnd_seed))
    # end

    if x0 is None:
        x0 = np.random.uniform(bounds.lb, bounds.ub)
    # end if
    assert np.all((x0 >= bounds.lb) & (x0 <= bounds.ub))
    x = x0.copy()
    funval = obj_counted(x, *args)

    # Set up stepper with adaptive acceptance rate
    stepper = BoundedRandNStepper(bounds)
    stepper = AdaptiveStepsize(stepper,
                               accept_rate=target_ar,
                               ar_tolerance=ar_tolerance,
                               interval=50)

    # -------------------------------
    # DO BURN-IN
    rejected_randomly = 0
    accepted_burnin = 0
    tracked_range = tqdm(range(burnin), total=burnin, desc='BURN-IN')
    if logger:
        stepper.logger = lambda msg: tracked_range.write(logger.name + ':' +
                                                         msg)
    else:
        stepper.logger = tracked_range.write
    # end if
    for _ in tracked_range:
        x_new = stepper(x)
        funval_new = obj_counted(x_new, *args)
        log_alpha = -(funval_new - funval) * beta
        if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha:
            x = x_new
            funval = funval_new
            stepper.notify_accept()
            accepted_burnin += 1
        elif log_alpha <= 0:
            rejected_randomly += 1
        # end if
    # end for
    ar = float(accepted_burnin) / burnin
    if logger:
        logger.info("Burn-in acceptance rate: {}".format(ar))
    # end if

    # -------------------------------
    # DO MAIN LOOP
    if collect_samples is not None:
        nsamples = min(collect_samples, main_iter)
        sample_cadence = main_iter / nsamples
        samples = np.zeros((nsamples, len(x)))
        samples_fval = np.zeros(nsamples)
    # end if
    accepted = 0
    rejected_randomly = 0
    minima_sorted = SortedList(
        key=lambda rec: rec[1])  # Sort by objective function value
    hist = HistogramIncremental(bounds, nbins=100)
    # Cached a lot of potential minimum values, as these need to be clustered before return N results
    N_cached = int(np.ceil(N * main_iter / 500))
    next_sample = 0.0
    sample_count = 0
    tracked_range = tqdm(range(main_iter), total=main_iter, desc='MAIN')
    if logger:
        stepper.logger = lambda msg: tracked_range.write(logger.name + ':' +
                                                         msg)
    else:
        stepper.logger = tracked_range.write
    # end if
    for i in tracked_range:
        if collect_samples and i >= next_sample:
            assert sample_count < collect_samples
            samples[sample_count] = x
            samples_fval[sample_count] = funval
            sample_count += 1
            next_sample += sample_cadence
        # end if
        x_new = stepper(x)
        funval_new = obj_counted(x_new, *args)
        log_alpha = -(funval_new - funval) * beta
        if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha:
            x = x_new
            funval = funval_new
            minima_sorted.add((x, funval))
            if len(minima_sorted) > N_cached:
                minima_sorted.pop()
            # end if
            stepper.notify_accept()
            hist += x
            accepted += 1
        elif log_alpha <= 0:
            rejected_randomly += 1
        # end if
    # end for
    stepper.logger = None
    ar = float(accepted) / main_iter
    if logger:
        logger.info("Acceptance rate: {}".format(ar))
        logger.info("Best minima (before clustering):\n{}".format(
            np.array([_mx[0] for _mx in minima_sorted[:10]])))
    # end if

    # -------------------------------
    # Cluster minima and associate each cluster with a local minimum.
    # Using a normalized coordinate space for cluster detection.
    x_range = bounds.ub - bounds.lb
    pts = np.array([x[0] for x in minima_sorted])
    fvals = np.array([x[1] for x in minima_sorted])
    pts_norm = (pts - bounds.lb) / x_range
    _, labels = dbscan(pts_norm, eps=cluster_eps, min_samples=21, n_jobs=-1)

    # Compute mean of each cluster and evaluate objective function at cluster mean locations.
    minima_candidates = []
    for grp in range(max(labels) + 1):
        mask = (labels == grp)
        mean_loc = np.mean(pts[mask, :], axis=0)
        # Evaluate objective function precisely at the mean location of each cluster
        fval = obj_counted(mean_loc, *args)
        minima_candidates.append((mean_loc, grp, fval))
    # end for

    # Rank minima locations by objective function.
    minima_candidates.sort(key=lambda c: c[2])

    # Pick up to N solutions
    solutions = minima_candidates[:N]

    # Put results into OptimizeResult container.
    # Add histograms to output result (in form of scipy.stats.rv_histogram)
    solution = OptimizeResult()
    solution.x = np.array([s[0] for s in solutions])
    solution.clusters = [pts[(labels == s[1])] for s in solutions]
    solution.cluster_funvals = [fvals[(labels == s[1])] for s in solutions]
    solution.bins = hist.bins
    solution.distribution = hist.histograms
    solution.acceptance_rate = ar
    solution.success = True
    solution.status = 0
    if len(solutions) > 0:
        solution.message = 'SUCCESS: Found {} local minima'.format(
            len(solutions))
    else:
        solution.message = 'WARNING: Found no clusters within tolerance {}'.format(
            cluster_eps)
    # end if
    solution.fun = np.array([s[2] for s in solutions])
    solution.jac = None
    solution.nfev = obj_counted.counter
    solution.njev = 0
    solution.nit = main_iter
    solution.maxcv = None
    solution.samples = samples if collect_samples else None
    solution.sample_funvals = samples_fval if collect_samples else None
    solution.bounds = bounds
    solution.version = 's0.3'  # Solution version for future traceability
    solution.rnd_seed = rnd_seed

    return solution
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes. If polish
            was employed, then OptimizeResult also contains the ``hess_inv`` and
            ``jac`` attributes.
        """

        nfev, nit, warning_flag = 0, 0, False
        status_message = _status_message['success']

        # calculate energies to start with
        for index, candidate in enumerate(self.population):
            parameters = self._scale_parameters(candidate)
            self.population_energies[index] = self.func(parameters,
                                                        *self.args)
            nfev += 1

            if nfev > self.maxfun:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

        minval = np.argmin(self.population_energies)

        # put the lowest energy into the best solution position.
        lowest_energy = self.population_energies[minval]
        self.population_energies[minval] = self.population_energies[0]
        self.population_energies[0] = lowest_energy

        self.population[[0, minval], :] = self.population[[minval, 0], :]

        if warning_flag:
            return OptimizeResult(
                           x=self.x,
                           fun=self.population_energies[0],
                           nfev=nfev,
                           nit=nit,
                           message=status_message,
                           success=(warning_flag != True))

        # do the optimisation.
        for nit in range(1, self.maxiter + 1):
            if self.dither is not None:
                self.scale = self.random_number_generator.rand(
                ) * (self.dither[1] - self.dither[0]) + self.dither[0]
            for candidate in range(np.size(self.population, 0)):
                if nfev > self.maxfun:
                    warning_flag = True
                    status_message = _status_message['maxfev']
                    break

                trial = self._mutate(candidate)
                self._ensure_constraint(trial)
                parameters = self._scale_parameters(trial)

                energy = self.func(parameters, *self.args)
                nfev += 1

                if energy < self.population_energies[candidate]:
                    self.population[candidate] = trial
                    self.population_energies[candidate] = energy

                    if energy < self.population_energies[0]:
                        self.population_energies[0] = energy
                        self.population[0] = trial

            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (np.std(self.population_energies) /
                           np.abs(np.mean(self.population_energies) +
                                  _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g"
                      % (nit,
                         self.population_energies[0]))

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nfev=nfev,
            nit=nit,
            message=status_message,
            success=(warning_flag != True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            nfev += result.nfev
            DE_result.nfev = nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 22
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.
        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes. If polish
            was employed, then OptimizeResult also contains the ``hess_inv`` and
            ``jac`` attributes.
        """
        #nit = self.niter
        start_time = self.time
        warning_flag = False
        if time.time()-start_time > self.maxtime and self.maxtime is not None :
    #                result = {'population':self.population,
    #                        'population_energies':self.population_energies,
    #                        'niter' : nit, 
    #                        'message': 'Maximum time has been exceeded.',
    #                        'success' : False }
            result = OptimizeResult(
                     population = self.population,
                     population_energies = self.population_energies,
                     nit = self.niter, 
                     message = 'Maximum time has been exceeded.',
                     success = False )
            return result
        
        
        
        status_message = _status_message['success']
        #print(self.population_energies[0])

        # do the optimisation.
        for nit in range(self.niter, self.maxiter + 1):
            population_count = np.size(self.population, 0)
            if self.dither is not None:
                self.scale = self.random_number_generator.rand(
                ) * (self.dither[1] - self.dither[0]) + self.dither[0]
                
            Parameters=[]
            Trials=[]
            for candidate in range(population_count):
                trial = self._mutate(candidate)
                self._ensure_constraint(trial)
                
                Trials.append(trial) 
                Parameters.append( self._scale_parameters(trial) )

            pool=multiprocessing.Pool(self.ncore)
            Energies = pool.map(self.func, Parameters)
            pool.close()
            pool.join()
            
            iNan = []
            for i in range(population_count):
                if self.population_energies[i] != self.population_energies[i]:
                    iNan.append(i)
            self.population_energies = np.delete(self.population_energies,iNan)
            self.population = np.delete(self.population,iNan,0)  
            population_count = np.size(self.population, 0)
            
            for candidate in range(population_count):
                if Energies[candidate] < self.population_energies[candidate]:
                    self.population[candidate] = Trials[candidate]
                    self.population_energies[candidate] = Energies[candidate]

                    if Energies[candidate] < self.population_energies[0]:
                        self.population_energies[0] = Energies[candidate]
                        self.population[0] = Trials[candidate]            
            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (np.std(self.population_energies) /
                           np.abs(np.mean(self.population_energies) +
                                  _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g"
                      % (nit,
                         self.population_energies[0]))
                print("total population at step %d is %d"
                %(nit, population_count )  ) 

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break
            if time.time()-start_time > self.maxtime and self.maxtime is not None :
                result = OptimizeResult(
                         population = self.population,
                         population_energies = self.population_energies,
                         nit = self.niter, 
                         message = 'Maximum time has been exceeded.',
                         success = False )
                return result
        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nit=nit,
            message=status_message,
            success=(warning_flag != True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T)

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 23
0
def _gensa_modified(func,
                    x0,
                    bounds,
                    maxiter=500,
                    initial_temp=5230.,
                    visit=2.62,
                    accept=-5.0,
                    maxfun=1e7,
                    args=(),
                    seed=None,
                    pure_sa=False):
    """Extension of the gensa function available the pygensa package at https://github.com/sgubianpm/pygensa

    This function is an extension of the function gensa defined in the package pygensa at
    https://github.com/sgubianpm/pygensa. The only difference with the existing version and _gensa_modified is that
    it allows the user to pass lower bounds and upper bounds with equal values. Though this is a trivial
    scenario in which case the optimal solution should be lower = upper, the current version of gensa crashes.

    :param fun : callable
        The objective function
    :param x0 : ndarray
        The starting coordinates.
    :param bounds : sequence
        Bounds for variables.  ``(min, max)`` pairs for each element in ``x``,
        defining the lower and upper bounds for the optimizing argument of
        `func`. It is required to have ``len(bounds) == len(x)``.
        ``len(bounds)`` is used to determine the number of parameters in ``x``.
    :param args : tuple, optional
        Any additional fixed parameters needed to
        completely specify the objective function.
    :param seed : int or `np.random.RandomState`, optional
        If `seed` is not specified the `np.RandomState` singleton is used.
        If `seed` is an int, a new `np.random.RandomState` instance is used,
        seeded with seed.
        If `seed` is already a `np.random.RandomState instance`, then that
        `np.random.RandomState` instance is used.
        Specify `seed` for repeatable minimizations. The random numbers
        generated with this seed only affect the visiting distribution
        function and new coordinates generation.
    :param temp_start : float, optional
        The initial temperature, use higher values to facilitates a wider
        search of the energy landscape, allowing gensa to escape local minima
        that it is trapped in.
    :param qv : float, optional
        Parameter for visiting distribution. Higher values give the visiting
        distribution a heavier tail, this makes the algorithm jump to a more
        distant region. The value range is (0, 3]
    :param qa : float, optional
        Parameter for acceptance distribution. It is used to control the
        probability of acceptance. The lower the acceptance parameter, the
        smaller the probability of acceptance. It has to be any negative value.
    :param maxfun : int, optional
        Soft limit for the number of objective function calls. If the
        algorithm is in the middle of a local search, this number will be
        exceeded, the algorithm will stop just after the local search is
        done.
    :param maxsteps: int, optional
        The maximum number of gensa iterations will perform.

    :return:

    :Example:

    from robust_tail.gensa_modified import _gensa_modified
    from pygensa.gensa import gensa

    # Test gensa_modified in the univariate case
    def f1(x): return x
    output = _gensa_modified(func = f1,x0 = None, bounds = [[1,1]])
    output
    gensa(func = f1,x0 = None, bounds = [[1,1]]) # This crashes

    # Test gensa_modified in the bivariate case with one lower bound equal to the upper bound
    def f2(x): return x[0] + x[1]
    output = _gensa_modified(func = f2,x0 = None,bounds = [[1,1],[1,3]])
    output

    gensa(func = f2,x0 = None,bounds = [[1,1],[1,3]]) # This crashes

    # Test gensa_modified in the bivariate case with both lower bound are equal to the upper bounds
    output = _gensa_modified(func = f2,x0 = None,bounds = [[1,1],[1,1]])
    output

    gensa(func = f2,x0 = None,bounds = [[1,1],[1,1]]) # This crashes

    # Let's check that when the lower bounds are strictly smaller than the upper bounds,
    # all goes well)
    output_gensa_modified = _gensa_modified(func = f2,x0 = None,bounds=[[1,2],[2,3]])
    output_gensa = gensa(func = f2,x0 = None,bounds=[[1,2],[2,3]])

    # Check that they have the same optimal solution
    all(output_gensa_modified.x == output_gensa.x)

     # Check that they have the same optimal objective value
     output_gensa_modified.fun == output_gensa.fun
    """

    # Check necessary conditions to run gensa_modified
    if not isfunction(func) or func is None:
        print("func has to be function.")
        return None

    for bound in bounds:
        if len(bound) != 2:
            print("Each parameter needs a lower and upper bounds")
            return None

    # If all lower bounds are different from the upper bounds,
    # run the usual gensa algorithm
    samebound = [x[0] == x[1] for x in bounds]
    lower_bound = [x[0] for x in bounds]

    if not any(samebound):
        output = gensa(func, x0, bounds, maxiter, initial_temp, visit, accept,
                       maxfun, args, seed, pure_sa)
    else:
        index = np.where(samebound)

        def _new_func(new_x=None):
            if new_x is not None:
                new_x_copy = list(new_x)
                x = np.array([
                    bound[0] if bound[0] == bound[1] else new_x_copy.pop(0)
                    for bound in bounds
                ])
            else:
                x = np.array([bound[0] for bound in bounds])
            output = func(x)
            return output

        if all(samebound):
            output = OptimizeResult()
            output.x = lower_bound
            output.fun = _new_func()
            return output

        new_bounds = [bound for bound in bounds if bound[0] != bound[1]]

        if x0 is not None:
            new_x0 = np.array([
                x0[i] for i in range(0, len(bounds))
                if bounds[i][0] != bounds[i][1]
            ])
        else:
            new_x0 = None

        output = gensa(_new_func, new_x0, new_bounds, maxiter, initial_temp,
                       visit, accept, maxfun, args, seed, pure_sa)

        # The output vector par of gensa will be of the same dimension as the new_lower bound
        # we need to include the value of lower_bound that was discarded in "par"
        if isinstance(OptimizeResult(), OptimizeResult):
            output_x_copy = list(output.x[:])
            output.x = [
                bound[0] if bound[0] == bound[1] else output_x_copy.pop(0)
                for bound in bounds
            ]

    return output
Esempio n. 24
0
def gbrt_minimize(func,
                  bounds,
                  base_estimator=None,
                  maxiter=100,
                  n_points=20,
                  n_start=10,
                  random_state=None):
    """Sequential optimisation using gradient boosted trees.

    Gradient boosted regression trees are used to model the (very)
    expensive to evaluate function `func`. The model is improved
    by sequentially evaluating the expensive function at the next
    best point. Thereby finding the minimum of `func` with as
    few evaluations as possible.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `bounds` [array-like, shape=(n_parameters, 2)]:
        - ``bounds[i][0]`` should give the lower bound of each parameter and
        - ``bounds[i][1]`` should give the upper bound of each parameter.

    * `base_estimator` [`GradientBoostingQuantileRegressor`]:
        The regressor to use as surrogate model

    * `maxiter` [int, default=100]:
        Number of iterations used to find the minimum. This corresponds
        to the total number of evaluations of `func`. If `n_start` > 0
        only `maxiter - n_start` iterations are used.

    * `n_start` [int, default=10]:
        Number of random points to draw before fitting `base_estimator`
        for the first time. If `n_start > maxiter` this degrades to
        a random search for the minimum.

    * `n_points` [int, default=20]:
        Number of points to sample when minimizing the acquisition function.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)

    # Bounds
    num_params = len(bounds)
    lower_bounds, upper_bounds = extract_bounds(bounds)

    # Default estimator
    if base_estimator is None:
        base_estimator = GradientBoostingQuantileRegressor(random_state=rng)

    # Record the points and function values evaluated as part of
    # the minimization
    Xi = np.zeros((maxiter, num_params))
    yi = np.zeros(maxiter)

    # Initialize with random points
    if n_start == 0:
        raise ValueError("Need at least one starting point.")

    if maxiter == 0:
        raise ValueError("Need to perform at least one iteration.")

    n_start = min(n_start, maxiter)

    Xi[:n_start] = _random_points(lower_bounds,
                                  upper_bounds,
                                  n_points=n_start,
                                  random_state=rng)
    best_x = Xi[:n_start].ravel()
    yi[:n_start] = [func(xi) for xi in Xi[:n_start]]
    best_y = np.min(yi[:n_start])

    models = []

    for i in range(n_start, maxiter):
        rgr = clone(base_estimator)
        # only the first i points are meaningful
        rgr.fit(Xi[:i, :], yi[:i])
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimisers like BFGS, use random sampling
        # for the moment.
        x0 = _random_points(lower_bounds,
                            upper_bounds,
                            n_points=n_points,
                            random_state=rng)
        aq = _expected_improvement(x0, rgr, best_y)
        best = np.argmin(aq)

        Xi[i] = x0[best].ravel()
        yi[i] = func(x0[best])

        if yi[i] < best_y:
            best_y = yi[i]
            best_x = Xi[i]

    res = OptimizeResult()
    res.x = best_x
    res.fun = best_y
    res.func_vals = yi
    res.x_iters = Xi
    res.models = models

    return res
Esempio n. 25
0
def _sequential_random_embeddings(f,
                                  x0,
                                  bounds,
                                  n_reduced_dims_eff=3,
                                  n_embeddings=10,
                                  verbosity=1,
                                  **optimizer_kwargs):
    """
    Implementation of the Sequential Random Embeddings algorithm described in
    +++++
    H. Qian, Y.-Q. Hu, and Y. Yu, Derivative-Free Optimization of High-Dimensional Non-Convex
    Functions by Sequential Random Embeddings, Proceedings of the Twenty-Fifth International Joint
    Conference on Artificial Intelligence, AAAI Press (2016).
    +++++
    The idea is basically to reduce high-dimensional problems to low-dimensional ones by embedding
    the original, high-dimensional search space ℝ^h into a low dimensional one, ℝ^l, by
    sequentially applying the random linear transformation
    x(n+1) = α(n+1)x(n) + A•y(n+1),    x ∈ ℝ^h, y ∈ ℝ^l, A ∈ N(0, 1)^(h×l), α ∈ ℝ
    and minimizing the objective function f(αx + A•y) w.r.t. (α, y).

    :param f: [callable] Objective function. Must accept its argument x as numpy array
    :param x0: [np.array] Initial values for the bacteria population in the original,
           high-dimensional space ℝ^h
    :param bounds: [callable] Bounds projection, see description of parameter
           ``projection_callback`` in :func:`local_search.bfgs_b`
    :param n_reduced_dims_eff: [int] Effective dimension of the embedded problem, ℝ^(l+1)
    :param n_embeddings: [int] Number of embedding iterations
    :param verbosity: [int] Output verbosity. Must be 0, 1, or 2
    :param optimizer_args: [dict] Arguments to pass to the actual optimization routine
    :return: Best minimum of f found [scipy.optimize.OptimizeResult]
    """

    assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.'

    orig_dim = x0.shape[1]
    x = np.zeros(orig_dim)
    x_best = x.copy()
    f_best = np.inf
    nfev = nit = 0
    success_best = False
    for i in range(n_embeddings):
        A = np.random.normal(size=(orig_dim, n_reduced_dims_eff - 1))

        # Normalize rows of A
        normalization_sum = A.sum(axis=1)
        normalization_sum = np.where(normalization_sum == 0, 1,
                                     normalization_sum)
        A = A / normalization_sum[:, np.newaxis]

        def f_embedded(arg):
            return f(bounds(arg[0] * x + A.dot(arg[1:]))[0])

        # Set up bounds callback
        def bounds_embedded(arg):
            bounds_hit = np.zeros(len(arg), dtype=bool)
            x_proj, bounds_hit_orig = bounds(arg[0] * x + A.dot(arg[1:]))
            if bounds_hit_orig.any(
            ):  # Boundary hit in original, non-embedded variable
                arg[1:] = np.linalg.lstsq(A, x_proj - arg[0] * x,
                                          rcond=None)[0]
                bounds_hit[1:] = (A[bounds_hit_orig] != 0).any(axis=0)

            return arg, bounds_hit

        # Set up y0
        y0 = np.zeros((x0.shape[0], n_reduced_dims_eff))
        y0[:, 0] = 1
        y0[:, 1:] = np.array(
            [np.linalg.lstsq(A, x_orig - x, rcond=None)[0] for x_orig in x0])

        if verbosity > 0:
            infoMsg = f'\nEmbedding iteration {i}'
            print(infoMsg)
            print('-' * len(infoMsg))

        optimizer_kwargs['verbosity'] = verbosity
        with warnings.catch_warnings():
            warnings.filterwarnings(
                'ignore',
                message=
                'Found initial conditions outside the defined search domain.')
            res_embedded = optimize(f_embedded,
                                    x0=y0,
                                    bounds=bounds_embedded,
                                    **optimizer_kwargs)
        y = res_embedded.x
        f_val = res_embedded.fun
        nfev += res_embedded.nfev
        nit += res_embedded.nit

        x = bounds(y[0] * x + A.dot(y[1:]))[0]

        if verbosity > 0:
            print(f'Random embedding gave x = {x}.')

        if f_val < f_best:
            f_best = f_val
            x_best = x.copy()
            success_best = res_embedded.success

    result = OptimizeResult()
    result.success = success_best
    result.x = x_best
    result.fun = f_best
    result.nfev = nfev
    result.nit = nit
    result.trace = None

    return result
Esempio n. 26
0
def optimize(f,
             x0=None,
             bounds=None,
             domain_scale=None,
             init='uniform',
             stepsize_start=None,
             stepsize_decay_fac=1e-3,
             base_tumble_rate=0.1,
             niter_rt=400,
             n_bacteria_per_dim=3,
             stationarity_window=20,
             eps_stat=1e-3,
             attraction=False,
             attraction_window=10,
             attraction_sigma=None,
             attraction_strength=0.5,
             bounds_reflection=False,
             n_best_selection=3,
             c_gd=1e-6,
             a_gd=None,
             n_linesearch_gd=20,
             alpha_linesearch_gd=0.5,
             beta_linesearch_gd=0.33,
             eps_abs_gd=1e-9,
             eps_rel_gd=1e-6,
             niter_gd=100,
             n_embeddings=5,
             max_dims=3,
             n_reduced_dims=2,
             verbosity=0):
    """
    Metaheuristic global optimization algorithm combining a bacterial run-and-tumble chemotactic
    search with a local, gradient-based search around the best minimum candidate points.
    The algorithm's goal is to find
                                        min f(x), x ∈ Ω,
    where f: Ω ⊂ ℝ^n → ℝ.
    Since the chemotactic search becomes more and more ineffective with increasing problem
    dimensionality, Sequential Random Embeddings are used to solve the optimization problem once its
    dimensionality exceeds a given threshold.

    :param f: [callable] Objective function. Must accept its argument x as numpy array
    :param x0: [array-like object] Optional initial conditions object. Must have the shape
           (n_bacteria, n_dims) or (n_dims,). If x0 == None, initial conditions are sampled randomly
           or uniformly-spaced from Ω. Note that this only works if Ω is a rectangular box, i.e., if
           no or non-rectangular bounds are imposed, x0 must not be None
    :param bounds: [callable or array-like object] Defines the bounded domain Ω. If provided, must
           be one of the following:
           - Bounds projection callback, as defined in description of parameter
             ``projection_callback`` in :func:`local_search.bfgs_b`
           - Rectangular box constraints. For each component x_i of x,
             bounds[i, 0] <= x_i <= bounds[i, 1], that is, bounds must have shape (n_dims, 2)
    :param domain_scale: [float] Scale of the optimization problem. If not provided, the algorithm
           tries to guess the scale from any provided rectangular box constraints. Used for
           auto-scaling algorithm stepsizes
    :param init: [string] Determines how initial bacteria positions are sampled from Ω if
           x0 == None, see description of parameter ``x0``. Currently supported: 'random' and
           'uniform'
    :param stepsize_start: [float] See description of parameter ``stepsize_start`` in
           :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale
           this length to the problem's scale
    :param stepsize_decay_fac: [float] Factor by which the run-and-tumble stepsize has decayed in
           the last run-and-tumble iteration compared to its initial value
    :param base_tumble_rate: [float] See description of parameter ``base_tumble_rate`` in
           :func:`global_search.run_and_tumble`
    :param niter_rt: [int] Maximum number of run-and-tumble iterations
    :param n_bacteria_per_dim: [int] How many bacteria to spawn in each dimension. Note that the
           total number of bacteria is
           i)  n_bacteria = n_bacteria_per_dim ** n_dims if n_dims <= max_dims or
           ii) n_bacteria = n_bacteria_per_dim ** (n_reduced_dims + 1) if n_dims > max_dims.
           If x0 is provided with shape (n_bacteria, n_dims), n_bacteria should agree with this
           relation.
    :param stationarity_window: [int] See description of parameter ``stationarity_window`` in
           :func:`global_search.run_and_tumble`
    :param eps_stat: [float] See description of parameter ``stationarity_window`` in
           :func:`global_search.run_and_tumble`
    :param attraction: [bool] See description of parameter ``attraction`` in
           :func:`global_search.run_and_tumble`
    :param attraction_window: [int] See description of parameter ``attraction_window`` in
           :func:`global_search.run_and_tumble`
    :param attraction_sigma: [float] See description of parameter ``attraction_sigma`` in
           :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale
           this length to the problem's scale
    :param attraction_strength: [float] See description of parameter ``attraction_strength`` in
           :func:`global_search.run_and_tumble`
    :param bounds_reflection: [bool] See description of parameter ``bounds_reflection`` in
           :func:`global_search.run_and_tumble`
    :param n_best_selection: [int] At the end of the run-and-tumble exploration stage, a local
           gradient-based search is performed, starting from the best positions found thus far by
           the n_best_selection best bacteria
    :param c_gd: [float] See description of parameter ``c`` in :func:`local_search.bfgs_b`
    :param a_gd: [float] See description of parameter ``a`` in :func:`local_search.bfgs_b`. If not
           provided, the algorithm tries to auto-scale this length to the problem's scale
    :param n_linesearch_gd: [int] See description of parameter ``n_linesearch`` in
           :func:`local_search.bfgs_b`
    :param alpha_linesearch_gd: [float] See description of parameter ``alpha_linesearch`` in
           :func:`local_search.bfgs_b`
    :param beta_linesearch_gd: [float] See description of parameter ``beta_linesearch`` in
           :func:`local_search.bfgs_b`
    :param eps_abs_gd: [float] See description of parameter ``eps_abs`` in
           :func:`local_search.bfgs_b`
    :param eps_rel_gd: [float] See description of parameter ``eps_rel`` in
           :func:`local_search.bfgs_b`
    :param niter_gd: [int] Maximum number of local, gradient-based search iterations
    :param n_embeddings: [int] Number of embedding iterations when using Sequential Random
           Embeddings. Only has an effect if n_dims > max_dims
    :param max_dims: [int] Maximum dimension of problems to be solved without using Sequential
           Random Embeddings
    :param n_reduced_dims: [int] Dimension of the embedded problem. Only has an effect if
           n_dims > max_dims
    :param verbosity: [int] Output verbosity. Must be 0, 1, or 2
    :return: Best minimum of f found [scipy.optimize.OptimizeResult]
    """

    assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.'
    assert n_reduced_dims >= 2, 'n_reduced_dims must not be less than 2.'

    n_reduced_dims_eff = n_reduced_dims + 1

    if bounds is None or callable(bounds):
        assert x0 is not None, (
            'If no box constraints are provided for bounds, x0 must not be ' +
            'None.')
        x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims,
                                    n_reduced_dims_eff)
        n_bacteria, n_dims = x0_population.shape

        if bounds is None:
            bound_lower, bound_upper = _prepare_bounds(bounds, n_dims)

            def projection_callback(x):
                x = np.clip(x, bound_lower, bound_upper)
                bounds_hit = np.where(
                    ((x == bound_lower) | (x == bound_upper)), True, False)
                return x, bounds_hit

            def projection_callback_population(x):
                return projection_callback(x)

        else:

            def projection_callback(x):
                return bounds(x)

            def projection_callback_population(x):
                out = np.array(
                    [projection_callback(x_single) for x_single in x])
                return out[:, 0], out[:, 1]

    elif isinstance(bounds, (list, np.ndarray)):
        if x0 is not None:
            x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims,
                                        n_reduced_dims_eff)
            n_bacteria, n_dims = x0_population.shape
            bound_lower, bound_upper = _prepare_bounds(bounds, n_dims)
        else:
            bound_lower, bound_upper = _prepare_bounds(bounds, None)
            n_dims = len(bound_lower)
            n_bacteria = (n_bacteria_per_dim**n_dims if n_dims <= max_dims else
                          n_bacteria_per_dim**n_reduced_dims_eff)
            if init == 'uniform' and n_dims > max_dims:
                init = 'random'
                if verbosity > 0:
                    warnings.warn(
                        'The option init="uniform" is only available for problems with '
                        +
                        'dimensionality less than or equal to max_dims, which was '
                        +
                        f'set to {max_dims}. Since the current problem has ' +
                        f'dimensionality {n_dims}, init was automatically set to '
                        + f'"random".')
            if init == 'random':
                x0_population = np.random.uniform(bound_lower,
                                                  bound_upper,
                                                  size=(n_bacteria, n_dims))
            elif init == 'uniform':
                init_points = []
                for i in range(n_dims):
                    init_points.append(
                        np.linspace(bound_lower[i], bound_upper[i],
                                    n_bacteria_per_dim))
                x0_population = np.array(np.meshgrid(*init_points)).reshape(
                    n_dims, -1).T
            else:
                raise ValueError('init must either be "random" or "uniform".')

        def projection_callback(x):
            x = np.clip(x, bound_lower, bound_upper)
            bounds_hit = np.where(((x == bound_lower) | (x == bound_upper)),
                                  True, False)
            return x, bounds_hit

        def projection_callback_population(x):
            return projection_callback(x)

    else:
        raise ValueError(
            'bounds must either be None, an array or corresponding nested list of '
            +
            'shape (n_dims, 2), or a custom callback function. See the docstring '
            + 'for details.')

    assert niter_rt > stationarity_window, 'niter_rt must be larger than stationarity_window.'
    assert n_best_selection <= n_bacteria, 'n_best_selection must not be larger than n_bacteria.'

    if stepsize_start is not None:
        auto_scale_stepsize = False
    else:
        auto_scale_stepsize = True
        stepsize_start = 1e-1
    stepsize_end = stepsize_decay_fac * stepsize_start

    if attraction_sigma is not None:
        auto_scale_attraction_sigma = False
    else:
        auto_scale_attraction_sigma = True
        attraction_sigma = 1

    if a_gd is not None:
        auto_scale_a_gd = False
    else:
        auto_scale_a_gd = True
        a_gd = 1e-2

    x0_population_orig = x0_population.copy()
    x0_population, _ = projection_callback_population(x0_population)
    if not np.array_equal(x0_population, x0_population_orig):
        warnings.warn(
            'Found initial conditions outside the defined search domain.')

    max_scale = None
    if domain_scale is not None:
        max_scale = domain_scale
    elif isinstance(bounds, (list, np.ndarray)):
        # noinspection PyUnboundLocalVariable
        domain_range = bound_upper - bound_lower
        max_scale = np.max(np.where(np.isinf(domain_range), 0, domain_range))
    if max_scale is not None and max_scale > 0:
        if auto_scale_stepsize:
            stepsize_start = stepsize_start * max_scale
            stepsize_end = stepsize_end * max_scale
        if auto_scale_attraction_sigma:
            attraction_sigma = attraction_sigma * max_scale
        if auto_scale_a_gd:
            a_gd = a_gd * max_scale

    if n_dims > max_dims:
        if verbosity > 0:
            print(
                f'Using sequential random embeddings in {n_reduced_dims} + 1 dimensions.'
            )
        return _sequential_random_embeddings(
            f,
            x0_population,
            projection_callback,
            n_reduced_dims_eff=n_reduced_dims_eff,
            n_embeddings=n_embeddings,
            verbosity=verbosity,
            domain_scale=max_scale,
            init=init,
            stepsize_start=stepsize_start,
            stepsize_decay_fac=stepsize_decay_fac,
            base_tumble_rate=base_tumble_rate,
            niter_rt=niter_rt,
            n_bacteria_per_dim=n_bacteria_per_dim,
            stationarity_window=stationarity_window,
            eps_stat=eps_stat,
            attraction=attraction,
            attraction_window=attraction_window,
            attraction_sigma=attraction_sigma,
            attraction_strength=attraction_strength,
            bounds_reflection=bounds_reflection,
            n_best_selection=n_best_selection,
            c_gd=c_gd,
            a_gd=a_gd,
            n_linesearch_gd=n_linesearch_gd,
            alpha_linesearch_gd=alpha_linesearch_gd,
            beta_linesearch_gd=beta_linesearch_gd,
            eps_abs_gd=eps_abs_gd,
            eps_rel_gd=eps_rel_gd,
            niter_gd=niter_gd,
            max_dims=n_reduced_dims_eff)

    else:
        x_best, f_best, nfev, nit, trace = run_and_tumble(
            f,
            x0_population,
            projection_callback_population,
            niter_rt,
            stepsize_start,
            stepsize_end,
            base_tumble_rate=base_tumble_rate,
            stationarity_window=stationarity_window,
            eps_stat=eps_stat,
            attraction=attraction,
            attraction_window=attraction_window,
            attraction_sigma=attraction_sigma,
            attraction_strength=attraction_strength,
            bounds_reflection=bounds_reflection,
            verbosity=verbosity)

        if verbosity == 2:
            print(
                '==============================================================================='
            )
        if verbosity > 0:
            print(
                f'Best result after run-and-tumble stage is x = {x_best[np.argmin(f_best)]}, '
                +
                f'f(x) = {np.min(f_best)}. Starting local, gradient-based optimization for the '
                + f'{n_best_selection} best bacteria.')

        sortIdx = f_best.argsort()
        x_best_selection = x_best[sortIdx[:n_best_selection]]
        x_best_gd = np.empty(x_best_selection.shape)
        f_min_gd = np.empty(n_best_selection)
        nfev_gd = 0
        nit_gd = 0
        success_gd = np.empty(n_best_selection)
        trace_gd = np.empty((niter_gd, n_bacteria, n_dims))
        trace_gd[:, sortIdx[n_best_selection:], :] = trace[
            -1, sortIdx[n_best_selection:], :]
        nit_gd_arr = np.empty(n_best_selection)
        visited_points = trace.reshape(-1, n_dims)

        for n, x_start in enumerate(x_best_selection):
            if verbosity == 2:
                print(f'Performing gradient descent for bacterium {n}.')

            # Calculate quadratic function approximation around x_start
            num_sampling_points = 2 * int(special.binom(n_dims + 2, 2))
            # noinspection PyArgumentList,PyUnresolvedReferences
            sampling_points = visited_points[spatial.cKDTree(
                visited_points).query(x_start, num_sampling_points)[1]]
            func_values = np.array([f(point) for point in sampling_points])
            nfev += num_sampling_points
            polynomial_powers = list(
                itertools.filterfalse(
                    lambda prod: sum(list(prod)) > 2,
                    itertools.product((0, 1, 2), repeat=n_dims)))
            sampling_matrix = np.stack([
                np.prod(sampling_points**d, axis=1) for d in polynomial_powers
            ],
                                       axis=-1)
            coeffs = np.linalg.lstsq(sampling_matrix, func_values, 2)[0]

            # Calculate Hessian matrix from the quadratic approximation
            H = np.ones((n_dims, n_dims))
            square_powers = list(
                itertools.filterfalse(
                    lambda zipped_item: sum(list(zipped_item[0])) != 2,
                    zip(polynomial_powers, coeffs)))
            for square_power, coeff in square_powers:
                idcs_to_consider = np.argwhere(np.array(square_power) != 0)
                if len(idcs_to_consider) == 1:  # Diagonal
                    H[idcs_to_consider[0], idcs_to_consider[0]] = 0.5 * coeff
                elif len(idcs_to_consider) == 2:  # Mixed derivatives
                    H[idcs_to_consider[0], idcs_to_consider[1]] = coeff
                    H[idcs_to_consider[1], idcs_to_consider[0]] = coeff
                else:
                    raise RuntimeError(
                        "Polynomial function approximation seems to be of higher "
                        "order than two. This shouldn't happen.")

            local_optimization_result = bfgs_b(
                f,
                x_start,
                projection_callback,
                H_start=H,
                a=a_gd,
                c=c_gd,
                niter=niter_gd,
                n_linesearch=n_linesearch_gd,
                alpha_linesearch=alpha_linesearch_gd,
                beta_linesearch=beta_linesearch_gd,
                eps_abs=eps_abs_gd,
                eps_rel=eps_rel_gd,
                verbosity=verbosity)
            x_best_gd[n] = local_optimization_result.x
            f_min_gd[n] = local_optimization_result.f
            nfev_gd += local_optimization_result.nfev
            nit_gd += local_optimization_result.nit
            nit_gd_arr[n] = local_optimization_result.nit
            success_gd[n] = local_optimization_result.success
            trace_gd[:, sortIdx[n], :] = _pad_trace(
                local_optimization_result.trace, niter_gd)

        result = OptimizeResult()
        result.success = success_gd.any()
        result.x = x_best_gd[np.argmin(f_min_gd)]
        result.fun = np.min(f_min_gd)
        result.nfev = nfev + nfev_gd
        result.nit = nit + nit_gd
        trace_gd = trace_gd[:np.max(nit_gd_arr).astype(int)]
        result.trace = np.concatenate((trace, trace_gd))

        return result
Esempio n. 27
0
def dual_annealing(func, x0, bounds, args=(), maxiter=1000,
                   local_search_options={}, initial_temp=5230.,
                   restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
                   maxfun=1e7, seed=None, no_local_search=False,
                   callback=None):
    """
    Find the global minimum of a function using Dual Annealing.

    Parameters
    ----------
    func : callable
        The objective function to be minimized.  Must be in the form
        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
        and ``args`` is a  tuple of any additional fixed parameters needed to
        completely specify the function.
    x0 : ndarray, shape(n,)
        A single initial starting point coordinates. If ``None`` is provided,
        initial coordinates are automatically generated (using the ``reset``
        method from the internal ``EnergyState`` class).
    bounds : sequence, shape (n, 2)
        Bounds for variables.  ``(min, max)`` pairs for each element in ``x``,
        defining bounds for the objective function parameter.
    args : tuple, optional
        Any additional fixed parameters needed to completely specify the
        objective function.
    maxiter : int, optional
        The maximum number of global search iterations. Default value is 1000.
    local_search_options : dict, optional
        Extra keyword arguments to be passed to the local minimizer
        (`minimize`). Some important options could be:
        ``method`` for the minimizer method to use and ``args`` for
        objective function additional arguments.
    initial_temp : float, optional
        The initial temperature, use higher values to facilitates a wider
        search of the energy landscape, allowing dual_annealing to escape
        local minima that it is trapped in. Default value is 5230. Range is
        (0.01, 5.e4].
    restart_temp_ratio : float, optional
        During the annealing process, temperature is decreasing, when it
        reaches ``initial_temp * restart_temp_ratio``, the reannealing process
        is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
    visit : float, optional
        Parameter for visiting distribution. Default value is 2.62. Higher
        values give the visiting distribution a heavier tail, this makes
        the algorithm jump to a more distant region. The value range is (0, 3].
    accept : float, optional
        Parameter for acceptance distribution. It is used to control the
        probability of acceptance. The lower the acceptance parameter, the
        smaller the probability of acceptance. Default value is -5.0 with
        a range (-1e4, -5].
    maxfun : int, optional
        Soft limit for the number of objective function calls. If the
        algorithm is in the middle of a local search, this number will be
        exceeded, the algorithm will stop just after the local search is
        done. Default value is 1e7.
    seed : {int or `numpy.random.RandomState` instance}, optional
        If `seed` is not specified the `numpy.random.RandomState` singleton is
        used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``RandomState`` instance, then that
        instance is used.
        Specify `seed` for repeatable minimizations. The random numbers
        generated with this seed only affect the visiting distribution
        function and new coordinates generation.
    no_local_search : bool, optional
        If `no_local_search` is set to True, a traditional Generalized
        Simulated Annealing will be performed with no local search
        strategy applied.
    callback : callable, optional
        A callback function with signature ``callback(x, f, context)``,
        which will be called for all minima found.
        ``x`` and ``f`` are the coordinates and function value of the
        latest minimum found, and ``context`` has value in [0, 1, 2], with the
        following meaning:

            - 0: minimum detected in the annealing process.
            - 1: detection occured in the local search process.
            - 2: detection done in the dual annealing process.

        If the callback implementation returns True, the algorithm will stop.

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a `OptimizeResult` object.
        Important attributes are: ``x`` the solution array, ``fun`` the value
        of the function at the solution, and ``message`` which describes the
        cause of the termination.
        See `OptimizeResult` for a description of other attributes.

    Notes
    -----
    This function implements the Dual Annealing optimization. This stochastic
    approach derived from [3]_ combines the generalization of CSA (Classical
    Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
    to a strategy for applying a local search on accepted locations [4]_.
    An alternative implementation of this same algorithm is described in [5]_
    and benchmarks are presented in [6]_. This approach introduces an advanced
    method to refine the solution found by the generalized annealing
    process. This algorithm uses a distorted Cauchy-Lorentz visiting
    distribution, with its shape controlled by the parameter :math:`q_{v}`

    .. math::

        g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
        \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
        \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
        \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
        \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}

    Where :math:`t` is the artificial time. This visiting distribution is used
    to generate a trial jump distance :math:`\\Delta x(t)` of variable
    :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.

    From the starting point, after calling the visiting distribution
    function, the acceptance probability is computed as follows:

    .. math::

        p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
        \\frac{1}{1-q_{a}}}\\}}

    Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
    acceptance probability is assigned to the cases where

    .. math::

        [1-(1-q_{a}) \\beta \\Delta E] < 0

    The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to

    .. math::

        T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
        1 + t\\right)^{q_{v}-1}-1}

    Where :math:`q_{v}` is the visiting parameter.

    .. versionadded:: 1.2.0

    References
    ----------
    .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
        statistics. Journal of Statistical Physics, 52, 479-487 (1998).
    .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
        Physica A, 233, 395-406 (1996).
    .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
        Annealing Algorithm and Its Application to the Thomson Model.
        Physics Letters A, 233, 216-220 (1997).
    .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
        Annealing. Physical Review E, 62, 4473 (2000).
    .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
        Simulated Annealing for Efficient Global Optimization: the GenSA
        Package for R. The R Journal, Volume 5/1 (2013).
    .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
        Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06

    Examples
    --------
    The following example is a 10-dimensional problem, with many local minima.
    The function involved is called Rastrigin
    (https://en.wikipedia.org/wiki/Rastrigin_function)

    >>> from scipy.optimize import dual_annealing
    >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
    >>> lw = [-5.12] * 10
    >>> up = [5.12] * 10
    >>> ret = dual_annealing(func, None, bounds=list(zip(lw, up)), seed=1234)
    >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format(
    ...       ret.x, ret.fun))
    global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09
     -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09
    -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000

    """
    if x0 is not None and not len(x0) == len(bounds):
        raise ValueError('Bounds size does not match x0')

    lu = list(zip(*bounds))
    lower = np.array(lu[0])
    upper = np.array(lu[1])
    # Check that restart temperature ratio is correct
    if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
        raise ValueError('Restart temperature ratio has to be in range (0, 1)')
    # Checking bounds are valid
    if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
            np.isnan(lower)) or np.any(np.isnan(upper))):
        raise ValueError('Some bounds values are inf values or nan values')
    # Checking that bounds are consistent
    if not np.all(lower < upper):
        raise ValueError('Bounds are note consistent min < max')

    # Wrapper for the objective function
    func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
    # Wrapper fot the minimizer
    minimizer_wrapper = LocalSearchWrapper(
        bounds, func_wrapper, **local_search_options)
    # Initialization of RandomState for reproducible runs if seed provided
    rand_state = check_random_state(seed)
    # Initialization of the energy state
    energy_state = EnergyState(lower, upper, callback)
    energy_state.reset(func_wrapper, rand_state, x0)
    # Minimum value of annealing temperature reached to perform
    # re-annealing
    temperature_restart = initial_temp * restart_temp_ratio
    # VisitingDistribution instance
    visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
    # Strategy chain instance
    strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
                               minimizer_wrapper, rand_state, energy_state)
    # Run the search loop
    need_to_stop = False
    iteration = 0
    message = []
    t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
    while(not need_to_stop):
        for i in range(maxiter):
            # Compute temperature for this step
            s = float(i) + 2.0
            t2 = np.exp((visit - 1) * np.log(s)) - 1.0
            temperature = initial_temp * t1 / t2
            iteration += 1
            if iteration >= maxiter:
                message.append("Maximum number of iteration reached")
                need_to_stop = True
                break
            # Need a re-annealing process?
            if temperature < temperature_restart:
                energy_state.reset(func_wrapper, rand_state)
                break
            # starting strategy chain
            val = strategy_chain.run(i, temperature)
            if val is not None:
                message.append(val)
                need_to_stop = True
                break
            # Possible local search at the end of the strategy chain
            if not no_local_search:
                val = strategy_chain.local_search()
                if val is not None:
                    message.append(val)
                    need_to_stop = True
                    break

    # Return the OptimizeResult
    res = OptimizeResult()
    res.x = energy_state.xbest
    res.fun = energy_state.ebest
    res.nit = iteration
    res.nfev = func_wrapper.nfev
    res.njev = func_wrapper.ngev
    res.message = message
    return res
Esempio n. 28
0
def dummy_minimize(func, dimensions, n_calls=100,
                   x0=None, y0=None, random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.
        - `specs` [dict]: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {"args": copy.copy(inspect.currentframe().f_locals),
             "function": inspect.currentframe().f_code.co_name}

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, got %s" % type(x0))

    if len(x0) > 0 and y0 is not None:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s"
                             % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")

        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")

    elif len(x0) > 0 and y0 is None:
        y0 = []
        n_calls -= len(x0)

    elif len(x0) == 0 and y0 is not None:
        raise ValueError("`x0`cannot be `None` when `y0` is provided")

    else:  # len(x0) == 0 and y0 is None
        y0 = []

    X = x0
    y = y0

    # Random search
    X = X + space.rvs(n_samples=n_calls, random_state=rng)
    first = True

    for i in range(len(y0), len(X)):
        y_i = func(X[i])

        if first:
            first = False
            if not np.isscalar(y_i):
                raise ValueError("`func` should return a scalar")

        y.append(y_i)

    y = np.array(y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.models = []  # Create attribute even though it is empty
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Esempio n. 29
0
def minimize(D,
             F,
             x0_generator=None,
             descents_count=1,
             maxiter=None,
             tol=1e-5,
             verbose=0):
    """
    Minimizes f(P) = <F, PDP^T>, over the set of permutation matrices.
    <., .> is the Frobenius inner product.
    This implementation uses the Frank–Wolfe algorithm.

    Parameters
    ----------
    D, F : square numpy matrices of the same size.
        By convention D is the distance and F is the flow in the factory assignment
        problem.
    x0_generator : generator for initial search points. It is a callable,
        that returns doubly stochastic matrices. The default generator picks random
        points (C + R)/2, where C is the center of the Birkhoff polytope and
        R is random matrix from it.
    descents_count : number of searches to perform from an initial point.
    maxiter : The maximum number of descent steps to perform. If None,
        there is no limit.
    tol : tolerance for the decrease of the objective. If the objective decreases
        with less than tol in one descent step, this local search is terminated.
    verbose : When True, prints results during the search.

    Returns
    -------
    scipy.optimize.OptimizeResult object with members fun and x.
    x is the argument that minimizes f and fun is f(x).
    the permutation x is returned in line notation.
    """
    n = len(D)

    is_torch = has_torch and isinstance(D, torch.Tensor)
    if is_torch:
        numpy_dtype = torch_dtype_to_numpy[D.dtype]
    else:
        numpy_dtype = D.dtype
    if x0_generator is None:
        x0_generator = SearchOriginGenerator(n, numpy_dtype)
        if is_torch:
            x0_generator = TorchifiedSearchOriginGenerator(x0_generator,
                                                           device=D.device)
    projector = TaylorExpansionMinimizer()
    if is_torch:
        projector = TorchifiedProjector(projector)

    relaxed_sol = minimize_relaxed(
        D,
        F,
        projector=projector,
        x0_generator=x0_generator,
        count=descents_count,
        maxiter=maxiter,
        tol=tol,
        verbose=verbose,
    )

    res = OptimizeResult()
    if is_torch:
        res.x = project_doubly_stochastic_matrix_onto_permutations(
            relaxed_sol.x.cpu())
    else:
        res.x = project_doubly_stochastic_matrix_onto_permutations(
            relaxed_sol.x)
    res.fun = objective(D, F, res.x)
    if verbose >= 1:
        print("Frak-Wolfe QP objective = %.3f." % (res.fun))
    return res
Esempio n. 30
0
def fmin_bfgs_f(f_g,
                x0,
                B0=None,
                M=2,
                gtol=1e-5,
                Delta=10.0,
                maxiter=None,
                callback=None,
                norm_ord=np.Inf,
                **_kwargs):
    """test BFGS with nonmonote line search"""
    fk, gk = f_g(x0)
    if B0 is None:
        Bk = np.eye(len(x0))
    else:
        Bk = B0
    Hk = np.linalg.inv(Bk)
    maxiter = 200 * len(x0) if maxiter is None else maxiter
    xk = x0
    norm = lambda x: np.linalg.norm(x, ord=norm_ord)
    theta = 0.9
    C = 0.5
    k = 0
    old_old_fval = fk + np.linalg.norm(gk) / 2
    old_fval = fk
    f_s = Seq(M)
    f_s.add(fk)
    flag = 0
    re_search = 0
    for k in range(maxiter):
        if norm(gk) <= gtol:
            break
        dki = -np.dot(Hk, gk)
        try:
            pk = dki
            f = f_g.fun
            myfprime = f_g.grad
            gfk = gk
            old_fval = fk
            (
                alpha_k,
                fc,
                gc,
                old_fval,
                old_old_fval,
                gfkp1,
            ) = line_search_wolfe2(f, myfprime, xk, pk, gfk, f_s.get_max(),
                                   old_fval, old_old_fval)
        except Exception as e:
            print(e)
            re_search += 1
            xk = xk + dki
            fk, gk = f_g(xk)
            old_fval, old_old_fval = fk, old_fval
            f_s.add(fk)
            if re_search > 2:
                flag = 1
                break
            continue
        if alpha_k is None:
            print("alpha is None")
            xk = xk + dki
            fk, gk = f_g(xk)
            old_fval, old_old_fval = fk, old_fval
            f_s.add(fk)
            re_search += 1
            if re_search > 2:
                flag = 1
                break
            continue
        dki = alpha_k * pk
        # fki, gki = f_g(xk + dki)
        fki, gki = old_fval, gfkp1
        Aredk = fk - fki
        Predk = -(np.dot(gk, dki) + 0.5 * np.dot(np.dot(Bk, dki), dki))
        rk = Aredk / Predk
        xk = xk + dki
        fk = fki
        yk = gki - gk
        tk = C + max(0, -np.dot(yk, dki) / norm(dki)**2) / norm(gk)
        ystark = (1 - theta) * yk + theta * tk * norm(gk) * dki
        gk = gki
        bs = np.dot(Bk, dki)
        Bk = (Bk + np.outer(yk, yk) / np.dot(yk, dki) -
              np.outer(bs, bs) / np.dot(bs, dki))
        # sk = dki
        # rhok = 1.0 / (np.dot(yk, sk))
        # A1 = 1 - np.outer(sk, yk) * rhok
        # A2 = 1 - np.outer(yk, sk) * rhok
        # Hk = np.dot(A2, np.dot(Hk, A1)) - (rhok * np.outer(sk, sk))
        # Bk = Bk + np.outer(ystark, ystark)/np.dot(ystark, dki) - \
        #    np.outer(bs, bs)/np.dot(bs, dki)  # MBFGS
        # print(np.dot(Hk, Bk))
        try:
            Hk = np.linalg.inv(Bk)
        except Exception:
            pass
        f_s.add(fk)
        if callback is not None:
            callback(xk)
    else:
        flag = 2
    # print("fit final: ", k, p, f_g.ncall)
    s = OptimizeResult()
    s.messgae = message_dict[flag]
    s.fun = float(fk)
    s.nit = k
    s.nfev = f_g.ncall
    s.njev = f_g.ncall
    s.status = flag
    s.x = np.array(xk)
    s.jac = np.array(gk)
    s.hess = np.array(Bk)
    s.success = flag == 0
    return s
Esempio n. 31
0
def gp_minimize(func, bounds=None, search="sampling", random_state=None,
                maxiter=1000, acq="UCB", num_points=500):
    """
    Black-box optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquistion function
    over the Gaussian posterior which is much quicker to evaluate.

    Parameters
    ----------
    func: callable
        Function to minimize. Should take a array of parameters and
        return the function value.

    bounds: array-like, shape (n_parameters, 2)
        ``bounds[i][0]`` should give the lower bound of each parameter and
        ``bounds[i][1]`` should give the upper bound of each parameter.

    search: string, "sampling" or "lbfgs"
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to "sampling", ``num_points`` are sampled randomly
        and the Gaussian Process prior is updated with that point that gives
        the best acquision value over the Gaussian posterior.

        If search is set to "lbfgs", then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquistion function
        over the Gaussian posterior.

    random_state: int, RandomState instance, or None (default)
        Set random state to something other than None for reproducible
        results.

    maxiter: int, default 1000
        Number of iterations to find the minimum. In other words, the
        number of function evaluations.

    acq: string, default "UCB"
        Function to minimize over the gaussian posterior. Can be either
        the "UCB" which refers to the UpperConfidenceBound or "EI" which
        is the Expected Improvement.

    num_points: int, default 500
        Number of points to sample to determine the next "best" point.
        Useless if search is set to "lbfgs".

    Returns
    -------
    res: OptimizeResult, scipy object
        The optimization result returned as a OptimizeResult object.
        Important attributes are
        ``x`` - float, the optimization solution,
        ``fun`` - float, the value of the function at the optimum,
        ``models``- gp_models[i]. the prior on the function fit at
                       iteration[i].
        ``func_vals`` - the function value at the ith iteration.
        ``x_iters`` - the value of ``x`` corresponding to the function value
                      at the ith iteration.
        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = np.random.RandomState(random_state)

    num_params = len(bounds)
    lower_bounds, upper_bounds = zip(*bounds)
    upper_bounds = np.asarray(upper_bounds)
    lower_bounds = np.asarray(lower_bounds)
    x0 = rng.rand(num_params)
    func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)]

    length_scale = np.ones(num_params)
    gp_params = {
        'kernel': Matern(length_scale=length_scale, nu=2.5),
        'normalize_y': True,
        'random_state': random_state
    }
    lbfgs_bounds = np.tile((0, 1), (num_params, 1))

    gp_models = []
    x = np.reshape(x0, (1, -1))

    for i in range(maxiter):
        gpr = GaussianProcessRegressor(**gp_params)
        gpr.fit(x, func_val)

        if search == "sampling":
            sampling = rng.rand(num_points, num_params)
            acquis = acquisition_func(sampling, gpr, np.min(func_val), acq)
            best_arg = np.argmin(acquis)
            best_x = sampling[best_arg]
        elif search == "lbfgs":
            init = rng.rand(num_params)
            best_x, _, _ = fmin_l_bfgs_b(
                acquisition_func,
                np.asfortranarray(init),
                args=(gpr, np.min(func_val), acq),
                bounds=lbfgs_bounds, approx_grad=True, maxiter=10)

        gp_models.append(gpr)

        best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x)
        x_list = x.tolist()
        x_list.append(best_x)
        x = np.asarray(x_list)
        func_val.append(best_f)

    x = lower_bounds + (upper_bounds - lower_bounds) * x
    func_ind = np.argmin(func_val)
    x_val = x[func_ind]
    best_func_val = func_val[func_ind]
    res = OptimizeResult()
    res.models = gp_models

    res.x = x_val
    res.fun = best_func_val
    res.func_vals = func_val
    res.x_iters = x

    return res
Esempio n. 32
0
def model_policy_gradient(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        learning_rate: float = 1e-2,
        decay_rate: float = 0.96,
        decay_steps: int = 5,
        log_sigma_init: float = -5.0,
        max_iterations: int = 1000,
        batch_size: int = 10,
        radius_coeff: float = 3.0,
        warmup_steps: int = 10,
        batch_size_model: int = 65536,
        save_func_vals: bool = False,
        random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_evaluations: Optional[int] = None
) -> scipy.optimize.OptimizeResult:
    """Model policy gradient algorithm for black-box optimization.

    The idea of this algorithm is to perform policy gradient, but estimate
    the function values using a surrogate model. 
    The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        learning_rate: The learning rate for the policy gradient.
        decay_rate: the learning decay rate for the Adam optimizer.
        decay_steps: the learning decay steps for the Adam optimizer.
        log_sigma_init: the intial value for the sigma of the policy
            in the log scale. 
        max_iterations: The maximum number of iterations to allow before
            termination.
        batch_size: The number of points to sample in each iteration. The cost 
            of evaluation of these samples are computed through the 
            quantum computer cost model.
        radius_coeff: The ratio determining the size of the radius around 
            the current iterate to sample points from to build the quadratic model.
            The ratio is with respect to the maximal ratio of the samples 
            from the current policy. 
        warmup_steps: The number of steps before the model policy gradient is performed. 
            before these steps, we use the policy gradient without the model. 
        batch_size_model: The model sample batch size. 
            After we fit the quadratic model, we use the model to evaluate 
            on big enough batch of samples.
        save_func_vals: whether to compute and save the function values for 
            the current value of parameter.   
        random_state: A seed (int) or `np.random.RandomState` class to use when
            generating random values. If not set, defaults to using the module
            methods in `np.random`.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """
    random_state = value.parse_random_state(random_state)

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    log_sigma = np.ones(n) * log_sigma_init
    sigma = np.exp(log_sigma)

    # set up the first and second moment estimate
    m_mean = np.zeros(n)
    v_mean = np.zeros(n)
    m_log_sigma = np.zeros(n)
    v_log_sigma = np.zeros(n)

    # set up lr schedule and optimizer
    lr_schedule1 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)
    lr_schedule2 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.fun = 0
    total_evals = 0
    num_iter = 0
    message = None

    # stats
    history_max = -np.inf

    while num_iter < max_iterations:
        # get samples from the current policy to evaluate
        z = random_state.randn(batch_size, n)
        new_xs = sigma * z + current_x

        if total_evals + batch_size > max_evaluations:
            message = "Reached maximum number of evaluations."
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += batch_size
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        if save_func_vals:
            res.func_vals.append(f(current_x))
            res.x_iters.append(np.copy(current_x))
            res.fun = res.func_vals[-1]

        # current sampling radius (maximal)
        max_radius = 0
        for x in new_xs:
            if np.linalg.norm(x - current_x) > max_radius:
                max_radius = np.linalg.norm(x - current_x)

        reward = [-y for y in new_ys]

        # warmup steps control whether to use the model to estimate the f
        if num_iter >= warmup_steps:
            # Determine points to use to build model
            model_xs = []
            model_ys = []
            for x, y in zip(known_xs, known_ys):
                if np.linalg.norm(x - current_x) < radius_coeff * max_radius:
                    model_xs.append(x)
                    model_ys.append(y)
            # safer way without the `SVD` not converging
            try:
                model = _get_quadratic_model(model_xs, model_ys, x)
                use_model = True
            except ValueError:
                use_model = False

            if use_model:
                # get samples (from model)
                z = random_state.randn(batch_size_model, n)
                new_xs = sigma * z + current_x

                # use the model for prediction
                new_ys = model.predict(new_xs - current_x)
                reward = [-y for y in new_ys]

        reward = np.array(reward)

        # stats
        reward_mean = np.mean(reward)
        reward_max = np.max(reward)

        if reward_max > history_max:
            history_max = reward_max

        # subtract baseline
        reward = reward - reward_mean

        # analytic derivatives (natural gradient policy gradient)
        delta_mean = np.dot(z.T, reward) * sigma
        delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2)

        delta_mean_norm = np.linalg.norm(np.dot(z.T, reward))
        delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward))

        delta_mean = delta_mean / delta_mean_norm
        delta_log_sigma = delta_log_sigma / delta_log_sigma_norm

        # gradient ascend to update the parameters
        current_x, m_mean, v_mean = _adam_update(delta_mean,
                                                 current_x,
                                                 num_iter,
                                                 m_mean,
                                                 v_mean,
                                                 lr_schedule=lr_schedule1)
        log_sigma, m_log_sigma, v_log_sigma = _adam_update(
            delta_log_sigma,
            log_sigma,
            num_iter,
            m_log_sigma,
            v_log_sigma,
            lr_schedule=lr_schedule2,
        )

        log_sigma = np.clip(log_sigma, -20.0, 2.0)
        sigma = np.exp(log_sigma)

        num_iter += 1

    final_val = f(current_x)
    res.func_vals.append(final_val)

    if message is None:
        message = "Reached maximum number of iterations."

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
    def solve(self):
        nfev, nit, warning_flag = 0, 0, False
        status_message = _status_message['success']

        # calculate energies to start with
        for index, candidate in enumerate(self.population):
            parameters = self._scale_parameters(candidate)
            self.population_energies[index] = self.func(parameters,
                                                        *self.args)
            nfev += 1

            if nfev > self.maxfun:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

        minval = np.argmin(self.population_energies)

        # put the lowest energy into the best solution position.
        lowest_energy = self.population_energies[minval]
        self.population_energies[minval] = self.population_energies[0]
        self.population_energies[0] = lowest_energy

        self.population[[0, minval], :] = self.population[[minval, 0], :]

        if warning_flag:
            return OptimizeResult(
                           x=self.x,
                           fun=self.population_energies[0],
                           nfev=nfev,
                           nit=nit,
                           message=status_message,
                           success=(warning_flag is not True))

        # do the optimisation.
        start_time = time.time()
        nit = 0
        while nit < self.maxiter + 1:
            nit += 1
            if start_time + self.max_execution_time < time.time():
                warning_flag = True
                status_message = 'Max execution time reached'
                break

            if self.dither is not None:
                self.scale = self.random_number_generator.rand(
                ) * (self.dither[1] - self.dither[0]) + self.dither[0]
            for candidate in range(np.size(self.population, 0)):
                if nfev > self.maxfun:
                    warning_flag = True
                    status_message = _status_message['maxfev']
                    break

                trial = self._mutate(candidate)
                self._ensure_constraint(trial)
                parameters = self._scale_parameters(trial)

                energy = self.func(parameters, *self.args)
                nfev += 1

                if energy < self.population_energies[candidate]:
                    self.population[candidate] = trial
                    self.population_energies[candidate] = energy

                    if energy < self.population_energies[0]:
                        self.population_energies[0] = energy
                        self.population[0] = trial

            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (np.std(self.population_energies) /
                           np.abs(np.mean(self.population_energies) +
                                  _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g"
                      % (nit,
                         self.population_energies[0]))

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nfev=nfev,
            nit=nit,
            message=status_message,
            success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            nfev += result.nfev
            DE_result.nfev = nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes.  If `polish`
            was employed, and a lower minimum was obtained by the polishing,
            then OptimizeResult also contains the ``jac`` attribute.
        """
        nit, warning_flag = 0, False
        status_message = _status_message['success']

        # The population may have just been initialized (all entries are
        # np.inf). If it has you have to calculate the initial energies.
        # Although this is also done in the evolve generator it's possible
        # that someone can set maxiter=0, at which point we still want the
        # initial energies to be calculated (the following loop isn't run).
        if np.all(np.isinf(self.population_energies)):
            self._calculate_population_energies()


        for nmig in xrange(1,self.number_of_migrations+1):

            if nmig != 1:
                # Get the host node
                host = int(self.island_marker[-1])

                # Get all the neighbors list
                neighbors = self.topology.neighbors(host)

                neighbor_results = {}
                neighbor_energy_results = {}

                for each_neighbor in neighbors:
                    replacement = client.get(self.key + str(each_neighbor))
                    if replacement is None:
                        for _ in range(int(self.wait_time / self.poll_time)):
                            replacement = client.get(self.key + str(each_neighbor))
                            if replacement is None:
                                print("POLLING!!!")
                                time.sleep(self.poll_time)
                            else:
                                break
                        if replacement is not None:
                            neighbor_results[each_neighbor] = np.array([float(items) for items in replacement.split(",")])
                            neighbor_energy_results[each_neighbor] = self.func(neighbor_results[each_neighbor],*self.args)

                total_computed_neighbors = len(neighbor_results)
                energies = []

                for each_neighbor in neighbor_results.keys():
                    energies.append((neighbor_results[each_neighbor],neighbor_energy_results[each_neighbor]))

                for pop_index in range(1,total_computed_neighbors+1):
                    energies.append((self.population[pop_index],self.population_energies[pop_index]))

                energies.sort(key=lambda x:x[-1])
                energies = energies[:total_computed_neighbors]

                for pop_index in range(1, total_computed_neighbors+1):
                    self.population[pop_index] = energies[pop_index-1][0]
                    self.population_energies[pop_index] = energies[pop_index-1][1]



            # do the optimisation.
            is_optimisation_complete = False
            for nit in xrange(1, self.maxiter + 1):
                # evolve the population by a generation
                try:
                    next(self)
                except StopIteration:
                    warning_flag = True
                    status_message = _status_message['maxfev']
                    #is_optimisation_complete = False
                    break

                if self.disp:
                    print("differential_evolution step %d: f(x)= %g"
                          % (nit,
                             self.population_energies[0]))

                # should the solver terminate?
                convergence = self.convergence

                if (self.callback and
                        self.callback(self._scale_parameters(self.population[0]),
                                      convergence=self.tol / convergence) is True):

                    warning_flag = True
                    status_message = ('callback function requested stop early '
                                      'by returning True')
                    is_optimisation_complete = False
                    break

                intol = (np.std(self.population_energies) <=
                         self.atol +
                         self.tol * np.abs(np.mean(self.population_energies)))
                if intol:
                    is_optimisation_complete = False
                if warning_flag or intol:
                    break

            else:
                status_message = _status_message['maxiter']
                warning_flag = True

            client.set(self.island_marker, ",".join([str(items) for items in self.x]))
            print("MARKED IN MEMCACHE")
            print(self.island_marker, ",".join([str(items) for items in self.x]))
            if not is_optimisation_complete:
                #break
                print("Exited due to some break condition above!!", status_message)

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nfev=self._nfev,
            nit=nit,
            message=status_message,
            success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            self._nfev += result.nfev
            DE_result.nfev = self._nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 35
0
def gp_minimize(func,
                dimensions,
                base_estimator=None,
                alpha=10e-10,
                acq="EI",
                xi=0.01,
                kappa=1.96,
                search="auto",
                n_calls=100,
                n_points=500,
                n_random_starts=10,
                n_restarts_optimizer=5,
                x0=None,
                y0=None,
                random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standard optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    The total number of evaluations, `n_calls`, are performed like the
    following. If `x0` is provided but not `y0`, then the elements of `x0`
    are first evaluated, followed by `n_random_starts` evaluations.
    Finally, `n_calls - len(x0) - n_random_starts` evaluations are
    made guided by the surrogate model. If `x0` and `y0` are both
    provided then `n_random_starts` evaluations are first made then
    `n_calls - n_random_starts` subsequent evaluations are made
    guided by the surrogate model.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `alpha` [float, default=1e-10]:
        Value added to the diagonal of the kernel matrix during fitting.
        Larger values correspond to an increased noise level in the
        observations and reduce potential numerical issues during fitting.

    * `acq` [string, default=`"EI"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"auto"`, `"sampling"` or `"lbfgs"`, default=`"auto"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"auto"`, then it is set to `"lbfgs"`` if
        all the search dimensions are Real(continuous). It defaults to
        `"sampling"` for all other cases.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `n_calls` [int, default=100]:
        Number of calls to `func`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_random_starts` [int, default=10]:
        Number of evaluations of `func` with random initialization points
        before approximating the `func` with `base_estimator`.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimization space.
        - `specs` [dict]`: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {
        "args": copy.copy(inspect.currentframe().f_locals),
        "function": inspect.currentframe().f_code.co_name
    }

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) * Matern(
                length_scale=np.ones(space.transformed_n_dims),
                length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                nu=2.5)),
            normalize_y=True,
            alpha=alpha,
            random_state=random_state)

    # Initialize with provided points (x0 and y0) and/or random points
    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError("Expected `n_random_starts` > 0, got %d" %
                         n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError("Expected `n_calls` >= %d, got %d" %
                         (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    if search == "auto":
        if space.is_real:
            search = "lbfgs"
        else:
            search = "sampling"
    elif search not in ["lbfgs", "sampling"]:
        raise ValueError(
            "Expected search to be 'lbfgs', 'sampling' or 'auto', "
            "got %s" % search)

    # Bayesian optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(X=X,
                                           model=gp,
                                           y_opt=np.min(yi),
                                           method=acq,
                                           xi=xi,
                                           kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(_acquisition,
                                            x0,
                                            args=(gp, np.min(yi), acq, xi,
                                                  kappa),
                                            bounds=space.transformed_bounds,
                                            approx_grad=True,
                                            maxiter=20)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Esempio n. 36
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes.  If `polish`
            was employed, and a lower minimum was obtained by the polishing,
            then OptimizeResult also contains the ``jac`` attribute.
        """
        nit, warning_flag = 0, False
        status_message = _status_message['success']

        # The population may have just been initialized (all entries are
        # np.inf). If it has you have to calculate the initial energies.
        # Although this is also done in the evolve generator it's possible
        # that someone can set maxiter=0, at which point we still want the
        # initial energies to be calculated (the following loop isn't run).
        if np.all(np.isinf(self.population_energies)):
            self.population_energies[:] = self._calculate_population_energies(
                self.population)
            self._promote_lowest_energy()

        # do the optimisation.
        for nit in xrange(1, self.maxiter + 1):
            # evolve the population by a generation
            try:
                next(self)
            except StopIteration:
                warning_flag = True
                if self._nfev > self.maxfun:
                    status_message = _status_message['maxfev']
                elif self._nfev == self.maxfun:
                    status_message = ('Maximum number of function evaluations'
                                      ' has been reached.')
                break

            if self.disp:
                print("differential_evolution step %d: f(x)= %g"
                      % (nit,
                         self.population_energies[0]))

            # should the solver terminate?
            convergence = self.convergence

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if np.any(np.isinf(self.population_energies)):
                intol = False
            else:
                intol = (np.std(self.population_energies) <=
                         self.atol +
                         self.tol * np.abs(np.mean(self.population_energies)))
            if warning_flag or intol:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nfev=self._nfev,
            nit=nit,
            message=status_message,
            success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T)

            self._nfev += result.nfev
            DE_result.nfev = self._nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 37
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes.  If `polish`
            was employed, and a lower minimum was obtained by the polishing,
            then OptimizeResult also contains the ``jac`` attribute.
        """
        nit, warning_flag = 0, False
        status_message = _status_message['success']

        # The population may have just been initialized (all entries are
        # np.inf). If it has you have to calculate the initial energies.
        # Although this is also done in the evolve generator it's possible
        # that someone can set maxiter=0, at which point we still want the
        # initial energies to be calculated (the following loop isn't run).
        if np.all(np.isinf(self.population_energies)):
            self._calculate_population_energies()

        # do the optimisation.
        for nit in xrange(1, self.maxiter + 1):
            # evolve the population by a generation
            try:
                next(self)
            except StopIteration:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

            if self.disp:
                print("differential_evolution step %d: f(x)= %g"
                      % (nit,
                         self.population_energies[0]))

            # should the solver terminate?
            convergence = self.convergence

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if np.any(np.isinf(self.population_energies)):
                intol = False
            else:
                intol = (np.std(self.population_energies) <=
                         self.atol +
                         self.tol * np.abs(np.mean(self.population_energies)))
            if warning_flag or intol:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(
            x=self.x,
            fun=self.population_energies[0],
            nfev=self._nfev,
            nit=nit,
            message=status_message,
            success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            self._nfev += result.nfev
            DE_result.nfev = self._nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 38
0
def _tree_minimize(func,
                   dimensions,
                   base_estimator,
                   n_calls,
                   n_points,
                   n_random_starts,
                   x0=None,
                   y0=None,
                   random_state=None,
                   acq="EI",
                   xi=0.01,
                   kappa=1.96):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with provided points (x0 and y0) and/or random points
    if n_calls <= 0:
        raise ValueError("Expected `n_calls` > 0, got %d" % n_random_starts)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError("Expected `n_random_starts` > 0, got %d" %
                         n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError("Expected `n_calls` >= %d, got %d" %
                         (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    # Tree-based optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points, random_state=rng))
        values = _gaussian_acquisition(X=X,
                                       model=rgr,
                                       y_opt=np.min(yi),
                                       method=acq,
                                       xi=xi,
                                       kappa=kappa)
        next_x = X[np.argmin(values)]
        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng

    return res
Esempio n. 39
0
def _tree_minimize(func, dimensions, base_estimator, n_calls,
                   n_points, n_random_starts, x0=None, y0=None,
                   random_state=None, acq="EI", xi=0.01, kappa=1.96):
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Initialize with provided points (x0 and y0) and/or random points
    if n_calls <= 0:
        raise ValueError(
            "Expected `n_calls` > 0, got %d" % n_random_starts)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, but got %s" % type(x0))

    n_init_func_calls = len(x0) if y0 is not None else 0
    n_total_init_calls = n_random_starts + n_init_func_calls

    if n_total_init_calls <= 0:
        # if x0 is not provided and n_random_starts is 0 then
        # it will ask for n_random_starts to be > 0.
        raise ValueError(
            "Expected `n_random_starts` > 0, got %d" % n_random_starts)

    if n_calls < n_total_init_calls:
        raise ValueError(
            "Expected `n_calls` >= %d, got %d" % (n_total_init_calls, n_calls))

    if y0 is None and x0:
        y0 = [func(x) for x in x0]
    elif x0:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError(
                "`y0` should be an iterable or a scalar, got %s" % type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")
        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")
    else:
        y0 = []

    Xi = x0 + space.rvs(n_samples=n_random_starts, random_state=rng)
    yi = y0 + [func(x) for x in Xi[len(x0):]]
    if np.ndim(yi) != 1:
        raise ValueError("`func` should return a scalar")

    # Tree-based optimization loop
    models = []
    n_model_iter = n_calls - n_total_init_calls
    for i in range(n_model_iter):
        rgr = clone(base_estimator)
        rgr.fit(space.transform(Xi), yi)
        models.append(rgr)

        # `rgr` predicts constants for each leaf which means that the EI
        # has zero gradient over large distances. As a result we can not
        # use gradient based optimizers like BFGS, so using random sampling
        # for the moment.
        X = space.transform(space.rvs(n_samples=n_points,
                                      random_state=rng))
        values = _gaussian_acquisition(
            X=X, model=rgr, y_opt=np.min(yi), method=acq,
            xi=xi, kappa=kappa)
        next_x = X[np.argmin(values)]
        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi.append(next_x)
        yi.append(next_y)

    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space
    res.random_state = rng

    return res
Esempio n. 40
0
    def solve(self):
        """
        Runs the DifferentialEvolutionSolver.

        Returns
        -------
        res : OptimizeResult
            The optimization result represented as a ``OptimizeResult`` object.
            Important attributes are: ``x`` the solution array, ``success`` a
            Boolean flag indicating if the optimizer exited successfully and
            ``message`` which describes the cause of the termination. See
            `OptimizeResult` for a description of other attributes. If polish
            was employed, then OptimizeResult also contains the ``hess_inv`` and
            ``jac`` attributes.
        """

        nfev, nit, warning_flag = 0, 0, False
        status_message = _status_message['success']

        # calculate energies to start with
        for index, candidate in enumerate(self.population):
            parameters = self._scale_parameters(candidate)
            self.population_energies[index] = self.func(parameters, *self.args)
            nfev += 1

            if nfev > self.maxfun:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

        minval = np.argmin(self.population_energies)

        # put the lowest energy into the best solution position.
        lowest_energy = self.population_energies[minval]
        self.population_energies[minval] = self.population_energies[0]
        self.population_energies[0] = lowest_energy

        self.population[[0, minval], :] = self.population[[minval, 0], :]

        if warning_flag:
            return OptimizeResult(x=self.x,
                                  fun=self.population_energies[0],
                                  nfev=nfev,
                                  nit=nit,
                                  message=status_message,
                                  success=(warning_flag != True))

        # do the optimisation.
        for nit in range(1, self.maxiter + 1):
            if self.dither is not None:
                self.scale = self.random_number_generator.rand() * (
                    self.dither[1] - self.dither[0]) + self.dither[0]
            for candidate in range(np.size(self.population, 0)):
                if nfev > self.maxfun:
                    warning_flag = True
                    status_message = _status_message['maxfev']
                    break

                trial = self._mutate(candidate)
                self._ensure_constraint(trial)
                parameters = self._scale_parameters(trial)

                energy = self.func(parameters, *self.args)
                nfev += 1

                if energy < self.population_energies[candidate]:
                    self.population[candidate] = trial
                    self.population_energies[candidate] = energy

                    if energy < self.population_energies[0]:
                        self.population_energies[0] = energy
                        self.population[0] = trial

            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (
                np.std(self.population_energies) /
                np.abs(np.mean(self.population_energies) + _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g" %
                      (nit, self.population_energies[0]))

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(x=self.x,
                                   fun=self.population_energies[0],
                                   nfev=nfev,
                                   nit=nit,
                                   message=status_message,
                                   success=(warning_flag != True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            nfev += result.nfev
            DE_result.nfev = nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result
Esempio n. 41
0
def model_gradient_descent(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        rate: float = 1e-1,
        sample_radius: float = 1e-1,
        n_sample_points: int = 100,
        n_sample_points_ratio: Optional[float] = None,
        rate_decay_exponent: float = 0.0,
        stability_constant: float = 0.0,
        sample_radius_decay_exponent: float = 0.0,
        tol: float = 1e-8,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_iterations: Optional[int] = None,
        max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult:
    """Model gradient descent algorithm for black-box optimization.

    The idea of this algorithm is to perform gradient descent, but estimate
    the gradient using a surrogate model instead of, say, by
    finite-differencing. The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.
    This algorithm works well when you have an initial guess which is in the
    convex neighborhood of a local optimum and you want to converge to that
    local optimum. It's meant to be used when the function is stochastic.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        rate: The learning rate for the gradient descent.
        sample_radius: The radius around the current iterate to sample
            points from to build the quadratic model.
        n_sample_points: The number of points to sample in each iteration.
        n_sample_points_ratio: This specifies the number of points to sample
            in each iteration as a coefficient of the number of points
            required to exactly determine a quadratic model. The number
            of sample points will be this coefficient times (n+1)(n+2)/2,
            rounded up, where n is the number of parameters.
            Setting this overrides n_sample_points.
        rate_decay_exponent: Controls decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant and a is the rate decay exponent
            (this parameter).
        stability_constant: Affects decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant (this parameter) and a is the rate decay
            exponent.
        sample_radius_decay_exponent: Controls decay of sample radius.
        tol: The algorithm terminates when the difference between the current
            iterate and the next suggested iterate is smaller than this value.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_iterations: The maximum number of iterations to allow before
            termination.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_iterations is None:
        max_iterations = np.inf
    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    if n_sample_points_ratio is not None:
        n_sample_points = int(
            np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2))

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.model_vals = [None]
    res.fun = 0
    total_evals = 0
    num_iter = 0
    converged = False
    message = None

    while num_iter < max_iterations:
        current_sample_radius = (sample_radius /
                                 (num_iter + 1)**sample_radius_decay_exponent)

        # Determine points to evaluate
        # in ball around current point
        new_xs = [np.copy(current_x)] + [
            current_x + _random_point_in_ball(n, current_sample_radius)
            for _ in range(n_sample_points)
        ]

        if total_evals + len(new_xs) > max_evaluations:
            message = 'Reached maximum number of evaluations.'
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += len(new_ys)
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        res.func_vals.append(new_ys[0])
        res.x_iters.append(np.copy(current_x))
        res.fun = res.func_vals[-1]

        # Determine points to use to build model
        model_xs = []
        model_ys = []
        for x, y in zip(known_xs, known_ys):
            if np.linalg.norm(x - current_x) < current_sample_radius:
                model_xs.append(x)
                model_ys.append(y)
        # Build and solve model
        model_gradient, model = _get_least_squares_model_gradient(
            model_xs, model_ys, current_x)

        # calculate the gradient and update the current point
        gradient_norm = np.linalg.norm(model_gradient)
        decayed_rate = (
            rate / (num_iter + 1 + stability_constant)**rate_decay_exponent)
        # Convergence criteria
        if decayed_rate * gradient_norm < tol:
            converged = True
            message = 'Optimization converged successfully.'
            break
        # Update
        current_x -= decayed_rate * model_gradient
        res.model_vals.append(
            model.predict([-decayed_rate * model_gradient])[0])

        num_iter += 1

    if converged:
        final_val = res.func_vals[-1]
    else:
        final_val = f(current_x)
        res.func_vals.append(final_val)

    if message is None:
        message = 'Reached maximum number of iterations.'

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Esempio n. 42
0
def create_result(Xi,
                  yi,
                  n_evaluations=None,
                  space=None,
                  rng=None,
                  specs=None,
                  models=None,
                  maximize=False):
    """
    Initialize an `OptimizeResult` object.

    Parameters
    ----------
    * `Xi` [list of lists, shape=(n_iters, n_features)]:
        Location of the minimum at every iteration.

    * `yi` [array-like, shape=(n_iters,)]:
        Minimum value obtained at every iteration.

    * `space` [Space instance, optional]:
        Search space.

    * `rng` [RandomState instance, optional]:
        State of the random state.

    * `specs` [dict, optional]:
        Call specifications.

    * `models` [list, optional]:
        List of fit surrogate models.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        OptimizeResult instance with the required information.
    """
    res = OptimizeResult()

    try:
        # Hyperband returns evaluations as lists of lists.
        # We want to store the results as a single array.
        yi = list(itertools.chain.from_iterable(yi))
        Xi = list(itertools.chain.from_iterable(Xi))
    except TypeError:
        # All algorithms other than Hyperband already return a single list.
        pass

    yi = np.asarray(yi)
    if np.ndim(yi) == 2:
        res.log_time = np.ravel(yi[:, 1])
        yi = np.ravel(yi[:, 0])

    if maximize:
        best = np.argmax(yi)
    else:
        best = np.argmin(yi)

    res.x = Xi[best]
    res.fun = yi[best]

    if n_evaluations:
        unique, sort_indices = np.unique(yi, return_index=True)

        if len(unique) < n_evaluations:
            func_sort_idx = np.argsort(yi)
            func_vals = sorted(yi)
            res.func_vals = np.asarray(func_vals[:n_evaluations])

            x_iter_sort = []
            for idx in func_sort_idx:
                x_iter_sort.append(Xi[idx])

            res.x_iters = np.asarray(x_iter_sort[:n_evaluations])
            res.all_func_vals = np.asarray(yi)
            res.all_x_iters = np.asarray(Xi)
        else:
            func_vals = sorted(unique)
            res.func_vals = np.asarray(func_vals[:n_evaluations])

            x_iter_sort = []
            for idx in sort_indices:
                x_iter_sort.append(Xi[idx])

            res.x_iters = np.asarray(x_iter_sort[:n_evaluations])
            res.all_func_vals = np.asarray(yi)
            res.all_x_iters = np.asarray(Xi)
    else:
        res.func_vals = np.asarray(yi)
        res.x_iters = np.asarray(Xi)

    res.models = models
    res.space = space
    res.random_state = rng
    res.specs = specs
    return res
Esempio n. 43
0
def gp_minimize(func,
                bounds,
                base_estimator=None,
                acq="LCB",
                xi=0.01,
                kappa=1.96,
                search="sampling",
                maxiter=1000,
                n_points=500,
                n_start=10,
                n_restarts_optimizer=5,
                random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `bounds` [array-like, shape=(n_parameters, 2)]:
        - ``bounds[i][0]`` should give the lower bound of each parameter and
        - ``bounds[i][1]`` should give the upper bound of each parameter.

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `acq` [string, default=`"LCB"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"sampling"` or `"lbfgs"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `maxiter` [int, default=1000]:
        Number of iterations to find the minimum. Note that `n_start`
        iterations are effectively discounted, such that total number of
        function evaluations is at most `maxiter`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_start` [int, default=10]:
        Number of random initialization points.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)

    # Bounds
    n_params = len(bounds)
    lb, ub = extract_bounds(bounds)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(n_params),
                           length_scale_bounds=[(0.01, 100)] * n_params,
                           nu=2.5)),
            normalize_y=True,
            alpha=10e-6,
            random_state=random_state)

    # First points
    Xi = lb + (ub - lb) * rng.rand(n_start, n_params)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError("The function to be optimized should return a scalar")

    # Bayesian optimization loop
    models = []

    for i in range(maxiter - n_start):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(Xi, yi)

        models.append(gp)

        if search == "sampling":
            X = lb + (ub - lb) * rng.rand(n_points, n_params)
            values = acquisition(X=X,
                                 model=gp,
                                 y_opt=np.min(yi),
                                 method=acq,
                                 xi=xi,
                                 kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = lb + (ub - lb) * rng.rand(n_params)

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(_acquisition,
                                            x0,
                                            args=(gp, np.min(yi), acq, xi,
                                                  kappa),
                                            bounds=bounds,
                                            approx_grad=True,
                                            maxiter=10)

                if a < best:
                    next_x, best = x, a

        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models

    return res
Esempio n. 44
0
def glpk(
        c,
        A_ub=None,
        b_ub=None,
        A_eq=None,
        b_eq=None,
        bounds=None,
        solver='simplex',
        sense=GLPK.GLP_MIN,
        scale=True,
        maxit=GLPK.INT_MAX,
        timeout=GLPK.INT_MAX,
        basis_fac='luf+ft',
        message_level=GLPK.GLP_MSG_ERR,
        disp=False,
        simplex_options=None,
        ip_options=None,
        mip_options=None,
):
    '''GLPK ctypes interface.

    Parameters
    ----------
    c : 1-D array (n,)
        Array of objective coefficients.
    A_ub : 2-D array (m, n)
        scipy.sparse.coo_matrix
    b_ub : 1-D array (m,)
    A_eq : 2-D array (k, n)
        scipy.sparse.coo_matrix
    b_eq : 1-D array (k,)
    bounds : None or list (n,) of tuple (2,) or tuple (2,)
        The jth entry in the list corresponds to the jth objective coefficient.
        Each entry is made up of a tuple describing the bounds.
        Use None to indicate that there is no bound. By default, bounds are
        (0, None) (all decision variables are non-negative). If a single tuple
        (min, max) is provided, then min and max will serve as bounds for all
        decision variables.
    solver : { 'simplex', 'interior', 'mip' }
        Use simplex (LP/MIP) or interior point method (LP only).
        Default is ``simplex``.
    sense : { 'GLP_MIN', 'GLP_MAX' }
        Minimization or maximization problem.
        Default is ``GLP_MIN``.
    scale : bool
        Scale the problem. Default is ``True``.
    maxit : int
        Maximum number of iterations. Default is ``INT_MAX``.
    timout : int
        Limit solution time to ``timeout`` seconds.
        Default is ``INT_MAX``.
    basis_fac : { 'luf+ft', 'luf+cbg', 'luf+cgr', 'btf+cbg', 'btf+cgr' }
        LP basis factorization strategy. Default is ``luf+ft``.
        These are combinations of the following strategies:

            - ``luf`` : plain LU-factorization
            - ``btf`` : block triangular LU-factorization
            - ``ft`` : Forrest-Tomlin update
            - ``cbg`` : Schur complement + Bartels-Golub update
            - ``cgr`` : Schur complement + Givens rotation update

    message_level : { GLP_MSG_OFF, GLP_MSG_ERR, GLP_MSG_ON, GLP_MSG_ON, GLP_MSG_ALL, GLP_MSG_DBG }
        Verbosity level of logging to stdout.
        Only applied when ``disp=True``. Default is ``GLP_MSG_ERR``.
        One of the following:

            ``GLP_MSG_OFF`` : no output
            ``GLP_MSG_ERR`` : warning and error messages only
            ``GLP_MSG_ON`` : normal output
            ``GLP_MSG_ALL`` : full output
            ``GLP_MSG_DBG`` : debug output

    disp : bool
        Display output to stdout. Default is ``False``.
    simplex_options : dict
        Options specific to simplex solver. The dictionary consists of
        the following fields:

            - primal : { 'primal', 'dual', 'dualp' }
                Primal or two-phase dual simplex.
                Default is ``primal``. One of the following:

                    - ``primal`` : use two-phase primal simplex
                    - ``dual`` : use two-phase dual simplex
                    - ``dualp`` : use two-phase dual simplex, and if it fails,
                        switch to the primal simplex

            - init_basis : { 'std', 'adv', 'bib' }
                Choice of initial basis.  Default is 'adv'.
                One of the following:

                    - ``std`` : standard initial basis of all slacks
                    - ``adv`` : advanced initial basis
                    - ``bib`` : Bixby's initial basis

            - steep : bool
                Use steepest edge technique or standard "textbook"
                pricing.  Default is ``True`` (steepest edge).

            - ratio : { 'relax', 'norelax', 'flip' }
                Ratio test strategy. Default is ``relax``.
                One of the following:

                    - ``relax`` : Harris' two-pass ratio test
                    - ``norelax`` : standard "textbook" ratio test
                    - ``flip`` : long-step ratio test

            - tol_bnd : double
                Tolerance used to check if the basic solution is primal
                feasible. (Default: 1e-7).

            - tol_dj : double
                Tolerance used to check if the basic solution is dual
                feasible. (Default: 1e-7).

            - tol_piv : double
                Tolerance used to choose eligble pivotal elements of
                the simplex table. (Default: 1e-10).

            - obj_ll : double
                Lower limit of the objective function. If the objective
                function reaches this limit and continues decreasing,
                the solver terminates the search. Used in the dual simplex
                only. (Default: -DBL_MAX -- the largest finite float64).

            - obj_ul : double
                Upper limit of the objective function. If the objective
                function reaches this limit and continues increasing,
                the solver terminates the search. Used in the dual simplex
                only. (Default: +DBL_MAX -- the largest finite float64).

            - presolve : bool
                Use presolver (assumes ``scale=True`` and
                ``init_basis='adv'``. Default is ``True``.

            - exact : bool
                Use simplex method based on exact arithmetic.
                Default is ``False``. If ``True``, all other
                ``simplex_option`` fields are ignored.

    ip_options : dict
        Options specific to interior-pooint solver.
        The dictionary consists of the following fields:

            - ordering : { 'nord', 'qmd', 'amd', 'symamd' }
                Ordering algorithm used before Cholesky factorizaiton.
                Default is ``amd``. One of the following:

                    - ``nord`` : natural (original) ordering
                    - ``qmd`` : quotient minimum degree ordering
                    - ``amd`` : approximate minimum degree ordering
                    - ``symamd`` : approximate minimum degree ordering
                        algorithm for Cholesky factorization of symmetric
                        matrices.

    mip_options : dict
        Options specific to MIP solver.
        The dictionary consists of the following fields:

            - intcon : 1-D array
                Array of integer contraints, specified as the 0-based
                indices of the solution. Default is an empty array.
            - bincon : 1-D array
                Array of binary constraints, specified as the 0-based
                indices of the solution. If any indices are duplicated
                between ``bincon`` and ``intcon``, they will be
                considered as binary constraints. Default is an empty
                array.
            - nomip : bool
                consider all integer variables as continuous
                (allows solving MIP as pure LP). Default is ``False``.
            - branch : { 'first', 'last', 'mostf', 'drtom', 'pcost' }
                Branching rule. Default is ``drtom``.
                One of the following:

                    - ``first`` : branch on first integer variable
                    - ``last`` : branch on last integer variable
                    - ``mostf`` : branch on most fractional variable
                    - ``drtom`` : branch using heuristic by Driebeck and Tomlin
                    - ``pcost`` : branch using hybrid pseudocost heuristic
                                  (may be useful for hard instances)

            - backtrack : { 'dfs', 'bfs', 'bestp', 'bestb' }
                Backtracking rule. Default is ``bestb``.
                One of the following:

                    - ``dfs`` : backtrack using depth first search
                    - ``bfs`` : backtrack using breadth first search
                    - ``bestp`` : backtrack using the best projection heuristic
                    - ``bestb`` : backtrack using node with best local bound

            - preprocess : { 'none', 'root', 'all' }
                Preprocessing technique. Default is ``GLP_PP_ALL``.
                One of the following:

                    - ``none`` : disable preprocessing
                    - ``root`` : perform preprocessing only on the root level
                    - ``all`` : perform preprocessing on all levels

            - round : bool
                Simple rounding heuristic. Default is ``True``.

            - presolve : bool
                Use MIP presolver. Default is ``True``.

            - binarize : bool
                replace general integer variables by binary ones
                (only used if ``presolve=True``). Default is ``False``.

            - fpump : bool
                Apply feasibility pump heuristic. Default is ``False``.

            - proxy : int
                Apply proximity search heuristic (in seconds). Default is 60.

            - cuts : list of { 'gomory', 'mir', 'cover', 'clique', 'all' }
                Cuts to generate. Default is no cuts. List of the following:

                    - ``gomory`` : Gomory's mixed integer cuts
                    - ``mir`` : MIR (mixed integer rounding) cuts
                    - ``cover`` : mixed cover cuts
                    - ``clique`` : clique cuts
                    - ``all`` : generate all cuts above

            - tol_int : float
                Absolute tolerance used to check if optimal solution to the
                current LP relaxation is integer feasible.
                (Default: 1e-5).
            - tol_obj : float
                Relative tolerance used to check if the objective value in
                optimal solution to the current LP relaxation is not better
                than in the best known integer feasible solution.
                (Default: 1e-7).
            - mip_gap : float
                Relative mip gap tolerance. If the relative mip gap for
                currently known best integer feasiblesolution falls below
                this tolerance, the solver terminates the search. This allows
                obtaining suboptimal integer feasible solutions if solving the
                problem to optimality takes too long time.
                (Default: 0.0).
            - bound : float
                add inequality obj <= bound (minimization) or
                obj >= bound (maximization) to integer feasibility
                problem (assumes ``minisat=True``).

    Notes
    -----
    In general, don't change tolerances without a detailed understanding
    of their purposes.
    '''

    # Housekeeping
    if simplex_options is None:
        simplex_options = {}
    if ip_options is None:
        ip_options = {}
    if mip_options is None:
        mip_options = {}

    # Create and fill the GLPK problem struct
    prob, lp = _fill_prob(c, A_ub, b_ub, A_eq, b_eq, bounds, sense, 'problem-name')
    c, A_ub, b_ub, A_eq, b_eq, bounds, _x0 = lp

    # Get the library
    _lib = GLPK()._lib

    # Scale the problem
    no_need_explict_scale = (solver == "simplex" and 
                             simplex_options.get("presolve"))
    if not no_need_explict_scale and scale:
        _lib.glp_scale_prob(prob, GLPK.GLP_SF_AUTO) # do auto scaling for now


    # Select basis factorization method
    bfcp = glp_bfcp()
    _lib.glp_get_bfcp(prob, ctypes.byref(bfcp))
    bfcp.type = {
        'luf+ft': GLPK.GLP_BF_LUF + GLPK.GLP_BF_FT,
        'luf+cbg': GLPK.GLP_BF_LUF + GLPK.GLP_BF_BG,
        'luf+cgr': GLPK.GLP_BF_LUF + GLPK.GLP_BF_GR,
        'btf+cbg': GLPK.GLP_BF_BTF + GLPK.GLP_BF_BG,
        'btf+cgr': GLPK.GLP_BF_BTF + GLPK.GLP_BF_GR,
    }[basis_fac]
    _lib.glp_set_bfcp(prob, ctypes.byref(bfcp))

    # Run the solver
    if solver == 'simplex':

        # Construct an initial basis
        basis = simplex_options.get('init_basis', 'adv')
        basis_fun = {
            'std': _lib.glp_std_basis,
            'adv': _lib.glp_adv_basis,
            'bib': _lib.glp_cpx_basis,
        }[basis]
        basis_args = [prob]
        if basis == 'adv':
            # adv must have 0 as flags argument
            basis_args.append(0)
        basis_fun(*basis_args)

        # Make control structure
        smcp = glp_smcp()
        _lib.glp_init_smcp(ctypes.byref(smcp))

        # Set options
        smcp.msg_lev = message_level*disp
        smcp.meth = {
            'primal': GLPK.GLP_PRIMAL,
            'dual': GLPK.GLP_DUAL,
            'dualp': GLPK.GLP_DUALP,
        }[simplex_options.get('method', 'primal')]
        smcp.pricing = {
            True: GLPK.GLP_PT_PSE,
            False: GLPK.GLP_PT_STD,
        }[simplex_options.get('steep', True)]
        smcp.r_test = {
            'relax': GLPK.GLP_RT_HAR,
            'norelax': GLPK.GLP_RT_STD,
            'flip': GLPK.GLP_RT_FLIP,
        }[simplex_options.get('ratio', 'relax')]
        smcp.tol_bnd = simplex_options.get('tol_bnd', 1e-7)
        smcp.tol_dj = simplex_options.get('tol_dj', 1e-7)
        smcp.tol_piv = simplex_options.get('tol_piv', 1e-10)
        if simplex_options.get('obj_ll', False):
            smcp.obj_ll = simplex_options['obj_ll']
        if simplex_options.get('obj_ul', False):
            smcp.obj_ul = simplex_options['obj_ul']
        smcp.it_lim = maxit
        smcp.tm_lim = timeout
        smcp.presolve = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[simplex_options.get('presolve', True)]

        # Simplex driver
        if simplex_options.get('exact', False):
            ret_code = _lib.glp_exact(prob, ctypes.byref(smcp))
        else:
            ret_code = _lib.glp_simplex(prob, ctypes.byref(smcp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK simplex not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_get_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status == GLPK.GLP_OPT,
        })

        # We can read a solution:
        if status == GLPK.GLP_OPT:

            res.fun = _lib.glp_get_obj_val(prob)
            res.x = np.array([_lib.glp_get_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)])
            res.dual = np.array([_lib.glp_get_row_dual(prob, ii) for ii in range(1, _lib.glp_get_num_rows(prob)+1)])

            # We don't get slack without doing sensitivity analysis since GLPK
            # uses auxiliary variables instead of slack!
            res.slack = b_ub - A_ub @ res.x
            res.con = b_eq - A_eq @ res.x

            # We shouldn't be reading this field... But we will anyways
            res.nit = prob.contents.it_cnt

    elif solver == 'interior':

        # Make a control structure
        iptcp = glp_iptcp()
        _lib.glp_init_iptcp(ctypes.byref(iptcp))

        # Set options
        iptcp.msg_lev = message_level*disp
        iptcp.ord_alg = {
            'nord': GLPK.GLP_ORD_NONE,
            'qmd': GLPK.GLP_ORD_QMD,
            'amd': GLPK.GLP_ORD_AMD,
            'symamd': GLPK.GLP_ORD_SYMAMD,
        }[ip_options.get('ordering', 'amd')]

        # Run the solver
        ret_code = _lib.glp_interior(prob, ctypes.byref(iptcp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK interior-point not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_ipt_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status == GLPK.GLP_OPT,
        })

        # We can read a solution:
        if status == GLPK.GLP_OPT:

            res.fun = _lib.glp_ipt_obj_val(prob)
            res.x = np.array([_lib.glp_ipt_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)])
            res.dual = np.array([_lib.glp_ipt_row_dual(prob, ii) for ii in range(1, _lib.gpl_get_num_rows(prob)+1)])

            # We don't get slack without doing sensitivity analysis since GLPK uses
            # auxiliary variables instead of slack!
            res.slack = b_ub - A_ub @ res.x
            res.con = b_eq - A_eq @ res.x

            # We shouldn't be reading this field... But we will anyways
            res.nit = prob.contents.it_cnt

    elif solver == 'mip':

        # Make a control structure
        iocp = glp_iocp()
        _lib.glp_init_iocp(ctypes.byref(iocp))

        # Make variables integer- and binary-valued
        if not mip_options.get('nomip', False):
            intcon = mip_options.get('intcon', [])
            for jj in intcon:
                _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_IV)
            bincon = mip_options.get('bincon', [])
            for jj in bincon:
                _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_BV)

        # Set options
        iocp.msg_lev = message_level*disp
        iocp.br_tech = {
            'first': GLPK.GLP_BR_FFV,
            'last': GLPK.GLP_BR_LFV,
            'mostf': GLPK.GLP_BR_MFV,
            'drtom': GLPK.GLP_BR_DTH,
            'pcost': GLPK.GLP_BR_PCH,
        }[mip_options.get('branch', 'drtom')]
        iocp.bt_tech = {
            'dfs': GLPK.GLP_BT_DFS,
            'bfs': GLPK.GLP_BT_BFS,
            'bestp': GLPK.GLP_BT_BPH,
            'bestb': GLPK.GLP_BT_BLB,
        }[mip_options.get('backtrack', 'bestb')]
        iocp.pp_teck = {
            'none': GLPK.GLP_PP_NONE,
            'root': GLPK.GLP_PP_ROOT,
            'all': GLPK.GLP_PP_ALL,
        }[mip_options.get('preprocess', 'all')]
        iocp.sr_heur = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('round', True)]
        iocp.fp_heur = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('fpump', False)]

        ps_tm_lim = mip_options.get('proxy', 60)
        if ps_tm_lim:
            iocp.ps_heur = GLPK.GLP_ON
            iocp.ps_tm_lim = ps_tm_lim*1000
        else:
            iocp.ps_heur = GLPK.GLP_OFF
            iocp.ps_tm_lim = 0

        cuts = set(list(mip_options.get('cuts', [])))
        if 'all' in cuts:
            cuts = {'gomory', 'mir', 'cover', 'clique'}
        if 'gomory' in cuts:
            iocp.gmi_cuts = GLPK.GLP_ON
        if 'mir' in cuts:
            iocp.mir_cuts = GLPK.GLP_ON
        if 'cover' in cuts:
            iocp.cov_cuts = GLPK.GLP_ON
        if 'clique' in cuts:
            iocp.clq_cuts = GLPK.GLP_ON

        iocp.tol_int = mip_options.get('tol_int', 1e-5)
        iocp.tol_obj = mip_options.get('tol_obj', 1e-7)
        iocp.mip_gap = mip_options.get('mip_gap', 0.0)
        iocp.tm_lim = timeout
        iocp.presolve = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('presolve', True)]
        iocp.binarize = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('binarize', False)]

        # Run the solver
        ret_code = _lib.glp_intopt(prob, ctypes.byref(iocp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK interior-point not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_mip_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status in [GLPK.GLP_OPT, GLPK.GLP_FEAS],
        })

        # We can read a solution:
        if res.success:
            res.fun = _lib.glp_mip_obj_val(prob)
            res.x = np.array([_lib.glp_mip_col_val(prob, ii) for ii in range(1, len(c)+1)])

    else:
        raise ValueError('"%s" is not a recognized solver.' % solver)

    # We're done, cleanup!
    _lib.glp_delete_prob(prob)

    # Map status codes to scipy:
    # res.status = {
    #     GLPK.GLP_OPT: 0,
    # }[res.status]

    return res
Esempio n. 45
0
def gp_minimize(func, dimensions, base_estimator=None, acq="LCB", xi=0.01,
                kappa=1.96, search="sampling", maxiter=1000, n_points=500,
                n_start=10, n_restarts_optimizer=5, random_state=None):
    """Bayesian optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquisition function
    over the Gaussian prior which is much quicker to evaluate.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `base_estimator` [a Gaussian process estimator]:
        The Gaussian process estimator to use for optimization.

    * `acq` [string, default=`"LCB"`]:
        Function to minimize over the gaussian prior. Can be either

        - `"LCB"` for lower confidence bound,
        - `"EI"` for expected improvement,
        - `"PI"` for probability of improvement.

    * `xi` [float, default=0.01]:
        Controls how much improvement one wants over the previous best
        values. Used when the acquisition is either `"EI"` or `"PI"`.

    * `kappa` [float, default=1.96]:
        Controls how much of the variance in the predicted values should be
        taken into account. If set to be very high, then we are favouring
        exploration over exploitation and vice versa.
        Used when the acquisition is `"LCB"`.

    * `search` [string, `"sampling"` or `"lbfgs"`]:
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to `"sampling"`, `n_points` are sampled randomly
        and the Gaussian Process prior is updated with the point that gives
        the best acquisition value over the Gaussian prior.

        If search is set to `"lbfgs"`, then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquisition function
        over the Gaussian prior.

    * `maxiter` [int, default=1000]:
        Number of iterations to find the minimum. Note that `n_start`
        iterations are effectively discounted, such that total number of
        function evaluations is at most `maxiter`.

    * `n_points` [int, default=500]:
        Number of points to sample to determine the next "best" point.
        Useless if search is set to `"lbfgs"`.

    * `n_start` [int, default=10]:
        Number of random initialization points.

    * `n_restarts_optimizer` [int, default=10]:
        The number of restarts of the optimizer when `search` is `"lbfgs"`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [float]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `models`: surrogate models used for each iteration.
        - `x_iters` [array]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = check_random_state(random_state)
    space = Space(dimensions)

    # Default GP
    if base_estimator is None:
        base_estimator = GaussianProcessRegressor(
            kernel=(ConstantKernel(1.0, (0.01, 1000.0)) *
                    Matern(length_scale=np.ones(space.transformed_n_dims),
                           length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                           nu=2.5)),
            normalize_y=True, alpha=10e-6, random_state=random_state)

    # First points
    Xi = space.rvs(n_samples=n_start, random_state=rng)
    yi = [func(x) for x in Xi]
    if np.ndim(yi) != 1:
        raise ValueError(
            "The function to be optimized should return a scalar")

    # Bayesian optimization loop
    models = []

    for i in range(maxiter - n_start):
        gp = clone(base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            gp.fit(space.transform(Xi), yi)

        models.append(gp)

        if search == "sampling":
            X = space.transform(space.rvs(n_samples=n_points,
                                          random_state=rng))
            values = _gaussian_acquisition(
                X=X, model=gp,  y_opt=np.min(yi), method=acq,
                xi=xi, kappa=kappa)
            next_x = X[np.argmin(values)]

        elif search == "lbfgs":
            best = np.inf

            for j in range(n_restarts_optimizer):
                x0 = space.transform(space.rvs(n_samples=1,
                                               random_state=rng))[0]

                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    x, a, _ = fmin_l_bfgs_b(
                        _acquisition, x0,
                        args=(gp, np.min(yi), acq, xi, kappa),
                        bounds=space.transformed_bounds,
                        approx_grad=True, maxiter=10)

                if a < best:
                    next_x, best = x, a

        next_x = space.inverse_transform(next_x.reshape((1, -1)))[0]
        next_y = func(next_x)
        Xi = np.vstack((Xi, next_x))
        yi.append(next_y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(yi)
    res.x = Xi[best]
    res.fun = yi[best]
    res.func_vals = np.array(yi)
    res.x_iters = Xi
    res.models = models
    res.space = space

    return res
def dual_annealing(func,
                   bounds,
                   args=(),
                   maxiter=1000,
                   local_search_options={},
                   initial_temp=5230.,
                   restart_temp_ratio=2.e-5,
                   visit=2.62,
                   accept=-5.0,
                   maxfun=1e7,
                   seed=None,
                   no_local_search=False,
                   callback=None,
                   x0=None):
    """
    Find the global minimum of a function using Dual Annealing.

    Parameters
    ----------
    func : callable
        The objective function to be minimized. Must be in the form
        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
        and ``args`` is a  tuple of any additional fixed parameters needed to
        completely specify the function.
    bounds : sequence, shape (n, 2)
        Bounds for variables.  ``(min, max)`` pairs for each element in ``x``,
        defining bounds for the objective function parameter.
    args : tuple, optional
        Any additional fixed parameters needed to completely specify the
        objective function.
    maxiter : int, optional
        The maximum number of global search iterations. Default value is 1000.
    local_search_options : dict, optional
        Extra keyword arguments to be passed to the local minimizer
        (`minimize`). Some important options could be:
        ``method`` for the minimizer method to use and ``args`` for
        objective function additional arguments.
    initial_temp : float, optional
        The initial temperature, use higher values to facilitates a wider
        search of the energy landscape, allowing dual_annealing to escape
        local minima that it is trapped in. Default value is 5230. Range is
        (0.01, 5.e4].
    restart_temp_ratio : float, optional
        During the annealing process, temperature is decreasing, when it
        reaches ``initial_temp * restart_temp_ratio``, the reannealing process
        is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
    visit : float, optional
        Parameter for visiting distribution. Default value is 2.62. Higher
        values give the visiting distribution a heavier tail, this makes
        the algorithm jump to a more distant region. The value range is (0, 3].
    accept : float, optional
        Parameter for acceptance distribution. It is used to control the
        probability of acceptance. The lower the acceptance parameter, the
        smaller the probability of acceptance. Default value is -5.0 with
        a range (-1e4, -5].
    maxfun : int, optional
        Soft limit for the number of objective function calls. If the
        algorithm is in the middle of a local search, this number will be
        exceeded, the algorithm will stop just after the local search is
        done. Default value is 1e7.
    seed : {int or `~numpy.random.mtrand.RandomState` instance}, optional
        If `seed` is not specified the `~numpy.random.mtrand.RandomState`
        singleton is used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``RandomState`` instance, then that
        instance is used.
        Specify `seed` for repeatable minimizations. The random numbers
        generated with this seed only affect the visiting distribution
        function and new coordinates generation.
    no_local_search : bool, optional
        If `no_local_search` is set to True, a traditional Generalized
        Simulated Annealing will be performed with no local search
        strategy applied.
    callback : callable, optional
        A callback function with signature ``callback(x, f, context)``,
        which will be called for all minima found.
        ``x`` and ``f`` are the coordinates and function value of the
        latest minimum found, and ``context`` has value in [0, 1, 2], with the
        following meaning:

            - 0: minimum detected in the annealing process.
            - 1: detection occurred in the local search process.
            - 2: detection done in the dual annealing process.

        If the callback implementation returns True, the algorithm will stop.
    x0 : ndarray, shape(n,), optional
        Coordinates of a single N-D starting point.

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a `OptimizeResult` object.
        Important attributes are: ``x`` the solution array, ``fun`` the value
        of the function at the solution, and ``message`` which describes the
        cause of the termination.
        See `OptimizeResult` for a description of other attributes.

    Notes
    -----
    This function implements the Dual Annealing optimization. This stochastic
    approach derived from [3]_ combines the generalization of CSA (Classical
    Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
    to a strategy for applying a local search on accepted locations [4]_.
    An alternative implementation of this same algorithm is described in [5]_
    and benchmarks are presented in [6]_. This approach introduces an advanced
    method to refine the solution found by the generalized annealing
    process. This algorithm uses a distorted Cauchy-Lorentz visiting
    distribution, with its shape controlled by the parameter :math:`q_{v}`

    .. math::

        g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
        \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
        \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
        \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
        \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}

    Where :math:`t` is the artificial time. This visiting distribution is used
    to generate a trial jump distance :math:`\\Delta x(t)` of variable
    :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.

    From the starting point, after calling the visiting distribution
    function, the acceptance probability is computed as follows:

    .. math::

        p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
        \\frac{1}{1-q_{a}}}\\}}

    Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
    acceptance probability is assigned to the cases where

    .. math::

        [1-(1-q_{a}) \\beta \\Delta E] < 0

    The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to

    .. math::

        T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
        1 + t\\right)^{q_{v}-1}-1}

    Where :math:`q_{v}` is the visiting parameter.

    .. versionadded:: 1.2.0

    References
    ----------
    .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
        statistics. Journal of Statistical Physics, 52, 479-487 (1998).
    .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
        Physica A, 233, 395-406 (1996).
    .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
        Annealing Algorithm and Its Application to the Thomson Model.
        Physics Letters A, 233, 216-220 (1997).
    .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
        Annealing. Physical Review E, 62, 4473 (2000).
    .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
        Simulated Annealing for Efficient Global Optimization: the GenSA
        Package for R. The R Journal, Volume 5/1 (2013).
    .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
        Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06

    Examples
    --------
    The following example is a 10-D problem, with many local minima.
    The function involved is called Rastrigin
    (https://en.wikipedia.org/wiki/Rastrigin_function)

    >>> from scipy.optimize import dual_annealing
    >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
    >>> lw = [-5.12] * 10
    >>> up = [5.12] * 10
    >>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234)
    >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format(
    ...       ret.x, ret.fun))
    global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09
     -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09
    -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000

    """  # noqa: E501
    if x0 is not None and not len(x0) == len(bounds):
        raise ValueError('Bounds size does not match x0')

    lu = list(zip(*bounds))
    lower = np.array(lu[0])
    upper = np.array(lu[1])
    # Check that restart temperature ratio is correct
    if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
        raise ValueError('Restart temperature ratio has to be in range (0, 1)')
    # Checking bounds are valid
    if (np.any(np.isinf(lower)) or np.any(np.isinf(upper))
            or np.any(np.isnan(lower)) or np.any(np.isnan(upper))):
        raise ValueError('Some bounds values are inf values or nan values')
    # Checking that bounds are consistent
    if not np.all(lower < upper):
        raise ValueError('Bounds are not consistent min < max')
    # Checking that bounds are the same length
    if not len(lower) == len(upper):
        raise ValueError('Bounds do not have the same dimensions')

    # Wrapper for the objective function
    func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
    # Wrapper fot the minimizer
    minimizer_wrapper = LocalSearchWrapper(bounds, func_wrapper,
                                           **local_search_options)
    # Initialization of RandomState for reproducible runs if seed provided
    rand_state = check_random_state(seed)
    # Initialization of the energy state
    energy_state = EnergyState(lower, upper, callback)
    energy_state.reset(func_wrapper, rand_state, x0)
    # Minimum value of annealing temperature reached to perform
    # re-annealing
    temperature_restart = initial_temp * restart_temp_ratio
    # VisitingDistribution instance
    visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
    # Strategy chain instance
    strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
                                   minimizer_wrapper, rand_state, energy_state)
    need_to_stop = False
    iteration = 0
    message = []
    # OptimizeResult object to be returned
    optimize_res = OptimizeResult()
    optimize_res.success = True
    optimize_res.status = 0

    t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
    # Run the search loop
    while (not need_to_stop):
        for i in range(maxiter):
            # Compute temperature for this step
            s = float(i) + 2.0
            t2 = np.exp((visit - 1) * np.log(s)) - 1.0
            temperature = initial_temp * t1 / t2
            if iteration >= maxiter:
                message.append("Maximum number of iteration reached")
                need_to_stop = True
                break
            # Need a re-annealing process?
            if temperature < temperature_restart:
                energy_state.reset(func_wrapper, rand_state)
                break
            # starting strategy chain
            val = strategy_chain.run(i, temperature)
            if val is not None:
                message.append(val)
                need_to_stop = True
                optimize_res.success = False
                break
            # Possible local search at the end of the strategy chain
            if not no_local_search:
                val = strategy_chain.local_search()
                if val is not None:
                    message.append(val)
                    need_to_stop = True
                    optimize_res.success = False
                    break
            iteration += 1

    # Setting the OptimizeResult values
    optimize_res.x = energy_state.xbest
    optimize_res.fun = energy_state.ebest
    optimize_res.nit = iteration
    optimize_res.nfev = func_wrapper.nfev
    optimize_res.njev = func_wrapper.ngev
    optimize_res.nhev = func_wrapper.nhev
    optimize_res.message = message
    return optimize_res
Esempio n. 47
0
def gp_minimize(func,
                bounds=None,
                search="sampling",
                random_state=None,
                maxiter=1000,
                acq="UCB",
                num_points=500):
    """
    Black-box optimization using Gaussian Processes.

    If every function evaluation is expensive, for instance
    when the parameters are the hyperparameters of a neural network
    and the function evaluation is the mean cross-validation score across
    ten folds, optimizing the hyperparameters by standared optimization
    routines would take for ever!

    The idea is to approximate the function using a Gaussian process.
    In other words the function values are assumed to follow a multivariate
    gaussian. The covariance of the function values are given by a
    GP kernel between the parameters. Then a smart choice to choose the
    next parameter to evaluate can be made by the acquistion function
    over the Gaussian posterior which is much quicker to evaluate.

    Parameters
    ----------
    func: callable
        Function to minimize. Should take a array of parameters and
        return the function value.

    bounds: array-like, shape (n_parameters, 2)
        ``bounds[i][0]`` should give the lower bound of each parameter and
        ``bounds[i][1]`` should give the upper bound of each parameter.

    search: string, "sampling" or "lbfgs"
        Searching for the next possible candidate to update the Gaussian prior
        with.

        If search is set to "sampling", ``num_points`` are sampled randomly
        and the Gaussian Process prior is updated with that point that gives
        the best acquision value over the Gaussian posterior.

        If search is set to "lbfgs", then a point is sampled randomly, and
        lbfgs is run for 10 iterations optimizing the acquistion function
        over the Gaussian posterior.

    random_state: int, RandomState instance, or None (default)
        Set random state to something other than None for reproducible
        results.

    maxiter: int, default 1000
        Number of iterations to find the minimum. In other words, the
        number of function evaluations.

    acq: string, default "UCB"
        Function to minimize over the gaussian posterior. Can be either
        the "UCB" which refers to the UpperConfidenceBound or "EI" which
        is the Expected Improvement.

    num_points: int, default 500
        Number of points to sample to determine the next "best" point.
        Useless if search is set to "lbfgs".

    Returns
    -------
    res: OptimizeResult, scipy object
        The optimization result returned as a OptimizeResult object.
        Important attributes are
        ``x`` - float, the optimization solution,
        ``fun`` - float, the value of the function at the optimum,
        ``models``- gp_models[i]. the prior on the function fit at
                       iteration[i].
        ``func_vals`` - the function value at the ith iteration.
        ``x_iters`` - the value of ``x`` corresponding to the function value
                      at the ith iteration.
        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    rng = np.random.RandomState(random_state)

    num_params = len(bounds)
    lower_bounds, upper_bounds = zip(*bounds)
    upper_bounds = np.asarray(upper_bounds)
    lower_bounds = np.asarray(lower_bounds)
    x0 = rng.rand(num_params)
    func_val = [func(lower_bounds + (upper_bounds - lower_bounds) * x0)]

    length_scale = np.ones(num_params)
    gp_params = {
        'kernel': Matern(length_scale=length_scale, nu=2.5),
        'normalize_y': True,
        'random_state': random_state
    }
    lbfgs_bounds = np.tile((0, 1), (num_params, 1))

    gp_models = []
    x = np.reshape(x0, (1, -1))

    for i in range(maxiter):
        gpr = GaussianProcessRegressor(**gp_params)
        gpr.fit(x, func_val)

        if search == "sampling":
            sampling = rng.rand(num_points, num_params)
            acquis = acquisition_func(sampling, gpr, np.min(func_val), acq)
            best_arg = np.argmin(acquis)
            best_x = sampling[best_arg]
        elif search == "lbfgs":
            init = rng.rand(num_params)
            best_x, _, _ = fmin_l_bfgs_b(acquisition_func,
                                         np.asfortranarray(init),
                                         args=(gpr, np.min(func_val), acq),
                                         bounds=lbfgs_bounds,
                                         approx_grad=True,
                                         maxiter=10)

        gp_models.append(gpr)

        best_f = func(lower_bounds + (upper_bounds - lower_bounds) * best_x)
        x_list = x.tolist()
        x_list.append(best_x)
        x = np.asarray(x_list)
        func_val.append(best_f)

    x = lower_bounds + (upper_bounds - lower_bounds) * x
    func_ind = np.argmin(func_val)
    x_val = x[func_ind]
    best_func_val = func_val[func_ind]
    res = OptimizeResult()
    res.models = gp_models

    res.x = x_val
    res.fun = best_func_val
    res.func_vals = func_val
    res.x_iters = x

    return res
Esempio n. 48
0
def dummy_minimize(func,
                   dimensions,
                   n_calls=100,
                   x0=None,
                   y0=None,
                   random_state=None):
    """Random search by uniform sampling within the given bounds.

    Parameters
    ----------
    * `func` [callable]:
        Function to minimize. Should take a array of parameters and
        return the function values.

    * `dimensions` [list, shape=(n_dims,)]:
        List of search space dimensions.
        Each search dimension can be defined either as

        - a `(upper_bound, lower_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(upper_bound, lower_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).

    * `n_calls` [int, default=100]:
        Number of calls to `func` to find the minimum.

    * `x0` [list, list of lists or `None`]:
        Initial input points.

        - If it is a list of lists, use it as a list of input points.
        - If it is a list, use it as a single initial input point.
        - If it is `None`, no initial input points are used.

    * `y0` [list, scalar or `None`]
        Evaluation of initial input points.

        - If it is a list, then it corresponds to evaluations of the function
          at each element of `x0` : the i-th element of `y0` corresponds
          to the function evaluated at the i-th element of `x0`.
        - If it is a scalar, then it corresponds to the evaluation of the
          function at `x0`.
        - If it is None and `x0` is provided, then the function is evaluated
          at each element of `x0`.

    * `random_state` [int, RandomState instance, or None (default)]:
        Set random state to something other than None for reproducible
        results.

    Returns
    -------
    * `res` [`OptimizeResult`, scipy object]:
        The optimization result returned as a OptimizeResult object.
        Important attributes are:

        - `x` [list]: location of the minimum.
        - `fun` [float]: function value at the minimum.
        - `x_iters` [list of lists]: location of function evaluation for each
           iteration.
        - `func_vals` [array]: function value for each iteration.
        - `space` [Space]: the optimisation space.
        - `specs` [dict]: the call specifications.
        - `rng` [RandomState instance]: State of the random state
           at the end of minimization.

        For more details related to the OptimizeResult object, refer
        http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html
    """
    # Save call args
    specs = {
        "args": copy.copy(inspect.currentframe().f_locals),
        "function": inspect.currentframe().f_code.co_name
    }

    # Check params
    rng = check_random_state(random_state)
    space = Space(dimensions)

    if x0 is None:
        x0 = []
    elif not isinstance(x0[0], list):
        x0 = [x0]

    if not isinstance(x0, list):
        raise ValueError("`x0` should be a list, got %s" % type(x0))

    if len(x0) > 0 and y0 is not None:
        if isinstance(y0, Iterable):
            y0 = list(y0)
        elif isinstance(y0, numbers.Number):
            y0 = [y0]
        else:
            raise ValueError("`y0` should be an iterable or a scalar, got %s" %
                             type(y0))
        if len(x0) != len(y0):
            raise ValueError("`x0` and `y0` should have the same length")

        if not all(map(np.isscalar, y0)):
            raise ValueError("`y0` elements should be scalars")

    elif len(x0) > 0 and y0 is None:
        y0 = []
        n_calls -= len(x0)

    elif len(x0) == 0 and y0 is not None:
        raise ValueError("`x0`cannot be `None` when `y0` is provided")

    else:  # len(x0) == 0 and y0 is None
        y0 = []

    X = x0
    y = y0

    # Random search
    X = X + space.rvs(n_samples=n_calls, random_state=rng)
    first = True

    for i in range(len(y0), len(X)):
        y_i = func(X[i])

        if first:
            first = False
            if not np.isscalar(y_i):
                raise ValueError("`func` should return a scalar")

        y.append(y_i)

    y = np.array(y)

    # Pack results
    res = OptimizeResult()
    best = np.argmin(y)
    res.x = X[best]
    res.fun = y[best]
    res.func_vals = y
    res.x_iters = X
    res.models = []  # Create attribute even though it is empty
    res.space = space
    res.random_state = rng
    res.specs = specs

    return res
Esempio n. 49
0
    def solve(self):
        nfev, nit, warning_flag = 0, 0, False
        status_message = _status_message['success']

        # calculate energies to start with
        for index, candidate in enumerate(self.population):
            parameters = self._scale_parameters(candidate)
            self.population_energies[index] = self.func(parameters, *self.args)
            nfev += 1

            if nfev > self.maxfun:
                warning_flag = True
                status_message = _status_message['maxfev']
                break

        minval = np.argmin(self.population_energies)

        # put the lowest energy into the best solution position.
        lowest_energy = self.population_energies[minval]
        self.population_energies[minval] = self.population_energies[0]
        self.population_energies[0] = lowest_energy

        self.population[[0, minval], :] = self.population[[minval, 0], :]

        if warning_flag:
            return OptimizeResult(x=self.x,
                                  fun=self.population_energies[0],
                                  nfev=nfev,
                                  nit=nit,
                                  message=status_message,
                                  success=(warning_flag is not True))

        # do the optimisation.
        start_time = time.time()
        nit = 0
        while nit < self.maxiter + 1:
            nit += 1
            if start_time + self.max_execution_time < time.time():
                warning_flag = True
                status_message = 'Max execution time reached'
                break

            if self.dither is not None:
                self.scale = self.random_number_generator.rand() * (
                    self.dither[1] - self.dither[0]) + self.dither[0]
            for candidate in range(np.size(self.population, 0)):
                if nfev > self.maxfun:
                    warning_flag = True
                    status_message = _status_message['maxfev']
                    break

                trial = self._mutate(candidate)
                self._ensure_constraint(trial)
                parameters = self._scale_parameters(trial)

                energy = self.func(parameters, *self.args)
                nfev += 1

                if energy < self.population_energies[candidate]:
                    self.population[candidate] = trial
                    self.population_energies[candidate] = energy

                    if energy < self.population_energies[0]:
                        self.population_energies[0] = energy
                        self.population[0] = trial

            # stop when the fractional s.d. of the population is less than tol
            # of the mean energy
            convergence = (
                np.std(self.population_energies) /
                np.abs(np.mean(self.population_energies) + _MACHEPS))

            if self.disp:
                print("differential_evolution step %d: f(x)= %g" %
                      (nit, self.population_energies[0]))

            if (self.callback and
                    self.callback(self._scale_parameters(self.population[0]),
                                  convergence=self.tol / convergence) is True):

                warning_flag = True
                status_message = ('callback function requested stop early '
                                  'by returning True')
                break

            if convergence < self.tol or warning_flag:
                break

        else:
            status_message = _status_message['maxiter']
            warning_flag = True

        DE_result = OptimizeResult(x=self.x,
                                   fun=self.population_energies[0],
                                   nfev=nfev,
                                   nit=nit,
                                   message=status_message,
                                   success=(warning_flag is not True))

        if self.polish:
            result = minimize(self.func,
                              np.copy(DE_result.x),
                              method='L-BFGS-B',
                              bounds=self.limits.T,
                              args=self.args)

            nfev += result.nfev
            DE_result.nfev = nfev

            if result.fun < DE_result.fun:
                DE_result.fun = result.fun
                DE_result.x = result.x
                DE_result.jac = result.jac
                # to keep internal state consistent
                self.population_energies[0] = result.fun
                self.population[0] = self._unscale_parameters(result.x)

        return DE_result