Ejemplo n.º 1
0
def newton_method(fun,
                  x0,
                  fprime,
                  args,
                  tol=1.0e-4,
                  maxiter=1000,
                  callback=None):
    '''ニュートン法 ステップサイズにArmijo条件
    '''
    x = numpy.array(x0)
    A, b = args

    for itr in xrange(maxiter):
        direction = -1 * numpy.linalg.solve(A, fprime(x, *args))
        alpha, obj_current, obj_next = armijo_stepsize(fun,
                                                       x,
                                                       fprime,
                                                       direction,
                                                       args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
Ejemplo n.º 2
0
def steepest_decent(fun,
                    x0,
                    fprime,
                    args,
                    tol=1.0e-4,
                    maxiter=1000,
                    callback=None):
    '''最急降下法
    '''
    x = numpy.array(x0)

    for itr in xrange(maxiter):
        direction = -1 * fprime(x, *args)
        alpha, obj_current, obj_next = armijo_stepsize(fun,
                                                       x,
                                                       fprime,
                                                       direction,
                                                       args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
Ejemplo n.º 3
0
def get_optimization_results(
        t,
        population,
        factorial_cost,
        scalar_fitness,
        skill_factor,
        pairs=None,
        tasks=None):
    K = len(set(skill_factor))
    N = len(population) // 2
    results = []
    for k in range(K):
        result = OptimizeResult()
        x, fun = get_best_individual(
            population, factorial_cost, scalar_fitness, skill_factor, k)
        result.x = x
        result.fun = fun
        result.nit = t
        result.nfev = (t + 1) * N
        if pairs is not None:
            result.pair = pairs[k, :]
        else:
            result.pair = None
        if tasks is not None:
            result.ucb_value = tasks[k].ucb_solver.value
        else:
            result.ucb_value = None
        results.append(result)
    return results
Ejemplo n.º 4
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self._xmin
     res.fun = self._fvalue
     res.message = self._message
     res.nit = self._step_record
     return res
Ejemplo n.º 5
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self.es.xbest
     res.fun = self.es.ebest
     res.nit = self._iter
     res.ncall = self.owf.nb_fun_call
     return res
Ejemplo n.º 6
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self._xmin
     res.fun = self._fvalue
     res.message = self._message
     res.nit = self._step_record
     return res
Ejemplo n.º 7
0
 def result(self):
     """ The OptimizeResult """
     res = OptimizeResult()
     res.x = self.es.xbest
     res.fun = self.es.ebest
     res.nit = self._iter
     res.ncall = self.owf.nb_fun_call
     return res
Ejemplo n.º 8
0
def get_optimization_results(t, N, factors, taskset):
    K = len(factors)
    results = []
    for k in range(K):
        factor = factors[k]
        result         = OptimizeResult()
        result.x       = deepcopy(factor.theta)
        result.fun     = factor.f_opt
        result.nit     = t
        result.nfev    = (t + 1) * 2 * N
        result.message = deepcopy(taskset.normalizers)
        results.append(result)
    return results
Ejemplo n.º 9
0
 def get_results(self, t):
     K = self.K
     N = self.args.pop_size
     results = []
     for k in range(K):
         result = OptimizeResult()
         x, fun = self.stos[k].population[0], self.stos[k].fitness[0]
         result.x = x
         result.fun = fun
         result.nit = t
         result.nfev = (t + 1) * N
         result.pair = self.stos[k].pair
         result.ucb_value = self.stos[k].ucb.value
         results.append(result)
     return results
Ejemplo n.º 10
0
def get_optimization_results(t, population, factorial_cost, scalar_fitness,
                             skill_factor, message):
    K = len(set(skill_factor))
    N = len(population) // 2
    results = []
    for k in range(K):
        result = OptimizeResult()
        x, fun = get_best_individual(population, factorial_cost,
                                     scalar_fitness, skill_factor, k)
        result.x = x
        result.fun = fun
        result.message = message
        result.nit = t
        result.nfev = (t + 1) * N
        mean, std = get_statistics(factorial_cost, skill_factor, k)
        result.mean = mean
        result.std = std
        results.append(result)
    return results
def steepest_decent(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000,
                    callback=None):
    '''最急降下法
    '''
    x = numpy.array(x0)

    for itr in xrange(maxiter):
        direction = -1 * fprime(x, *args)
        alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
def newton_method(fun, x0, fprime, args, tol=1.0e-4, maxiter=1000,
                    callback=None):
    '''ニュートン法 ステップサイズにArmijo条件
    '''
    x = numpy.array(x0)
    A, b = args

    for itr in xrange(maxiter):
        direction =  -1 * numpy.linalg.solve(A, fprime(x, *args))
        alpha, obj_current, obj_next = armijo_stepsize(fun, x, fprime, direction, args=args)

        if numpy.linalg.norm(obj_current - obj_next) < tol:
            break

        x = x + alpha * direction
        if callback is not None:
            callback(x)

    result = OptimizeResult()
    result.x = x
    result.fun = fun(x, *args)
    result.nit = itr
    return result
Ejemplo n.º 13
0
def model_policy_gradient(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        learning_rate: float = 1e-2,
        decay_rate: float = 0.96,
        decay_steps: int = 5,
        log_sigma_init: float = -5.0,
        max_iterations: int = 1000,
        batch_size: int = 10,
        radius_coeff: float = 3.0,
        warmup_steps: int = 10,
        batch_size_model: int = 65536,
        save_func_vals: bool = False,
        random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_evaluations: Optional[int] = None
) -> scipy.optimize.OptimizeResult:
    """Model policy gradient algorithm for black-box optimization.

    The idea of this algorithm is to perform policy gradient, but estimate
    the function values using a surrogate model. 
    The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        learning_rate: The learning rate for the policy gradient.
        decay_rate: the learning decay rate for the Adam optimizer.
        decay_steps: the learning decay steps for the Adam optimizer.
        log_sigma_init: the intial value for the sigma of the policy
            in the log scale. 
        max_iterations: The maximum number of iterations to allow before
            termination.
        batch_size: The number of points to sample in each iteration. The cost 
            of evaluation of these samples are computed through the 
            quantum computer cost model.
        radius_coeff: The ratio determining the size of the radius around 
            the current iterate to sample points from to build the quadratic model.
            The ratio is with respect to the maximal ratio of the samples 
            from the current policy. 
        warmup_steps: The number of steps before the model policy gradient is performed. 
            before these steps, we use the policy gradient without the model. 
        batch_size_model: The model sample batch size. 
            After we fit the quadratic model, we use the model to evaluate 
            on big enough batch of samples.
        save_func_vals: whether to compute and save the function values for 
            the current value of parameter.   
        random_state: A seed (int) or `np.random.RandomState` class to use when
            generating random values. If not set, defaults to using the module
            methods in `np.random`.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """
    random_state = value.parse_random_state(random_state)

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    log_sigma = np.ones(n) * log_sigma_init
    sigma = np.exp(log_sigma)

    # set up the first and second moment estimate
    m_mean = np.zeros(n)
    v_mean = np.zeros(n)
    m_log_sigma = np.zeros(n)
    v_log_sigma = np.zeros(n)

    # set up lr schedule and optimizer
    lr_schedule1 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)
    lr_schedule2 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.fun = 0
    total_evals = 0
    num_iter = 0
    message = None

    # stats
    history_max = -np.inf

    while num_iter < max_iterations:
        # get samples from the current policy to evaluate
        z = random_state.randn(batch_size, n)
        new_xs = sigma * z + current_x

        if total_evals + batch_size > max_evaluations:
            message = "Reached maximum number of evaluations."
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += batch_size
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        if save_func_vals:
            res.func_vals.append(f(current_x))
            res.x_iters.append(np.copy(current_x))
            res.fun = res.func_vals[-1]

        # current sampling radius (maximal)
        max_radius = 0
        for x in new_xs:
            if np.linalg.norm(x - current_x) > max_radius:
                max_radius = np.linalg.norm(x - current_x)

        reward = [-y for y in new_ys]

        # warmup steps control whether to use the model to estimate the f
        if num_iter >= warmup_steps:
            # Determine points to use to build model
            model_xs = []
            model_ys = []
            for x, y in zip(known_xs, known_ys):
                if np.linalg.norm(x - current_x) < radius_coeff * max_radius:
                    model_xs.append(x)
                    model_ys.append(y)
            # safer way without the `SVD` not converging
            try:
                model = _get_quadratic_model(model_xs, model_ys, x)
                use_model = True
            except ValueError:
                use_model = False

            if use_model:
                # get samples (from model)
                z = random_state.randn(batch_size_model, n)
                new_xs = sigma * z + current_x

                # use the model for prediction
                new_ys = model.predict(new_xs - current_x)
                reward = [-y for y in new_ys]

        reward = np.array(reward)

        # stats
        reward_mean = np.mean(reward)
        reward_max = np.max(reward)

        if reward_max > history_max:
            history_max = reward_max

        # subtract baseline
        reward = reward - reward_mean

        # analytic derivatives (natural gradient policy gradient)
        delta_mean = np.dot(z.T, reward) * sigma
        delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2)

        delta_mean_norm = np.linalg.norm(np.dot(z.T, reward))
        delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward))

        delta_mean = delta_mean / delta_mean_norm
        delta_log_sigma = delta_log_sigma / delta_log_sigma_norm

        # gradient ascend to update the parameters
        current_x, m_mean, v_mean = _adam_update(delta_mean,
                                                 current_x,
                                                 num_iter,
                                                 m_mean,
                                                 v_mean,
                                                 lr_schedule=lr_schedule1)
        log_sigma, m_log_sigma, v_log_sigma = _adam_update(
            delta_log_sigma,
            log_sigma,
            num_iter,
            m_log_sigma,
            v_log_sigma,
            lr_schedule=lr_schedule2,
        )

        log_sigma = np.clip(log_sigma, -20.0, 2.0)
        sigma = np.exp(log_sigma)

        num_iter += 1

    final_val = f(current_x)
    res.func_vals.append(final_val)

    if message is None:
        message = "Reached maximum number of iterations."

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Ejemplo n.º 14
0
def _sequential_random_embeddings(f,
                                  x0,
                                  bounds,
                                  n_reduced_dims_eff=3,
                                  n_embeddings=10,
                                  verbosity=1,
                                  **optimizer_kwargs):
    """
    Implementation of the Sequential Random Embeddings algorithm described in
    +++++
    H. Qian, Y.-Q. Hu, and Y. Yu, Derivative-Free Optimization of High-Dimensional Non-Convex
    Functions by Sequential Random Embeddings, Proceedings of the Twenty-Fifth International Joint
    Conference on Artificial Intelligence, AAAI Press (2016).
    +++++
    The idea is basically to reduce high-dimensional problems to low-dimensional ones by embedding
    the original, high-dimensional search space ℝ^h into a low dimensional one, ℝ^l, by
    sequentially applying the random linear transformation
    x(n+1) = α(n+1)x(n) + A•y(n+1),    x ∈ ℝ^h, y ∈ ℝ^l, A ∈ N(0, 1)^(h×l), α ∈ ℝ
    and minimizing the objective function f(αx + A•y) w.r.t. (α, y).

    :param f: [callable] Objective function. Must accept its argument x as numpy array
    :param x0: [np.array] Initial values for the bacteria population in the original,
           high-dimensional space ℝ^h
    :param bounds: [callable] Bounds projection, see description of parameter
           ``projection_callback`` in :func:`local_search.bfgs_b`
    :param n_reduced_dims_eff: [int] Effective dimension of the embedded problem, ℝ^(l+1)
    :param n_embeddings: [int] Number of embedding iterations
    :param verbosity: [int] Output verbosity. Must be 0, 1, or 2
    :param optimizer_args: [dict] Arguments to pass to the actual optimization routine
    :return: Best minimum of f found [scipy.optimize.OptimizeResult]
    """

    assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.'

    orig_dim = x0.shape[1]
    x = np.zeros(orig_dim)
    x_best = x.copy()
    f_best = np.inf
    nfev = nit = 0
    success_best = False
    for i in range(n_embeddings):
        A = np.random.normal(size=(orig_dim, n_reduced_dims_eff - 1))

        # Normalize rows of A
        normalization_sum = A.sum(axis=1)
        normalization_sum = np.where(normalization_sum == 0, 1,
                                     normalization_sum)
        A = A / normalization_sum[:, np.newaxis]

        def f_embedded(arg):
            return f(bounds(arg[0] * x + A.dot(arg[1:]))[0])

        # Set up bounds callback
        def bounds_embedded(arg):
            bounds_hit = np.zeros(len(arg), dtype=bool)
            x_proj, bounds_hit_orig = bounds(arg[0] * x + A.dot(arg[1:]))
            if bounds_hit_orig.any(
            ):  # Boundary hit in original, non-embedded variable
                arg[1:] = np.linalg.lstsq(A, x_proj - arg[0] * x,
                                          rcond=None)[0]
                bounds_hit[1:] = (A[bounds_hit_orig] != 0).any(axis=0)

            return arg, bounds_hit

        # Set up y0
        y0 = np.zeros((x0.shape[0], n_reduced_dims_eff))
        y0[:, 0] = 1
        y0[:, 1:] = np.array(
            [np.linalg.lstsq(A, x_orig - x, rcond=None)[0] for x_orig in x0])

        if verbosity > 0:
            infoMsg = f'\nEmbedding iteration {i}'
            print(infoMsg)
            print('-' * len(infoMsg))

        optimizer_kwargs['verbosity'] = verbosity
        with warnings.catch_warnings():
            warnings.filterwarnings(
                'ignore',
                message=
                'Found initial conditions outside the defined search domain.')
            res_embedded = optimize(f_embedded,
                                    x0=y0,
                                    bounds=bounds_embedded,
                                    **optimizer_kwargs)
        y = res_embedded.x
        f_val = res_embedded.fun
        nfev += res_embedded.nfev
        nit += res_embedded.nit

        x = bounds(y[0] * x + A.dot(y[1:]))[0]

        if verbosity > 0:
            print(f'Random embedding gave x = {x}.')

        if f_val < f_best:
            f_best = f_val
            x_best = x.copy()
            success_best = res_embedded.success

    result = OptimizeResult()
    result.success = success_best
    result.x = x_best
    result.fun = f_best
    result.nfev = nfev
    result.nit = nit
    result.trace = None

    return result
Ejemplo n.º 15
0
def model_gradient_descent(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        rate: float = 1e-1,
        sample_radius: float = 1e-1,
        n_sample_points: int = 100,
        n_sample_points_ratio: Optional[float] = None,
        rate_decay_exponent: float = 0.0,
        stability_constant: float = 0.0,
        sample_radius_decay_exponent: float = 0.0,
        tol: float = 1e-8,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_iterations: Optional[int] = None,
        max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult:
    """Model gradient descent algorithm for black-box optimization.

    The idea of this algorithm is to perform gradient descent, but estimate
    the gradient using a surrogate model instead of, say, by
    finite-differencing. The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.
    This algorithm works well when you have an initial guess which is in the
    convex neighborhood of a local optimum and you want to converge to that
    local optimum. It's meant to be used when the function is stochastic.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        rate: The learning rate for the gradient descent.
        sample_radius: The radius around the current iterate to sample
            points from to build the quadratic model.
        n_sample_points: The number of points to sample in each iteration.
        n_sample_points_ratio: This specifies the number of points to sample
            in each iteration as a coefficient of the number of points
            required to exactly determine a quadratic model. The number
            of sample points will be this coefficient times (n+1)(n+2)/2,
            rounded up, where n is the number of parameters.
            Setting this overrides n_sample_points.
        rate_decay_exponent: Controls decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant and a is the rate decay exponent
            (this parameter).
        stability_constant: Affects decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant (this parameter) and a is the rate decay
            exponent.
        sample_radius_decay_exponent: Controls decay of sample radius.
        tol: The algorithm terminates when the difference between the current
            iterate and the next suggested iterate is smaller than this value.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_iterations: The maximum number of iterations to allow before
            termination.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_iterations is None:
        max_iterations = np.inf
    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    if n_sample_points_ratio is not None:
        n_sample_points = int(
            np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2))

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.model_vals = [None]
    res.fun = 0
    total_evals = 0
    num_iter = 0
    converged = False
    message = None

    while num_iter < max_iterations:
        current_sample_radius = (sample_radius /
                                 (num_iter + 1)**sample_radius_decay_exponent)

        # Determine points to evaluate
        # in ball around current point
        new_xs = [np.copy(current_x)] + [
            current_x + _random_point_in_ball(n, current_sample_radius)
            for _ in range(n_sample_points)
        ]

        if total_evals + len(new_xs) > max_evaluations:
            message = 'Reached maximum number of evaluations.'
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += len(new_ys)
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        res.func_vals.append(new_ys[0])
        res.x_iters.append(np.copy(current_x))
        res.fun = res.func_vals[-1]

        # Determine points to use to build model
        model_xs = []
        model_ys = []
        for x, y in zip(known_xs, known_ys):
            if np.linalg.norm(x - current_x) < current_sample_radius:
                model_xs.append(x)
                model_ys.append(y)
        # Build and solve model
        model_gradient, model = _get_least_squares_model_gradient(
            model_xs, model_ys, current_x)

        # calculate the gradient and update the current point
        gradient_norm = np.linalg.norm(model_gradient)
        decayed_rate = (
            rate / (num_iter + 1 + stability_constant)**rate_decay_exponent)
        # Convergence criteria
        if decayed_rate * gradient_norm < tol:
            converged = True
            message = 'Optimization converged successfully.'
            break
        # Update
        current_x -= decayed_rate * model_gradient
        res.model_vals.append(
            model.predict([-decayed_rate * model_gradient])[0])

        num_iter += 1

    if converged:
        final_val = res.func_vals[-1]
    else:
        final_val = f(current_x)
        res.func_vals.append(final_val)

    if message is None:
        message = 'Reached maximum number of iterations.'

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Ejemplo n.º 16
0
def optimize_minimize_mhmcmc_cluster(objective,
                                     bounds,
                                     args=(),
                                     x0=None,
                                     T=1,
                                     N=3,
                                     burnin=100000,
                                     maxiter=1000000,
                                     target_ar=0.4,
                                     ar_tolerance=0.05,
                                     cluster_eps=DEFAULT_CLUSTER_EPS,
                                     rnd_seed=None,
                                     collect_samples=None,
                                     logger=None):
    """
    Minimize objective function and return up to N local minima solutions.

    :param objective: Objective function to minimize. Takes unpacked args as function call arguments and returns
        a float.
    :type objective: Callable(\*args) -> float
    :param bounds: Bounds of the parameter space.
    :type bounds: scipy.optimize.Bounds
    :param args: Any additional fixed parameters needed to completely specify the objective function.
    :type args: tuple or list
    :param x0: Initial guess. If None, will be selected randomly and uniformly within the parameter bounds.
    :type x0: numpy.array with same shape as elements of bounds
    :param T: The "temperature" parameter for the accept or reject criterion. To sample the domain well,
        should be in the order of the typical difference in local minima objective valuations.
    :type T: float
    :param N: Maximum number of minima to return
    :type N: int
    :param burnin: Number of random steps to discard before starting to accumulate statistics.
    :type burnin: int
    :param maxiter: Maximum number of steps to take (including burnin).
    :type maxiter: int
    :param target_ar: Target acceptance rate of point samples generated by stepping.
    :type target_ar: float between 0 and 1
    :param ar_tolerance: Tolerance on the acceptance rate before actively adapting the step size.
    :type ar_tolerance: float
    :param cluster_eps: Point proximity tolerance for DBSCAN clustering, in normalized bounds coordinates.
    :type cluster_eps: float
    :param rnd_seed: Random seed to force deterministic behaviour
    :type rnd_seed: int
    :param collect_samples: If not None and integral type, collect collect_samples at regular intervals
        and return as part of solution.
    :type collect_samples: int or NoneType
    :param logger: Logger instance for outputting log messages.
    :return: OptimizeResult containing solution(s) and solver data.
    :rtype: scipy.optimize.OptimizeResult with additional attributes
    """
    @call_counter
    def obj_counted(*args):
        return objective(*args)

    # end func

    assert maxiter >= 2 * burnin, "maxiter {} should be at least twice burnin steps {}".format(
        maxiter, burnin)
    main_iter = maxiter - burnin

    if collect_samples is not None:
        assert isinstance(collect_samples,
                          int), "collect_samples expected to be integral type"
        assert collect_samples > 0, "collect_samples expected to be positive"
    # end if

    beta = 1.0 / T

    if rnd_seed is None:
        rnd_seed = int(time.time() * 1000) % (1 << 31)
    # end if
    np.random.seed(rnd_seed)
    if logger:
        logger.info('Using random seed {}'.format(rnd_seed))
    # end

    if x0 is None:
        x0 = np.random.uniform(bounds.lb, bounds.ub)
    # end if
    assert np.all((x0 >= bounds.lb) & (x0 <= bounds.ub))
    x = x0.copy()
    funval = obj_counted(x, *args)

    # Set up stepper with adaptive acceptance rate
    stepper = BoundedRandNStepper(bounds)
    stepper = AdaptiveStepsize(stepper,
                               accept_rate=target_ar,
                               ar_tolerance=ar_tolerance,
                               interval=50)

    # -------------------------------
    # DO BURN-IN
    rejected_randomly = 0
    accepted_burnin = 0
    tracked_range = tqdm(range(burnin), total=burnin, desc='BURN-IN')
    if logger:
        stepper.logger = lambda msg: tracked_range.write(logger.name + ':' +
                                                         msg)
    else:
        stepper.logger = tracked_range.write
    # end if
    for _ in tracked_range:
        x_new = stepper(x)
        funval_new = obj_counted(x_new, *args)
        log_alpha = -(funval_new - funval) * beta
        if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha:
            x = x_new
            funval = funval_new
            stepper.notify_accept()
            accepted_burnin += 1
        elif log_alpha <= 0:
            rejected_randomly += 1
        # end if
    # end for
    ar = float(accepted_burnin) / burnin
    if logger:
        logger.info("Burn-in acceptance rate: {}".format(ar))
    # end if

    # -------------------------------
    # DO MAIN LOOP
    if collect_samples is not None:
        nsamples = min(collect_samples, main_iter)
        sample_cadence = main_iter / nsamples
        samples = np.zeros((nsamples, len(x)))
        samples_fval = np.zeros(nsamples)
    # end if
    accepted = 0
    rejected_randomly = 0
    minima_sorted = SortedList(
        key=lambda rec: rec[1])  # Sort by objective function value
    hist = HistogramIncremental(bounds, nbins=100)
    # Cached a lot of potential minimum values, as these need to be clustered before return N results
    N_cached = int(np.ceil(N * main_iter / 500))
    next_sample = 0.0
    sample_count = 0
    tracked_range = tqdm(range(main_iter), total=main_iter, desc='MAIN')
    if logger:
        stepper.logger = lambda msg: tracked_range.write(logger.name + ':' +
                                                         msg)
    else:
        stepper.logger = tracked_range.write
    # end if
    for i in tracked_range:
        if collect_samples and i >= next_sample:
            assert sample_count < collect_samples
            samples[sample_count] = x
            samples_fval[sample_count] = funval
            sample_count += 1
            next_sample += sample_cadence
        # end if
        x_new = stepper(x)
        funval_new = obj_counted(x_new, *args)
        log_alpha = -(funval_new - funval) * beta
        if log_alpha > 0 or np.log(np.random.rand()) <= log_alpha:
            x = x_new
            funval = funval_new
            minima_sorted.add((x, funval))
            if len(minima_sorted) > N_cached:
                minima_sorted.pop()
            # end if
            stepper.notify_accept()
            hist += x
            accepted += 1
        elif log_alpha <= 0:
            rejected_randomly += 1
        # end if
    # end for
    stepper.logger = None
    ar = float(accepted) / main_iter
    if logger:
        logger.info("Acceptance rate: {}".format(ar))
        logger.info("Best minima (before clustering):\n{}".format(
            np.array([_mx[0] for _mx in minima_sorted[:10]])))
    # end if

    # -------------------------------
    # Cluster minima and associate each cluster with a local minimum.
    # Using a normalized coordinate space for cluster detection.
    x_range = bounds.ub - bounds.lb
    pts = np.array([x[0] for x in minima_sorted])
    fvals = np.array([x[1] for x in minima_sorted])
    pts_norm = (pts - bounds.lb) / x_range
    _, labels = dbscan(pts_norm, eps=cluster_eps, min_samples=21, n_jobs=-1)

    # Compute mean of each cluster and evaluate objective function at cluster mean locations.
    minima_candidates = []
    for grp in range(max(labels) + 1):
        mask = (labels == grp)
        mean_loc = np.mean(pts[mask, :], axis=0)
        # Evaluate objective function precisely at the mean location of each cluster
        fval = obj_counted(mean_loc, *args)
        minima_candidates.append((mean_loc, grp, fval))
    # end for

    # Rank minima locations by objective function.
    minima_candidates.sort(key=lambda c: c[2])

    # Pick up to N solutions
    solutions = minima_candidates[:N]

    # Put results into OptimizeResult container.
    # Add histograms to output result (in form of scipy.stats.rv_histogram)
    solution = OptimizeResult()
    solution.x = np.array([s[0] for s in solutions])
    solution.clusters = [pts[(labels == s[1])] for s in solutions]
    solution.cluster_funvals = [fvals[(labels == s[1])] for s in solutions]
    solution.bins = hist.bins
    solution.distribution = hist.histograms
    solution.acceptance_rate = ar
    solution.success = True
    solution.status = 0
    if len(solutions) > 0:
        solution.message = 'SUCCESS: Found {} local minima'.format(
            len(solutions))
    else:
        solution.message = 'WARNING: Found no clusters within tolerance {}'.format(
            cluster_eps)
    # end if
    solution.fun = np.array([s[2] for s in solutions])
    solution.jac = None
    solution.nfev = obj_counted.counter
    solution.njev = 0
    solution.nit = main_iter
    solution.maxcv = None
    solution.samples = samples if collect_samples else None
    solution.sample_funvals = samples_fval if collect_samples else None
    solution.bounds = bounds
    solution.version = 's0.3'  # Solution version for future traceability
    solution.rnd_seed = rnd_seed

    return solution
Ejemplo n.º 17
0
def dual_annealing(func, x0, bounds, args=(), maxiter=1000,
                   local_search_options={}, initial_temp=5230.,
                   restart_temp_ratio=2.e-5, visit=2.62, accept=-5.0,
                   maxfun=1e7, seed=None, no_local_search=False,
                   callback=None):
    """
    Find the global minimum of a function using Dual Annealing.

    Parameters
    ----------
    func : callable
        The objective function to be minimized.  Must be in the form
        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
        and ``args`` is a  tuple of any additional fixed parameters needed to
        completely specify the function.
    x0 : ndarray, shape(n,)
        A single initial starting point coordinates. If ``None`` is provided,
        initial coordinates are automatically generated (using the ``reset``
        method from the internal ``EnergyState`` class).
    bounds : sequence, shape (n, 2)
        Bounds for variables.  ``(min, max)`` pairs for each element in ``x``,
        defining bounds for the objective function parameter.
    args : tuple, optional
        Any additional fixed parameters needed to completely specify the
        objective function.
    maxiter : int, optional
        The maximum number of global search iterations. Default value is 1000.
    local_search_options : dict, optional
        Extra keyword arguments to be passed to the local minimizer
        (`minimize`). Some important options could be:
        ``method`` for the minimizer method to use and ``args`` for
        objective function additional arguments.
    initial_temp : float, optional
        The initial temperature, use higher values to facilitates a wider
        search of the energy landscape, allowing dual_annealing to escape
        local minima that it is trapped in. Default value is 5230. Range is
        (0.01, 5.e4].
    restart_temp_ratio : float, optional
        During the annealing process, temperature is decreasing, when it
        reaches ``initial_temp * restart_temp_ratio``, the reannealing process
        is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
    visit : float, optional
        Parameter for visiting distribution. Default value is 2.62. Higher
        values give the visiting distribution a heavier tail, this makes
        the algorithm jump to a more distant region. The value range is (0, 3].
    accept : float, optional
        Parameter for acceptance distribution. It is used to control the
        probability of acceptance. The lower the acceptance parameter, the
        smaller the probability of acceptance. Default value is -5.0 with
        a range (-1e4, -5].
    maxfun : int, optional
        Soft limit for the number of objective function calls. If the
        algorithm is in the middle of a local search, this number will be
        exceeded, the algorithm will stop just after the local search is
        done. Default value is 1e7.
    seed : {int or `numpy.random.RandomState` instance}, optional
        If `seed` is not specified the `numpy.random.RandomState` singleton is
        used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``RandomState`` instance, then that
        instance is used.
        Specify `seed` for repeatable minimizations. The random numbers
        generated with this seed only affect the visiting distribution
        function and new coordinates generation.
    no_local_search : bool, optional
        If `no_local_search` is set to True, a traditional Generalized
        Simulated Annealing will be performed with no local search
        strategy applied.
    callback : callable, optional
        A callback function with signature ``callback(x, f, context)``,
        which will be called for all minima found.
        ``x`` and ``f`` are the coordinates and function value of the
        latest minimum found, and ``context`` has value in [0, 1, 2], with the
        following meaning:

            - 0: minimum detected in the annealing process.
            - 1: detection occured in the local search process.
            - 2: detection done in the dual annealing process.

        If the callback implementation returns True, the algorithm will stop.

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a `OptimizeResult` object.
        Important attributes are: ``x`` the solution array, ``fun`` the value
        of the function at the solution, and ``message`` which describes the
        cause of the termination.
        See `OptimizeResult` for a description of other attributes.

    Notes
    -----
    This function implements the Dual Annealing optimization. This stochastic
    approach derived from [3]_ combines the generalization of CSA (Classical
    Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
    to a strategy for applying a local search on accepted locations [4]_.
    An alternative implementation of this same algorithm is described in [5]_
    and benchmarks are presented in [6]_. This approach introduces an advanced
    method to refine the solution found by the generalized annealing
    process. This algorithm uses a distorted Cauchy-Lorentz visiting
    distribution, with its shape controlled by the parameter :math:`q_{v}`

    .. math::

        g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
        \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
        \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
        \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
        \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}

    Where :math:`t` is the artificial time. This visiting distribution is used
    to generate a trial jump distance :math:`\\Delta x(t)` of variable
    :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.

    From the starting point, after calling the visiting distribution
    function, the acceptance probability is computed as follows:

    .. math::

        p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
        \\frac{1}{1-q_{a}}}\\}}

    Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
    acceptance probability is assigned to the cases where

    .. math::

        [1-(1-q_{a}) \\beta \\Delta E] < 0

    The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to

    .. math::

        T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
        1 + t\\right)^{q_{v}-1}-1}

    Where :math:`q_{v}` is the visiting parameter.

    .. versionadded:: 1.2.0

    References
    ----------
    .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
        statistics. Journal of Statistical Physics, 52, 479-487 (1998).
    .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
        Physica A, 233, 395-406 (1996).
    .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
        Annealing Algorithm and Its Application to the Thomson Model.
        Physics Letters A, 233, 216-220 (1997).
    .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
        Annealing. Physical Review E, 62, 4473 (2000).
    .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
        Simulated Annealing for Efficient Global Optimization: the GenSA
        Package for R. The R Journal, Volume 5/1 (2013).
    .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
        Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06

    Examples
    --------
    The following example is a 10-dimensional problem, with many local minima.
    The function involved is called Rastrigin
    (https://en.wikipedia.org/wiki/Rastrigin_function)

    >>> from scipy.optimize import dual_annealing
    >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
    >>> lw = [-5.12] * 10
    >>> up = [5.12] * 10
    >>> ret = dual_annealing(func, None, bounds=list(zip(lw, up)), seed=1234)
    >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format(
    ...       ret.x, ret.fun))
    global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09
     -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09
    -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000

    """
    if x0 is not None and not len(x0) == len(bounds):
        raise ValueError('Bounds size does not match x0')

    lu = list(zip(*bounds))
    lower = np.array(lu[0])
    upper = np.array(lu[1])
    # Check that restart temperature ratio is correct
    if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
        raise ValueError('Restart temperature ratio has to be in range (0, 1)')
    # Checking bounds are valid
    if (np.any(np.isinf(lower)) or np.any(np.isinf(upper)) or np.any(
            np.isnan(lower)) or np.any(np.isnan(upper))):
        raise ValueError('Some bounds values are inf values or nan values')
    # Checking that bounds are consistent
    if not np.all(lower < upper):
        raise ValueError('Bounds are note consistent min < max')

    # Wrapper for the objective function
    func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
    # Wrapper fot the minimizer
    minimizer_wrapper = LocalSearchWrapper(
        bounds, func_wrapper, **local_search_options)
    # Initialization of RandomState for reproducible runs if seed provided
    rand_state = check_random_state(seed)
    # Initialization of the energy state
    energy_state = EnergyState(lower, upper, callback)
    energy_state.reset(func_wrapper, rand_state, x0)
    # Minimum value of annealing temperature reached to perform
    # re-annealing
    temperature_restart = initial_temp * restart_temp_ratio
    # VisitingDistribution instance
    visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
    # Strategy chain instance
    strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
                               minimizer_wrapper, rand_state, energy_state)
    # Run the search loop
    need_to_stop = False
    iteration = 0
    message = []
    t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
    while(not need_to_stop):
        for i in range(maxiter):
            # Compute temperature for this step
            s = float(i) + 2.0
            t2 = np.exp((visit - 1) * np.log(s)) - 1.0
            temperature = initial_temp * t1 / t2
            iteration += 1
            if iteration >= maxiter:
                message.append("Maximum number of iteration reached")
                need_to_stop = True
                break
            # Need a re-annealing process?
            if temperature < temperature_restart:
                energy_state.reset(func_wrapper, rand_state)
                break
            # starting strategy chain
            val = strategy_chain.run(i, temperature)
            if val is not None:
                message.append(val)
                need_to_stop = True
                break
            # Possible local search at the end of the strategy chain
            if not no_local_search:
                val = strategy_chain.local_search()
                if val is not None:
                    message.append(val)
                    need_to_stop = True
                    break

    # Return the OptimizeResult
    res = OptimizeResult()
    res.x = energy_state.xbest
    res.fun = energy_state.ebest
    res.nit = iteration
    res.nfev = func_wrapper.nfev
    res.njev = func_wrapper.ngev
    res.message = message
    return res
Ejemplo n.º 18
0
def optimize_stiefel(func, X0, args=(), tau_max=.5, max_it=1, tol=1e-6,
                     disp=False, tau_find_freq=100):
    """
    Optimize a function over a Stiefel manifold.

    :param func: Function to be optimized
    :param X0: Initial point for line search
    :param tau_max: Maximum step size
    :param max_it: Maximum number of iterations
    :param tol: Tolerance criteria to terminate line search
    :param disp: Choose whether to display output
    :param args: Extra arguments passed to the function
    """
    tol = float(tol)
    assert tol > 0, 'Tolerance must be positive'
    max_it = int(max_it)
    assert max_it > 0, 'The maximum number of iterations must be a positive '\
                       + 'integer'
    tau_max = float(tau_max)
    assert tau_max > 0, 'The parameter `tau_max` must be positive.'
    k = 0
    X = X0.copy()
    nit = 0
    nfev = 0
    success = False
    if disp:
        print 'Stiefel Optimization'.center(80)
        print '{0:4s} {1:11s} {2:5s}'.format('It', 'F', '(F - F_old) / F_old')
        print '-' * 30

    
    ls_func = LSFunc()
    ls_func.func = func
    decrease_tau = False
    tau_max0 = tau_max
    while nit <= max_it:
        nit += 1
        F, G = func(X, *args)
        F_old = F
        nfev += 1
        A = compute_A(G, X)
        ls_func.A = A
        ls_func.X = X
        ls_func.func_args = args
        ls_func.tau_max = tau_max
        increased_tau = False
        if nit == 1 or decrease_tau or nit % tau_find_freq == 0:
            # Need to minimize ls_func with respect to each argument
            tau_init = np.linspace(-10, 0., 3)[:, None]
            tau_d = np.linspace(-10, 0., 50)[:, None]
            tau_all, F_all = pybgo.minimize(ls_func, tau_init, tau_d, fixed_noise=1e-16,
                    add_at_least=1, tol=1e-2, scale=True,
                    train_every=1)[:2]
            nfev += tau_all.shape[0]
            idx = np.argmin(F_all)
            tau = np.exp(tau_all[idx, 0]) * tau_max
            if tau_max - tau <= 1e-6:
                tau_max = 1.2 * tau_max
                if disp:
                    print 'increasing tau_max to {0:1.5e}'.format(tau_max)
                    increased_tau = True
            if decrease_tau:
                tau_max = .8 * tau_max
                if disp:
                    print 'decreasing max_tau to {0:1.5e}'.format(tau_max)
                decrease_tau = False
            F = F_all[idx, 0]
        else:
            F = ls_func([np.log(tau /  tau_max)])
        delta_F = (F_old - F) / np.abs(F_old)
        if delta_F < 0:
            if disp:
                print '*** backtracking'
            nit -= 1
            decrease_tau = True
            continue
        X_old = X
        X = Y_func(tau, X, A)
        if disp:
            print '{0:4s} {1:1.5e} {2:5e} tau = {3:1.3e}, tau_max = {4:1.3e}'.format(
             str(nit).zfill(4), F, delta_F, tau, tau_max)
        if delta_F <= tol:
            if disp:
                print '*** Converged ***'
            success = True
            break
    res = OptimizeResult()
    res.tau_max = tau_max
    res.X = X
    res.nfev = nfev
    res.nit = nit
    res.fun = F
    res.success = success
    return res
Ejemplo n.º 19
0
def optimize(f,
             x0=None,
             bounds=None,
             domain_scale=None,
             init='uniform',
             stepsize_start=None,
             stepsize_decay_fac=1e-3,
             base_tumble_rate=0.1,
             niter_rt=400,
             n_bacteria_per_dim=3,
             stationarity_window=20,
             eps_stat=1e-3,
             attraction=False,
             attraction_window=10,
             attraction_sigma=None,
             attraction_strength=0.5,
             bounds_reflection=False,
             n_best_selection=3,
             c_gd=1e-6,
             a_gd=None,
             n_linesearch_gd=20,
             alpha_linesearch_gd=0.5,
             beta_linesearch_gd=0.33,
             eps_abs_gd=1e-9,
             eps_rel_gd=1e-6,
             niter_gd=100,
             n_embeddings=5,
             max_dims=3,
             n_reduced_dims=2,
             verbosity=0):
    """
    Metaheuristic global optimization algorithm combining a bacterial run-and-tumble chemotactic
    search with a local, gradient-based search around the best minimum candidate points.
    The algorithm's goal is to find
                                        min f(x), x ∈ Ω,
    where f: Ω ⊂ ℝ^n → ℝ.
    Since the chemotactic search becomes more and more ineffective with increasing problem
    dimensionality, Sequential Random Embeddings are used to solve the optimization problem once its
    dimensionality exceeds a given threshold.

    :param f: [callable] Objective function. Must accept its argument x as numpy array
    :param x0: [array-like object] Optional initial conditions object. Must have the shape
           (n_bacteria, n_dims) or (n_dims,). If x0 == None, initial conditions are sampled randomly
           or uniformly-spaced from Ω. Note that this only works if Ω is a rectangular box, i.e., if
           no or non-rectangular bounds are imposed, x0 must not be None
    :param bounds: [callable or array-like object] Defines the bounded domain Ω. If provided, must
           be one of the following:
           - Bounds projection callback, as defined in description of parameter
             ``projection_callback`` in :func:`local_search.bfgs_b`
           - Rectangular box constraints. For each component x_i of x,
             bounds[i, 0] <= x_i <= bounds[i, 1], that is, bounds must have shape (n_dims, 2)
    :param domain_scale: [float] Scale of the optimization problem. If not provided, the algorithm
           tries to guess the scale from any provided rectangular box constraints. Used for
           auto-scaling algorithm stepsizes
    :param init: [string] Determines how initial bacteria positions are sampled from Ω if
           x0 == None, see description of parameter ``x0``. Currently supported: 'random' and
           'uniform'
    :param stepsize_start: [float] See description of parameter ``stepsize_start`` in
           :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale
           this length to the problem's scale
    :param stepsize_decay_fac: [float] Factor by which the run-and-tumble stepsize has decayed in
           the last run-and-tumble iteration compared to its initial value
    :param base_tumble_rate: [float] See description of parameter ``base_tumble_rate`` in
           :func:`global_search.run_and_tumble`
    :param niter_rt: [int] Maximum number of run-and-tumble iterations
    :param n_bacteria_per_dim: [int] How many bacteria to spawn in each dimension. Note that the
           total number of bacteria is
           i)  n_bacteria = n_bacteria_per_dim ** n_dims if n_dims <= max_dims or
           ii) n_bacteria = n_bacteria_per_dim ** (n_reduced_dims + 1) if n_dims > max_dims.
           If x0 is provided with shape (n_bacteria, n_dims), n_bacteria should agree with this
           relation.
    :param stationarity_window: [int] See description of parameter ``stationarity_window`` in
           :func:`global_search.run_and_tumble`
    :param eps_stat: [float] See description of parameter ``stationarity_window`` in
           :func:`global_search.run_and_tumble`
    :param attraction: [bool] See description of parameter ``attraction`` in
           :func:`global_search.run_and_tumble`
    :param attraction_window: [int] See description of parameter ``attraction_window`` in
           :func:`global_search.run_and_tumble`
    :param attraction_sigma: [float] See description of parameter ``attraction_sigma`` in
           :func:`global_search.run_and_tumble`. If not provided, the algorithm tries to auto-scale
           this length to the problem's scale
    :param attraction_strength: [float] See description of parameter ``attraction_strength`` in
           :func:`global_search.run_and_tumble`
    :param bounds_reflection: [bool] See description of parameter ``bounds_reflection`` in
           :func:`global_search.run_and_tumble`
    :param n_best_selection: [int] At the end of the run-and-tumble exploration stage, a local
           gradient-based search is performed, starting from the best positions found thus far by
           the n_best_selection best bacteria
    :param c_gd: [float] See description of parameter ``c`` in :func:`local_search.bfgs_b`
    :param a_gd: [float] See description of parameter ``a`` in :func:`local_search.bfgs_b`. If not
           provided, the algorithm tries to auto-scale this length to the problem's scale
    :param n_linesearch_gd: [int] See description of parameter ``n_linesearch`` in
           :func:`local_search.bfgs_b`
    :param alpha_linesearch_gd: [float] See description of parameter ``alpha_linesearch`` in
           :func:`local_search.bfgs_b`
    :param beta_linesearch_gd: [float] See description of parameter ``beta_linesearch`` in
           :func:`local_search.bfgs_b`
    :param eps_abs_gd: [float] See description of parameter ``eps_abs`` in
           :func:`local_search.bfgs_b`
    :param eps_rel_gd: [float] See description of parameter ``eps_rel`` in
           :func:`local_search.bfgs_b`
    :param niter_gd: [int] Maximum number of local, gradient-based search iterations
    :param n_embeddings: [int] Number of embedding iterations when using Sequential Random
           Embeddings. Only has an effect if n_dims > max_dims
    :param max_dims: [int] Maximum dimension of problems to be solved without using Sequential
           Random Embeddings
    :param n_reduced_dims: [int] Dimension of the embedded problem. Only has an effect if
           n_dims > max_dims
    :param verbosity: [int] Output verbosity. Must be 0, 1, or 2
    :return: Best minimum of f found [scipy.optimize.OptimizeResult]
    """

    assert verbosity in [0, 1, 2], 'verbosity must be 0, 1, or 2.'
    assert n_reduced_dims >= 2, 'n_reduced_dims must not be less than 2.'

    n_reduced_dims_eff = n_reduced_dims + 1

    if bounds is None or callable(bounds):
        assert x0 is not None, (
            'If no box constraints are provided for bounds, x0 must not be ' +
            'None.')
        x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims,
                                    n_reduced_dims_eff)
        n_bacteria, n_dims = x0_population.shape

        if bounds is None:
            bound_lower, bound_upper = _prepare_bounds(bounds, n_dims)

            def projection_callback(x):
                x = np.clip(x, bound_lower, bound_upper)
                bounds_hit = np.where(
                    ((x == bound_lower) | (x == bound_upper)), True, False)
                return x, bounds_hit

            def projection_callback_population(x):
                return projection_callback(x)

        else:

            def projection_callback(x):
                return bounds(x)

            def projection_callback_population(x):
                out = np.array(
                    [projection_callback(x_single) for x_single in x])
                return out[:, 0], out[:, 1]

    elif isinstance(bounds, (list, np.ndarray)):
        if x0 is not None:
            x0_population = _prepare_x0(x0, n_bacteria_per_dim, max_dims,
                                        n_reduced_dims_eff)
            n_bacteria, n_dims = x0_population.shape
            bound_lower, bound_upper = _prepare_bounds(bounds, n_dims)
        else:
            bound_lower, bound_upper = _prepare_bounds(bounds, None)
            n_dims = len(bound_lower)
            n_bacteria = (n_bacteria_per_dim**n_dims if n_dims <= max_dims else
                          n_bacteria_per_dim**n_reduced_dims_eff)
            if init == 'uniform' and n_dims > max_dims:
                init = 'random'
                if verbosity > 0:
                    warnings.warn(
                        'The option init="uniform" is only available for problems with '
                        +
                        'dimensionality less than or equal to max_dims, which was '
                        +
                        f'set to {max_dims}. Since the current problem has ' +
                        f'dimensionality {n_dims}, init was automatically set to '
                        + f'"random".')
            if init == 'random':
                x0_population = np.random.uniform(bound_lower,
                                                  bound_upper,
                                                  size=(n_bacteria, n_dims))
            elif init == 'uniform':
                init_points = []
                for i in range(n_dims):
                    init_points.append(
                        np.linspace(bound_lower[i], bound_upper[i],
                                    n_bacteria_per_dim))
                x0_population = np.array(np.meshgrid(*init_points)).reshape(
                    n_dims, -1).T
            else:
                raise ValueError('init must either be "random" or "uniform".')

        def projection_callback(x):
            x = np.clip(x, bound_lower, bound_upper)
            bounds_hit = np.where(((x == bound_lower) | (x == bound_upper)),
                                  True, False)
            return x, bounds_hit

        def projection_callback_population(x):
            return projection_callback(x)

    else:
        raise ValueError(
            'bounds must either be None, an array or corresponding nested list of '
            +
            'shape (n_dims, 2), or a custom callback function. See the docstring '
            + 'for details.')

    assert niter_rt > stationarity_window, 'niter_rt must be larger than stationarity_window.'
    assert n_best_selection <= n_bacteria, 'n_best_selection must not be larger than n_bacteria.'

    if stepsize_start is not None:
        auto_scale_stepsize = False
    else:
        auto_scale_stepsize = True
        stepsize_start = 1e-1
    stepsize_end = stepsize_decay_fac * stepsize_start

    if attraction_sigma is not None:
        auto_scale_attraction_sigma = False
    else:
        auto_scale_attraction_sigma = True
        attraction_sigma = 1

    if a_gd is not None:
        auto_scale_a_gd = False
    else:
        auto_scale_a_gd = True
        a_gd = 1e-2

    x0_population_orig = x0_population.copy()
    x0_population, _ = projection_callback_population(x0_population)
    if not np.array_equal(x0_population, x0_population_orig):
        warnings.warn(
            'Found initial conditions outside the defined search domain.')

    max_scale = None
    if domain_scale is not None:
        max_scale = domain_scale
    elif isinstance(bounds, (list, np.ndarray)):
        # noinspection PyUnboundLocalVariable
        domain_range = bound_upper - bound_lower
        max_scale = np.max(np.where(np.isinf(domain_range), 0, domain_range))
    if max_scale is not None and max_scale > 0:
        if auto_scale_stepsize:
            stepsize_start = stepsize_start * max_scale
            stepsize_end = stepsize_end * max_scale
        if auto_scale_attraction_sigma:
            attraction_sigma = attraction_sigma * max_scale
        if auto_scale_a_gd:
            a_gd = a_gd * max_scale

    if n_dims > max_dims:
        if verbosity > 0:
            print(
                f'Using sequential random embeddings in {n_reduced_dims} + 1 dimensions.'
            )
        return _sequential_random_embeddings(
            f,
            x0_population,
            projection_callback,
            n_reduced_dims_eff=n_reduced_dims_eff,
            n_embeddings=n_embeddings,
            verbosity=verbosity,
            domain_scale=max_scale,
            init=init,
            stepsize_start=stepsize_start,
            stepsize_decay_fac=stepsize_decay_fac,
            base_tumble_rate=base_tumble_rate,
            niter_rt=niter_rt,
            n_bacteria_per_dim=n_bacteria_per_dim,
            stationarity_window=stationarity_window,
            eps_stat=eps_stat,
            attraction=attraction,
            attraction_window=attraction_window,
            attraction_sigma=attraction_sigma,
            attraction_strength=attraction_strength,
            bounds_reflection=bounds_reflection,
            n_best_selection=n_best_selection,
            c_gd=c_gd,
            a_gd=a_gd,
            n_linesearch_gd=n_linesearch_gd,
            alpha_linesearch_gd=alpha_linesearch_gd,
            beta_linesearch_gd=beta_linesearch_gd,
            eps_abs_gd=eps_abs_gd,
            eps_rel_gd=eps_rel_gd,
            niter_gd=niter_gd,
            max_dims=n_reduced_dims_eff)

    else:
        x_best, f_best, nfev, nit, trace = run_and_tumble(
            f,
            x0_population,
            projection_callback_population,
            niter_rt,
            stepsize_start,
            stepsize_end,
            base_tumble_rate=base_tumble_rate,
            stationarity_window=stationarity_window,
            eps_stat=eps_stat,
            attraction=attraction,
            attraction_window=attraction_window,
            attraction_sigma=attraction_sigma,
            attraction_strength=attraction_strength,
            bounds_reflection=bounds_reflection,
            verbosity=verbosity)

        if verbosity == 2:
            print(
                '==============================================================================='
            )
        if verbosity > 0:
            print(
                f'Best result after run-and-tumble stage is x = {x_best[np.argmin(f_best)]}, '
                +
                f'f(x) = {np.min(f_best)}. Starting local, gradient-based optimization for the '
                + f'{n_best_selection} best bacteria.')

        sortIdx = f_best.argsort()
        x_best_selection = x_best[sortIdx[:n_best_selection]]
        x_best_gd = np.empty(x_best_selection.shape)
        f_min_gd = np.empty(n_best_selection)
        nfev_gd = 0
        nit_gd = 0
        success_gd = np.empty(n_best_selection)
        trace_gd = np.empty((niter_gd, n_bacteria, n_dims))
        trace_gd[:, sortIdx[n_best_selection:], :] = trace[
            -1, sortIdx[n_best_selection:], :]
        nit_gd_arr = np.empty(n_best_selection)
        visited_points = trace.reshape(-1, n_dims)

        for n, x_start in enumerate(x_best_selection):
            if verbosity == 2:
                print(f'Performing gradient descent for bacterium {n}.')

            # Calculate quadratic function approximation around x_start
            num_sampling_points = 2 * int(special.binom(n_dims + 2, 2))
            # noinspection PyArgumentList,PyUnresolvedReferences
            sampling_points = visited_points[spatial.cKDTree(
                visited_points).query(x_start, num_sampling_points)[1]]
            func_values = np.array([f(point) for point in sampling_points])
            nfev += num_sampling_points
            polynomial_powers = list(
                itertools.filterfalse(
                    lambda prod: sum(list(prod)) > 2,
                    itertools.product((0, 1, 2), repeat=n_dims)))
            sampling_matrix = np.stack([
                np.prod(sampling_points**d, axis=1) for d in polynomial_powers
            ],
                                       axis=-1)
            coeffs = np.linalg.lstsq(sampling_matrix, func_values, 2)[0]

            # Calculate Hessian matrix from the quadratic approximation
            H = np.ones((n_dims, n_dims))
            square_powers = list(
                itertools.filterfalse(
                    lambda zipped_item: sum(list(zipped_item[0])) != 2,
                    zip(polynomial_powers, coeffs)))
            for square_power, coeff in square_powers:
                idcs_to_consider = np.argwhere(np.array(square_power) != 0)
                if len(idcs_to_consider) == 1:  # Diagonal
                    H[idcs_to_consider[0], idcs_to_consider[0]] = 0.5 * coeff
                elif len(idcs_to_consider) == 2:  # Mixed derivatives
                    H[idcs_to_consider[0], idcs_to_consider[1]] = coeff
                    H[idcs_to_consider[1], idcs_to_consider[0]] = coeff
                else:
                    raise RuntimeError(
                        "Polynomial function approximation seems to be of higher "
                        "order than two. This shouldn't happen.")

            local_optimization_result = bfgs_b(
                f,
                x_start,
                projection_callback,
                H_start=H,
                a=a_gd,
                c=c_gd,
                niter=niter_gd,
                n_linesearch=n_linesearch_gd,
                alpha_linesearch=alpha_linesearch_gd,
                beta_linesearch=beta_linesearch_gd,
                eps_abs=eps_abs_gd,
                eps_rel=eps_rel_gd,
                verbosity=verbosity)
            x_best_gd[n] = local_optimization_result.x
            f_min_gd[n] = local_optimization_result.f
            nfev_gd += local_optimization_result.nfev
            nit_gd += local_optimization_result.nit
            nit_gd_arr[n] = local_optimization_result.nit
            success_gd[n] = local_optimization_result.success
            trace_gd[:, sortIdx[n], :] = _pad_trace(
                local_optimization_result.trace, niter_gd)

        result = OptimizeResult()
        result.success = success_gd.any()
        result.x = x_best_gd[np.argmin(f_min_gd)]
        result.fun = np.min(f_min_gd)
        result.nfev = nfev + nfev_gd
        result.nit = nit + nit_gd
        trace_gd = trace_gd[:np.max(nit_gd_arr).astype(int)]
        result.trace = np.concatenate((trace, trace_gd))

        return result
Ejemplo n.º 20
0
def glpk(
        c,
        A_ub=None,
        b_ub=None,
        A_eq=None,
        b_eq=None,
        bounds=None,
        solver='simplex',
        sense=GLPK.GLP_MIN,
        scale=True,
        maxit=GLPK.INT_MAX,
        timeout=GLPK.INT_MAX,
        basis_fac='luf+ft',
        message_level=GLPK.GLP_MSG_ERR,
        disp=False,
        simplex_options=None,
        ip_options=None,
        mip_options=None,
):
    '''GLPK ctypes interface.

    Parameters
    ----------
    c : 1-D array (n,)
        Array of objective coefficients.
    A_ub : 2-D array (m, n)
        scipy.sparse.coo_matrix
    b_ub : 1-D array (m,)
    A_eq : 2-D array (k, n)
        scipy.sparse.coo_matrix
    b_eq : 1-D array (k,)
    bounds : None or list (n,) of tuple (2,) or tuple (2,)
        The jth entry in the list corresponds to the jth objective coefficient.
        Each entry is made up of a tuple describing the bounds.
        Use None to indicate that there is no bound. By default, bounds are
        (0, None) (all decision variables are non-negative). If a single tuple
        (min, max) is provided, then min and max will serve as bounds for all
        decision variables.
    solver : { 'simplex', 'interior', 'mip' }
        Use simplex (LP/MIP) or interior point method (LP only).
        Default is ``simplex``.
    sense : { 'GLP_MIN', 'GLP_MAX' }
        Minimization or maximization problem.
        Default is ``GLP_MIN``.
    scale : bool
        Scale the problem. Default is ``True``.
    maxit : int
        Maximum number of iterations. Default is ``INT_MAX``.
    timout : int
        Limit solution time to ``timeout`` seconds.
        Default is ``INT_MAX``.
    basis_fac : { 'luf+ft', 'luf+cbg', 'luf+cgr', 'btf+cbg', 'btf+cgr' }
        LP basis factorization strategy. Default is ``luf+ft``.
        These are combinations of the following strategies:

            - ``luf`` : plain LU-factorization
            - ``btf`` : block triangular LU-factorization
            - ``ft`` : Forrest-Tomlin update
            - ``cbg`` : Schur complement + Bartels-Golub update
            - ``cgr`` : Schur complement + Givens rotation update

    message_level : { GLP_MSG_OFF, GLP_MSG_ERR, GLP_MSG_ON, GLP_MSG_ON, GLP_MSG_ALL, GLP_MSG_DBG }
        Verbosity level of logging to stdout.
        Only applied when ``disp=True``. Default is ``GLP_MSG_ERR``.
        One of the following:

            ``GLP_MSG_OFF`` : no output
            ``GLP_MSG_ERR`` : warning and error messages only
            ``GLP_MSG_ON`` : normal output
            ``GLP_MSG_ALL`` : full output
            ``GLP_MSG_DBG`` : debug output

    disp : bool
        Display output to stdout. Default is ``False``.
    simplex_options : dict
        Options specific to simplex solver. The dictionary consists of
        the following fields:

            - primal : { 'primal', 'dual', 'dualp' }
                Primal or two-phase dual simplex.
                Default is ``primal``. One of the following:

                    - ``primal`` : use two-phase primal simplex
                    - ``dual`` : use two-phase dual simplex
                    - ``dualp`` : use two-phase dual simplex, and if it fails,
                        switch to the primal simplex

            - init_basis : { 'std', 'adv', 'bib' }
                Choice of initial basis.  Default is 'adv'.
                One of the following:

                    - ``std`` : standard initial basis of all slacks
                    - ``adv`` : advanced initial basis
                    - ``bib`` : Bixby's initial basis

            - steep : bool
                Use steepest edge technique or standard "textbook"
                pricing.  Default is ``True`` (steepest edge).

            - ratio : { 'relax', 'norelax', 'flip' }
                Ratio test strategy. Default is ``relax``.
                One of the following:

                    - ``relax`` : Harris' two-pass ratio test
                    - ``norelax`` : standard "textbook" ratio test
                    - ``flip`` : long-step ratio test

            - tol_bnd : double
                Tolerance used to check if the basic solution is primal
                feasible. (Default: 1e-7).

            - tol_dj : double
                Tolerance used to check if the basic solution is dual
                feasible. (Default: 1e-7).

            - tol_piv : double
                Tolerance used to choose eligble pivotal elements of
                the simplex table. (Default: 1e-10).

            - obj_ll : double
                Lower limit of the objective function. If the objective
                function reaches this limit and continues decreasing,
                the solver terminates the search. Used in the dual simplex
                only. (Default: -DBL_MAX -- the largest finite float64).

            - obj_ul : double
                Upper limit of the objective function. If the objective
                function reaches this limit and continues increasing,
                the solver terminates the search. Used in the dual simplex
                only. (Default: +DBL_MAX -- the largest finite float64).

            - presolve : bool
                Use presolver (assumes ``scale=True`` and
                ``init_basis='adv'``. Default is ``True``.

            - exact : bool
                Use simplex method based on exact arithmetic.
                Default is ``False``. If ``True``, all other
                ``simplex_option`` fields are ignored.

    ip_options : dict
        Options specific to interior-pooint solver.
        The dictionary consists of the following fields:

            - ordering : { 'nord', 'qmd', 'amd', 'symamd' }
                Ordering algorithm used before Cholesky factorizaiton.
                Default is ``amd``. One of the following:

                    - ``nord`` : natural (original) ordering
                    - ``qmd`` : quotient minimum degree ordering
                    - ``amd`` : approximate minimum degree ordering
                    - ``symamd`` : approximate minimum degree ordering
                        algorithm for Cholesky factorization of symmetric
                        matrices.

    mip_options : dict
        Options specific to MIP solver.
        The dictionary consists of the following fields:

            - intcon : 1-D array
                Array of integer contraints, specified as the 0-based
                indices of the solution. Default is an empty array.
            - bincon : 1-D array
                Array of binary constraints, specified as the 0-based
                indices of the solution. If any indices are duplicated
                between ``bincon`` and ``intcon``, they will be
                considered as binary constraints. Default is an empty
                array.
            - nomip : bool
                consider all integer variables as continuous
                (allows solving MIP as pure LP). Default is ``False``.
            - branch : { 'first', 'last', 'mostf', 'drtom', 'pcost' }
                Branching rule. Default is ``drtom``.
                One of the following:

                    - ``first`` : branch on first integer variable
                    - ``last`` : branch on last integer variable
                    - ``mostf`` : branch on most fractional variable
                    - ``drtom`` : branch using heuristic by Driebeck and Tomlin
                    - ``pcost`` : branch using hybrid pseudocost heuristic
                                  (may be useful for hard instances)

            - backtrack : { 'dfs', 'bfs', 'bestp', 'bestb' }
                Backtracking rule. Default is ``bestb``.
                One of the following:

                    - ``dfs`` : backtrack using depth first search
                    - ``bfs`` : backtrack using breadth first search
                    - ``bestp`` : backtrack using the best projection heuristic
                    - ``bestb`` : backtrack using node with best local bound

            - preprocess : { 'none', 'root', 'all' }
                Preprocessing technique. Default is ``GLP_PP_ALL``.
                One of the following:

                    - ``none`` : disable preprocessing
                    - ``root`` : perform preprocessing only on the root level
                    - ``all`` : perform preprocessing on all levels

            - round : bool
                Simple rounding heuristic. Default is ``True``.

            - presolve : bool
                Use MIP presolver. Default is ``True``.

            - binarize : bool
                replace general integer variables by binary ones
                (only used if ``presolve=True``). Default is ``False``.

            - fpump : bool
                Apply feasibility pump heuristic. Default is ``False``.

            - proxy : int
                Apply proximity search heuristic (in seconds). Default is 60.

            - cuts : list of { 'gomory', 'mir', 'cover', 'clique', 'all' }
                Cuts to generate. Default is no cuts. List of the following:

                    - ``gomory`` : Gomory's mixed integer cuts
                    - ``mir`` : MIR (mixed integer rounding) cuts
                    - ``cover`` : mixed cover cuts
                    - ``clique`` : clique cuts
                    - ``all`` : generate all cuts above

            - tol_int : float
                Absolute tolerance used to check if optimal solution to the
                current LP relaxation is integer feasible.
                (Default: 1e-5).
            - tol_obj : float
                Relative tolerance used to check if the objective value in
                optimal solution to the current LP relaxation is not better
                than in the best known integer feasible solution.
                (Default: 1e-7).
            - mip_gap : float
                Relative mip gap tolerance. If the relative mip gap for
                currently known best integer feasiblesolution falls below
                this tolerance, the solver terminates the search. This allows
                obtaining suboptimal integer feasible solutions if solving the
                problem to optimality takes too long time.
                (Default: 0.0).
            - bound : float
                add inequality obj <= bound (minimization) or
                obj >= bound (maximization) to integer feasibility
                problem (assumes ``minisat=True``).

    Notes
    -----
    In general, don't change tolerances without a detailed understanding
    of their purposes.
    '''

    # Housekeeping
    if simplex_options is None:
        simplex_options = {}
    if ip_options is None:
        ip_options = {}
    if mip_options is None:
        mip_options = {}

    # Create and fill the GLPK problem struct
    prob, lp = _fill_prob(c, A_ub, b_ub, A_eq, b_eq, bounds, sense, 'problem-name')
    c, A_ub, b_ub, A_eq, b_eq, bounds, _x0 = lp

    # Get the library
    _lib = GLPK()._lib

    # Scale the problem
    no_need_explict_scale = (solver == "simplex" and 
                             simplex_options.get("presolve"))
    if not no_need_explict_scale and scale:
        _lib.glp_scale_prob(prob, GLPK.GLP_SF_AUTO) # do auto scaling for now


    # Select basis factorization method
    bfcp = glp_bfcp()
    _lib.glp_get_bfcp(prob, ctypes.byref(bfcp))
    bfcp.type = {
        'luf+ft': GLPK.GLP_BF_LUF + GLPK.GLP_BF_FT,
        'luf+cbg': GLPK.GLP_BF_LUF + GLPK.GLP_BF_BG,
        'luf+cgr': GLPK.GLP_BF_LUF + GLPK.GLP_BF_GR,
        'btf+cbg': GLPK.GLP_BF_BTF + GLPK.GLP_BF_BG,
        'btf+cgr': GLPK.GLP_BF_BTF + GLPK.GLP_BF_GR,
    }[basis_fac]
    _lib.glp_set_bfcp(prob, ctypes.byref(bfcp))

    # Run the solver
    if solver == 'simplex':

        # Construct an initial basis
        basis = simplex_options.get('init_basis', 'adv')
        basis_fun = {
            'std': _lib.glp_std_basis,
            'adv': _lib.glp_adv_basis,
            'bib': _lib.glp_cpx_basis,
        }[basis]
        basis_args = [prob]
        if basis == 'adv':
            # adv must have 0 as flags argument
            basis_args.append(0)
        basis_fun(*basis_args)

        # Make control structure
        smcp = glp_smcp()
        _lib.glp_init_smcp(ctypes.byref(smcp))

        # Set options
        smcp.msg_lev = message_level*disp
        smcp.meth = {
            'primal': GLPK.GLP_PRIMAL,
            'dual': GLPK.GLP_DUAL,
            'dualp': GLPK.GLP_DUALP,
        }[simplex_options.get('method', 'primal')]
        smcp.pricing = {
            True: GLPK.GLP_PT_PSE,
            False: GLPK.GLP_PT_STD,
        }[simplex_options.get('steep', True)]
        smcp.r_test = {
            'relax': GLPK.GLP_RT_HAR,
            'norelax': GLPK.GLP_RT_STD,
            'flip': GLPK.GLP_RT_FLIP,
        }[simplex_options.get('ratio', 'relax')]
        smcp.tol_bnd = simplex_options.get('tol_bnd', 1e-7)
        smcp.tol_dj = simplex_options.get('tol_dj', 1e-7)
        smcp.tol_piv = simplex_options.get('tol_piv', 1e-10)
        if simplex_options.get('obj_ll', False):
            smcp.obj_ll = simplex_options['obj_ll']
        if simplex_options.get('obj_ul', False):
            smcp.obj_ul = simplex_options['obj_ul']
        smcp.it_lim = maxit
        smcp.tm_lim = timeout
        smcp.presolve = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[simplex_options.get('presolve', True)]

        # Simplex driver
        if simplex_options.get('exact', False):
            ret_code = _lib.glp_exact(prob, ctypes.byref(smcp))
        else:
            ret_code = _lib.glp_simplex(prob, ctypes.byref(smcp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK simplex not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_get_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status == GLPK.GLP_OPT,
        })

        # We can read a solution:
        if status == GLPK.GLP_OPT:

            res.fun = _lib.glp_get_obj_val(prob)
            res.x = np.array([_lib.glp_get_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)])
            res.dual = np.array([_lib.glp_get_row_dual(prob, ii) for ii in range(1, _lib.glp_get_num_rows(prob)+1)])

            # We don't get slack without doing sensitivity analysis since GLPK
            # uses auxiliary variables instead of slack!
            res.slack = b_ub - A_ub @ res.x
            res.con = b_eq - A_eq @ res.x

            # We shouldn't be reading this field... But we will anyways
            res.nit = prob.contents.it_cnt

    elif solver == 'interior':

        # Make a control structure
        iptcp = glp_iptcp()
        _lib.glp_init_iptcp(ctypes.byref(iptcp))

        # Set options
        iptcp.msg_lev = message_level*disp
        iptcp.ord_alg = {
            'nord': GLPK.GLP_ORD_NONE,
            'qmd': GLPK.GLP_ORD_QMD,
            'amd': GLPK.GLP_ORD_AMD,
            'symamd': GLPK.GLP_ORD_SYMAMD,
        }[ip_options.get('ordering', 'amd')]

        # Run the solver
        ret_code = _lib.glp_interior(prob, ctypes.byref(iptcp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK interior-point not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_ipt_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status == GLPK.GLP_OPT,
        })

        # We can read a solution:
        if status == GLPK.GLP_OPT:

            res.fun = _lib.glp_ipt_obj_val(prob)
            res.x = np.array([_lib.glp_ipt_col_prim(prob, ii) for ii in range(1, _lib.glp_get_num_cols(prob)+1)])
            res.dual = np.array([_lib.glp_ipt_row_dual(prob, ii) for ii in range(1, _lib.gpl_get_num_rows(prob)+1)])

            # We don't get slack without doing sensitivity analysis since GLPK uses
            # auxiliary variables instead of slack!
            res.slack = b_ub - A_ub @ res.x
            res.con = b_eq - A_eq @ res.x

            # We shouldn't be reading this field... But we will anyways
            res.nit = prob.contents.it_cnt

    elif solver == 'mip':

        # Make a control structure
        iocp = glp_iocp()
        _lib.glp_init_iocp(ctypes.byref(iocp))

        # Make variables integer- and binary-valued
        if not mip_options.get('nomip', False):
            intcon = mip_options.get('intcon', [])
            for jj in intcon:
                _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_IV)
            bincon = mip_options.get('bincon', [])
            for jj in bincon:
                _lib.glp_set_col_kind(prob, jj+1, GLPK.GLP_BV)

        # Set options
        iocp.msg_lev = message_level*disp
        iocp.br_tech = {
            'first': GLPK.GLP_BR_FFV,
            'last': GLPK.GLP_BR_LFV,
            'mostf': GLPK.GLP_BR_MFV,
            'drtom': GLPK.GLP_BR_DTH,
            'pcost': GLPK.GLP_BR_PCH,
        }[mip_options.get('branch', 'drtom')]
        iocp.bt_tech = {
            'dfs': GLPK.GLP_BT_DFS,
            'bfs': GLPK.GLP_BT_BFS,
            'bestp': GLPK.GLP_BT_BPH,
            'bestb': GLPK.GLP_BT_BLB,
        }[mip_options.get('backtrack', 'bestb')]
        iocp.pp_teck = {
            'none': GLPK.GLP_PP_NONE,
            'root': GLPK.GLP_PP_ROOT,
            'all': GLPK.GLP_PP_ALL,
        }[mip_options.get('preprocess', 'all')]
        iocp.sr_heur = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('round', True)]
        iocp.fp_heur = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('fpump', False)]

        ps_tm_lim = mip_options.get('proxy', 60)
        if ps_tm_lim:
            iocp.ps_heur = GLPK.GLP_ON
            iocp.ps_tm_lim = ps_tm_lim*1000
        else:
            iocp.ps_heur = GLPK.GLP_OFF
            iocp.ps_tm_lim = 0

        cuts = set(list(mip_options.get('cuts', [])))
        if 'all' in cuts:
            cuts = {'gomory', 'mir', 'cover', 'clique'}
        if 'gomory' in cuts:
            iocp.gmi_cuts = GLPK.GLP_ON
        if 'mir' in cuts:
            iocp.mir_cuts = GLPK.GLP_ON
        if 'cover' in cuts:
            iocp.cov_cuts = GLPK.GLP_ON
        if 'clique' in cuts:
            iocp.clq_cuts = GLPK.GLP_ON

        iocp.tol_int = mip_options.get('tol_int', 1e-5)
        iocp.tol_obj = mip_options.get('tol_obj', 1e-7)
        iocp.mip_gap = mip_options.get('mip_gap', 0.0)
        iocp.tm_lim = timeout
        iocp.presolve = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('presolve', True)]
        iocp.binarize = {
            True: GLPK.GLP_ON,
            False: GLPK.GLP_OFF,
        }[mip_options.get('binarize', False)]

        # Run the solver
        ret_code = _lib.glp_intopt(prob, ctypes.byref(iocp))
        if ret_code != GLPK.SUCCESS:
            warn('GLPK interior-point not successful!', OptimizeWarning)
            return OptimizeResult({
                'message': GLPK.RET_CODES[ret_code],
            })

        # Figure out what happened
        status = _lib.glp_mip_status(prob)
        message = GLPK.STATUS_CODES[status]
        res = OptimizeResult({
            'status': status,
            'message': message,
            'success': status in [GLPK.GLP_OPT, GLPK.GLP_FEAS],
        })

        # We can read a solution:
        if res.success:
            res.fun = _lib.glp_mip_obj_val(prob)
            res.x = np.array([_lib.glp_mip_col_val(prob, ii) for ii in range(1, len(c)+1)])

    else:
        raise ValueError('"%s" is not a recognized solver.' % solver)

    # We're done, cleanup!
    _lib.glp_delete_prob(prob)

    # Map status codes to scipy:
    # res.status = {
    #     GLPK.GLP_OPT: 0,
    # }[res.status]

    return res
def dual_annealing(func,
                   bounds,
                   args=(),
                   maxiter=1000,
                   local_search_options={},
                   initial_temp=5230.,
                   restart_temp_ratio=2.e-5,
                   visit=2.62,
                   accept=-5.0,
                   maxfun=1e7,
                   seed=None,
                   no_local_search=False,
                   callback=None,
                   x0=None):
    """
    Find the global minimum of a function using Dual Annealing.

    Parameters
    ----------
    func : callable
        The objective function to be minimized. Must be in the form
        ``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array
        and ``args`` is a  tuple of any additional fixed parameters needed to
        completely specify the function.
    bounds : sequence, shape (n, 2)
        Bounds for variables.  ``(min, max)`` pairs for each element in ``x``,
        defining bounds for the objective function parameter.
    args : tuple, optional
        Any additional fixed parameters needed to completely specify the
        objective function.
    maxiter : int, optional
        The maximum number of global search iterations. Default value is 1000.
    local_search_options : dict, optional
        Extra keyword arguments to be passed to the local minimizer
        (`minimize`). Some important options could be:
        ``method`` for the minimizer method to use and ``args`` for
        objective function additional arguments.
    initial_temp : float, optional
        The initial temperature, use higher values to facilitates a wider
        search of the energy landscape, allowing dual_annealing to escape
        local minima that it is trapped in. Default value is 5230. Range is
        (0.01, 5.e4].
    restart_temp_ratio : float, optional
        During the annealing process, temperature is decreasing, when it
        reaches ``initial_temp * restart_temp_ratio``, the reannealing process
        is triggered. Default value of the ratio is 2e-5. Range is (0, 1).
    visit : float, optional
        Parameter for visiting distribution. Default value is 2.62. Higher
        values give the visiting distribution a heavier tail, this makes
        the algorithm jump to a more distant region. The value range is (0, 3].
    accept : float, optional
        Parameter for acceptance distribution. It is used to control the
        probability of acceptance. The lower the acceptance parameter, the
        smaller the probability of acceptance. Default value is -5.0 with
        a range (-1e4, -5].
    maxfun : int, optional
        Soft limit for the number of objective function calls. If the
        algorithm is in the middle of a local search, this number will be
        exceeded, the algorithm will stop just after the local search is
        done. Default value is 1e7.
    seed : {int or `~numpy.random.mtrand.RandomState` instance}, optional
        If `seed` is not specified the `~numpy.random.mtrand.RandomState`
        singleton is used.
        If `seed` is an int, a new ``RandomState`` instance is used,
        seeded with `seed`.
        If `seed` is already a ``RandomState`` instance, then that
        instance is used.
        Specify `seed` for repeatable minimizations. The random numbers
        generated with this seed only affect the visiting distribution
        function and new coordinates generation.
    no_local_search : bool, optional
        If `no_local_search` is set to True, a traditional Generalized
        Simulated Annealing will be performed with no local search
        strategy applied.
    callback : callable, optional
        A callback function with signature ``callback(x, f, context)``,
        which will be called for all minima found.
        ``x`` and ``f`` are the coordinates and function value of the
        latest minimum found, and ``context`` has value in [0, 1, 2], with the
        following meaning:

            - 0: minimum detected in the annealing process.
            - 1: detection occurred in the local search process.
            - 2: detection done in the dual annealing process.

        If the callback implementation returns True, the algorithm will stop.
    x0 : ndarray, shape(n,), optional
        Coordinates of a single N-D starting point.

    Returns
    -------
    res : OptimizeResult
        The optimization result represented as a `OptimizeResult` object.
        Important attributes are: ``x`` the solution array, ``fun`` the value
        of the function at the solution, and ``message`` which describes the
        cause of the termination.
        See `OptimizeResult` for a description of other attributes.

    Notes
    -----
    This function implements the Dual Annealing optimization. This stochastic
    approach derived from [3]_ combines the generalization of CSA (Classical
    Simulated Annealing) and FSA (Fast Simulated Annealing) [1]_ [2]_ coupled
    to a strategy for applying a local search on accepted locations [4]_.
    An alternative implementation of this same algorithm is described in [5]_
    and benchmarks are presented in [6]_. This approach introduces an advanced
    method to refine the solution found by the generalized annealing
    process. This algorithm uses a distorted Cauchy-Lorentz visiting
    distribution, with its shape controlled by the parameter :math:`q_{v}`

    .. math::

        g_{q_{v}}(\\Delta x(t)) \\propto \\frac{ \\
        \\left[T_{q_{v}}(t) \\right]^{-\\frac{D}{3-q_{v}}}}{ \\
        \\left[{1+(q_{v}-1)\\frac{(\\Delta x(t))^{2}} { \\
        \\left[T_{q_{v}}(t)\\right]^{\\frac{2}{3-q_{v}}}}}\\right]^{ \\
        \\frac{1}{q_{v}-1}+\\frac{D-1}{2}}}

    Where :math:`t` is the artificial time. This visiting distribution is used
    to generate a trial jump distance :math:`\\Delta x(t)` of variable
    :math:`x(t)` under artificial temperature :math:`T_{q_{v}}(t)`.

    From the starting point, after calling the visiting distribution
    function, the acceptance probability is computed as follows:

    .. math::

        p_{q_{a}} = \\min{\\{1,\\left[1-(1-q_{a}) \\beta \\Delta E \\right]^{ \\
        \\frac{1}{1-q_{a}}}\\}}

    Where :math:`q_{a}` is a acceptance parameter. For :math:`q_{a}<1`, zero
    acceptance probability is assigned to the cases where

    .. math::

        [1-(1-q_{a}) \\beta \\Delta E] < 0

    The artificial temperature :math:`T_{q_{v}}(t)` is decreased according to

    .. math::

        T_{q_{v}}(t) = T_{q_{v}}(1) \\frac{2^{q_{v}-1}-1}{\\left( \\
        1 + t\\right)^{q_{v}-1}-1}

    Where :math:`q_{v}` is the visiting parameter.

    .. versionadded:: 1.2.0

    References
    ----------
    .. [1] Tsallis C. Possible generalization of Boltzmann-Gibbs
        statistics. Journal of Statistical Physics, 52, 479-487 (1998).
    .. [2] Tsallis C, Stariolo DA. Generalized Simulated Annealing.
        Physica A, 233, 395-406 (1996).
    .. [3] Xiang Y, Sun DY, Fan W, Gong XG. Generalized Simulated
        Annealing Algorithm and Its Application to the Thomson Model.
        Physics Letters A, 233, 216-220 (1997).
    .. [4] Xiang Y, Gong XG. Efficiency of Generalized Simulated
        Annealing. Physical Review E, 62, 4473 (2000).
    .. [5] Xiang Y, Gubian S, Suomela B, Hoeng J. Generalized
        Simulated Annealing for Efficient Global Optimization: the GenSA
        Package for R. The R Journal, Volume 5/1 (2013).
    .. [6] Mullen, K. Continuous Global Optimization in R. Journal of
        Statistical Software, 60(6), 1 - 45, (2014). DOI:10.18637/jss.v060.i06

    Examples
    --------
    The following example is a 10-D problem, with many local minima.
    The function involved is called Rastrigin
    (https://en.wikipedia.org/wiki/Rastrigin_function)

    >>> from scipy.optimize import dual_annealing
    >>> func = lambda x: np.sum(x*x - 10*np.cos(2*np.pi*x)) + 10*np.size(x)
    >>> lw = [-5.12] * 10
    >>> up = [5.12] * 10
    >>> ret = dual_annealing(func, bounds=list(zip(lw, up)), seed=1234)
    >>> print("global minimum: xmin = {0}, f(xmin) = {1:.6f}".format(
    ...       ret.x, ret.fun))
    global minimum: xmin = [-4.26437714e-09 -3.91699361e-09 -1.86149218e-09 -3.97165720e-09
     -6.29151648e-09 -6.53145322e-09 -3.93616815e-09 -6.55623025e-09
    -6.05775280e-09 -5.00668935e-09], f(xmin) = 0.000000

    """  # noqa: E501
    if x0 is not None and not len(x0) == len(bounds):
        raise ValueError('Bounds size does not match x0')

    lu = list(zip(*bounds))
    lower = np.array(lu[0])
    upper = np.array(lu[1])
    # Check that restart temperature ratio is correct
    if restart_temp_ratio <= 0. or restart_temp_ratio >= 1.:
        raise ValueError('Restart temperature ratio has to be in range (0, 1)')
    # Checking bounds are valid
    if (np.any(np.isinf(lower)) or np.any(np.isinf(upper))
            or np.any(np.isnan(lower)) or np.any(np.isnan(upper))):
        raise ValueError('Some bounds values are inf values or nan values')
    # Checking that bounds are consistent
    if not np.all(lower < upper):
        raise ValueError('Bounds are not consistent min < max')
    # Checking that bounds are the same length
    if not len(lower) == len(upper):
        raise ValueError('Bounds do not have the same dimensions')

    # Wrapper for the objective function
    func_wrapper = ObjectiveFunWrapper(func, maxfun, *args)
    # Wrapper fot the minimizer
    minimizer_wrapper = LocalSearchWrapper(bounds, func_wrapper,
                                           **local_search_options)
    # Initialization of RandomState for reproducible runs if seed provided
    rand_state = check_random_state(seed)
    # Initialization of the energy state
    energy_state = EnergyState(lower, upper, callback)
    energy_state.reset(func_wrapper, rand_state, x0)
    # Minimum value of annealing temperature reached to perform
    # re-annealing
    temperature_restart = initial_temp * restart_temp_ratio
    # VisitingDistribution instance
    visit_dist = VisitingDistribution(lower, upper, visit, rand_state)
    # Strategy chain instance
    strategy_chain = StrategyChain(accept, visit_dist, func_wrapper,
                                   minimizer_wrapper, rand_state, energy_state)
    need_to_stop = False
    iteration = 0
    message = []
    # OptimizeResult object to be returned
    optimize_res = OptimizeResult()
    optimize_res.success = True
    optimize_res.status = 0

    t1 = np.exp((visit - 1) * np.log(2.0)) - 1.0
    # Run the search loop
    while (not need_to_stop):
        for i in range(maxiter):
            # Compute temperature for this step
            s = float(i) + 2.0
            t2 = np.exp((visit - 1) * np.log(s)) - 1.0
            temperature = initial_temp * t1 / t2
            if iteration >= maxiter:
                message.append("Maximum number of iteration reached")
                need_to_stop = True
                break
            # Need a re-annealing process?
            if temperature < temperature_restart:
                energy_state.reset(func_wrapper, rand_state)
                break
            # starting strategy chain
            val = strategy_chain.run(i, temperature)
            if val is not None:
                message.append(val)
                need_to_stop = True
                optimize_res.success = False
                break
            # Possible local search at the end of the strategy chain
            if not no_local_search:
                val = strategy_chain.local_search()
                if val is not None:
                    message.append(val)
                    need_to_stop = True
                    optimize_res.success = False
                    break
            iteration += 1

    # Setting the OptimizeResult values
    optimize_res.x = energy_state.xbest
    optimize_res.fun = energy_state.ebest
    optimize_res.nit = iteration
    optimize_res.nfev = func_wrapper.nfev
    optimize_res.njev = func_wrapper.ngev
    optimize_res.nhev = func_wrapper.nhev
    optimize_res.message = message
    return optimize_res
Ejemplo n.º 22
0
def fmin_bfgs_f(f_g,
                x0,
                B0=None,
                M=2,
                gtol=1e-5,
                Delta=10.0,
                maxiter=None,
                callback=None,
                norm_ord=np.Inf,
                **_kwargs):
    """test BFGS with nonmonote line search"""
    fk, gk = f_g(x0)
    if B0 is None:
        Bk = np.eye(len(x0))
    else:
        Bk = B0
    Hk = np.linalg.inv(Bk)
    maxiter = 200 * len(x0) if maxiter is None else maxiter
    xk = x0
    norm = lambda x: np.linalg.norm(x, ord=norm_ord)
    theta = 0.9
    C = 0.5
    k = 0
    old_old_fval = fk + np.linalg.norm(gk) / 2
    old_fval = fk
    f_s = Seq(M)
    f_s.add(fk)
    flag = 0
    re_search = 0
    for k in range(maxiter):
        if norm(gk) <= gtol:
            break
        dki = -np.dot(Hk, gk)
        try:
            pk = dki
            f = f_g.fun
            myfprime = f_g.grad
            gfk = gk
            old_fval = fk
            (
                alpha_k,
                fc,
                gc,
                old_fval,
                old_old_fval,
                gfkp1,
            ) = line_search_wolfe2(f, myfprime, xk, pk, gfk, f_s.get_max(),
                                   old_fval, old_old_fval)
        except Exception as e:
            print(e)
            re_search += 1
            xk = xk + dki
            fk, gk = f_g(xk)
            old_fval, old_old_fval = fk, old_fval
            f_s.add(fk)
            if re_search > 2:
                flag = 1
                break
            continue
        if alpha_k is None:
            print("alpha is None")
            xk = xk + dki
            fk, gk = f_g(xk)
            old_fval, old_old_fval = fk, old_fval
            f_s.add(fk)
            re_search += 1
            if re_search > 2:
                flag = 1
                break
            continue
        dki = alpha_k * pk
        # fki, gki = f_g(xk + dki)
        fki, gki = old_fval, gfkp1
        Aredk = fk - fki
        Predk = -(np.dot(gk, dki) + 0.5 * np.dot(np.dot(Bk, dki), dki))
        rk = Aredk / Predk
        xk = xk + dki
        fk = fki
        yk = gki - gk
        tk = C + max(0, -np.dot(yk, dki) / norm(dki)**2) / norm(gk)
        ystark = (1 - theta) * yk + theta * tk * norm(gk) * dki
        gk = gki
        bs = np.dot(Bk, dki)
        Bk = (Bk + np.outer(yk, yk) / np.dot(yk, dki) -
              np.outer(bs, bs) / np.dot(bs, dki))
        # sk = dki
        # rhok = 1.0 / (np.dot(yk, sk))
        # A1 = 1 - np.outer(sk, yk) * rhok
        # A2 = 1 - np.outer(yk, sk) * rhok
        # Hk = np.dot(A2, np.dot(Hk, A1)) - (rhok * np.outer(sk, sk))
        # Bk = Bk + np.outer(ystark, ystark)/np.dot(ystark, dki) - \
        #    np.outer(bs, bs)/np.dot(bs, dki)  # MBFGS
        # print(np.dot(Hk, Bk))
        try:
            Hk = np.linalg.inv(Bk)
        except Exception:
            pass
        f_s.add(fk)
        if callback is not None:
            callback(xk)
    else:
        flag = 2
    # print("fit final: ", k, p, f_g.ncall)
    s = OptimizeResult()
    s.messgae = message_dict[flag]
    s.fun = float(fk)
    s.nit = k
    s.nfev = f_g.ncall
    s.njev = f_g.ncall
    s.status = flag
    s.x = np.array(xk)
    s.jac = np.array(gk)
    s.hess = np.array(Bk)
    s.success = flag == 0
    return s