Ejemplo n.º 1
0
def hanso(func,
          x0=None,
          grad=None,
          nvar=None,
          nstart=None,
          sampgrad=False,
          funcrtol=1e-20,
          gradnormtol=1e-6,
          verbose=2,
          fvalquit=-np.inf,
          cpumax=np.inf,
          maxit=100,
          **kwargs):
    """
    HANSO: Hybrid Algorithm for Nonsmooth Optimization

    The algorithm is two-fold. Viz,
    BFGS phase: BFGS is run from multiple starting points, taken from
    the columns of x0 parameter, if provided, and otherwise 10 points
    generated randomly. If the termination test was satisfied at the
    best point found by BFGS, or if nvar > 100, HANSO terminates;
    otherwise, it continues to:

    Gradient sampling phases: 3 gradient sampling phases are run from
    lowest point found, using sampling radii:
    10*evaldist, evaldist, evaldist/10
    Termination takes place immediately during any phase if
    cpumax CPU time is exceeded.

    References
    ----------
    A.S. Lewis and M.L. Overton, Nonsmooth Optimization via Quasi-Newton
    Methods, Math Programming, 2012

    J.V. Burke, A.S. Lewis and M.L. Overton, A Robust Gradient Sampling
    Algorithm for Nonsmooth, Nonconvex Optimization
    SIAM J. Optimization 15 (2005), pp. 751-779

    Parameters
    ----------
    func: callable function on 1D arrays of length nvar
        function being optimized

    grad: callable function
        gradient of func

    fvalquit: float, optional (default -inf)
        param passed to bfgs1run function

    gradnormtol: float, optional (default 1e-4)
        termination tolerance for smallest vector in convex hull of saved
        gradients

    verbose: int, optional (default 1)
        param passed to bfgs1run function

    cpumax: float, optional (default inf)
        quit if cpu time in secs exceeds this (applies to total running time)

    sampgrad: boolean, optional (default False)
        if set, the gradient-sampling will be used to continue the algorithm
        in case the BFGS fails

    **kwargs: param-value dict
        optional parameters passed to bfgs backend. Possible key/values are:
        x0: 2D array of shape (nvar, nstart), optional (default None)
            intial points, one per column

        nvar: int, optional (default None)
            number of dimensions in the problem (exclusive x0)

        nstart: int, optional (default None)
            number of starting points for BFGS algorithm (exclusive x0)

        maxit: int, optional (default 100)
            param passed to bfgs1run function
        wolfe1: float, optional (default 0)
            param passed to bfgs1run function

        wolfe2: float, optional (default .5)
            param passed to bfgs1run function

    Returns
    -------
    x: D array of same length nvar = len(x0)
        final iterate

    f: list of nstart floats
        final function values, one per run of bfgs1run

    d: list of nstart 1D arrays, each of same length as input nvar
        final smallest vectors in convex hull of saved gradients,
        one array per run of bfgs1run

    H: list of nstarts 2D arrays, each of shape (nvar, nvar)
        final inverse Hessian approximations, one array per run of bfgs1run

    itrecs: list of nstart int
        numbers of iterations, one per run of bfgs1run; see bfgs1run
        for details

    inforecs: list of int
        reason for termination; see bfgs1run for details

    pobj: list of tuples of the form (duration of iteration, final func value)
        trajectory for best starting point (i.e of the starting point that
        led to the greatest overall decrease in the cost function.
        Note that the O(1) time consumed by the gradient-sampling stage is not
        counted.

    Optional Outputs (in case output_records is True):
    Xrecs: list of nstart 2D arrays, each of shape (iter, nvar)
        iterates where saved gradients were evaluated; one array per run
        of bfgs1run; see bfgs1run
        for details

    Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar)
        gradients evaluated at these points, one per run of bfgs1run;
        see bfgs1run for details

    wrecs: list of nstart 1D arrays, each of length iter
        weights defining convex combinations d = G*w; one array per
        run of bfgs1run; see bfgs1run for details

    fevalrecs: list of nstart 1D arrays, each of length iter
        records of all function evaluations in the line searches;
        one array per run of bfgs1run; see bfgs1run for details

    xrecs: list of nstart 2D arrays, each of length (iter, nvar)
        record of x iterates

    Hrecs: list of nstart 2D arrays, each of shape (iter, nvar)
       record of H (Hessian) iterates; one array per run of bfgs1run;
       see bfgs1run for details

    Raises
    ------
    RuntimeError

    """
    def _log(msg, level=0):
        if verbose > level:
            print msg

    # sanitize x0
    if x0 is None:
        assert not nvar is None, (
            "No value specified for x0, expecting a value for nvar")
        assert not nstart is None, (
            "No value specified for x0, expecting a value for nstart")

        x0 = setx0(nvar, nstart)
    else:
        assert nvar is None, (
            "Value specified for x0, expecting no value for nvar")

        assert nstart is None, (
            "Value specified for x0, expecting no value for nstart")

        x0 = np.array(x0)
        if x0.ndim == 1:
            x0 = x0.reshape((-1, 1))

        nvar, nstart = x0.shape

    cpufinish = time.time() + cpumax

    # run BFGS step
    kwargs['output_records'] = 1
    x, f, d, H, _, info, X, G, w, pobj = bfgs(func,
                                              x0=x0,
                                              grad=grad,
                                              fvalquit=fvalquit,
                                              funcrtol=funcrtol,
                                              gradnormtol=gradnormtol,
                                              cpumax=cpumax,
                                              maxit=maxit,
                                              verbose=verbose,
                                              **kwargs)

    # throw away all but the best result
    assert len(f) == np.array(x).shape[1], np.array(x).shape
    indx = np.argmin(f)
    f = f[indx]
    x = x[..., indx]
    d = d[..., indx]
    H = H[indx]  # bug if do this when only one start point: H already matrix
    X = X[indx]
    G = G[indx]
    w = w[indx]
    pobj = pobj[indx]

    dnorm = linalg.norm(d, 2)
    # the 2nd argument will not be used since x == X(:,1) after bfgs
    loc, X, G, w = postprocess(x, np.nan, dnorm, X, G, w, verbose=verbose)

    if np.isnan(f) or np.isinf(f):
        _log('hanso: f is infinite or nan at all starting points')
        return x, f, loc, X, G, w, H, pobj

    if time.time() > cpufinish:
        _log('hanso: cpu time limit exceeded')
        _log('hanso: best point found has f = %g with local optimality '
             'measure: dnorm = %5.1e, evaldist = %5.1e' %
             (f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if f < fvalquit:
        _log('hanso: reached target objective')
        _log('hanso: best point found has f = %g with local optimality'
             ' measure: dnorm = %5.1e, evaldist = %5.1e' %
             (f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if dnorm < gradnormtol:
        _log('hanso: verified optimality within tolerance in bfgs phase')
        _log('hanso: best point found has f = %g with local optimality '
             'measure: dnorm = %5.1e, evaldist = %5.1e' %
             (f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if sampgrad:
        # launch gradient sampling
        # time0 = time.time()
        f_BFGS = f
        # save optimality certificate info in case gradient sampling cannot
        # improve the one provided by BFGS
        dnorm_BFGS = dnorm
        loc_BFGS = loc
        d_BFGS = d
        X_BFGS = X
        G_BFGS = G
        w_BFGS = w
        x0 = x.reshape((-1, 1))

        # otherwise gradient sampling is too expensivea
        if maxit > 100:
            maxit = 100

        # # otherwise grad sampling will augment with random starts
        # x0 = x0[..., :1]
        # assert 0, x0.shape

        cpumax = cpufinish - time.time()  # time left

        # run gradsamp proper
        x, f, g, dnorm, X, G, w = gradsamp(func,
                                           x0,
                                           grad=grad,
                                           maxit=maxit,
                                           cpumax=cpumax)

        if f == f_BFGS:  # gradient sampling did not reduce f
            _log('hanso: gradient sampling did not reduce f below best point'
                 ' found by BFGS\n')
            # use the better optimality certificate
            if dnorm > dnorm_BFGS:
                loc = loc_BFGS
                d = d_BFGS
                X = X_BFGS
                G = G_BFGS
                w = w_BFGS
        elif f < f_BFGS:
            loc, X, G, w = postprocess(x, g, dnorm, X, G, w, verbose=verbose)
            _log('hanso: gradient sampling reduced f below best point found'
                 ' by BFGS\n')
        else:
            raise RuntimeError('hanso: f > f_BFGS: this should never happen'
                               )  # this should never happen

        x = x[0]
        f = f[0]
        # pobj.append((time.time() - time0, f))
        return x, f, loc, X, G, w, H, pobj
    else:
        return x, f, loc, X, G, w, H, pobj
Ejemplo n.º 2
0

if __name__ == '__main__':
    nvar = 300
    nstart = 20
    func_name = 'Rosenbrock "Banana" function in %i dimensions' % nvar
    import os
    from example_functions import (l1, grad_l1)
    from setx0 import setx0
    import scipy.io
    if os.path.isfile("/tmp/x0.mat"):
        x0 = scipy.io.loadmat("/tmp/x0.mat",
                              squeeze_me=True,
                              struct_as_record=False)['x0']
    else:
        x0 = setx0(nvar, nstart)

    if x0.ndim == 1:
        x0 = x0.reshape((-1, 1))

    _x = None
    _f = np.inf
    for j in xrange(x0.shape[1]):
        print ">" * 100, "(j = %i)" % j
        x, f = bfgs1run(l1,
                        x0[..., j],
                        grad=grad_l1,
                        maxit=100,
                        verbose=2,
                        nvec=10)[:2]
        if f < _f:
Ejemplo n.º 3
0
def bfgs(func, x0=None, grad=None, nvar=None, nstart=None, maxit=100, nvec=0,
         verbose=1, funcrtol=1e-20, gradnormtol=1e-6, fvalquit=-np.inf,
         xnormquit=np.inf, cpumax=np.inf, strongwolfe=False, wolfe1=0,
         wolfe2=.5, quitLSfail=1, ngrad=None, evaldist=1e-6, H0=None, scale=1,
         output_records=2, callback=None):
    """
    Make a single run of BFGS from one starting point. Intended to be
    called from bfgs.

    Parameters
    ----------
    func: callable function on 1D arrays of length nvar
        function being optimized

    grad: callable function
        gradient of func

    x0: 2D array of shape (nvar, nstart), optional (default None)
        intial points, one per column

    nvar: int, optional (default None)
        number of dimensions in the problem (exclusive x0)

    nstart: int, optional (default None)
        number of starting points for BFGS algorithm (exclusive x0)

    maxit: int, optional (default 100)
        param passed to bfgs1run function

    wolfe1: float, optional (default 0)
        param passed to bfgs1run function

    wolfe2: float, optional (default .5)
        param passed to bfgs1run function

    gradnormtol: float, optional (default 1e-6)
        termination tolerance on d: smallest vector in convex hull of up
        to ngrad gradients

    xnormquit: float, optional (default inf)
        quit if norm(x) exceeds this value

    evaldist: float, optional default (1e-4)
        the gradients used in the termination test qualify only if
        they are evaluated at points  approximately  within
        distance evaldist of x

    H0: 2D array of shape (nvar, nvar), optional (default identity matrix)
        for full BFGS: initial inverse Hessian approximation (must be
        positive definite, but this is not checked), this could be draw
        drawn from a Wishart distribution;
        for limited memory BFGS: same, but applied every iteration
        (must be sparse in this case)

    scale: boolean, optional (default True)
        for full BFGS: 1 to scale H0 at first iteration, 0 otherwise
        for limited memory BFGS: 1 to scale H0 every time, 0 otherwise

    cpumax: float, optional (default inf)
        quit if cpu time in secs exceeds this (applies to total running
        time)

    fvalquit: float, optional (default -inf)
        param passed to bfgs1run function

    quitLSfail: int, optional (default 1)
        1 if quit when line search fails, 0 (potentially useful if func
        is not numerically continuous)

    ngrad: int, optional (default max(100, 2 * nvar))
        number of gradients willing to save and use in solving QP to check
        optimality tolerance on smallest vector in their convex hull;
        see also next two options

    verbose: int, optional (default 1)
        param passed to bfgs1run function

    output_records: int, optional (default 2)
        Which low-level execution records to return from low-level
        bfgs1run calls ? Possible values are:
        0: don't return execution records from low-level bfgs1run calls
        1: return H and w records from low-level bfgs1run calls
        2: return all execution records from low-level bfgs1run calls

    Returns
    -------
    x: D array of same length nvar = len(x0)
        final iterate

    f: list of nstart floats
        final function values, one per run of bfgs1run

    d: list of nstart 1D arrays, each of same length as input nvar
       final smallest vectors in convex hull of saved gradients,
       one array per run of bfgs1run

    H: list of nstarts 2D arrays, each of shape (nvar, nvar)
       final inverse Hessian approximations, one array per run of bfgs1run

    itrecs: list of nstart int
       numbers of iterations, one per run of bfgs1run; see bfgs1run
       for details

    inforecs: list of int
        reason for termination; see bfgs1run for details

    pobj: list of lists of tuples of the form (duration of iteration,
    final func value)
        for each starting point, the energy trajectory for each iteration
        of the iterates therefrom

    Optional Outputs (in case output_records is True):
    Xrecs: list of nstart 2D arrays, each of shape (iter, nvar)
        iterates where saved gradients were evaluated; one array per run
        of bfgs1run; see bfgs1run
        for details

    Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar)
        gradients evaluated at these points, one per run of bfgs1run;
        see bfgs1run for details

    wrecs: list of nstart 1D arrays, each of length iter
        weights defining convex combinations d = G*w; one array per
        run of bfgs1run; see bfgs1run for details

    fevalrecs: list of nstart 1D arrays, each of length iter
        records of all function evaluations in the line searches;
        one array per run of bfgs1run; see bfgs1run for details

    xrecs: list of nstart 2D arrays, each of length (iter, nvar)
        record of x iterates

    Hrecs: list of nstart 2D arrays, each of shape (iter, nvar)
       record of H (Hessian) iterates; one array per run of bfgs1run;
       see bfgs1run for details

    Notes
    -----
    if there is more than one starting vector, then:
    f, iter, info are vectors of length nstart
    x, d are matrices of size pars.nvar by nstart
    H, X, G, w, xrec, Hrec are cell arrays of length nstart, and
    fevalrec is a cell array of cell arrays
    Thus, for example, d[:,i] = G[i] * w[i], for i = 0,...,nstart - 1

    BFGS is normally used for optimizing smooth, not necessarily convex,
    functions, for which the convergence rate is generically superlinear.
    But it also works very well for functions that are nonsmooth at their
    minimizers, typically with a linear convergence rate and a final
    inverse Hessian approximation that is very ill conditioned, as long
    as a weak Wolfe line search is used. This version of BFGS will work
    well both for smooth and nonsmooth functions and has a stopping
    criterion that applies for both cases, described above.
    Reference:  A.S. Lewis and M.L. Overton, Nonsmooth Optimization via
    Quasi-Newton Methods, Math Programming, 2012

    See Also
    --------
    `gradsamp`

    """

    def _fg(x):
        return func(x) if grad is None else func(x), grad(x)

    def _log(msg, level=0):
        if verbose > level:
            print msg

    # sanitize x0
    if x0 is None:
        assert not nvar is None, (
            "No value specified for x0, expecting a value for nvar")
        assert not nstart is None, (
            "No value specified for x0, expecting a value for nstart")

        x0 = setx0(nvar, nstart)
    else:
        assert nvar is None, (
            "Value specified for x0, expecting no value for nvar")

        assert nstart is None, (
            "Value specified for x0, expecting no value for nstart")

        if x0.ndim == 1:
            x0 = x0[:, np.newaxis]

        nvar, nstart = x0.shape

    cpufinish = time.time() + cpumax
    pobj = []
    _f = []
    itrecs = []
    inforecs = []
    _d = []
    _x = []
    _H = []
    if output_records:
        xrecs = []
        fevalrecs = []
        Hrecs = []
        Xrecs = []
        Grecs = []
        wrecs = []
    for run in xrange(nstart):
        _log("Staring bfgs1run %i/%i..." % (
                run + 1, nstart))
        if verbose > 0 & nstart > 1:
            _log('bfgs: starting point %d' % (run + 1))
        cpumax = cpufinish - time.time()
        if output_records > 1:
            x, f, d, HH, it, info, X, G, w, fevalrec, xrec, Hrec, times = \
                bfgs1run(func, x0[..., run], grad=grad, maxit=maxit,
                         wolfe1=wolfe1, wolfe2=wolfe2, funcrtol=funcrtol,
                         gradnormtol=gradnormtol, fvalquit=fvalquit,
                         xnormquit=xnormquit, cpumax=cpumax,
                         strongwolfe=strongwolfe, nvec=nvec, verbose=verbose,
                         quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist,
                         H0=H0, scale=scale, callback=callback)
            _x.append(x)
            _f.append(x)
            _d.append(d)
            itrecs.append(it)
            inforecs.append(info)
            Xrecs.append(X)
            Grecs.append(G)
            wrecs.append(w)
            fevalrecs.append(fevalrec)
            xrecs.append(xrec)
            Hrecs.append(Hrec)
        elif output_records > 0:
            x, f, d, HH, it, info, X, G, w, _, _, _, times = bfgs1run(
                func, x0[..., run], grad=grad, maxit=maxit, wolfe1=wolfe1,
                wolfe2=wolfe2, funcrtol=funcrtol, gradnormtol=gradnormtol,
                fvalquit=fvalquit, xnormquit=xnormquit, cpumax=cpumax,
                strongwolfe=strongwolfe, nvec=nvec, verbose=verbose,
                quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist, H0=H0,
                scale=scale, callback=callback)
            _x.append(x)
            _f.append(f)
            _d.append(d)
            itrecs.append(it)
            inforecs.append(info)
            Xrecs.append(X)
            Grecs.append(G)
            wrecs.append(w)
        else:  # avoid computing unnecessary arrays
            x, f, d, HH, it, info, _, _, _, _, _, _, times = bfgs1run(
                func, x0[..., run], grad=grad, maxit=maxit, wolfe1=wolfe1,
                wolfe2=wolfe2, funcrtol=funcrtol, gradnormtol=gradnormtol,
                fvalquit=fvalquit, xnormquit=xnormquit, cpumax=cpumax,
                strongwolfe=strongwolfe, nvec=nvec, verbose=verbose,
                quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist, H0=H0,
                scale=scale, callback=callback)
            _x.append(x)
            _f.append(f)
            _d.append(d)
            itrecs.append(it)
            inforecs.append(info)

        _log('... done (bfgs1run %i/%i).' % (run + 1, nstart))
        _log("\r\n")

        # HH should be exactly symmetric as of version 2.02, but does no harm
        _H.append((HH + HH.T) / 2.)

        # commit times
        run_pobj = []
        for duration, f in times:
            run_pobj.append((duration, f))
        pobj.append(run_pobj)

        # check that we'ven't exploded the time budget
        if time.time() > cpufinish or f < fvalquit or linalg.norm(
            x, 2) > xnormquit:
            break
    # end of for loop

    # we're done: now collect and return outputs to caller
    _x = np.array(_x).T
    _f = np.array(_f)
    _d = np.array(_d).T
    if output_records > 1:
        return (_x, _f, _d, _H, itrecs, inforecs, Xrecs, Grecs, wrecs,
                fevalrecs, xrecs, Hrecs, pobj)
    elif output_records > 0:
        return _x, _f, _d, _H, itrecs, inforecs, Xrecs, Grecs, wrecs, pobj
    else:
        return _x, _f, _d, _H, itrecs, inforecs, pobj
Ejemplo n.º 4
0
def hanso(func, x0=None, grad=None, nvar=None, nstart=None, sampgrad=False,
          funcrtol=1e-20, gradnormtol=1e-6, verbose=2, fvalquit=-np.inf,
          cpumax=np.inf, maxit=100, callback=None, **kwargs):
    """
    HANSO: Hybrid Algorithm for Nonsmooth Optimization

    The algorithm is two-fold. Viz,
    BFGS phase: BFGS is run from multiple starting points, taken from
    the columns of x0 parameter, if provided, and otherwise 10 points
    generated randomly. If the termination test was satisfied at the
    best point found by BFGS, or if nvar > 100, HANSO terminates;
    otherwise, it continues to:

    Gradient sampling phases: 3 gradient sampling phases are run from
    lowest point found, using sampling radii:
    10*evaldist, evaldist, evaldist/10
    Termination takes place immediately during any phase if
    cpumax CPU time is exceeded.

    References
    ----------
    A.S. Lewis and M.L. Overton, Nonsmooth Optimization via Quasi-Newton
    Methods, Math Programming, 2012

    J.V. Burke, A.S. Lewis and M.L. Overton, A Robust Gradient Sampling
    Algorithm for Nonsmooth, Nonconvex Optimization
    SIAM J. Optimization 15 (2005), pp. 751-779

    Parameters
    ----------
    func: callable function on 1D arrays of length nvar
        function being optimized

    grad: callable function
        gradient of func

    fvalquit: float, optional (default -inf)
        param passed to bfgs1run function

    gradnormtol: float, optional (default 1e-4)
        termination tolerance for smallest vector in convex hull of saved
        gradients

    verbose: int, optional (default 1)
        param passed to bfgs1run function

    cpumax: float, optional (default inf)
        quit if cpu time in secs exceeds this (applies to total running time)

    sampgrad: boolean, optional (default False)
        if set, the gradient-sampling will be used to continue the algorithm
        in case the BFGS fails

    **kwargs: param-value dict
        optional parameters passed to bfgs backend. Possible key/values are:
        x0: 2D array of shape (nvar, nstart), optional (default None)
            intial points, one per column

        nvar: int, optional (default None)
            number of dimensions in the problem (exclusive x0)

        nstart: int, optional (default None)
            number of starting points for BFGS algorithm (exclusive x0)

        maxit: int, optional (default 100)
            param passed to bfgs1run function
        wolfe1: float, optional (default 0)
            param passed to bfgs1run function

        wolfe2: float, optional (default .5)
            param passed to bfgs1run function

    Returns
    -------
    x: D array of same length nvar = len(x0)
        final iterate

    f: list of nstart floats
        final function values, one per run of bfgs1run

    d: list of nstart 1D arrays, each of same length as input nvar
        final smallest vectors in convex hull of saved gradients,
        one array per run of bfgs1run

    H: list of nstarts 2D arrays, each of shape (nvar, nvar)
        final inverse Hessian approximations, one array per run of bfgs1run

    itrecs: list of nstart int
        numbers of iterations, one per run of bfgs1run; see bfgs1run
        for details

    inforecs: list of int
        reason for termination; see bfgs1run for details

    pobj: list of tuples of the form (duration of iteration, final func value)
        trajectory for best starting point (i.e of the starting point that
        led to the greatest overall decrease in the cost function.
        Note that the O(1) time consumed by the gradient-sampling stage is not
        counted.

    Optional Outputs (in case output_records is True):
    Xrecs: list of nstart 2D arrays, each of shape (iter, nvar)
        iterates where saved gradients were evaluated; one array per run
        of bfgs1run; see bfgs1run
        for details

    Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar)
        gradients evaluated at these points, one per run of bfgs1run;
        see bfgs1run for details

    wrecs: list of nstart 1D arrays, each of length iter
        weights defining convex combinations d = G*w; one array per
        run of bfgs1run; see bfgs1run for details

    fevalrecs: list of nstart 1D arrays, each of length iter
        records of all function evaluations in the line searches;
        one array per run of bfgs1run; see bfgs1run for details

    xrecs: list of nstart 2D arrays, each of length (iter, nvar)
        record of x iterates

    Hrecs: list of nstart 2D arrays, each of shape (iter, nvar)
       record of H (Hessian) iterates; one array per run of bfgs1run;
       see bfgs1run for details

    Raises
    ------
    RuntimeError

    """

    def _log(msg, level=0):
        if verbose > level:
            print msg

    # sanitize x0
    if x0 is None:
        assert not nvar is None, (
            "No value specified for x0, expecting a value for nvar")
        assert not nstart is None, (
            "No value specified for x0, expecting a value for nstart")

        x0 = setx0(nvar, nstart)
    else:
        assert nvar is None, (
            "Value specified for x0, expecting no value for nvar")

        assert nstart is None, (
            "Value specified for x0, expecting no value for nstart")

        x0 = np.array(x0)
        if x0.ndim == 1:
            x0 = x0.reshape((-1, 1))

        nvar, nstart = x0.shape

    cpufinish = time.time() + cpumax

    # run BFGS step
    kwargs['output_records'] = 1
    x, f, d, H, _, info, X, G, w, pobj = bfgs(
        func, x0=x0, grad=grad, fvalquit=fvalquit, funcrtol=funcrtol,
        gradnormtol=gradnormtol, cpumax=cpumax, maxit=maxit,
        verbose=verbose, callback=callback, **kwargs)

    # throw away all but the best result
    assert len(f) == np.array(x).shape[1], np.array(x).shape
    indx = np.argmin(f)
    f = f[indx]
    x = x[..., indx]
    d = d[..., indx]
    H = H[indx]  # bug if do this when only one start point: H already matrix
    X = X[indx]
    G = G[indx]
    w = w[indx]
    pobj = pobj[indx]

    dnorm = linalg.norm(d, 2)
    # the 2nd argument will not be used since x == X(:,1) after bfgs
    loc, X, G, w = postprocess(x, np.nan, dnorm, X, G, w, verbose=verbose)

    if np.isnan(f) or np.isinf(f):
        _log('hanso: f is infinite or nan at all starting points')
        return x, f, loc, X, G, w, H, pobj

    if time.time() > cpufinish:
        _log('hanso: cpu time limit exceeded')
        _log('hanso: best point found has f = %g with local optimality '
             'measure: dnorm = %5.1e, evaldist = %5.1e' % (
                f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if f < fvalquit:
        _log('hanso: reached target objective')
        _log('hanso: best point found has f = %g with local optimality'
             ' measure: dnorm = %5.1e, evaldist = %5.1e' % (
                f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if dnorm < gradnormtol:
        _log('hanso: verified optimality within tolerance in bfgs phase')
        _log('hanso: best point found has f = %g with local optimality '
             'measure: dnorm = %5.1e, evaldist = %5.1e' % (
                f, loc['dnorm'], loc['evaldist']))
        return x, f, loc, X, G, w, H, pobj

    if sampgrad:
        # launch gradient sampling
        # time0 = time.time()
        f_BFGS = f
        # save optimality certificate info in case gradient sampling cannot
        # improve the one provided by BFGS
        dnorm_BFGS = dnorm
        loc_BFGS = loc
        d_BFGS = d
        X_BFGS = X
        G_BFGS = G
        w_BFGS = w
        x0 = x.reshape((-1, 1))

        # otherwise gradient sampling is too expensivea
        if maxit > 100:
            maxit = 100

        # # otherwise grad sampling will augment with random starts
        # x0 = x0[..., :1]
        # assert 0, x0.shape

        cpumax = cpufinish - time.time()  # time left

        # run gradsamp proper
        x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad, maxit=maxit,
                                           cpumax=cpumax)

        if f == f_BFGS:  # gradient sampling did not reduce f
            _log('hanso: gradient sampling did not reduce f below best point'
                 ' found by BFGS\n')
            # use the better optimality certificate
            if dnorm > dnorm_BFGS:
                loc = loc_BFGS
                d = d_BFGS
                X = X_BFGS
                G = G_BFGS
                w = w_BFGS
        elif f < f_BFGS:
            loc, X, G, w = postprocess(x, g, dnorm, X, G, w, verbose=verbose)
            _log('hanso: gradient sampling reduced f below best point found'
                 ' by BFGS\n')
        else:
            raise RuntimeError(
                'hanso: f > f_BFGS: this should never happen'
                )  # this should never happen

        x = x[0]
        f = f[0]
        if callback:
            callback(x)
        return x, f, loc, X, G, w, H, pobj
    else:
        if callback:
            callback(x)
        return x, f, loc, X, G, w, H, pobj
Ejemplo n.º 5
0
            from example_functions import (l2 as func,
                                           gradl2 as grad)
        elif "banana" in func_name:
            nvar = 2
            from example_functions import (rosenbrock_banana as func,
                                           grad_rosenbrock_banana as grad)
        elif "esterov" in func_name:
            from example_functions import (nesterov as func,
                                           grad_nesterov as grad)
        if os.path.exists("/tmp/x0.mat"):
                x0 = scipy.io.loadmat("/tmp/x0.mat", squeeze_me=True,
                                      struct_as_record=False)['x0']
                if x0.ndim == 1:
                    x0 = x0.reshape((-1, 1), order='F')
        else:
            x0 = setx0(nvar, nstart)

        if "banana" in func_name:
            x0 = x0[:nvar, ...]

        nvar, nstart = x0.shape

        func_name = func_name + " in %i dimensions" % nvar
        print "Running HANSO for %s ..." % func_name

        for strongwolfe in wolfe_kinds:
            # run BFGS
            results = hanso(func, x0=x0,
                            grad=grad,
                            sampgrad=True,
                            strongwolfe=strongwolfe,
Ejemplo n.º 6
0
            g.append(g0)
            dnorm.append(linalg.norm(g0, 2))
            X.append(x[..., run])
            G.append(g0)
            w.append(1)
        else:
            cpumax = cpufinish - time.time()  # time left
            xtmp, ftmp, gtmp, dnormtmp, Xtmp, Gtmp, wtmp = \
                gradsamp1run(func, x0[..., run], grad=grad, f0=f0, g0=g0,
                             **kwargs)
            x.append(xtmp)
            f.append(ftmp)
            g.append(gtmp)
            dnorm.append(dnormtmp)
            X.append(Xtmp)
            G.append(Gtmp)
            w.append(wtmp)
        if time.time() > cpufinish:
            break

    return x, f, np.array(g).T, dnorm, np.array(X)[0], np.array(G)[0], w

if __name__ == '__main__':
    from setx0 import setx0
    from example_functions import (l1 as func,
                                   grad_l1 as grad)
    x0 = setx0(20, 10)
    x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad)
    print "fmin:", f
    print "xopt:", x
Ejemplo n.º 7
0
            g.append(g0)
            dnorm.append(linalg.norm(g0, 2))
            X.append(x[..., run])
            G.append(g0)
            w.append(1)
        else:
            cpumax = cpufinish - time.time()  # time left
            xtmp, ftmp, gtmp, dnormtmp, Xtmp, Gtmp, wtmp = \
                gradsamp1run(func, x0[..., run], grad=grad, f0=f0, g0=g0,
                             **kwargs)
            x.append(xtmp)
            f.append(ftmp)
            g.append(gtmp)
            dnorm.append(dnormtmp)
            X.append(Xtmp)
            G.append(Gtmp)
            w.append(wtmp)
        if time.time() > cpufinish:
            break

    return x, f, np.array(g).T, dnorm, np.array(X)[0], np.array(G)[0], w


if __name__ == '__main__':
    from setx0 import setx0
    from example_functions import (l1 as func, grad_l1 as grad)
    x0 = setx0(20, 10)
    x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad)
    print "fmin:", f
    print "xopt:", x