Example #1
0
def sd(fun, x0, search='inexact', eps=1e-8, maxiter=10000, **kwargs):
    """Steepest descent

    Parameters
    ----------
    fun: object
        objective function, with callable method f, g and G
    x0: ndarray
        initial point
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for convergence criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f, g and G)
    """
    x = x0
    f0 = -np.inf
    f1 = fun.f(x0)
    g1 = fun.g(x0)
    niter = 0
    neval = 2

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        d = -g1

        if search == 'inexact':
            alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = ls.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        x = x + alpha * d

        f0 = f1
        f1 = fun.f(x)
        g1 = fun.g(x)
        niter += 1
        neval += (v + 2)
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval
Example #2
0
def cg(fun, x0, method='prp+', search='inexact', 
       eps=1e-8, maxiter=10000, nu=0.2, a_high=.3, debug=False, **kwargs):
    """Non-linear conjugate gradient methods

    Parameters
    ----------
    fun: object
        objective function, with callable method f and g
    x0: ndarray
        initial point
    method: string, optional
        options: 'fr' for FR, 'prp' for PRP, 'prp+' for PRP+, 'hs' for HS,
                 'cd' for conjugate descent, 'dy' for Dai-Yuan
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for stopping criterion
    maxiter: int, optional
        maximum number of iterations
    nv: float, optional
        parameter for restart by orthogonality test
    debug: boolean, optional
        output information for every iteration if set to True
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f and g)
    flist: list
        list of objective values along the iterations
    xlist: list
        list of points along the iterations
    """
    
    x = x0
    n = x.size
    f0 = -np.inf
    f1 = fun.f(x)
    g0 = np.zeros(n)
    g1 = fun.g(x)
    d = -g1
    niter = 0
    neval = 2

    flist = []
    xlist = []

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        if abs(np.dot(g1, g0)) > 0.2 * np.dot(g1, g1):
            d = -g1

        if search == 'inexact':
            alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = ls.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        d = alpha * d
        if norm(d, np.inf) > a_high:
            d = d / norm(d, np.inf) * a_high
        x = x + d

        g0 = g1
        g1 = fun.g(x)
        y = g1 - g0
        
        f0 = f1
        f1 = fun.f(x)
        neval += (v + 2)

        flist.append(f1)
        xlist.append(x)

        if debug:
            print('iter:', niter, alpha)

        if method == 'fr':
            beta = np.dot(g1, g1) / np.dot(g0, g0)
        elif method == 'prp':
            beta = np.dot(g1, y) / np.dot(g0, g0)
        elif method == 'prp+':
            beta = max(np.dot(g1, y) / np.dot(g0, g0), 0)
        elif method == 'hs':
            beta = np.dot(g1, y) / np.dot(d, y)
        elif method == 'cd':
            beta = -np.dot(g1, g1) / np.dot(g0, d)
        elif method == 'dy':
            beta = np.dot(g1, g1) / np.dot(d, y)
        else:
            raise ValueError('Invalid method name')

        d = beta * d - g1

        niter += 1
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval, flist, xlist
Example #3
0
def momentum(fun,
             x0,
             search='inexact',
             eps=1e-8,
             maxiter=10000,
             beta=.9,
             a_high=.3,
             **kwargs):
    """Momentum

    Parameters
    ----------
    fun: object
        objective function, with callable method f, g
    x0: ndarray
        initial point
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for convergence criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f and g)
    flist: list
        list of objective values along the iterations
    xlist: list
        list of points along the iterations
    """
    x = x0
    f0 = -np.inf
    f1 = fun.f(x0)
    g1 = fun.g(x0)
    niter = 0
    neval = 2

    d0 = np.zeros(x.size)

    flist = []
    xlist = []

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        d = beta * d0 - g1

        if search == 'inexact':
            alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = ls.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        d = alpha * d
        if norm(d, np.inf) > a_high:
            d = d / norm(d, np.inf) * a_high

        x = x + d

        f0 = f1
        f1 = fun.f(x)
        g1 = fun.g(x)
        d0 = alpha * d

        flist.append(f1)
        xlist.append(x)

        niter += 1
        neval += (v + 2)
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval, flist, xlist
def DennisGayWelsch(fun, x0, B0=None, method='ls', search='inexact',
                    eps=1e-8, maxiter=1000, **kwargs):
    """Dennis-Gay-Welsch method: line search or trust region

    Parameters
    ----------
    fun: object
        objective function, with callable method eval (J, r, f, g)
    x0: ndarray
        initial point
    B0: ndarray, optional
        initial approximation of matrix S, half the identity by default
    method: string, optional
        'ls' for DGW with line search, 'tr' for DGW with trust region
    search: string, optional
        used only when method == 'ls'
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for stopping criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations
    """
    def dogleg(G, g1):
        d_SD = -g1
        d_GN = np.linalg.solve(G, -g1)
        alpha = norm(d_SD) ** 2 / np.dot(d_SD, G @ d_SD)

        if norm(d_GN) < delta:
            d = d_GN
        elif alpha * norm(d_SD) > delta:
            d = delta * (d_SD / norm(d_SD))
        else:
            D = norm(d_GN - alpha * d_SD) ** 2
            E = 2 * alpha * np.dot(d_SD, d_GN - alpha * d_SD)
            F = (alpha * norm(d_SD)) ** 2 - delta ** 2
            det = E ** 2 - 4 * D * F

            beta = (-E + sqrt(det)) / (2 * D)
            d = (1 - beta) * alpha * d_SD + beta * d_GN
        return d

    x = x0
    f0 = -np.inf
    J, r, f1, g1 = fun.eval(x)
    B = .5 * np.identity(x.size) if B0 is None else B0

    delta = 1 # radius of trust-region
    niter = 0
    neval = 1
    uflag = True

    while (abs(f1 - f0) >= eps * abs(f0)):
        # DGW with line search routine
        if method == 'ls':
            d = np.linalg.solve(np.dot(J.T, J) + B, -g1)
            if np.dot(g1, d) > 0:
                d = np.linalg.solve(np.dot(J.T, J), -g1)

            if search == 'inexact':
                alpha, v = linesearch.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
            elif search == 'exact':
                alpha, v = linesearch.exact(fun, x, d, **kwargs)
            else:
                raise ValueError('Invalid search type')
            s = alpha * d

        # DGW with trust region (dogleg) routine
        elif method == 'tr':
            G = np.dot(J.T, J) + B
            d = dogleg(G, g1)
            delta_f = f1 - fun.f(x + d)

            if delta_f < 0:
                G = np.dot(J.T, J)
                d = dogleg(G, g1)
                delta_f = f1 - fun.f(x + d)

            delta_q = -np.dot(d, g1) - np.dot(d, G @ d) / 2
            gamma = delta_f / delta_q

            uflag = False
            if gamma < .25:
                delta = delta / 4
            if gamma > .75 and abs(norm(d) - delta) < 1e-8 * delta:
                delta = delta * 2
            if gamma > 0:
                s = d
                uflag = True
        else:
            raise ValueError('Invalid method name')

        # Update of point x and matrix approximation B
        if uflag == True:
            x = x + s
            g0 = g1
            f0 = f1
            J0 = J
            J, r, f1, g1 = fun.eval(x)
            neval += 1

            y = g1 - g0
            y_hat = g1 - np.dot(J0.T, r)

            # Scaling for faster convergence
            tau = min(1, abs(np.dot(s, y_hat) / np.dot(s, B @ s)))
            B = tau * B

            z = y_hat - B @ s
            t = np.dot(y, s)
            B = B + np.outer(z, y) / t + np.outer(y, z) / t \
                  - np.outer(y * np.dot(z, s) / t, y / t)

        niter += 1
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval
def GaussNewton(fun, x0, method=None, search='inexact',
                eps=1e-8, maxiter=1000, **kwargs):
    """Gauss-Newton (GN) method

    Parameters
    ----------
    fun: object
        objective function, with callable method eval (J, r, f, g)
    x0: ndarray
        initial point
    method: string, optional
        ignored
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for stopping criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations
    """
    x = x0
    f0 = -np.inf
    J, r, f1, g1 = fun.eval(x)
    niter = 0
    neval = 1

    while (abs(f1 - f0) >= eps * abs(f0)):
        d = np.linalg.lstsq(J, -r, rcond=None)[0]

        if search == 'inexact':
            alpha, v = linesearch.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = linesearch.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        x = x + alpha * d

        f0 = f1
        J, r, f1, g1 = fun.eval(x)

        niter += 1
        neval += (v + 1)
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval
Example #6
0
def quasiNewton(fun,
                x0,
                H0=None,
                method='bfgs',
                search='inexact',
                eps=1e-8,
                maxiter=1000,
                **kwargs):
    """Quasi-Newton methods: SR1 / DFP / BFGS

    Parameters
    ----------
    fun: object
        objective function, with callable method f and g
    x0: ndarray
        initial point
    H0: ndarray, optional
        initial Hessian inverse, identity by default
    method: string, optional
        'sr1' for SR1, 'dfp' for DFP, 'bfgs' for BFGS
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for convergence criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f and g)
    """
    x = x0
    if H0 is not None:
        H = H0
    else:
        H = np.eye(x.size)

    f0 = -np.inf
    f1 = fun.f(x)
    g0 = np.zeros(x.size)
    g1 = fun.g(x)
    niter = 0
    neval = 2

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        d = -(H @ g1)
        if search == 'inexact':
            alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = ls.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        s = alpha * d
        x = x + s

        g0 = g1
        g1 = fun.g(x)
        y = g1 - g0

        if f0 == 0 and H0 is None:  # initial scaling
            H = (np.dot(y, s) / np.dot(y, y)) * H

        f0 = f1
        f1 = fun.f(x)
        neval += (v + 2)

        if method == 'sr1':
            z = s - H @ y
            if abs(np.dot(z, y)) >= eps * norm(z) * norm(y):
                H = H + np.outer(z, z / np.dot(z, y))

        elif method == 'dfp':
            z = H @ y
            H = H + np.outer(s, s / np.dot(s, y)) - np.outer(
                z, z / np.dot(y, z))

        elif method == 'bfgs':
            r = 1 / np.dot(s, y)
            z = r * (H @ y)
            H = H + r * (1 + np.dot(y, z)) * np.outer(s, s) \
                  - np.outer(s, z) - np.outer(z, s)
        else:
            raise ValueError('Invalid method name')

        niter += 1
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval
Example #7
0
def Newton(fun,
           x0,
           method='damped',
           search='inexact',
           eps=1e-8,
           maxiter=1000,
           **kwargs):
    """Newton's method: normal or damped

    Parameters
    ----------
    fun: object
        objective function, with callable method f, g and G
    x0: ndarray
        initial point
    method: string, optional
        'normal' for Normal Newton, 'damped' for damped Newton
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for convergence criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f, g and G)
    """
    x = x0
    f0 = -np.inf
    f1 = fun.f(x0)
    g1 = fun.g(x0)
    niter = 0
    neval = 2
    errflag = 0

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        G = fun.G(x)
        try:  # test if positive definite
            L = np.linalg.cholesky(G)
        except np.linalg.LinAlgError:
            errflag = 1

        d = np.linalg.solve(G, -g1)

        if method == 'normal':
            alpha, v = 1, 0

        elif method == 'damped':
            if search == 'inexact':
                alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
            elif search == 'exact':
                alpha, v = ls.exact(fun, x, d, **kwargs)
            else:
                raise ValueError('Invalid search type')
        else:
            raise ValueError('Invalid method name')

        x = x + alpha * d

        f0 = f1
        f1 = fun.f(x)
        g1 = fun.g(x)
        niter += 1
        neval += (v + 3)
        if niter == maxiter:
            break

    if errflag == 1:
        print('Warning: Non-positive-definite Hessian encountered.')
    return x, f1, norm(g1), niter, neval
Example #8
0
def modifiedNewton(fun,
                   x0,
                   method='mix',
                   search='inexact',
                   eps=1e-8,
                   maxiter=1000,
                   **kwargs):
    """Modified Newton's method: mixed direction or LM method

    Parameters
    ----------
    fun: object
        objective function, with callable method f, g and G
    x0: ndarray
        initial point
    method: string, optional
        'mix' for mixed direction method, 'lm' for Levenberg-Marquardt method
    search: string, optional
        'exact' for exact line search, 'inexact' for inexact line search
    eps: float, optional
        tolerance, used for convergence criterion
    maxiter: int, optional
        maximum number of iterations
    kwargs: dict, optional
        other arguments to pass down

    Returns
    -------
    x: ndarray
        optimal point
    f: float
        optimal function value
    gnorm: float
        norm of gradient at optimal point
    niter: int
        number of iterations
    neval: int
        number of function evaluations (f, g and G)
    """
    x = x0
    f0 = -np.inf
    f1 = fun.f(x0)
    g1 = fun.g(x0)
    niter = 0
    neval = 2

    while (abs(f1 - f0) > eps) or (norm(g1) > eps):
        if method == 'mix':
            try:  # test if singular
                d = np.linalg.solve(fun.G(x), -g1)
                if abs(np.dot(g1, d)) < eps * norm(g1) * norm(d):  # orthogonal
                    d = -g1
                if np.dot(g1, d) > eps * norm(g1) * norm(d):  # non-descent
                    d = -d
            except np.linalg.LinAlgError:
                d = -g1

        elif method == 'lm':
            G = fun.G(x)
            v = 0
            while True:
                try:  # test if positive definite
                    L = np.linalg.cholesky(G + v * np.eye(x.size))
                    break
                except np.linalg.LinAlgError:
                    if v == 0:
                        v = norm(G) / 2  # Frobenius norm
                    else:
                        v *= 2
            y = np.linalg.solve(L, -g1)
            d = np.linalg.solve(L.T, y)
        else:
            raise ValueError('Invalid method name')

        if search == 'inexact':
            alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs)
        elif search == 'exact':
            alpha, v = ls.exact(fun, x, d, **kwargs)
        else:
            raise ValueError('Invalid search type')

        x = x + alpha * d

        f0 = f1
        f1 = fun.f(x)
        g1 = fun.g(x)
        niter += 1
        neval += (v + 3)
        if niter == maxiter:
            break

    return x, f1, norm(g1), niter, neval