def get_machine_parameter(i):
    if data_type == np.float32:
        if i is 1:
            return data_type(1.19209290e-07)
        elif i is 2:
            return data_type(1.17549435e-38)
            return data_type(3.40282347e+38)
    elif data_type == np.float64:
        if i is 1:
            return data_type(2.2204460492503131e-16)
        elif i is 2:
            return data_type(2.2250738585072014e-308)
            return data_type(1.7976931348623157e+308)
ファイル: lmpar.py プロジェクト: WilliamRo/CLipPYME
def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam):
Solves the sub-problem in the levenberg-marquardt algorithm.
 By using the trust region framework, the L-M algorithm can be
 regarded as solving a set of minimization problems:

      min || J * p + r ||_2     s.t. || D * p || <= Delta

 By introducing a parameter lambda into this sub-problem, the
 constrained optimization problem can be converted to an
 unconstrained optimization problem:

         ||   /         J         \       /  r  \  ||
     min ||  |                    | p +  |      |  ||
      p  ||  \  sqrt(lambda) * D /       \  0  /   ||

 This routine determines the value lambda and as a by-product,
 it gives a nearly exact solution to the minimization problem

 Let a = J, d = D, b = -r, x = p, we denoted the optimization
 problem as :

         ||   /         a         \       /  b  \  ||
     min ||  |                    | x -  |      |  ||
      x  ||  \  sqrt(lambda) * d /       \  0  /   ||

    n: int
        a positive integer input variable set to the order of r
    r: ndarray
        an n by n array. on input the full upper triangle must
        contain the full upper triangle of the matrix r. on output
        the full upper triangle is unaltered, and the strict lower
        triangle contains the strict upper triangle (transposed)
        of the upper triangular matrix s such that

                t   t                2     t
               P *(J * J + lambda * D ) * P = s * s

    ldr: int
        a positive integer input variable not less than n which
        specifies the leading dimension of the array r
    ipvt: ndarray
        an integer input array of length n which defines the
        permutation matrix p such that a*p = q*r. column j of p
        is column ipvt(j) of the identity matrix
    diag: ndarray
        an input array of length n which must contain the diagonal
        elements of the matrix D
    qtb: ndarray
        an input array of length n which must contain the first
        n elements of the vector (q transpose)*b
    delta: float
        a positive input variable which specifies an upper
        bound on the euclidean norm of D*x
    lam: float
        a non-negative variable containing an initial estimate
        of the levenberg-marquardt parameter.

    lam: float
        final estimate
    x: ndarray
        an output array of length n which contains the least
        squares solution of the system J*x = r, sqrt(lam)*D*x = 0,
        for the output lam
    sdiag: ndarray
        an output array of length n which contains the
        diagonal elements of the upper triangular matrix s


    # region : Initialize parameters
    # ----------------------------------------
    global p1, p001, dwarf
    global wa1, wa2, x, sdiag

    if wa1 is None or wa1.size is not n:
        wa1 = np.zeros(n, data_type)
    if wa2 is None or wa2.size is not n:
        wa2 = np.zeros(n, data_type)
    if x is None or x.size is not n:
        x = np.zeros(n, data_type)
    if sdiag is None or sdiag.size is not n:
        sdiag = np.zeros(n, data_type)

    # ----------------------------------------
    # endregion : Initialize parameters

    # region : Compute Gauss-Newton direction
    # ------------------------------------------
    # :: stored in x. If the jacobian is rank-deficient,
    #    obtain a least squares solution :
    # ::        t
    # ::   R * P * x = -qtb
    nsing = n
    for j in range(n):
        wa1[j] = qtb[j]
        if np.abs(r[j + j * ldr]) < 1e-8:
            r[j + j * ldr] = 0.0
        if r[j + j * ldr] == 0.0 and nsing is n:
            nsing = j
        if nsing < n:
            wa1[j] = 0.0

    # :: solving R * z = qtb using back substitution
    if nsing >= 1:
        for k in range(1, nsing + 1):
            # ::         wa1[j] - z[j+1]*r[j][j+1] -...- z[n]*r[j][n]
            # :: z[j] = ----------------------------------------------
            # ::                           r[j][j]
            j = nsing - k
            wa1[j] /= r[j + j * ldr]
            temp = wa1[j]
            if j >= 1:
                for i in range(j):
                    wa1[i] -= r[i + j * ldr] * temp
                    # if abs(wa1[i]) > 1e5:
                    #     aaa = 1

    # ::      t
    # :: x = P * z
    for j in range(n):
        l = ipvt[j] - 1
        x[l] = wa1[j]

    if utility.lam_trace:
        print ">> ||p^{GN}|| = %.10f" % enorm(x)
    # ------------------------------------------
    # endregion : Compute Gauss-Newton direction

    # region : Preparation
    # ------------------------------------------------
    # > initialize the iteration counter
    iter = 0
    # > evaluate the function at the origin, and test
    #   for acceptance of the gauss-newton direction
    for j in range(n):
        wa2[j] = diag[j] * x[j]
    dxnorm = enorm(wa2)
    # :: ||x||_2 = Delta + epsilon is acceptable
    fp = dxnorm - delta
    if fp <= p1 * delta:
        lam = data_type(0.0)
        return [lam, x, sdiag]
    # ------------------------------------------------
    # endregion : Preparation

    # region : Set bound
    # TODO: make comments
    # :: f(lam) = || D * x ||_2 - delta
    #    A root-finding Newton's method will be performed
    # :: If the jacobian is not rank deficient, the newton step provides
    #    a lower bound, lam_l, for the zero of the function.
    #    Otherwise set this bound to zero
    lam_l = data_type(0.0)
    if nsing >= n:
        for j in range(n):
            l = ipvt[j] - 1
            # :: wa2 stores D * x in which x is gauss-newton direction
            wa1[j] = diag[l] * (wa2[l] / dxnorm)
        # :: wa1 stores ...
        for j in range(n):
            sum = data_type(0.0)
            if j >= 1:
                for i in range(j):
                    sum += r[i + j * ldr] * wa1[i]
            wa1[j] = (wa1[j] - sum) / r[j + j * ldr]

        temp = enorm(wa1)
        lam_l = fp / delta / temp / temp

    # > calculate an upper bound, lam_u, for the zero of the function
    for j in range(n):
        sum = 0.0
        for i in range(j + 1):
            sum += r[i + j * ldr] * qtb[i]
        l = ipvt[j] - 1
        wa1[j] = sum / diag[l]
    gnorm = enorm(wa1)
    lam_u = gnorm / delta
    if lam_u == 0.0:
        lam_u = dwarf / min(delta, p1)

    # > if the input lam lies outside of the interval (lam_l, lam_u)
    #   set lam to the closer endpoint
    lam = max(lam, lam_l)
    lam = min(lam, lam_u)
    if lam == 0.0:
        lam = gnorm / dxnorm

    # endregion : Set bound

    # region : Iteration

    while True:
        iter += 1

        if utility.lam_trace:
            print '>> Step %d, lam ∈(%.8f, %.8f):' % (iter,
                                                     lam_l, lam_u)

        # > evaluate the function at the current value of lam
        if lam == 0.0:
            d1 = dwarf
            d2 = p001 * lam_u
            lam = max(d1, d2)
        temp = np.sqrt(lam)
        for j in range(n):
            wa1[j] = temp * diag[j]

        if utility.lam_trace:
            print '   lam = %.8f' % lam

        qr_solve(n, r, ldr, ipvt, wa1, qtb, x, sdiag)
        for j in range(n):
            wa2[j] = diag[j] * x[j]
        dxnorm = enorm(wa2)
        temp = fp
        fp = dxnorm - delta

        if utility.lam_trace:
            print '   Dx - delta = %.8f' % fp

        # > if the function is small enough, accept the current value
        #   of lam. also test for the exceptional cases where lam_l
        #   is zero or the number of iterations has reached 10
        if np.abs(fp) <= p1 * delta \
                or (lam_l == 0.0 and fp <= temp and temp < 0.0) \
                or iter is 10:
            return [lam, x, sdiag]

        # > compute the newton correction
        # ::
        # ::            / ||d*x|| \2  ||d*x|| - delta
        # ::   lam_c = | -------- |  -----------------
        # ::            \  ||y|| /         delta
        # ::     t
        # ::    r * y = x, fp = ||d*x|| - delta
        for j in range(n):
            l = ipvt[j] - 1
            wa1[j] = diag[l] * (wa2[l] / dxnorm)
        for j in range(n):
            wa1[j] /= sdiag[j]
            temp = wa1[j]
            if n > j + 1:
                for i in range(j + 1, n):
                    wa1[i] -= r[i + j * ldr] * temp

        temp = enorm(wa1)
        lam_c = fp / delta / temp / temp

        # > depending on the sign of the function, update lam_l or lam_u
        if fp > 0.0:
            lam_l = max(lam_l, lam)
        if fp < 0.0:
            lam_u = min(lam_u, lam)

        # > compute an improved estimate for lam
        d1 = lam_l
        d2 = lam + lam_c
        lam = max(d1, d2)

    # endregion : Iteration

    return [lam, x, sdiag]
ファイル: lmpar.py プロジェクト: WilliamRo/CLipPYME
#   Created: June 20, 2016
#   Author: William Ro

import numpy as np
from enorm import euclid_norm as enorm
from dpmpar import get_machine_parameter as dpmpar
from qrsolv import qr_solve

from utility import data_type
import utility

# region: Module Parameters

p1 = data_type(0.1)
p001 = data_type(0.001)

dwarf = dpmpar(2)

wa1 = None
wa2 = None
x = None
sdiag = None

# endregion: Module Parameters

def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam):
Solves the sub-problem in the levenberg-marquardt algorithm.
ファイル: enorm.py プロジェクト: WilliamRo/CLipPYME
def euclid_norm(x):
    Given an n-vector x, this function calculates the
    euclidean norm of x

    The euclidean norm is computed by accumulating the sum of
    squares in three different sums. The sums of squares for the
    small and large components are scaled so that no overflows
    occur. Non-destructive underflows are permitted. Underflows
    and overflows do not occur in the computation of the unscaled
    sum of squares for the intermediate components.
    The definitions of small, intermediate and large components
    depend on two constants, rdwarf and rgiant. The main
    restrictions on these constants are that rdwarf**2 not
    underflow and rgiant**2 not overflow. The constants
    given here are suitable for every known computer"""

    # > initialize parameters
    global rdwarf, rgiant
    n = x.size
    s1 = data_type(0.0)
    s2 = data_type(0.0)
    s3 = data_type(0.0)
    x1max = data_type(0.0)
    x3max = data_type(0.0)
    agiant = rgiant / n

    # > calculate sums
    for i in range(n):
        xabs = np.abs(x[i])
        if xabs >= agiant:
            # :: sum for large components
            if xabs > x1max:
                # > compute 2nd power
                d1 = x1max / xabs
                s1 = 1.0 + s1 * (d1 * d1)
                x1max = xabs
                # > compute 2nd power
                d1 = xabs / x1max
                s1 += d1 * d1
        elif xabs <= rdwarf:
            # :: sum for small components
            if xabs > x3max:
                # > compute 2nd power
                d1 = x3max / xabs
                s3 = 1.0 + s3 * (d1 * d1)
                x3max = xabs
            elif xabs != 0.0:
                # > compute 2nd power
                d1 = xabs / x3max
                s3 += d1 * d1
            # :: sum for intermediate components
            s2 += xabs * xabs

    # > calculate norm
    if s1 != 0:
        ret_val = x1max * np.sqrt(s1 + (s2 / x1max) / x1max)
    elif s2 != 0:
        if s2 >= x3max:
            ret_val = np.sqrt(
                s2 * (1.0 + (x3max / s2) * (x3max * s3)))
            ret_val = np.sqrt(
                x3max * ((s2 / x3max) + (x3max * s3)))
        ret_val = x3max * np.sqrt(s3)

    return ret_val
ファイル: qrsolv.py プロジェクト: WilliamRo/CLipPYME
def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag):
    Solves the linear least square problem:

              ||   /  a  \      / b \  || 2
          min ||  |      | x - |    |  ||
           x  ||  \  d  /      \ 0 /   || 2

     in which a is an m by n matrix, d is an n by n diagonal matrix,
     b is an m-vector. The necessary information must be provided:
      (1) the q-r factorization with column pivoting of a:
                a * p = q * r
      (2) q * b
     With these information, we have
             t                          t
          / q      \  /  a  \    / r * p \
         |         | |      | = |    0   |
         \      i /  \  d  /    \    d  /

      This routine uses a set of givens transformation to convert
      the right-most matrix to an upper triangular matrix and
      then use back substitution to obtain the solution

        n: int
            a positive integer input variable set to the order of r
        r: ndarray
            is an n by n array. on input the full upper triangle
            must contain the full upper triangle of the matrix r.
            on output the full upper triangle is unaltered, and the
            strict lower triangle contains the strict upper triangle
            (transposed) of the upper triangular matrix s
        ldr: int
            a positive integer input variable not less than n
            which specifies the leading dimension of the array r
        ipvt: ndarray
            an integer input array of length n which defines the
            permutation matrix p such that a*p = q*r. column j of p
            is column ipvt(j) of the identity matrix
        diag: ndarray
            an input array of length n which must contain the
            diagonal elements of the matrix d
        qtb: ndarray
            an input array of length n which must contain the first
            n elements of the vector (q transpose)*b
        x: ndarray
            an output array of length n which contains the least
            squares solution of the system a*x = b, d*x = 0
        sdiag: ndarray
            an output array of length n which contains the
            diagonal elements of the upper triangular matrix s

                  t    t                    t
                 p * (a * a + d * d) * p = s * s

            In effect, s is the Cholesky factorization of
            the left matrix

    # region : Initialize parameters

    global wa, p5, p25
    if wa is None or wa.size is not n:
        wa = np.zeros(n, data_type)

    # endregion : Initialize parameters

    # region : Preparation
    # ----------------------------
    # > copy r and qtb to preserve input and initialize s
    #   in particular, save the diagonal elements of r in x
    for j in range(n):
        for i in range(j, n):
            r[i + j * ldr] = r[j + i * ldr]
        x[j] = r[j + j * ldr]
        wa[j] = qtb[j]

    aa = 1
    # ----------------------------
    # endregion : Preparation

    # region : Givens rotation
    # ---------------------------
    # > eliminate the diagonal matrix d using a givens rotation
    # ::                            t              _    t
    # ::         n by n      / r * p \           / r * p \
    # ::   (m - n) by n     |    0    | = q_g * |    0    |
    # ::         n by n      \   d   /           \   0   /

    for j in range(n):
        # > prepare the row of d to be eliminated, locating the
        #   diagonal element using p from the qr factorization.
        l = ipvt[j] - 1
        if diag[l] != 0.0:
            # :: sdiag[l : n] stores the row j in r temporarily
            for k in range(j, n):
                sdiag[k] = 0
            sdiag[j] = diag[l]

            # :: the transformations to eliminate the row of d
            #    modify only a single element of qtb beyond the
            #    first n, which is initially zero.
            qtbpj = 0.0
            for k in range(j, n):
                # > determine a givens rotation which eliminates the
                #   appropriate element in the current row of d
                if sdiag[k] != 0.0:
                    if np.abs(r[k + k * ldr]) < np.abs(sdiag[k]):
                        cotan = r[k + k * ldr] / sdiag[k]
                        sin = p5 / np.sqrt(p25 + p25 * (cotan * cotan))
                        cos = sin * cotan
                        tan = sdiag[k] / r[k + k * ldr]
                        cos = p5 / np.sqrt(p25 + p25 * (tan * tan))
                        sin = cos * tan
                    # > compute the modified diagonal element of r and
                    #   the modified element of (qtb, 0)^t
                    temp = cos * wa[k] + sin * qtbpj
                    qtbpj = -sin * wa[k] + cos * qtbpj
                    wa[k] = temp
                    # > transform the row of s
                    r[k + k * ldr] = cos * r[k + k * ldr] + \
                                     sin * sdiag[k]
                    if n > k + 1:
                        for i in range(k + 1, n):
                            temp = cos * r[i + k * ldr] + sin * sdiag[i]
                            sdiag[i] = -sin * r[i + k * ldr] + \
                                       cos * sdiag[i]
                            r[i + k * ldr] = temp

        # > store the diagonal element of s and restore the
        #   corresponding diagonal element of r
        sdiag[j] = r[j + j * ldr]
        r[j + j * ldr] = x[j]

    # > solve the triangular system for z. if the system is singular,
    #   then obtain a least squares solution
    #                      t
    # :: r * z = qtb, z = p * x and qtb is stored in wa
    nsing = n
    for j in range(n):
        if sdiag[j] == 0.0 and nsing is n:
            nsing = j
        if nsing < n:
            wa[j] = 0.0
    if nsing >= 1:
        # > use back substitution
        for k in range(1, nsing + 1):
            j = nsing - k
            sum = data_type(0)
            if nsing > j + 1:
                for i in range(j + 1, nsing):
                    sum += r[i + j * ldr] * wa[i]
            wa[j] = (wa[j] - sum) / sdiag[j]

    # > permute the components of z back to components of x

    # ---------------------------
    # endregion : Givens rotation
    for j in range(n):
        l = ipvt[j] - 1
        x[l] = wa[j]

ファイル: qrsolv.py プロジェクト: WilliamRo/CLipPYME
#   Created: June 21, 2016
#   Author: William Ro

import numpy as np
from utility import data_type

# region : Module parameters

p5 = data_type(0.5)
p25 = data_type(0.25)

wa = None

# endregion : Module parameters

def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag):
    Solves the linear least square problem:

              ||   /  a  \      / b \  || 2
          min ||  |      | x - |    |  ||
           x  ||  \  d  /      \ 0 /   || 2

     in which a is an m by n matrix, d is an n by n diagonal matrix,
     b is an m-vector. The necessary information must be provided:
      (1) the q-r factorization with column pivoting of a:
def euclid_norm(x):
    Given an n-vector x, this function calculates the
    euclidean norm of x

    The euclidean norm is computed by accumulating the sum of
    squares in three different sums. The sums of squares for the
    small and large components are scaled so that no overflows
    occur. Non-destructive underflows are permitted. Underflows
    and overflows do not occur in the computation of the unscaled
    sum of squares for the intermediate components.
    The definitions of small, intermediate and large components
    depend on two constants, rdwarf and rgiant. The main
    restrictions on these constants are that rdwarf**2 not
    underflow and rgiant**2 not overflow. The constants
    given here are suitable for every known computer"""

    # > initialize parameters
    global rdwarf, rgiant
    n = x.size
    s1 = data_type(0.0)
    s2 = data_type(0.0)
    s3 = data_type(0.0)
    x1max = data_type(0.0)
    x3max = data_type(0.0)
    agiant = rgiant / n

    # > calculate sums
    for i in range(n):
        xabs = np.abs(x[i])
        if xabs >= agiant:
            # :: sum for large components
            if xabs > x1max:
                # > compute 2nd power
                d1 = x1max / xabs
                s1 = 1.0 + s1 * (d1 * d1)
                x1max = xabs
                # > compute 2nd power
                d1 = xabs / x1max
                s1 += d1 * d1
        elif xabs <= rdwarf:
            # :: sum for small components
            if xabs > x3max:
                # > compute 2nd power
                d1 = x3max / xabs
                s3 = 1.0 + s3 * (d1 * d1)
                x3max = xabs
            elif xabs != 0.0:
                # > compute 2nd power
                d1 = xabs / x3max
                s3 += d1 * d1
            # :: sum for intermediate components
            s2 += xabs * xabs

    # > calculate norm
    if s1 != 0:
        ret_val = x1max * np.sqrt(s1 + (s2 / x1max) / x1max)
    elif s2 != 0:
        if s2 >= x3max:
            ret_val = np.sqrt(s2 * (1.0 + (x3max / s2) * (x3max * s3)))
            ret_val = np.sqrt(x3max * ((s2 / x3max) + (x3max * s3)))
        ret_val = x3max * np.sqrt(s3)

    return ret_val
def qr(m, n, a, lda, pivot):
    Uses householder transformations with column pivoting (optional)
    to compute a QR factorization of the m by n matrix a

        m: int
            a positive integer input variable set to the number of rows
            of a
        n: int
            a positive integer input variable set to the number of
            columns of a
        a: ndarray
            an m by n array. on input a contains the matrix for which
            the qr factorization is to be computed. on output the
            strict upper trapezoidal part of a contains the strict
            upper trapezoidal part of r, and the lower trapezoidal
            part of a contains a factored form of q (the non-trivial
            elements of the u vectors described above)
        lda: int
            a positive integer input variable not less than m which
            specifies the leading dimension of the array a
        pivot: bool
            a logical input variable. if pivot is set true, then
            column pivoting is enforced. if pivot is set false, then
            no column pivoting is done

        ipvt: ndarray
            an integer output array. ipvt defines the permutation
            matrix p such that a*p = q*r. column j of p is column
            ipvt(j) of the identity matrix. if pivot is false, ipvt
            will be set to None
        rdiag: ndarray
            an output array of length n which contains the diagonal
            elements of r
        acnorm: ndarray
            an output array of length n which contains the norms of
            the corresponding columns of the input matrix a. if this
            information is not needed, then acnorm can coincide with


    # region : Initialize parameters
    # ----------------------------------------
    global p05, eps_machine, ipvt, rdiag, acnorm, wa

    if ipvt is None or ipvt.size is not n:
        ipvt = np.zeros(n, np.int32)
    if rdiag is None or rdiag.size is not n:
        rdiag = np.zeros(n, data_type)
    if acnorm is None or acnorm.size is not n:
        acnorm = np.zeros(n, data_type)
    if wa is None or wa.size is not n:
        wa = np.zeros(n, data_type)

    # ----------------------------------------
    # endregion : Initialize parameters

    # > compute the initial column norms and initialize several arrays
    for j in range(n):
        acnorm[j] = enorm(a[lda * j:lda * (j + 1)])
        rdiag[j] = acnorm[j]
        wa[j] = rdiag[j]
        if pivot:
            ipvt[j] = j + 1

    # > reduce a to r with householder transformations
    min_mn = min(m, n)
    for j in range(min_mn):
        # > if pivot
        # --------------------------------------------------------
        if pivot:
            # >> bring the column of largest norm
            #    into the pivot position
            k_max = j
            for k in range(j, n):
                if rdiag[k] > rdiag[k_max]:
                    k_max = k
            # >> switch
            if k_max is not j:
                for i in range(m):  # traverse rows
                    # >>> switch
                    temp = a[i + j * lda]
                    a[i + j * lda] = a[i + k_max * lda]
                    a[i + k_max * lda] = temp
                # >>> overwrite, acnorm[k_max] still hold
                rdiag[k_max] = rdiag[j]
                wa[k_max] = wa[j]
                # >>> switch
                k = ipvt[j]
                ipvt[j] = ipvt[k_max]
                ipvt[k_max] = k

        # > compute the householder transformation to reduce the
        #   j-th column of a to a multiple of the j-th unit vector
        # ------------------------
        # >> normalize
        # :: v = x - ||x||_2 * e_1
        # :: ajnorm = ||x||_2
        ajnorm = enorm(a[lda * j + j:lda * (j + 1)])
        if ajnorm != 0.0:
            if a[j + j * lda] < 0.0:
                # :: prepare to keep a[i + j * lda] positive
                ajnorm = -ajnorm
            # :: x = sgn(x_1) * x / ||x||_2
            for i in range(j, m):
                a[i + j * lda] /= ajnorm
            # :: a[j + j * lda] temporarily stores v[0]
            # :: one number being subtracted from another close number
            #    has been avoided
            a[j + j * lda] += 1.0

            # > apply the transformation to the remaining columns and
            #   update the norms
            #                                        t
            # :: A[i][k] -= beta * v[i] * w[k], w = A * v
            #            t
            # :: beta = 1 / v[0], can be proved easily
            # :: w[k] = A[k-th column] * v
            jp1 = j + 1  # j plus 1
            if n > jp1:
                for k in range(jp1, n):  # traverse columns
                    sum = data_type(0.0)  # this is w[j]
                    for i in range(j, m):  # traverse rows
                        #      v[i]             A[i][k-th column]
                        sum += a[i + j * lda] * a[i + k * lda]
                    # :: beta * w[k]
                    temp = sum / a[j + j * lda]
                    for i in range(j, m):
                        # :: a[i][k] -= beta * w[k] * v[i]
                        a[i + k * lda] -= temp * a[i + j * lda]

                    # :: rdiag stores information used to pivot
                    # >> update rdiag to ensure that it can present
                    #    alpha = +- ||x||_2
                    if pivot and rdiag[k] != 0:
                        temp = a[j + k * lda] / rdiag[k]
                        # >>> compute max
                        d1 = 1.0 - temp * temp
                        rdiag[k] *= np.sqrt(max(0.0, d1))
                        # >>> compute 2nd power
                        d1 = rdiag[k] / wa[k]
                        # :: if rdiag is to small
                        if p05 * (d1 * d1) <= eps_machine:
                            rdiag[k] = enorm(a[jp1 + k * lda:(k + 1) * lda])
                            wa[k] = rdiag[k]
        # :: sgn(ajnorm) = -sgn(x_0)
        # :: H * x = alpha * e_1
        rdiag[j] = -ajnorm

    # > return
    if pivot:
        return [ipvt, rdiag, acnorm]
        return [rdiag, acnorm]
from enorm import euclid_norm as enorm

import utility
from utility import data_type
import qrfac
from dpmpar import get_machine_parameter as dpmpar
from fdjac2 import jac
from qrfac import qr
from lmpar import lm_lambda

import clip.cl as cl

# region : Module parameters

p1 = data_type(0.1)
p5 = data_type(0.5)
p25 = data_type(0.25)
p75 = data_type(0.75)
p0001 = data_type(1e-4)

eps_machine = dpmpar(1)

wa4 = None
qtf = None

# endregion : Module parameters

def lmdif(func, x, args=(), full_output=0,
          ftol=data_type(1.49012e-8), xtol=data_type(1.49012e-8),
def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag):
    Solves the linear least square problem:

              ||   /  a  \      / b \  || 2
          min ||  |      | x - |    |  ||
           x  ||  \  d  /      \ 0 /   || 2

     in which a is an m by n matrix, d is an n by n diagonal matrix,
     b is an m-vector. The necessary information must be provided:
      (1) the q-r factorization with column pivoting of a:
                a * p = q * r
      (2) q * b
     With these information, we have
             t                          t
          / q      \  /  a  \    / r * p \
         |         | |      | = |    0   |
         \      i /  \  d  /    \    d  /

      This routine uses a set of givens transformation to convert
      the right-most matrix to an upper triangular matrix and
      then use back substitution to obtain the solution

        n: int
            a positive integer input variable set to the order of r
        r: ndarray
            is an n by n array. on input the full upper triangle
            must contain the full upper triangle of the matrix r.
            on output the full upper triangle is unaltered, and the
            strict lower triangle contains the strict upper triangle
            (transposed) of the upper triangular matrix s
        ldr: int
            a positive integer input variable not less than n
            which specifies the leading dimension of the array r
        ipvt: ndarray
            an integer input array of length n which defines the
            permutation matrix p such that a*p = q*r. column j of p
            is column ipvt(j) of the identity matrix
        diag: ndarray
            an input array of length n which must contain the
            diagonal elements of the matrix d
        qtb: ndarray
            an input array of length n which must contain the first
            n elements of the vector (q transpose)*b
        x: ndarray
            an output array of length n which contains the least
            squares solution of the system a*x = b, d*x = 0
        sdiag: ndarray
            an output array of length n which contains the
            diagonal elements of the upper triangular matrix s

                  t    t                    t
                 p * (a * a + d * d) * p = s * s

            In effect, s is the Cholesky factorization of
            the left matrix

    # region : Initialize parameters

    global wa, p5, p25
    if wa is None or wa.size is not n:
        wa = np.zeros(n, data_type)

    # endregion : Initialize parameters

    # region : Preparation
    # ----------------------------
    # > copy r and qtb to preserve input and initialize s
    #   in particular, save the diagonal elements of r in x
    for j in range(n):
        for i in range(j, n):
            r[i + j * ldr] = r[j + i * ldr]
        x[j] = r[j + j * ldr]
        wa[j] = qtb[j]

    aa = 1
    # ----------------------------
    # endregion : Preparation

    # region : Givens rotation
    # ---------------------------
    # > eliminate the diagonal matrix d using a givens rotation
    # ::                            t              _    t
    # ::         n by n      / r * p \           / r * p \
    # ::   (m - n) by n     |    0    | = q_g * |    0    |
    # ::         n by n      \   d   /           \   0   /

    for j in range(n):
        # > prepare the row of d to be eliminated, locating the
        #   diagonal element using p from the qr factorization.
        l = ipvt[j] - 1
        if diag[l] != 0.0:
            # :: sdiag[l : n] stores the row j in r temporarily
            for k in range(j, n):
                sdiag[k] = 0
            sdiag[j] = diag[l]

            # :: the transformations to eliminate the row of d
            #    modify only a single element of qtb beyond the
            #    first n, which is initially zero.
            qtbpj = 0.0
            for k in range(j, n):
                # > determine a givens rotation which eliminates the
                #   appropriate element in the current row of d
                if sdiag[k] != 0.0:
                    if np.abs(r[k + k * ldr]) < np.abs(sdiag[k]):
                        cotan = r[k + k * ldr] / sdiag[k]
                        sin = p5 / np.sqrt(p25 + p25 * (cotan * cotan))
                        cos = sin * cotan
                        tan = sdiag[k] / r[k + k * ldr]
                        cos = p5 / np.sqrt(p25 + p25 * (tan * tan))
                        sin = cos * tan
                    # > compute the modified diagonal element of r and
                    #   the modified element of (qtb, 0)^t
                    temp = cos * wa[k] + sin * qtbpj
                    qtbpj = -sin * wa[k] + cos * qtbpj
                    wa[k] = temp
                    # > transform the row of s
                    r[k + k * ldr] = cos * r[k + k * ldr] + \
                                     sin * sdiag[k]
                    if n > k + 1:
                        for i in range(k + 1, n):
                            temp = cos * r[i + k * ldr] + sin * sdiag[i]
                            sdiag[i] = -sin * r[i + k * ldr] + \
                                       cos * sdiag[i]
                            r[i + k * ldr] = temp

        # > store the diagonal element of s and restore the
        #   corresponding diagonal element of r
        sdiag[j] = r[j + j * ldr]
        r[j + j * ldr] = x[j]

    # > solve the triangular system for z. if the system is singular,
    #   then obtain a least squares solution
    #                      t
    # :: r * z = qtb, z = p * x and qtb is stored in wa
    nsing = n
    for j in range(n):
        if sdiag[j] == 0.0 and nsing is n:
            nsing = j
        if nsing < n:
            wa[j] = 0.0
    if nsing >= 1:
        # > use back substitution
        for k in range(1, nsing + 1):
            j = nsing - k
            sum = data_type(0)
            if nsing > j + 1:
                for i in range(j + 1, nsing):
                    sum += r[i + j * ldr] * wa[i]
            wa[j] = (wa[j] - sum) / sdiag[j]

    # > permute the components of z back to components of x

    # ---------------------------
    # endregion : Givens rotation
    for j in range(n):
        l = ipvt[j] - 1
        x[l] = wa[j]

#   Created: June 21, 2016
#   Author: William Ro

import numpy as np
from utility import data_type

# region : Module parameters

p5 = data_type(0.5)
p25 = data_type(0.25)

wa = None

# endregion : Module parameters

def qr_solve(n, r, ldr, ipvt, diag, qtb, x, sdiag):
    Solves the linear least square problem:

              ||   /  a  \      / b \  || 2
          min ||  |      | x - |    |  ||
           x  ||  \  d  /      \ 0 /   || 2

     in which a is an m by n matrix, d is an n by n diagonal matrix,
     b is an m-vector. The necessary information must be provided:
      (1) the q-r factorization with column pivoting of a:
def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam):
Solves the sub-problem in the levenberg-marquardt algorithm.
 By using the trust region framework, the L-M algorithm can be
 regarded as solving a set of minimization problems:

      min || J * p + r ||_2     s.t. || D * p || <= Delta

 By introducing a parameter lambda into this sub-problem, the
 constrained optimization problem can be converted to an
 unconstrained optimization problem:

         ||   /         J         \       /  r  \  ||
     min ||  |                    | p +  |      |  ||
      p  ||  \  sqrt(lambda) * D /       \  0  /   ||

 This routine determines the value lambda and as a by-product,
 it gives a nearly exact solution to the minimization problem

 Let a = J, d = D, b = -r, x = p, we denoted the optimization
 problem as :

         ||   /         a         \       /  b  \  ||
     min ||  |                    | x -  |      |  ||
      x  ||  \  sqrt(lambda) * d /       \  0  /   ||

    n: int
        a positive integer input variable set to the order of r
    r: ndarray
        an n by n array. on input the full upper triangle must
        contain the full upper triangle of the matrix r. on output
        the full upper triangle is unaltered, and the strict lower
        triangle contains the strict upper triangle (transposed)
        of the upper triangular matrix s such that

                t   t                2     t
               P *(J * J + lambda * D ) * P = s * s

    ldr: int
        a positive integer input variable not less than n which
        specifies the leading dimension of the array r
    ipvt: ndarray
        an integer input array of length n which defines the
        permutation matrix p such that a*p = q*r. column j of p
        is column ipvt(j) of the identity matrix
    diag: ndarray
        an input array of length n which must contain the diagonal
        elements of the matrix D
    qtb: ndarray
        an input array of length n which must contain the first
        n elements of the vector (q transpose)*b
    delta: float
        a positive input variable which specifies an upper
        bound on the euclidean norm of D*x
    lam: float
        a non-negative variable containing an initial estimate
        of the levenberg-marquardt parameter.

    lam: float
        final estimate
    x: ndarray
        an output array of length n which contains the least
        squares solution of the system J*x = r, sqrt(lam)*D*x = 0,
        for the output lam
    sdiag: ndarray
        an output array of length n which contains the
        diagonal elements of the upper triangular matrix s


    # region : Initialize parameters
    # ----------------------------------------
    global p1, p001, dwarf
    global wa1, wa2, x, sdiag

    if wa1 is None or wa1.size is not n:
        wa1 = np.zeros(n, data_type)
    if wa2 is None or wa2.size is not n:
        wa2 = np.zeros(n, data_type)
    if x is None or x.size is not n:
        x = np.zeros(n, data_type)
    if sdiag is None or sdiag.size is not n:
        sdiag = np.zeros(n, data_type)

    # ----------------------------------------
    # endregion : Initialize parameters

    # region : Compute Gauss-Newton direction
    # ------------------------------------------
    # :: stored in x. If the jacobian is rank-deficient,
    #    obtain a least squares solution :
    # ::        t
    # ::   R * P * x = -qtb
    nsing = n
    for j in range(n):
        wa1[j] = qtb[j]
        if np.abs(r[j + j * ldr]) < 1e-8:
            r[j + j * ldr] = 0.0
        if r[j + j * ldr] == 0.0 and nsing is n:
            nsing = j
        if nsing < n:
            wa1[j] = 0.0

    # :: solving R * z = qtb using back substitution
    if nsing >= 1:
        for k in range(1, nsing + 1):
            # ::         wa1[j] - z[j+1]*r[j][j+1] -...- z[n]*r[j][n]
            # :: z[j] = ----------------------------------------------
            # ::                           r[j][j]
            j = nsing - k
            wa1[j] /= r[j + j * ldr]
            temp = wa1[j]
            if j >= 1:
                for i in range(j):
                    wa1[i] -= r[i + j * ldr] * temp
                    # if abs(wa1[i]) > 1e5:
                    #     aaa = 1

    # ::      t
    # :: x = P * z
    for j in range(n):
        l = ipvt[j] - 1
        x[l] = wa1[j]

    if utility.lam_trace:
        print ">> ||p^{GN}|| = %.10f" % enorm(x)
    # ------------------------------------------
    # endregion : Compute Gauss-Newton direction

    # region : Preparation
    # ------------------------------------------------
    # > initialize the iteration counter
    iter = 0
    # > evaluate the function at the origin, and test
    #   for acceptance of the gauss-newton direction
    for j in range(n):
        wa2[j] = diag[j] * x[j]
    dxnorm = enorm(wa2)
    # :: ||x||_2 = Delta + epsilon is acceptable
    fp = dxnorm - delta
    if fp <= p1 * delta:
        lam = data_type(0.0)
        return [lam, x, sdiag]
    # ------------------------------------------------
    # endregion : Preparation

    # region : Set bound
    # TODO: make comments
    # :: f(lam) = || D * x ||_2 - delta
    #    A root-finding Newton's method will be performed
    # :: If the jacobian is not rank deficient, the newton step provides
    #    a lower bound, lam_l, for the zero of the function.
    #    Otherwise set this bound to zero
    lam_l = data_type(0.0)
    if nsing >= n:
        for j in range(n):
            l = ipvt[j] - 1
            # :: wa2 stores D * x in which x is gauss-newton direction
            wa1[j] = diag[l] * (wa2[l] / dxnorm)
        # :: wa1 stores ...
        for j in range(n):
            sum = data_type(0.0)
            if j >= 1:
                for i in range(j):
                    sum += r[i + j * ldr] * wa1[i]
            wa1[j] = (wa1[j] - sum) / r[j + j * ldr]

        temp = enorm(wa1)
        lam_l = fp / delta / temp / temp

    # > calculate an upper bound, lam_u, for the zero of the function
    for j in range(n):
        sum = 0.0
        for i in range(j + 1):
            sum += r[i + j * ldr] * qtb[i]
        l = ipvt[j] - 1
        wa1[j] = sum / diag[l]
    gnorm = enorm(wa1)
    lam_u = gnorm / delta
    if lam_u == 0.0:
        lam_u = dwarf / min(delta, p1)

    # > if the input lam lies outside of the interval (lam_l, lam_u)
    #   set lam to the closer endpoint
    lam = max(lam, lam_l)
    lam = min(lam, lam_u)
    if lam == 0.0:
        lam = gnorm / dxnorm

    # endregion : Set bound

    # region : Iteration

    while True:
        iter += 1

        if utility.lam_trace:
            print '>> Step %d, lam ∈(%.8f, %.8f):' % (iter, lam_l, lam_u)

        # > evaluate the function at the current value of lam
        if lam == 0.0:
            d1 = dwarf
            d2 = p001 * lam_u
            lam = max(d1, d2)
        temp = np.sqrt(lam)
        for j in range(n):
            wa1[j] = temp * diag[j]

        if utility.lam_trace:
            print '   lam = %.8f' % lam

        qr_solve(n, r, ldr, ipvt, wa1, qtb, x, sdiag)
        for j in range(n):
            wa2[j] = diag[j] * x[j]
        dxnorm = enorm(wa2)
        temp = fp
        fp = dxnorm - delta

        if utility.lam_trace:
            print '   Dx - delta = %.8f' % fp

        # > if the function is small enough, accept the current value
        #   of lam. also test for the exceptional cases where lam_l
        #   is zero or the number of iterations has reached 10
        if np.abs(fp) <= p1 * delta \
                or (lam_l == 0.0 and fp <= temp and temp < 0.0) \
                or iter is 10:
            return [lam, x, sdiag]

        # > compute the newton correction
        # ::
        # ::            / ||d*x|| \2  ||d*x|| - delta
        # ::   lam_c = | -------- |  -----------------
        # ::            \  ||y|| /         delta
        # ::     t
        # ::    r * y = x, fp = ||d*x|| - delta
        for j in range(n):
            l = ipvt[j] - 1
            wa1[j] = diag[l] * (wa2[l] / dxnorm)
        for j in range(n):
            wa1[j] /= sdiag[j]
            temp = wa1[j]
            if n > j + 1:
                for i in range(j + 1, n):
                    wa1[i] -= r[i + j * ldr] * temp

        temp = enorm(wa1)
        lam_c = fp / delta / temp / temp

        # > depending on the sign of the function, update lam_l or lam_u
        if fp > 0.0:
            lam_l = max(lam_l, lam)
        if fp < 0.0:
            lam_u = min(lam_u, lam)

        # > compute an improved estimate for lam
        d1 = lam_l
        d2 = lam + lam_c
        lam = max(d1, d2)

    # endregion : Iteration

    return [lam, x, sdiag]
#   Created: June 20, 2016
#   Author: William Ro

import numpy as np
from enorm import euclid_norm as enorm
from dpmpar import get_machine_parameter as dpmpar
from qrsolv import qr_solve

from utility import data_type
import utility

# region: Module Parameters

p1 = data_type(0.1)
p001 = data_type(0.001)

dwarf = dpmpar(2)

wa1 = None
wa2 = None
x = None
sdiag = None

# endregion: Module Parameters

def lm_lambda(n, r, ldr, ipvt, diag, qtb, delta, lam):
Solves the sub-problem in the levenberg-marquardt algorithm.
ファイル: qrfac.py プロジェクト: WilliamRo/CLipPYME
#   Created: June 19, 2016
#   Author: William Ro

import numpy as np
from enorm import euclid_norm as enorm
from dpmpar import get_machine_parameter as dpmpar
from utility import data_type

# region : Module parameters

p05 = data_type(0.05)
eps_machine = dpmpar(1)

ipvt = None
rdiag = None
acnorm = None
wa = None

# endregion : Module parameters

def qr(m, n, a, lda, pivot):
    Uses householder transformations with column pivoting (optional)
    to compute a QR factorization of the m by n matrix a

def lmdif(func, x, args=(), full_output=0,
          ftol=data_type(1.49012e-8), xtol=data_type(1.49012e-8),
          gtol=0.0, maxfev=0, epsfcn=None, factor=100, diag=None):
    Minimize the sum of the squares of m nonlinear functions in n
        variables by a modification of the levenberg-marquardt
        algorithm. The user must provide a subroutine which calculates
        the functions. The jacobian is then calculated by a
        forward-difference approximation

    func: callable
        should take at least one (possibly length N vector) argument and
        returns M floating point numbers. It must not return NaNs or
        fitting might fail.
    x: ndarray
        The starting estimate for the minimization.
    args: tuple, optional
        Any extra arguments to func are placed in this tuple.
    full_output: bool, optional
        non-zero to return all optional outputs.
    ftol: float, optional
        Relative error desired in the sum of squares.
    xtol: float, optional
        Relative error desired in the approximate solution.
    gtol: float, optional
        Orthogonality desired between the function vector and the
        columns of the Jacobian.
    maxfev: int, optional
        The maximum number of calls to the function. If `Dfun` is
        provided then the default `maxfev` is 100*(N+1) where N is the
        number of elements in x0, otherwise the default `maxfev` is
    epsfcn: float, optional
        A variable used in determining a suitable step length for the
        forward-difference approximation of the Jacobian (for
        Dfun=None). Normally the actual step length will be
        sqrt(epsfcn)*x If epsfcn is less than the machine precision,
        it is assumed that the relative errors are of the order of the
        machine precision.
    factor: float, optional
        A parameter determining the initial step bound
        (``factor * || diag * x||``). Should be in interval ``(0.1,
    diag: sequence, optional
        N positive entries that serve as a scale factors for the
        variables. If set None, the variables will be scaled internally

    x: ndarray
        The solution (or the result of the last iteration for an
        unsuccessful call).
    cov_x: ndarray
        Uses the fjac and ipvt optional outputs to construct an
        estimate of the jacobian around the solution. None if a
        singular matrix encountered (indicates very flat curvature in
        some direction).  This matrix must be multiplied by the
        residual variance to get the covariance of the parameter
        estimates -- see curve_fit.
    infodict: dict
        a dictionary of optional outputs with the key s:

            The number of function calls
            The function evaluated at the output
            A permutation of the R matrix of a QR
            factorization of the final approximate
            Jacobian matrix, stored column wise.
            Together with ipvt, the covariance of the
            estimate can be approximated.
            An integer array of length N which defines
            a permutation matrix, p, such that
            fjac*p = q*r, where r is upper triangular
            with diagonal elements of nonincreasing
            magnitude. Column j of p is column ipvt(j)
            of the identity matrix.
            The vector (transpose(q) * fvec).

    mesg: str
        A string message giving information about the cause of failure.
    ier: int
        An integer flag.  If it is equal to 1, 2, 3 or 4, the solution
        was found.  O therwise, the solution was not found. In either
        case, the optional output variable 'mesg' gives more

    # region : Initialize part of parameters

    global eps_machine, wa4, qtf
    global p1, p5, p25, p75, p0001
    ier = 0

    x = np.asarray(x).flatten()
    if not isinstance(args, tuple):
        args = (args,)
    if epsfcn is None:
        epsfcn = finfo(utility.data_type).eps

    if diag is None:
        mode = 1
        mode = 2

    # endregion : Initialize  part of parameters

    # region : Check the input parameters for errors

    if ftol < 0. or xtol < 0. or gtol < 0. or factor <= 0:
        raise ValueError('!!! Some input parameters for lmdif ' +
                         'are illegal')

    if diag is not None:  # if mode == 2
        for d in diag:
            if d <= 0:
                raise ValueError('!!! Entries in diag must be positive')

    # endregion : Check the input parameters for errors

    # region : Preparation before main loop

    # > evaluate the function at the starting point and calculate
    # its norm
    # :: evaluate r(x) -> fvec
    fvec = func(x, *args)
    # :: evaluate ||r(x)||_2 -> fnorm
    fnorm = enorm(fvec)
    if utility.wm_trace:
        print(">>> L-M begins")
        print(">>> ||x0|| = %.10f" % fnorm)

    # region : initialize other parameters
    # -----------------------------------------
    nfev = 1
    m = fvec.size
    n = x.size
    ldfjac = m
    if m < n:
        raise ValueError('!!! m < n in lmdif')
    if maxfev <= 0:
        maxfev = 200 * (n + 1)
    # > check wa4 and qtf
    if wa4 is None or wa4.size is not m:
        wa4 = np.zeros(m, data_type)
    if qtf is None or qtf.size is not n:
        qtf = np.zeros(n, data_type)
    # ------------------------------------------
    # endregion : initialize other parameters

    # endregion : Preparation before main loop

    # region : Main loop

    # > initialize levenberg-marquardt parameter and iteration counter
    lam = data_type(0.0)
    iter = 1

    # > begin outer loop
    while True:
        if utility.wm_trace:
            print(">>> Step %d:" % iter)
        # > calculate the jacobian matrix
        # :: evaluate J(x) -> fjac: m by n
        fjac = jac(func, x, args, fvec, epsfcn)
        nfev += n

        # > compute the qr factorization of the jacobian
        # ::
        # ::                  /  R  \         n by n
        # ::     J * P = Q * |       |
        # ::                  \  0  /   (m - n) by n
        # ::      t
        # ::     Q  = H_n * ... H_2 * H_1
        # ::
        # ::    For H in { H_1, H_2, ..., H_n }, and arbitrary A
        # ::                        t
        # ::    H = I - beta * v * v
        # ::                     t                  t
        # ::    H * A = A - v * w,      w = beta * A * v
        # ::    information of P -> ipvt
        # ::    R -> rdiag and strict upper trapezoidal part of fjac
        # ::    { H_k }_k -> lower trapezoidal part of fjac
        ipvt, rdiag, acnorm = qr(m, n, fjac, ldfjac, True)

        # > on the first iteration
        if iter is 1:
            # >> if the diag is None, scale according to the norms of
            #    the columns of the initial jacobian
            if diag is None:
                diag = np.zeros(n, data_type)
                for j in range(n):
                    diag[j] = qrfac.acnorm[j]
                    if diag[j] == 0.0:
                        diag[j] = 1.0
            # >> calculate the norm of the scaled x and initialize
            #    the step bound delta
            wa3 = qrfac.wa  # 'wa3' is a name left over by lmdif
            for j in range(n):
                wa3[j] = diag[j] * x[j]
            xnorm = enorm(wa3)
            delta = factor * xnorm
            if delta == 0.0:
                delta = data_type(factor)

        # > form (q^T)*fvec and store the first n components in qtf
        # :: see x_{NG} = - PI * R^{-1} * Q_1^T * fvec
        # :: H * r = r - v * beta * r^T * v
        for i in range(m):
            wa4[i] = fvec[i]
        for j in range(n):  # altogether n times transformation
            # :: here the lower trapezoidal part of fjac contains
            #    a factored form of q, in other words, a set of v
            if fjac[j + j * ldfjac] != 0:
                sum = data_type(0.0)  # r^T * v
                for i in range(j, m):
                    sum += fjac[i + j * ldfjac] * wa4[i]
                # :: mul -beta
                temp = -sum / fjac[j + j * ldfjac]
                for i in range(j, m):
                    wa4[i] += fjac[i + j * ldfjac] * temp
            # restore the diag of R in fjac
            fjac[j + j * ldfjac] = qrfac.rdiag[j]
            qtf[j] = wa4[j]

        # > compute the norm(inf norm) of the scaled gradient
        #         t           t
        # :: g = J * r = P * R * qtf
        gnorm = data_type(0.0)
        wa2 = qrfac.acnorm
        if fnorm != 0:
            for j in range(n):
                # >> get index
                l = ipvt[j] - 1
                if wa2[l] != 0.0:
                    sum = data_type(0.0)
                    for i in range(j + 1):
                        sum += fjac[i + j * ldfjac] * (qtf[i] / fnorm)
                    # >>> computing max
                    d1 = np.abs(sum / wa2[l])
                    gnorm = max(gnorm, d1)

        if utility.wm_trace:
            print("       ||df|| = %.10f, nfev = %d" % (gnorm, nfev))
        # > test for convergence of the gradient norm
        if gnorm <= gtol:
            ier = 4

        # > rescale if necessary
        if mode is not 2:
            for j in range(n):
                # >> compute max
                d1 = diag[j]
                d2 = wa2[j]
                diag[j] = max(d1, d2)

        # > beginning of the inner loop
        while True:
            if utility.wm_trace:
                print("    => try delta = %.10f:" % delta)
                if False:
                    utility.lam_trace = True
                    print("--" * 26 + " lmpar begin")
            # > determine the levenberg-marquardt parameter
            lam, wa1, sdiag = lm_lambda(n, fjac, ldfjac, ipvt,
                                        diag, qtf, delta, lam)
            if utility.lam_trace:
                utility.lam_trace = False
                print("--" * 26 + " lmpar end")
            # store the direction p and x + p. calculate the norm of p
            for j in range(n):
                wa1[j] = -wa1[j]
                wa2[j] = x[j] + wa1[j]
                wa3[j] = diag[j] * wa1[j]
            # :: pnorm = || D * p ||_2
            pnorm = enorm(wa3)

            # > on the first iteration, adjust the initial step bound
            if iter is 1:
                delta = min(delta, pnorm)

            # > evaluate the function at x + p and calculate its norm
            wa4 = func(wa2, *args)
            nfev += 1
            fnorm1 = enorm(wa4)

            # > compute the scaled actual reduction
            act_red = -1
            if p1 * fnorm1 < fnorm:
                # compute 2nd power
                d1 = fnorm1 / fnorm
                act_red = 1.0 - d1 * d1

            # > compute the scaled predicted reduction and the
            #   scaled directional derivative
            # :: pre_red = (m(0) - m(p)) / m(0)
            # ::              t   t           t
            # ::         =  (p * J * J * p + J * r * p) / m(0)
            # ::               t   t           t   t
            # :: J = Q * R => p * J * J * p = p * R * R * p
            # ::
            # :: m(0) = fnorm * fnorm
            for j in range(n):
                wa3[j] = 0
                l = ipvt[j] - 1
                temp = wa1[l]
                for i in range(j + 1):
                    wa3[i] += fjac[i + j * ldfjac] * temp
            # :: now wa3 stores J * p
            temp1 = enorm(wa3) / fnorm
            #                             t
            # :: lam * p = - grad_m(p) = J * r
            temp2 = (np.sqrt(lam) * pnorm) / fnorm
            # :: TODO -  ... / p5
            pre_red = temp1 * temp1 + temp2 * temp2 / p5
            dir_der = -(temp1 * temp1 + temp2 * temp2)

            # > compute the ratio of the actual to the predicted
            #   reduction
            ratio = 0.0
            if pre_red != 0:
                ratio = act_red / pre_red

            if utility.wm_trace:
                print("          ratio = %.10f, nfev = %d" % (ratio, nfev))

            # > update the step bound
            if ratio <= p25:
                if act_red >= 0.0:
                    temp = p5
                    temp = p5 * dir_der / (dir_der + p5 * act_red)
                if p1 * fnorm1 >= fnorm or temp < p1:
                    temp = p1
                # >> compute min, shrink the trust region
                d1 = pnorm / p1
                delta = temp * min(delta, d1)
                lam /= temp
                if utility.wm_trace:
                    print("          delta ↓ -> %.10f:" % delta)
                if lam == 0.0 or ratio >= p75:
                    # >> expand the trust region
                    delta = pnorm / p5
                    lam = p5 * lam
                    if utility.wm_trace:
                        print("          delta ↑ -> %.10f:" % delta)

            # > test for successful iteration
            if ratio >= p0001:
                # >> successful iteration. update x, fvec
                #    and their norms
                for j in range(n):
                    x[j] = wa2[j]
                    wa2[j] = diag[j] * x[j]
                for i in range(m):
                    fvec[i] = wa4[i]
                xnorm = enorm(wa2)

                if utility.wm_trace:
                    print("    √ ||x|| ↓ %.10f -> %.10f" %
                          (fnorm - fnorm1, fnorm1))

                fnorm = fnorm1
                iter += 1
            elif utility.wm_trace:
                print("       × ||x|| not changed")

            # > test for convergence
            if np.abs(act_red) <= ftol and pre_red <= ftol \
                    and p5 * ratio <= 1.0:
                ier = 1
            if delta <= xtol * xnorm:
                ier = 2
            if np.abs(act_red) <= ftol and pre_red <= ftol \
                    and p5 * ratio <= 1.0 and ier is 2:
                ier = 3
            if ier is not 0:

            # > test for termination and stringent tolerances
            if nfev >= maxfev:
                ier = 5
            if np.abs(act_red) <= eps_machine and pre_red <= \
                    eps_machine and p5 * ratio <= 1.0:
                ier = 6
            if delta <= eps_machine * xnorm:
                ier = 7
            if gnorm <= eps_machine:
                ier = 8
            if ier is not 0:
            tmp = 1
            if ratio >= p0001:

        if ier is not 0:

    # endregion : Main loop

    # > wrap results
    errors = {0: ["Improper input parameters.", TypeError],
              1: ["Both actual and predicted relative reductions "
                  "in the sum of squares are at most %f * 1e-8" %
                  (ftol * 1e8), None],
              2: ["The relative error between two consecutive "
                  "iterates is at most %f * 1e-8" % (xtol * 1e8), None],
              3: ["Both actual and predicted relative reductions in "
                  "the sum of squares\n  are at most %f and the "
                  "relative error between two consecutive "
                  "iterates is at \n  most %f" % (ftol, xtol), None],
              4: ["The cosine of the angle between func(x) and any "
                  "column of the\n  Jacobian is at most %f in "
                  "absolute value" % gtol, None],
              5: ["Number of calls to function has reached "
                  "maxfev = %d." % maxfev, ValueError],
              6: ["ftol=%f is too small, no further reduction "
                  "in the sum of squares\n  is possible.""" % ftol,
              7: ["xtol=%f is too small, no further improvement in "
                  "the approximate\n  solution is possible." % xtol,
              8: ["gtol=%f is too small, func(x) is orthogonal to the "
                  "columns of\n  the Jacobian to machine "
                  "precision." % gtol, ValueError],
              'unknown': ["Unknown error.", TypeError]}

    if ier not in [1, 2, 3, 4] and not full_output:
        if ier in [5, 6, 7, 8]:
            print("!!! leastsq warning: %s" % errors[ier][0])

    mesg = errors[ier][0]

    if utility.wm_trace:
        print(">>> " + mesg)

    if full_output:
        cov_x = None
        if ier in [1, 2, 3, 4]:
            from numpy.dual import inv
            from numpy.linalg import LinAlgError
            perm = take(eye(n), ipvt - 1, 0)
            r = triu(transpose(fjac.reshape(n, m))[:n, :])
            R = dot(r, perm)
                cov_x = inv(dot(transpose(R), R))
            except (LinAlgError, ValueError):
        dct = {'fjac': fjac, 'fvec': fvec, 'ipvt': ipvt,
               'nfev': nfev, 'qtf': qtf}
        return x, cov_x, dct, mesg, ier
        return x, ier
ファイル: qrfac.py プロジェクト: WilliamRo/CLipPYME
def qr(m, n, a, lda, pivot):
    Uses householder transformations with column pivoting (optional)
    to compute a QR factorization of the m by n matrix a

        m: int
            a positive integer input variable set to the number of rows
            of a
        n: int
            a positive integer input variable set to the number of
            columns of a
        a: ndarray
            an m by n array. on input a contains the matrix for which
            the qr factorization is to be computed. on output the
            strict upper trapezoidal part of a contains the strict
            upper trapezoidal part of r, and the lower trapezoidal
            part of a contains a factored form of q (the non-trivial
            elements of the u vectors described above)
        lda: int
            a positive integer input variable not less than m which
            specifies the leading dimension of the array a
        pivot: bool
            a logical input variable. if pivot is set true, then
            column pivoting is enforced. if pivot is set false, then
            no column pivoting is done

        ipvt: ndarray
            an integer output array. ipvt defines the permutation
            matrix p such that a*p = q*r. column j of p is column
            ipvt(j) of the identity matrix. if pivot is false, ipvt
            will be set to None
        rdiag: ndarray
            an output array of length n which contains the diagonal
            elements of r
        acnorm: ndarray
            an output array of length n which contains the norms of
            the corresponding columns of the input matrix a. if this
            information is not needed, then acnorm can coincide with


    # region : Initialize parameters
    # ----------------------------------------
    global p05, eps_machine, ipvt, rdiag, acnorm, wa

    if ipvt is None or ipvt.size is not n:
        ipvt = np.zeros(n, np.int32)
    if rdiag is None or rdiag.size is not n:
        rdiag = np.zeros(n, data_type)
    if acnorm is None or acnorm.size is not n:
        acnorm = np.zeros(n, data_type)
    if wa is None or wa.size is not n:
        wa = np.zeros(n, data_type)

    # ----------------------------------------
    # endregion : Initialize parameters

    # > compute the initial column norms and initialize several arrays
    for j in range(n):
        acnorm[j] = enorm(a[lda * j:lda * (j + 1)])
        rdiag[j] = acnorm[j]
        wa[j] = rdiag[j]
        if pivot:
            ipvt[j] = j + 1

    # > reduce a to r with householder transformations
    min_mn = min(m, n)
    for j in range(min_mn):
        # > if pivot
        # --------------------------------------------------------
        if pivot:
            # >> bring the column of largest norm
            #    into the pivot position
            k_max = j
            for k in range(j, n):
                if rdiag[k] > rdiag[k_max]:
                    k_max = k
            # >> switch
            if k_max is not j:
                for i in range(m):  # traverse rows
                    # >>> switch
                    temp = a[i + j * lda]
                    a[i + j * lda] = a[i + k_max * lda]
                    a[i + k_max * lda] = temp
                # >>> overwrite, acnorm[k_max] still hold
                rdiag[k_max] = rdiag[j]
                wa[k_max] = wa[j]
                # >>> switch
                k = ipvt[j]
                ipvt[j] = ipvt[k_max]
                ipvt[k_max] = k

        # > compute the householder transformation to reduce the
        #   j-th column of a to a multiple of the j-th unit vector
        # ------------------------
        # >> normalize
        # :: v = x - ||x||_2 * e_1
        # :: ajnorm = ||x||_2
        ajnorm = enorm(a[lda * j + j:lda * (j + 1)])
        if ajnorm != 0.0:
            if a[j + j * lda] < 0.0:
                # :: prepare to keep a[i + j * lda] positive
                ajnorm = -ajnorm
            # :: x = sgn(x_1) * x / ||x||_2
            for i in range(j, m):
                a[i + j * lda] /= ajnorm
            # :: a[j + j * lda] temporarily stores v[0]
            # :: one number being subtracted from another close number
            #    has been avoided
            a[j + j * lda] += 1.0

            # > apply the transformation to the remaining columns and
            #   update the norms
            #                                        t
            # :: A[i][k] -= beta * v[i] * w[k], w = A * v
            #            t
            # :: beta = 1 / v[0], can be proved easily
            # :: w[k] = A[k-th column] * v
            jp1 = j + 1  # j plus 1
            if n > jp1:
                for k in range(jp1, n):  # traverse columns
                    sum = data_type(0.0)  # this is w[j]
                    for i in range(j, m):  # traverse rows
                        #      v[i]             A[i][k-th column]
                        sum += a[i + j * lda] * a[i + k * lda]
                    # :: beta * w[k]
                    temp = sum / a[j + j * lda]
                    for i in range(j, m):
                        # :: a[i][k] -= beta * w[k] * v[i]
                        a[i + k * lda] -= temp * a[i + j * lda]

                    # :: rdiag stores information used to pivot
                    # >> update rdiag to ensure that it can present
                    #    alpha = +- ||x||_2
                    if pivot and rdiag[k] != 0:
                        temp = a[j + k * lda] / rdiag[k]
                        # >>> compute max
                        d1 = 1.0 - temp * temp
                        rdiag[k] *= np.sqrt(max(0.0, d1))
                        # >>> compute 2nd power
                        d1 = rdiag[k] / wa[k]
                        # :: if rdiag is to small
                        if p05 * (d1 * d1) <= eps_machine:
                            rdiag[k] = enorm(
                                a[jp1 + k * lda:(k + 1) * lda])
                            wa[k] = rdiag[k]
        # :: sgn(ajnorm) = -sgn(x_0)
        # :: H * x = alpha * e_1
        rdiag[j] = -ajnorm

    # > return
    if pivot:
        return [ipvt, rdiag, acnorm]
        return [rdiag, acnorm]
#   Created: June 19, 2016
#   Author: William Ro

import numpy as np
from enorm import euclid_norm as enorm
from dpmpar import get_machine_parameter as dpmpar
from utility import data_type

# region : Module parameters

p05 = data_type(0.05)
eps_machine = dpmpar(1)

ipvt = None
rdiag = None
acnorm = None
wa = None

# endregion : Module parameters

def qr(m, n, a, lda, pivot):
    Uses householder transformations with column pivoting (optional)
    to compute a QR factorization of the m by n matrix a
