예제 #1
0
def _lange(x, norm, axis):
    order = 'F' if x.flags.f_contiguous and not x.flags.c_contiguous else 'C'
    dtype = 'f' if x.dtype.char in 'fF' else 'd'
    if x.size == 0:
        shape = [x.shape[i] for i in set(range(x.ndim)) - set(axis)]
        return nlcpy.zeros(shape, dtype=dtype)
    if norm in (None, 'fro', 'f'):
        if x.dtype.kind == 'c':
            x = abs(x)
        return nlcpy.sqrt(nlcpy.sum(x * x, axis=axis))
    if norm == nlcpy.inf:
        norm = 'I'
    else:
        norm = '1'
    lwork = x.shape[0] if norm == 'I' else 1
    x = nlcpy.asarray(nlcpy.moveaxis(x, (axis[0], axis[1]), (0, 1)), order='F')
    y = nlcpy.empty(x.shape[2:], dtype=dtype, order='F')
    work = nlcpy.empty(lwork, dtype=dtype)
    fpe = request._get_fpe_flag()
    args = (
        ord(norm),
        x._ve_array,
        y._ve_array,
        work._ve_array,
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_norm',
        args,
    )

    return nlcpy.asarray(y, order=order)
예제 #2
0
 def test_asl_random_f(self):
     self._helper(_test_asl_random_f, '/opt/nec/ve/bin/nfort', 'random_normal_d_',
                  (int64, void_p, float64, float64, uint64), int32)
     N, seed, D_M, D_S, val = self._prep()
     err = self.kern(N, veo.OnStack(seed, inout=veo.INTENT_IN), D_M, D_S, val.ve_adr,
                     sync=self.sync, callback=self.callback)
     testing.assert_array_equal(val, self._exec_random(seed, D_M, D_S, N),
                                err_msg='File-ID: {}'.format(self.lib.id))
     if self.sync:
         assert err == 0
예제 #3
0
 def test_heterosolver_cpp(self):
     self._helper(_test_heterosolver_cpp, '/opt/nec/ve/bin/nc++',
                  'HS_csr_unsym_ind_0',
                  (void_p,), int64)
     x = self._prep()
     err = self.kern(veo.OnStack(x, inout=veo.INTENT_OUT),
                     sync=self.sync, callback=self.callback)
     testing.assert_allclose(x, self._make_ref(), rtol=1e-12,
                             err_msg='File-ID: {}'.format(self.lib.id))
     if self.sync:
         assert err == 0
예제 #4
0
 def test_sblas_c(self):
     self._helper(_test_sblas_c, '/opt/nec/ve/bin/ncc',
                  'sblas_mv_csr_ind_0', (void_p, ), int64)
     y = self._prep()
     err = self.kern(veo.OnStack(y, inout=veo.INTENT_OUT),
                     sync=self.sync,
                     callback=self.callback)
     testing.assert_allclose(y,
                             self._make_ref(),
                             rtol=1e-12,
                             err_msg='File-ID: {}'.format(self.lib.id))
     if self.sync:
         assert err == 0
예제 #5
0
def ftrace_region_end(message):
    """Ends an ftrace region.

    A file ftrace.out is generated after running your program that invokes
    this routine.
    The ftrace.out includes performance information of your program.

    Notes
    -----
    It is necessary to specify an identical string *message* to
    :func:`ftrace_region_begin` and :func:`ftrace_region_end`.

    Parameters
    ----------
    message : str
        Any string can be specified to distinguish a user-specified region.

    See Also
    --------
    ftrace_region : Enables an ftrace region.
    ftrace_region_begin : Begins an ftrace region.

    Examples
    --------
    >>> import nlcpy as vp
    >>> x = vp.random.rand(10000, 10000)
    >>> vp.prof.ftrace_region_begin('dgemm')
    >>> # something you want to profile
    >>> _ = x @ x
    >>> vp.prof.ftrace_region_end('dgemm')
    """

    nlcpy.request.flush()
    ctx = veo._get_veo_ctx()
    lib = _get_lib()
    f = lib.find_function("nlcpy_profiling_region_end".encode('utf-8'))
    f.args_type(b"void *")
    f.ret_type("void")
    if type(message) is not bytes:
        message = message.encode('utf-8')
    buff = numpy.frombuffer(message, dtype=numpy.uint8)
    req = f(ctx, veo.OnStack(buff))
    req.wait_result()
예제 #6
0
def _geev(a, jobvr):
    a = nlcpy.asarray(a)
    util._assertRankAtLeast2(a)
    util._assertNdSquareness(a)

    # used to match the contiguous of result to numpy.
    c_order = a.flags.c_contiguous or sum([i > 1 for i in a.shape[:-2]]) < 2

    a_complex = a.dtype.char in 'FD'
    if a.dtype.char == 'F':
        dtype = 'D'
        f_dtype = 'f'
        c_dtype = 'F'
    elif a.dtype.char == 'D':
        dtype = 'D'
        f_dtype = 'd'
        c_dtype = 'D'
    else:
        dtype = 'd'
        if a.dtype.char == 'f':
            f_dtype = 'f'
            c_dtype = 'F'
        else:
            f_dtype = 'd'
            c_dtype = 'D'

    if a.size == 0:
        dtype = c_dtype if a_complex else f_dtype
        w = nlcpy.empty(shape=a.shape[:-1], dtype=dtype)
        if jobvr:
            vr = nlcpy.empty(shape=a.shape, dtype=dtype)
            return w, vr
        else:
            return w

    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)), dtype=dtype, order='F')
    wr = nlcpy.empty(a.shape[1:], dtype=dtype, order='F')
    wi = nlcpy.empty(a.shape[1:], dtype=dtype, order='F')
    vr = nlcpy.empty(a.shape if jobvr else 1, dtype=dtype, order='F')
    vc = nlcpy.empty(a.shape if jobvr else 1, dtype='D', order='F')

    n = a.shape[0]
    work = nlcpy.empty(
        65 * n if a_complex else 66 * n, dtype=dtype, order='F')
    rwork = nlcpy.empty(2 * n if a_complex else 1, dtype=f_dtype, order='F')
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        wr._ve_array,
        wi._ve_array,
        vr._ve_array,
        vc._ve_array,
        work._ve_array,
        rwork._ve_array,
        ord('V') if jobvr else ord('N'),
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_eig',
        args,
    )

    if a_complex:
        w_complex = True
        w = wr
        vc = vr
    else:
        w_complex = nlcpy.any(wi)
        w = wr + wi * 1.0j
    if w_complex:
        if c_order:
            w = nlcpy.asarray(nlcpy.moveaxis(w, 0, -1), dtype=c_dtype, order='C')
        else:
            w = nlcpy.moveaxis(nlcpy.asarray(w, dtype=c_dtype), 0, -1)
    else:
        wr = w.real
        w = nlcpy.moveaxis(nlcpy.asarray(wr, dtype=f_dtype), 0, -1)

    if jobvr:
        if w_complex:
            if c_order:
                vr = nlcpy.asarray(
                    nlcpy.moveaxis(vc, (1, 0), (-1, -2)), dtype=c_dtype, order='C')
            else:
                vr = nlcpy.moveaxis(
                    nlcpy.asarray(vc, dtype=c_dtype), (1, 0), (-1, -2))
        else:
            if c_dtype == "F":
                vr = nlcpy.asarray(vc.real, dtype=f_dtype, order='C')
            else:
                vc = nlcpy.moveaxis(
                    nlcpy.asarray(vc, dtype=c_dtype), (1, 0), (-1, -2))
                vr = vc.real

    if jobvr:
        return w, vr
    else:
        return w
예제 #7
0
def _syevd(a, jobz, UPLO):
    a = nlcpy.asarray(a)
    util._assertRankAtLeast2(a)
    util._assertNdSquareness(a)
    UPLO = UPLO.upper()
    if UPLO not in 'UL':
        raise ValueError("UPLO argument must be 'L' or 'U'")

    # used to match the contiguous of result to numpy.
    c_order = a.flags.c_contiguous or sum([i > 1 for i in a.shape[:-2]]) < 2

    a_complex = a.dtype.char in 'FD'
    if a.dtype.char == 'F':
        dtype = 'F'
        f_dtype = 'f'
    elif a.dtype.char == 'D':
        dtype = 'D'
        f_dtype = 'd'
    else:
        if a.dtype.char == 'f':
            dtype = 'f'
            f_dtype = 'f'
        else:
            dtype = 'd'
            f_dtype = 'd'

    if a.size == 0:
        w = nlcpy.empty(shape=a.shape[:-1], dtype=f_dtype)
        if jobz:
            vr = nlcpy.empty(shape=a.shape, dtype=dtype)
            return w, vr
        else:
            return w

    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)), dtype=dtype, order='F')
    w = nlcpy.empty(a.shape[1:], dtype=f_dtype, order='F')
    n = a.shape[0]
    if a.size > 1:
        if a_complex:
            lwork = max(2 * n + n * n, n + 48)
            lrwork = 1 + 5 * n + 2 * n * n if jobz else n
        else:
            lwork = max(2 * n + 32, 1 + 6 * n + 2 * n * n) if jobz else 2 * n + 32
            lrwork = 1
        liwork = 3 + 5 * n if jobz else 1
    else:
        lwork = 1
        lrwork = 1
        liwork = 1

    work = nlcpy.empty(lwork, dtype=dtype)
    rwork = nlcpy.empty(lrwork, dtype=f_dtype)
    iwork = nlcpy.empty(liwork, dtype='l')
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        w._ve_array,
        work._ve_array,
        rwork._ve_array,
        iwork._ve_array,
        ord('V') if jobz else ord('N'),
        ord(UPLO),
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_eigh',
        args,
    )

    if c_order:
        w = nlcpy.asarray(nlcpy.moveaxis(w, 0, -1), order='C')
    else:
        w = nlcpy.moveaxis(w, 0, -1)
    if jobz:
        if c_order:
            a = nlcpy.asarray(nlcpy.moveaxis(a, (1, 0), (-1, -2)), order='C')
        else:
            a = nlcpy.moveaxis(a, (1, 0), (-1, -2))
        return w, a
    else:
        return w
예제 #8
0
def svd(a, full_matrices=True, compute_uv=True, hermitian=False):
    """Singular Value Decomposition.

    When *a* is a 2D array, it is factorized as
    ``u @ nlcpy.diag(s) @ vh = (u * s) @ vh``,
    where *u* and *vh* are 2D unitary arrays and *s* is a 1D array of *a*'s singular
    values.
    When *a* is higher-dimensional, SVD is applied in stacked mode as explained below.

    Parameters
    ----------
    a : (..., M, N) array_like
        A real or complex array with a.ndim >= 2.
    full_matrices : bool, optional
        If True (default), *u* and *vh* have the shapes ``(..., M, M)`` and ``(..., N,
        N)``, respectively. Otherwise, the shapes are ``(..., M, K)`` and ``(..., K,
        N)``, respectively, where ``K = min(M, N)``.
    compute_uv : bool, optional
        Whether or not to compute *u* and *vh* in addition to *s*. True by default.
    hermitian : bool, optional
        If True, *a* is assumed to be Hermitian (symmetric if real-valued), enabling a
        more efficient method for finding singular values. Defaults to False.

    Returns
    -------
    u : {(..., M, M), (..., M, K)} ndarray
        Unitary array(s). The first ``a.ndim - 2`` dimensions have the same size as those
        of the input *a*. The size of the last two dimensions depends on the value of
        *full_matrices*. Only returned when *compute_uv* is True.
    s : (..., K) ndarray
        Vector(s) with the singular values, within each vector sorted in descending
        order. The first ``a.ndim - 2`` dimensions have the same size as those of the
        input *a*.
    vh : {(..., N, N), (..., K, N)} ndarray
        Unitary array(s). The first ``a.ndim - 2`` dimensions have the same size as those
        of the input *a*. The size of the last two dimensions depends on the value of
        *full_matrices*. Only returned when *compute_uv* is True.

    Note
    ----
    The decomposition is performed using LAPACK routine ``_gesdd``.

    SVD is usually described for the factorization of a 2D matrix :math:`A`.
    The higher-dimensional case will be discussed below. In the 2D case, SVD is written
    as
    :math:`A=USV^{H}`, where :math:`A = a`, :math:`U = u`, :math:`S = nlcpy.diag(s)`
    and :math:`V^{H} = vh`. The 1D array `s` contains the singular values of `a` and
    `u` and `vh` are unitary. The rows of `vh` are the eigenvectors of :math:`A^{H}A`
    and the columns of `u` are the eigenvectors of :math:`AA^{H}`. In both cases the
    corresponding (possibly non-zero) eigenvalues are given by ``s**2``.

    If `a` has more than two dimensions, then broadcasting rules apply, as explained in
    :ref:`Linear algebra on several matrices at once <linalg_several_matrices_at_once>`.
    This means that SVD is working in "stacked" mode: it iterates over all indices
    of the first ``a.ndim - 2`` dimensions and for each combination SVD is applied to the
    last two indices.

    Examples
    --------
    >>> import nlcpy as vp
    >>> from nlcpy import testing
    >>> a = vp.random.randn(9, 6) + 1j*vp.random.randn(9, 6)

    Reconstruction based on full SVD, 2D case:

    >>> u, s, vh = vp.linalg.svd(a, full_matrices=True)
    >>> u.shape, s.shape, vh.shape
    ((9, 9), (6,), (6, 6))
    >>> vp.testing.assert_allclose(a, vp.dot(u[:, :6] * s, vh))
    >>> smat = vp.zeros((9, 6), dtype=complex)
    >>> smat[:6, :6] = vp.diag(s)
    >>> vp.testing.assert_allclose(a, vp.dot(u, vp.dot(smat, vh)))

    Reconstruction based on reduced SVD, 2D case:

    >>> u, s, vh = vp.linalg.svd(a, full_matrices=False)
    >>> u.shape, s.shape, vh.shape
    ((9, 6), (6,), (6, 6))
    >>> vp.testing.assert_allclose(a, vp.dot(u * s, vh))
    >>> smat = vp.diag(s)
    >>> vp.testing.assert_allclose(a, vp.dot(u, vp.dot(smat, vh)))

    """
    a = nlcpy.asarray(a)
    util._assertRankAtLeast2(a)
    if hermitian:
        util._assertNdSquareness(a)

    # used to match the contiguous of result to numpy.
    c_order = a.flags.c_contiguous or sum([i > 1 for i in a.shape[:-2]]) < 2

    a_complex = a.dtype.char in 'FD'
    if a.dtype == 'F':
        dtype = 'F'
        f_dtype = 'f'
    elif a.dtype == 'D':
        dtype = 'D'
        f_dtype = 'd'
    elif a.dtype == 'f':
        dtype = 'f'
        f_dtype = 'f'
    else:
        dtype = 'd'
        f_dtype = 'd'

    if hermitian:
        if compute_uv:
            # lapack returns eigenvalues in reverse order, so to reconsist.
            s, u = nlcpy.linalg.eigh(a)
            signs = nlcpy.sign(s)
            s = abs(s)
            sidx = nlcpy.argsort(s)[..., ::-1]
            signs = _take_along_axis(signs, sidx, signs.ndim - 1)
            s = _take_along_axis(s, sidx, s.ndim - 1)
            u = _take_along_axis(u, sidx[..., None, :], u.ndim - 1)
            # singular values are unsigned, move the sign into v
            vt = nlcpy.conjugate(u * signs[..., None, :])
            vt = nlcpy.moveaxis(vt, -2, -1)
            return u, s, vt
        else:
            s = nlcpy.linalg.eigvalsh(a)
            s = nlcpy.sort(abs(s))[..., ::-1]
            return s

    m = a.shape[-2]
    n = a.shape[-1]
    min_mn = min(m, n)
    max_mn = max(m, n)
    if a.size == 0:
        s = nlcpy.empty(a.shape[:-2] + (min_mn, ), f_dtype)
        if compute_uv:
            if full_matrices:
                u_shape = a.shape[:-1] + (m, )
                vt_shape = a.shape[:-2] + (n, n)
            else:
                u_shape = a.shape[:-1] + (min_mn, )
                vt_shape = a.shape[:-2] + (min_mn, n)
            u = nlcpy.empty(u_shape, dtype=dtype)
            vt = nlcpy.empty(vt_shape, dtype=dtype)
            return u, s, vt
        else:
            return s

    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)),
                    dtype=dtype,
                    order='F')
    if compute_uv:
        if full_matrices:
            u = nlcpy.empty((m, m) + a.shape[2:], dtype=dtype, order='F')
            vt = nlcpy.empty((n, n) + a.shape[2:], dtype=dtype, order='F')
            job = 'A'
        else:
            u = nlcpy.empty((m, m) + a.shape[2:], dtype=dtype, order='F')
            vt = nlcpy.empty((min_mn, n) + a.shape[2:], dtype=dtype, order='F')
            job = 'S'
    else:
        u = nlcpy.empty(1)
        vt = nlcpy.empty(1)
        job = 'N'

    if a_complex:
        mnthr1 = int(min_mn * 17.0 / 9.0)
        if max_mn >= mnthr1:
            if job == 'N':
                lwork = 130 * min_mn
            elif job == 'S':
                lwork = (min_mn + 130) * min_mn
            else:
                lwork = max(
                    (min_mn + 130) * min_mn,
                    (min_mn + 1) * min_mn + 32 * max_mn,
                )
        else:
            lwork = 64 * (min_mn + max_mn) + 2 * min_mn
    else:
        mnthr = int(min_mn * 11.0 / 6.0)
        if m >= n:
            if m >= mnthr:
                if job == 'N':
                    lwork = 131 * n
                elif job == 'S':
                    lwork = max((131 + n) * n, (4 * n + 7) * n)
                else:
                    lwork = max((n + 131) * n, (n + 1) * n + 32 * m,
                                (4 * n + 6) * n + m)
            else:
                if job == 'N':
                    lwork = 64 * m + 67 * n
                elif job == 'S':
                    lwork = max(64 * m + 67 * n, (3 * n + 7) * n)
                else:
                    lwork = (3 * n + 7) * n
        else:
            if n >= mnthr:
                if job == 'N':
                    lwork = 131 * m
                elif job == 'S':
                    lwork = max((m + 131) * m, (4 * m + 7) * m)
                else:
                    lwork = max((m + 131) * m, (m + 1) * m + 32 * n,
                                (4 * m + 7) * m)
            else:
                if job == 'N':
                    lwork = 67 * m + 64 * n
                else:
                    lwork = max(67 * m + 64 * n, (3 * m + 7) * m)

    s = nlcpy.empty((min_mn, ) + a.shape[2:], dtype=f_dtype, order='F')
    work = nlcpy.empty(lwork, dtype=dtype)
    if a_complex:
        if job == 'N':
            lrwork = 5 * min_mn
        else:
            lrwork = min_mn * max(5 * min_mn + 7,
                                  2 * max(m, n) + 2 * min_mn + 1)
    else:
        lrwork = 1
    rwork = nlcpy.empty(lrwork, dtype=f_dtype)
    iwork = nlcpy.empty(8 * min_mn, dtype=f_dtype)
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        ord(job),
        a._ve_array,
        s._ve_array,
        u._ve_array,
        vt._ve_array,
        work._ve_array,
        rwork._ve_array,
        iwork._ve_array,
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_svd',
        args,
    )

    if c_order:
        s = nlcpy.asarray(nlcpy.moveaxis(s, 0, -1), order='C')
    else:
        s = nlcpy.moveaxis(s, 0, -1)
    if compute_uv:
        u = nlcpy.moveaxis(u, (1, 0), (-1, -2))
        if not full_matrices:
            u = u[..., :m, :min_mn]
        if c_order:
            u = nlcpy.asarray(u, dtype=dtype, order='C')
            vt = nlcpy.asarray(nlcpy.moveaxis(vt, (1, 0), (-1, -2)),
                               dtype,
                               order='C')
        else:
            vt = nlcpy.moveaxis(nlcpy.asarray(vt, dtype), (1, 0), (-1, -2))
        return u, s, vt
    else:
        return s
예제 #9
0
def qr(a, mode='reduced'):
    """Computes the qr factorization of a matrix.

    Factor the matrix *a* as *qr*, where *q* is orthonormal and *r* is upper-triangular.

    Parameters
    ----------
    a : (M, N) array_like
        Matrix to be factored.
    mode : {'reduced', 'complete', 'r', 'raw', 'full', 'economic'}, optional
        If K = min(M, N), then

        - 'reduced' : returns q, r with dimensions (M, K), (K, N) (default)
        - 'complete' : returns q, r with dimensions (M, M), (M, N)
        - 'r' : returns r only with dimensions (K, N)
        - 'raw' : returns h, tau with dimensions (N, M), (K,)
        - 'full' or 'f' : alias of 'reduced', deprecated
        - 'economic' or 'e' : returns h from 'raw', deprecated.

    Returns
    -------
    q : ndarray, optional
        A matrix with orthonormal columns. When mode = 'complete' the result is an
        orthogonal/unitary matrix depending on whether or not a is real/complex. The
        determinant may be either +/- 1 in that case.
    r : ndarray, optional
        The upper-triangular matrix.
    (h, tau) : ndarray, optional
        The array h contains the Householder reflectors that generate q along with r. The
        tau array contains scaling factors for the reflectors. In the deprecated
        'economic' mode only h is returned.

    Note
    ----
    This is an interface to the LAPACK routines ``dgeqrf``, ``zgeqrf``, ``dorgqr``,
    and ``zungqr``.

    For more information on the qr factorization, see for example:
    https://en.wikipedia.org/wiki/QR_factorization

    Note that when 'raw' option is specified the returned arrays are of type "float64" or
    "complex128" and the h array is transposed to be FORTRAN compatible.

    Examples
    --------
    >>> import numpy as np
    >>> import nlcpy as vp
    >>> from nlcpy import testing
    >>> a = vp.random.randn(9, 6)
    >>> q, r = vp.linalg.qr(a)
    >>> vp.testing.assert_allclose(a, vp.dot(q, r))  # a does equal qr
    >>> r2 = vp.linalg.qr(a, mode='r')
    >>> r3 = vp.linalg.qr(a, mode='economic')
    >>> # mode='r' returns the same r as mode='full'
    >>> vp.testing.assert_allclose(r, r2)
    >>> # But only triu parts are guaranteed equal when mode='economic'
    >>> vp.testing.assert_allclose(r, np.triu(r3[:6,:6], k=0))

    Example illustrating a common use of qr: solving of least squares problems

    What are the least-squares-best *m* and *y0* in ``y = y0 + mx`` for the following
    data: {(0,1), (1,0), (1,2), (2,1)}. (Graph the points and you’ll see that it should
    be y0 = 0, m = 1.) The answer is provided by solving the over-determined matrix
    equation ``Ax = b``, where::

        A = array([[0, 1], [1, 1], [1, 1], [2, 1]])
        x = array([[y0], [m]])
        b = array([[1], [0], [2], [1]])

    If A = qr such that q is orthonormal (which is always possible via Gram-Schmidt),
    then ``x = inv(r) * (q.T) * b``. (In practice, however, we simply use :func:`lstsq`.)

    >>> A = vp.array([[0, 1], [1, 1], [1, 1], [2, 1]])
    >>> A
    array([[0, 1],
           [1, 1],
           [1, 1],
           [2, 1]])
    >>> b = vp.array([1, 0, 2, 1])
    >>> q, r = vp.linalg.qr(A)
    >>> p = vp.dot(q.T, b)
    >>> vp.dot(vp.linalg.inv(r), p)
    array([1.1102230246251565e-16, 1.0000000000000002e+00])

    """
    if mode not in ('reduced', 'complete', 'r', 'raw'):
        if mode in ('f', 'full'):
            msg = "".join(
                ("The 'full' option is deprecated in favor of 'reduced'.\n",
                 "For backward compatibility let mode default."))
            warnings.warn(msg, DeprecationWarning, stacklevel=3)
            mode = 'reduced'
        elif mode in ('e', 'economic'):
            msg = "The 'economic' option is deprecated."
            warnings.warn(msg, DeprecationWarning, stacklevel=3)
            mode = 'economic'
        else:
            raise ValueError("Unrecognized mode '%s'" % mode)

    a = nlcpy.asarray(a)
    util._assertRank2(a)
    if a.dtype == 'F':
        dtype = 'D'
        a_dtype = 'F'
    elif a.dtype == 'D':
        dtype = 'D'
        a_dtype = 'D'
    elif a.dtype == 'f':
        dtype = 'd'
        a_dtype = 'f'
    else:
        dtype = 'd'
        a_dtype = 'd'

    m, n = a.shape
    if a.size == 0:
        if mode == 'reduced':
            return nlcpy.empty((m, 0), a_dtype), nlcpy.empty((0, n), a_dtype)
        elif mode == 'complete':
            return nlcpy.identity(m, a_dtype), nlcpy.empty((m, n), a_dtype)
        elif mode == 'r':
            return nlcpy.empty((0, n), a_dtype)
        elif mode == 'raw':
            return nlcpy.empty((n, m), dtype), nlcpy.empty((0, ), dtype)
        else:
            return nlcpy.empty((m, n), a_dtype), nlcpy.empty((0, ), a_dtype)

    a = nlcpy.asarray(a, dtype=dtype, order='F')
    k = min(m, n)
    if mode == 'complete':
        if m > n:
            x = nlcpy.empty((m, m), dtype=dtype, order='F')
            x[:m, :n] = a
            a = x
        r_shape = (m, n)
    elif mode in ('r', 'reduced', 'economic'):
        r_shape = (k, n)
    else:
        r_shape = 1
    jobq = 0 if mode in ('r', 'raw', 'economic') else 1
    tau = nlcpy.empty(k, dtype=dtype)
    r = nlcpy.zeros(r_shape, dtype=dtype)
    work = nlcpy.empty(n * 64, dtype=dtype)
    fpe = request._get_fpe_flag()
    args = (
        m,
        n,
        jobq,
        a._ve_array,
        tau._ve_array,
        r._ve_array,
        work._ve_array,
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_qr',
        args,
    )

    if mode == 'raw':
        return a.T, tau

    if mode == 'r':
        return nlcpy.asarray(r, dtype=a_dtype)

    if mode == 'economic':
        return nlcpy.asarray(a, dtype=a_dtype)

    mc = m if mode == 'complete' else k
    q = nlcpy.asarray(a[:, :mc], dtype=a_dtype, order='C')
    r = nlcpy.asarray(r, dtype=a_dtype, order='C')
    return q, r
예제 #10
0
def cholesky(a):
    """Cholesky decomposition.

    Return the Cholesky decomposition, *L* * *L.H*, of the square matrix *a*, where *L*
    is lower-triangular and *.H* is the conjugate transpose operator (which is the
    ordinary transpose if *a* is real-valued). *a* must be Hermitian (symmetric if
    real-valued) and positive-definite. Only *L* is actually returned.

    Parameters
    ----------
    a : (..., M, M) array_like
        Hermitian (symmetric if all elements are real), positive-definite input matrix.

    Returns
    -------
    L : (..., M, M) ndarray
        Upper or lower-triangular Cholesky factor of *a*.

    Note
    ----
    The Cholesky decomposition is often used as a fast way of solving :math:`Ax = b`

    (when *A* is both Hermitian/symmetric and positive-definite).

    First, we solve for y in :math:`Ly = b`, and then for x in :math:`L.Hx = y`.

    Examples
    --------
    >>> import nlcpy as vp
    >>> A = vp.array([[1,-2j],[2j,5]])
    >>> A
    array([[ 1.+0.j, -0.-2.j],
           [ 0.+2.j,  5.+0.j]])
    >>> L = vp.linalg.cholesky(A)
    >>> L
    array([[1.+0.j, 0.+0.j],
           [0.+2.j, 1.+0.j]])
    >>> vp.dot(L, vp.conjugate(L.T)) # verify that L * L.H = A
    array([[1.+0.j, 0.-2.j],
           [0.+2.j, 5.+0.j]])

    """
    a = nlcpy.asarray(a)
    util._assertRankAtLeast2(a)
    util._assertNdSquareness(a)

    if a.dtype == 'F':
        dtype = 'D'
        L_dtype = 'F'
    elif a.dtype == 'D':
        dtype = 'D'
        L_dtype = 'D'
    elif a.dtype == 'f':
        dtype = 'd'
        L_dtype = 'f'
    else:
        dtype = 'd'
        L_dtype = 'd'

    if a.size == 0:
        return nlcpy.empty(a.shape, dtype=L_dtype)

    # used to match the contiguous of result to numpy.
    c_order = a.flags.c_contiguous or sum([i > 1 for i in a.shape[:-2]]) < 2

    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)),
                    dtype=dtype,
                    order='F')
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request(
        'nlcpy_cholesky', args, callback=util._assertPositiveDefinite(info))

    if c_order:
        L = nlcpy.asarray(nlcpy.moveaxis(a, (1, 0), (-1, -2)),
                          dtype=L_dtype,
                          order='C')
    else:
        L = nlcpy.moveaxis(nlcpy.asarray(a, dtype=L_dtype), (1, 0), (-1, -2))
    return L
예제 #11
0
def inv(a):
    """Computes the (multiplicative) inverse of a matrix.

    Given a square matrix *a*, return the matrix *ainv* satisfying

    ::

        dot(a, ainv) = dot(ainv, a) = eye(a.shape[0]).

    Parameters
    ----------
    a : (..., M, M) array_like
        Matrix to be inverted.

    Returns
    -------
    ainv : (..., M, M) ndarray
        (Multiplicative) inverse of the matrix *a*.

    Note
    ----
    Broadcasting rules apply, see the :ref:`nlcpy.linalg <nlcpy_linalg>`
    documentation for details.

    Examples
    --------
    >>> import nlcpy as vp
    >>> from nlcpy import testing
    >>> a = vp.array([[1., 2.], [3., 4.]])
    >>> ainv = vp.linalg.inv(a)
    >>> vp.testing.assert_allclose(vp.dot(a, ainv), vp.eye(2), atol=1e-8, rtol=1e-5)
    >>> vp.testing.assert_allclose(vp.dot(ainv, a), vp.eye(2), atol=1e-8, rtol=1e-5)

    Inverses of several matrices can be computed at once:

    >>> a = vp.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]])
    >>> vp.linalg.inv(a)   # doctest: +SKIP
    array([[[-2.  ,  1.  ],
            [ 1.5 , -0.5 ]],
    <BLANKLINE>
           [[-1.25,  0.75],
            [ 0.75, -0.25]]])

    """
    a = nlcpy.asarray(a)
    # used to match the contiguous of result to numpy.
    c_order = a.flags.c_contiguous or sum([i > 1 for i in a.shape[:-2]]) < 2
    util._assertRankAtLeast2(a)
    util._assertNdSquareness(a)
    if a.dtype.char in 'FD':
        dtype = 'D'
        if a.dtype.char in 'fF':
            ainv_dtype = 'F'
        else:
            ainv_dtype = 'D'
    else:
        dtype = 'd'
        if a.dtype.char == 'f':
            ainv_dtype = 'f'
        else:
            ainv_dtype = 'd'
    if a.size == 0:
        return nlcpy.asarray(a, dtype=ainv_dtype)
    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)),
                    dtype=dtype,
                    order='F')
    ipiv = nlcpy.empty(a.shape[-1])
    work = nlcpy.empty(a.shape[-1] * 256)
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        ipiv._ve_array,
        work._ve_array,
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request('nlcpy_inv',
                                    args,
                                    callback=util._assertNotSingular(info))

    if c_order:
        a = nlcpy.moveaxis(a, (1, 0), (-1, -2))
        return nlcpy.asarray(a, dtype=ainv_dtype, order='C')
    else:
        a = nlcpy.asarray(a, dtype=ainv_dtype)
        return nlcpy.moveaxis(a, (1, 0), (-1, -2))
예제 #12
0
def solve(a, b):
    """Solves a linear matrix equation, or system of linear scalar equations.

    Computes the "exact" solution, *x*, of the well-determined, i.e., full rank, linear
    matrix equation :math:`ax = b`.

    Parameters
    ----------
    a : (..., M, M) array_like
        Coefficient matrix.
    b : {(..., M,), (..., M, K)} array_like
        Ordinate or "dependent variable" values.

    Returns
    -------
    x : {(..., M,), (..., M, K)} ndarray
        Solution to the system a x = b. Returned shape is identical to *b*.

    Note
    ----
    The solutions are computed using LAPACK routine ``_gesv``.

    `a` must be square and of full-rank, i.e., all rows (or, equivalently, columns) must
    be linearly independent; if either is not true, use :func:`lstsq` for the
    least-squares best "solution" of the system/equation.

    Examples
    --------
    Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``:

    >>> import nlcpy as vp
    >>> a = vp.array([[3,1], [1,2]])
    >>> b = vp.array([9,8])
    >>> x = vp.linalg.solve(a, b)
    >>> x
    array([2., 3.])

    """
    a = nlcpy.asarray(a)
    b = nlcpy.asarray(b)
    c_order = (a.flags.c_contiguous or a.ndim < 4 or
               a.ndim - b.ndim < 2 and b.flags.c_contiguous) and \
        not (a.ndim < b.ndim and not b.flags.c_contiguous)
    util._assertRankAtLeast2(a)
    util._assertNdSquareness(a)

    if a.ndim - 1 == b.ndim:
        if a.shape[-1] != b.shape[-1]:
            raise ValueError(
                'solve1: Input operand 1 has a mismatch in '
                'its core dimension 0, with gufunc signature (m,m),(m)->(m) '
                '(size {0} is different from {1})'.format(
                    b.shape[-1], a.shape[-1]))
    elif b.ndim == 1:
        raise ValueError(
            'solve: Input operand 1 does not have enough dimensions '
            '(has 1, gufunc core with signature (m,m),(m,n)->(m,n) requires 2)'
        )
    else:
        if a.shape[-1] != b.shape[-2]:
            raise ValueError(
                'solve: Input operand 1 has a mismatch in '
                'its core dimension 0, with gufunc signature (m,m),(m,n)->(m,n) '
                '(size {0} is different from {1})'.format(
                    b.shape[-2], a.shape[-1]))

    if b.ndim == 1 or a.ndim - 1 == b.ndim and a.shape[-1] == b.shape[-1]:
        tmp = 1
        _newaxis = (None, )
    else:
        tmp = 2
        _newaxis = (None, None)
    for i in range(1, min(a.ndim - 2, b.ndim - tmp) + 1):
        if a.shape[-2 - i] != b.shape[-tmp - i] and \
           1 not in (a.shape[-2 - i], b.shape[-tmp - i]):
            raise ValueError(
                'operands could not be broadcast together with '
                'remapped shapes [original->remapped]: {0}->({1}) '
                '{2}->({3}) and requested shape ({4})'.format(
                    str(a.shape).replace(' ', ''),
                    str(a.shape[:-2] + _newaxis).replace(' ', '').replace(
                        'None', 'newaxis').strip('(,)'),
                    str(b.shape).replace(' ', ''),
                    str(b.shape[:-tmp] + _newaxis).replace(' ', '').replace(
                        'None', 'newaxis').replace('None',
                                                   'newaxis').strip('(,)'),
                    str(b.shape[-tmp:]).replace(' ', '').strip('(,)')))

    if a.dtype.char in 'FD' or b.dtype.char in 'FD':
        dtype = 'complex128'
        if a.dtype.char in 'fF' and b.dtype.char in 'fF':
            x_dtype = 'complex64'
        else:
            x_dtype = 'complex128'
    else:
        dtype = 'float64'
        if a.dtype.char == 'f' and b.dtype.char == 'f':
            x_dtype = 'float32'
        else:
            x_dtype = 'float64'

    x_shape = b.shape
    if b.ndim == a.ndim - 1:
        b = b[..., nlcpy.newaxis]
    diff = abs(a.ndim - b.ndim)
    if a.ndim < b.ndim:
        bcast_shape = [
            b.shape[i] if b.shape[i] != 1 or i < diff else a.shape[i - diff]
            for i in range(b.ndim - 2)
        ]
    else:
        bcast_shape = [
            a.shape[i] if a.shape[i] != 1 or i < diff else b.shape[i - diff]
            for i in range(a.ndim - 2)
        ]
    bcast_shape_a = bcast_shape + list(a.shape[-2:])
    bcast_shape_b = bcast_shape + list(b.shape[-2:])
    a = nlcpy.broadcast_to(a, bcast_shape_a)
    if bcast_shape_b != list(b.shape):
        b = nlcpy.broadcast_to(b, bcast_shape_b)
        x_shape = b.shape
    if b.size == 0:
        return nlcpy.empty(x_shape, dtype=x_dtype)

    a = nlcpy.array(nlcpy.moveaxis(a, (-1, -2), (1, 0)),
                    dtype=dtype,
                    order='F')
    b = nlcpy.array(nlcpy.moveaxis(b, (-1, -2), (1, 0)),
                    dtype=dtype,
                    order='F')

    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        b._ve_array,
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request('nlcpy_solve',
                                    args,
                                    callback=util._assertNotSingular(info))

    if c_order:
        x = nlcpy.moveaxis(b, (1, 0), (-1, -2)).reshape(x_shape)
        return nlcpy.asarray(x, x_dtype, 'C')
    else:
        x = nlcpy.asarray(b, x_dtype)
        return nlcpy.moveaxis(x, (1, 0), (-1, -2)).reshape(x_shape)
예제 #13
0
def lstsq(a, b, rcond='warn'):
    """Returns the least-squares solution to a linear matrix equation.

    Solves the equation :math:`ax = b` by computing a vector *x* that minimizes the
    squared Euclidean 2-norm :math:`|b-ax|^2_2`. The equation may be under-, well-, or
    over-determined (i.e., the number of linearly independent rows of *a* can be less
    than, equal to, or greater than its number of linearly independent columns). If *a*
    is square and of full rank, then *x* (but for round-off error) is the "exact"
    solution of the equation.

    Parameters
    ----------
    a : (M,N) array_like
        "Coefficient" matrix.
    b : {(M,), (M, K)} array_like
        Ordinate or "dependent variable" values. If *b* is two-dimensional, the
        least-squares solution is calculated for each of the *K* columns of *b*.
    rcond : float, optional
        Cut-off ratio for small singular values of *a*. For the purposes of rank
        determination, singular values are treated as zero if they are smaller than
        *rcond* times the largest singular value of *a*.

    Returns
    -------
    x : {(N,), (N, K)} ndarray
        Least-squares solution. If *b* is two-dimensional, the solutions are in the *K*
        columns of *x*.
    residuals : {(1,), (K,), (0,)} ndarray
        Sums of residuals; squared Euclidean 2-norm for each column in ``b - a@x``. If
        the rank of *a* is < N or M <= N, this is an empty array. If *b* is
        1-dimensional, this is a (1,) shape array. Otherwise the shape is (K,).
    rank : *int*
        Rank of matrix *a*.
    s : (min(M, N),) ndarray
        Singular values of *a*.

    Note
    ----
    If `b` is a matrix, then all array results are returned as matrices.

    Examples
    --------
    .. plot::
        :align: center

        Fit a line, ``y = mx + c``, through some noisy data-points:

        >>> import nlcpy as vp
        >>> x = vp.array([0, 1, 2, 3])
        >>> y = vp.array([-1, 0.2, 0.9, 2.1])

        By examining the coefficients, we see that the line should have a gradient of
        roughly 1 and cut the y-axis at, more or less, -1.
        We can rewrite the line equation as ``y = Ap``, where ``A = [[x 1]]`` and
        ``p = [[m], [c]]``. Now use lstsq to solve for *p*:

        >>> A = vp.array((x, vp.ones(len(x)))).T
        >>> A
        array([[0., 1.],
               [1., 1.],
               [2., 1.],
               [3., 1.]])
        >>> m, c = vp.linalg.lstsq(A, y, rcond=None)[0]
        >>> m, c  # doctest: +SKIP
        (array(1.), array(-0.95)) # may vary

        Plot the data along with the fitted line:

        >>> import matplotlib.pyplot as plt
        >>> _ = plt.plot(x, y, 'o', label='Original data', markersize=15)
        >>> _ = plt.plot(x, m*x + c, 'r', label='Fitted line')
        >>> _ = plt.legend()
        >>> plt.show()

    """
    a = nlcpy.asarray(a)
    b = nlcpy.asarray(b)
    b_ndim = b.ndim
    if b_ndim == 1:
        b = b[:, nlcpy.newaxis]
    util._assertRank2(a, b)

    if a.shape[-2] != b.shape[-2]:
        raise util.LinAlgError('Incompatible dimensions')

    a_complex = a.dtype.char in 'FD'
    b_complex = b.dtype.char in 'FD'
    if a_complex or b_complex:
        if a.dtype.char in 'fF' and b.dtype.char in 'fF':
            x_dtype = 'F'
            f_dtype = 'f'
        else:
            x_dtype = 'D'
            f_dtype = 'd'
    else:
        if a.dtype.char == 'f' and b.dtype.char == 'f':
            x_dtype = 'f'
            f_dtype = 'f'
        else:
            x_dtype = 'd'
            f_dtype = 'd'

    m = a.shape[-2]
    n = a.shape[-1]
    k = b.shape[-1]
    minmn = min(m, n)
    maxmn = max(m, n)
    k_extend = (k == 0)
    if k_extend:
        b = nlcpy.zeros([m, 1], dtype=b.dtype)
        k = 1
    if minmn == 0:
        if n > 0:
            x = nlcpy.zeros([n, k], dtype=x_dtype)
            residuals = nlcpy.array([], dtype=f_dtype)
        else:
            x = nlcpy.array([], dtype=x_dtype)
            if b_complex:
                br = nlcpy.asarray(b.real, dtype='d')
                bi = nlcpy.asarray(b.imag, dtype='d')
                square_b = nlcpy.power(br, 2) + nlcpy.power(bi, 2)
            else:
                square_b = nlcpy.power(b, 2, dtype='d')
            residuals = nlcpy.add.reduce(square_b, dtype=f_dtype)
        return (x, residuals, 0, nlcpy.array([], dtype=f_dtype))

    if rcond == 'warn':
        rcond = -1.0
    if rcond is None:
        rcond = nlcpy.finfo(x_dtype).eps * max(m, n)
    rcond = nlcpy.asarray(rcond, dtype=f_dtype)

    a = nlcpy.array(a, dtype=x_dtype, order='F')
    if m < n:
        _b = nlcpy.empty([n, k], dtype=x_dtype, order='F')
        _b[:m, :] = b
        b = _b
    else:
        b = nlcpy.array(b, dtype=x_dtype, order='F')
    s = nlcpy.empty(min(m, n), dtype=f_dtype)
    rank = numpy.empty(1, dtype='l')

    nlvl = max(0, int(nlcpy.log(minmn / 26.0) / nlcpy.log(2)) + 1)
    mnthr = int(minmn * 1.6)
    mm = m
    lwork = 1
    if a_complex:
        _tmp = 2
        wlalsd = minmn * k
        lrwork = 10 * maxmn + 2 * maxmn * 25 + 8 * maxmn * nlvl + 3 * 25 * k \
                            + max(26 ** 2, n * (1 + k) + 2 * k)
    else:
        _tmp = 3
        wlalsd = 9 * minmn + 2 * minmn * 25 + 8 * minmn * nlvl + minmn * k + 26**2
        lrwork = 1

    if m >= n:
        if m >= mnthr:
            mm = n
            lwork = max(65 * n, n + 32 * k)
        lwork = max(lwork, _tmp * n + (mm + n) * 64, _tmp * n + 32 * k,
                    _tmp * n + 32 * n - 32, _tmp * n + wlalsd)
    else:
        if n >= mnthr:
            lwork = max(lwork, m * m + 4 * m + wlalsd, m * m + 132 * m,
                        m * m + 4 * m + 32 * k, m * m + (k + 1) * m,
                        m * m + n + m)
        else:
            lwork = max(lwork, 3 * m + wlalsd, 3 * m + 64 * (n + m),
                        3 * m + 32 * k)

    liwork = minmn * (11 + 3 * nlvl)
    work = nlcpy.empty(lwork)
    iwork = nlcpy.empty(liwork, dtype='l')
    rwork = nlcpy.empty(lrwork, dtype=f_dtype)
    info = numpy.empty(1, dtype='l')
    fpe = request._get_fpe_flag()
    args = (
        a._ve_array,
        b._ve_array,
        s._ve_array,
        work._ve_array,
        iwork._ve_array,
        rwork._ve_array,
        rcond._ve_array,
        veo.OnStack(rank, inout=veo.INTENT_OUT),
        veo.OnStack(info, inout=veo.INTENT_OUT),
        veo.OnStack(fpe, inout=veo.INTENT_OUT),
    )

    request._push_and_flush_request('nlcpy_lstsq',
                                    args,
                                    callback=_lstsq_errchk(info),
                                    sync=True)

    if rank < n or m <= n:
        residuals = nlcpy.array([], dtype=f_dtype)
    else:
        _b = b[n:]
        square_b = nlcpy.power(_b.real, 2) + nlcpy.power(_b.imag, 2)
        residuals = nlcpy.add.reduce(square_b, dtype=f_dtype)
    if k_extend:
        x = b[..., :0]
        residuals = residuals[..., :0]
    elif b_ndim == 1:
        x = nlcpy.asarray(b[:n, 0], dtype=x_dtype, order='C')
    else:
        x = nlcpy.asarray(b[:n, :], dtype=x_dtype, order='C')

    return (x, residuals, rank[0], s)