コード例 #1
0
ファイル: decomposition.py プロジェクト: zori/chainer
def _assert_nd_squareness(*arrays):
    for a in arrays:
        if max(a.shape[-2:]) != min(a.shape[-2:]):
            raise linalg.LinAlgError(
                'Last 2 dimensions of the array must be square')
コード例 #2
0
ファイル: decomposition.py プロジェクト: zori/chainer
def _assert_cupy_array(*arrays):
    for a in arrays:
        if not isinstance(a, cupy.core.ndarray):
            raise linalg.LinAlgError(
                'cupy.linalg only supports cupy.core.ndarray')
コード例 #3
0
ファイル: decomposition.py プロジェクト: zori/chainer
def _assert_rank2(*arrays):
    for a in arrays:
        if a.ndim != 2:
            raise linalg.LinAlgError(
                '{}-dimensional array given. Array must be '
                'two-dimensional'.format(a.ndim))
コード例 #4
0
def _check_status(dev_info):
    status = int(dev_info)
    if status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')
コード例 #5
0
ファイル: _solve.py プロジェクト: venkywonka/cupy
def lstsq(a, b, rcond=1e-15):
    """Return the least-squares solution to a linear matrix equation.

    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    Args:
        a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)``
        b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)``
            or ``(M, K)``
        rcond (float): Cutoff parameter for small singular values.
            For stability it computes the largest singular value denoted by
            ``s``, and sets all singular values smaller than ``s`` to zero.

    Returns:
        tuple:
            A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the
            least-squares solution with shape ``(N,)`` or ``(N, K)`` depending
            if ``b`` was two-dimensional. The sums of ``residuals`` is the
            squared Euclidean 2-norm for each column in b - a*x. The
            ``residuals`` is an empty array if the rank of a is < N or M <= N,
            but  iff b is 1-dimensional, this is a (1,) shape array, Otherwise
            the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The
            singular values of ``a`` are ``s``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.lstsq`
    """
    _util._assert_cupy_array(a, b)
    _util._assert_rank2(a)
    if b.ndim > 2:
        raise linalg.LinAlgError('{}-dimensional array given. Array must be at'
                                 ' most two-dimensional'.format(b.ndim))
    m, n = a.shape[-2:]
    m2 = b.shape[0]
    if m != m2:
        raise linalg.LinAlgError('Incompatible dimensions')

    u, s, vt = cupy.linalg.svd(a, full_matrices=False)
    # number of singular values and matrix rank
    cutoff = rcond * s.max()
    s1 = 1 / s
    sing_vals = s <= cutoff
    s1[sing_vals] = 0
    rank = s.size - sing_vals.sum()

    if b.ndim == 2:
        s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1)
    # Solve the least-squares solution
    z = core.dot(u.transpose(), b) * s1
    x = core.dot(vt.transpose(), z)
    # Calculate squared Euclidean 2-norm for each column in b - a*x
    if rank != n or m <= n:
        resids = cupy.array([], dtype=a.dtype)
    elif b.ndim == 2:
        e = b - core.dot(a, x)
        resids = cupy.sum(cupy.square(e), axis=0)
    else:
        e = b - cupy.dot(a, x)
        resids = cupy.dot(e.T, e).reshape(-1)
    return x, resids, rank, s
コード例 #6
0
def lstsq(a, b, rcond='warn'):
    """Return the least-squares solution to a linear matrix equation.

    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    Args:
        a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)``
        b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)``
            or ``(M, K)``
        rcond (float): Cutoff parameter for small singular values.
            For stability it computes the largest singular value denoted by
            ``s``, and sets all singular values smaller than ``s`` to zero.

    Returns:
        tuple:
            A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the
            least-squares solution with shape ``(N,)`` or ``(N, K)`` depending
            if ``b`` was two-dimensional. The sums of ``residuals`` is the
            squared Euclidean 2-norm for each column in b - a*x. The
            ``residuals`` is an empty array if the rank of a is < N or M <= N,
            but  iff b is 1-dimensional, this is a (1,) shape array, Otherwise
            the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The
            singular values of ``a`` are ``s``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.lstsq`
    """
    if rcond == 'warn':
        warnings.warn(
            '`rcond` parameter will change to the default of '
            'machine precision times ``max(M, N)`` where M and N '
            'are the input matrix dimensions.\n'
            'To use the future default and silence this warning '
            'we advise to pass `rcond=None`, to keep using the old, '
            'explicitly pass `rcond=-1`.', FutureWarning)
        rcond = -1

    _util._assert_cupy_array(a, b)
    _util._assert_rank2(a)
    if b.ndim > 2:
        raise linalg.LinAlgError('{}-dimensional array given. Array must be at'
                                 ' most two-dimensional'.format(b.ndim))
    m, n = a.shape[-2:]
    m2 = b.shape[0]
    if m != m2:
        raise linalg.LinAlgError('Incompatible dimensions')

    u, s, vh = cupy.linalg.svd(a, full_matrices=False)

    if rcond is None:
        rcond = numpy.finfo(s.dtype).eps * max(m, n)
    elif rcond <= 0 or rcond >= 1:
        # some doc of gelss/gelsd says "rcond < 0", but it's not true!
        rcond = numpy.finfo(s.dtype).eps

    # number of singular values and matrix rank
    cutoff = rcond * s.max()
    s1 = 1 / s
    sing_vals = s <= cutoff
    s1[sing_vals] = 0
    rank = s.size - sing_vals.sum(dtype=numpy.int32)

    # Solve the least-squares solution
    # x = vh.T.conj() @ diag(s1) @ u.T.conj() @ b
    z = (cupy.dot(b.T, u.conj()) * s1).T
    x = cupy.dot(vh.T.conj(), z)
    # Calculate squared Euclidean 2-norm for each column in b - a*x
    if m <= n or rank != n:
        resids = cupy.empty((0, ), dtype=s.dtype)
    else:
        e = b - a.dot(x)
        resids = cupy.atleast_1d(_nrm2_last_axis(e.T))
    return x, resids, rank, s
コード例 #7
0
def solve(a, b):
    '''Solves a linear matrix equation.

    It computes the exact solution of ``x`` in ``ax = b``,
    where ``a`` is a square and full rank matrix.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(M, M)``
        b (cupy.ndarray): The vector with ``M`` elements, or
            the matrix with dimension ``(M, K)``

    .. seealso:: :func:`numpy.linalg.solve`
    '''
    # NOTE: Since cusolver in CUDA 8.0 does not support gesv,
    #       we manually solve a linear system with QR decomposition.
    #       For details, please see the following:
    #       http://docs.nvidia.com/cuda/cusolver/index.html#qr_examples
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a, b)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    if 2 < b.ndim:
        raise linalg.LinAlgError(
            '{}-dimensional array given. Array must be '
            'one or two-dimensional'.format(b.ndim))
    if len(a) != len(b):
        raise linalg.LinAlgError(
            'The number of rows of array a must be '
            'the same as that of array b')

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    m, k = (b.size, 1) if b.ndim == 1 else b.shape
    a = a.transpose().astype(dtype, order='C', copy=True)
    b = b.transpose().astype(dtype, order='C', copy=True)
    cusolver_handle = device.get_cusolver_handle()
    cublas_handle = device.get_cublas_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        geqrf = cusolver.sgeqrf
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        ormqr = cusolver.sormqr
        trsm = cublas.strsm
    else:  # dtype == 'd'
        geqrf = cusolver.dgeqrf
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        ormqr = cusolver.dormqr
        trsm = cublas.dtrsm

    # 1. QR decomposition (A = Q * R)
    buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(m, dtype=dtype)
    geqrf(
        cusolver_handle, m, m, a.data.ptr, m,
        tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 2. ormqr (Q^T * B)
    ormqr(
        cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T,
        m, k, m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m,
        workspace.data.ptr, buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 3. trsm (X = R^{-1} * (Q^T * B))
    trsm(
        cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER,
        cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT,
        m, k, 1, a.data.ptr, m, b.data.ptr, m)
    return b.transpose()
コード例 #8
0
ファイル: cublas.py プロジェクト: wphicks/cupy
def batched_gesv(a, b):
    """Solves multiple linear matrix equations using cublas<t>getr[fs]Batched().

    Computes the solution to system of linear equation ``ax = b``.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(..., M, M)``.
        b (cupy.ndarray): The matrix with dimension ``(..., M)`` or
            ``(..., M, K)``.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(..., M)`` or ``(..., M, K)``.
    """
    util._assert_cupy_array(a, b)
    util._assert_nd_squareness(a)

    if not ((a.ndim == b.ndim or a.ndim == b.ndim + 1)
            and a.shape[:-1] == b.shape[:a.ndim - 1]):
        raise ValueError(
            'a must have (..., M, M) shape and b must have (..., M) '
            'or (..., M, K)')

    dtype = numpy.promote_types(a.dtype.char, 'f')
    if dtype == 'f':
        t = 's'
    elif dtype == 'd':
        t = 'd'
    elif dtype == 'F':
        t = 'c'
    elif dtype == 'D':
        t = 'z'
    else:
        raise TypeError('invalid dtype')
    getrf = getattr(cublas, t + 'getrfBatched')
    getrs = getattr(cublas, t + 'getrsBatched')

    bs = numpy.prod(a.shape[:-2]) if a.ndim > 2 else 1
    n = a.shape[-1]
    nrhs = b.shape[-1] if a.ndim == b.ndim else 1
    b_shape = b.shape
    a_data_ptr = a.data.ptr
    b_data_ptr = b.data.ptr
    a = cupy.ascontiguousarray(a.reshape(bs, n, n).transpose(0, 2, 1),
                               dtype=dtype)
    b = cupy.ascontiguousarray(b.reshape(bs, n, nrhs).transpose(0, 2, 1),
                               dtype=dtype)
    if a.data.ptr == a_data_ptr:
        a = a.copy()
    if b.data.ptr == b_data_ptr:
        b = b.copy()

    if n > get_batched_gesv_limit():
        warnings.warn('The matrix size ({}) exceeds the set limit ({})'.format(
            n, get_batched_gesv_limit()))

    handle = device.get_cublas_handle()
    lda = n
    a_step = lda * n * a.itemsize
    a_array = cupy.arange(a.data.ptr,
                          a.data.ptr + a_step * bs,
                          a_step,
                          dtype=cupy.uintp)
    ldb = n
    b_step = ldb * nrhs * b.itemsize
    b_array = cupy.arange(b.data.ptr,
                          b.data.ptr + b_step * bs,
                          b_step,
                          dtype=cupy.uintp)
    pivot = cupy.empty((bs, n), dtype=numpy.int32)
    dinfo = cupy.empty((bs, ), dtype=numpy.int32)
    info = numpy.empty((1, ), dtype=numpy.int32)
    # LU factorization (A = L * U)
    getrf(handle, n, a_array.data.ptr, lda, pivot.data.ptr, dinfo.data.ptr, bs)
    util._check_cublas_info_array_if_synchronization_allowed(getrf, dinfo)
    # Solves Ax = b
    getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a_array.data.ptr, lda,
          pivot.data.ptr, b_array.data.ptr, ldb, info.ctypes.data, bs)
    if info[0] != 0:
        msg = 'Error reported by {} in cuBLAS. '.format(getrs.__name__)
        if info[0] < 0:
            msg += 'The {}-th parameter had an illegal value.'.format(-info[0])
        raise linalg.LinAlgError(msg)

    return b.transpose(0, 2, 1).reshape(b_shape)
コード例 #9
0
ファイル: norms.py プロジェクト: zivzone/cupy
def slogdet(a):
    """Returns sign and logarithm of the determinant of an array.

    It calculates the natural logarithm of the determinant of a given value.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``.

    Returns:
        tuple of :class:`~cupy.ndarray`:
            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
            'logdet' represents the natural logarithm of the absolute of the
            determinant.
            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
            will be ``-inf``.
            The shapes of both ``sign`` and ``logdet`` are equal to
            ``a.shape[:-2]``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. warning::
        To produce the same results as :func:`numpy.linalg.slogdet` for
        singular inputs, set the `linalg` configuration to `raise`.

    .. seealso:: :func:`numpy.linalg.slogdet`
    """
    if a.ndim < 2:
        msg = ('%d-dimensional array given. '
               'Array must be at least two-dimensional' % a.ndim)
        raise linalg.LinAlgError(msg)
    util._assert_nd_squareness(a)

    dtype = numpy.promote_types(a.dtype.char, 'f')
    real_dtype = dtype

    # TODO(kataoka): support complex types
    if dtype not in (numpy.float32, numpy.float64):
        msg = ('dtype must be float32 or float64'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    a_shape = a.shape
    shape = a_shape[:-2]
    n = a_shape[-2]

    if a.size == 0:
        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
        sign = cupy.ones(shape, dtype)
        logdet = cupy.zeros(shape, real_dtype)
        return sign, logdet

    lu, ipiv, dev_info = decomposition._lu_factor(a, dtype)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diagonal(lu, axis1=-2, axis2=-1)

    # ipiv is 1-origin
    non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) +
                cupy.count_nonzero(diag < 0, axis=-1))

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    logdet = cupy.log(abs(diag)).sum(axis=-1)

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape),
        cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape),
    )
コード例 #10
0
ファイル: gls_fit.py プロジェクト: rij/jwst
def gls_fit(ramp_data, input_var_data,
             prev_fit_data, prev_slope_data,
             readnoise, gain,
             frame_time, group_time, nframes_used,
             num_cr, cr_flagged_2d, saturated_data,
             use_extra_terms=True):
    """Generalized least squares linear fit.

    It is assumed that every input pixel has num_cr cosmic-ray hits
    somewhere within the ramp.  This function should be called separately
    for different values of num_cr.

    Parameters
    ----------
    ramp_data: 2-D ndarray; indices:  group, pixel number
        The ramp data for one of the integrations in an exposure.  This
        may be a subset in detector coordinates, but covering all groups.
        The shape is (ngroups, nz), where ngroups is the length of the
        ramp, and nz is the number of pixels in the current subset.

    input_var_data: 2-D ndarray, shape (ngroups, nz)
        The square of the input ERR array, matching ramp_data.

    prev_fit_data: 2-D ndarray, shape (ngroups, nz)
        The fit to ramp_data, based on applying the values of intercept,
        slope, and cosmic-ray amplitudes that were determined in a previous
        call to gls_fit.  This array is only used for setting up the
        covariance matrix.

    prev_slope_data: 1-D ndarray, length nz.
        An estimate (e.g. from a previous iteration) of the slope at each
        pixel, in electrons per second.

    readnoise: 1-D ndarray, length nz.
        The read noise in electrons at each detector pixel.

    gain: 1-D ndarray, shape (nz,)
        The analog-to-digital gain (electrons per dn) at each detector
        pixel.

    frame_time: float
        The time to read one frame, in seconds (e.g. 10.6 s).

    group_time: float
        Time increment between groups, in seconds.

    nframes_used: int
        Number of frames that were averaged together to make a group.
        Note that this value does not include the number (if any) of
        skipped frames.

    num_cr: int
        The number of cosmic rays that will be handled.  All pixels in the
        current set (ramp_data) are assumed to have this many cosmic ray
        hits somewhere within the ramp.

    cr_flagged_2d: 2-D ndarray, shape (ngroups, nz)
        The values should be 0 or 1; 1 indicates that a cosmic ray was
        detected (by another step) at that point.

    saturated_data: 2-D ndarray, shape (ngroups, nz)
        Normal values are zero; the value will be a huge number for
        saturated pixels.  This will be added to the main diagonal of the
        inverse of the weight matrix to greatly reduce the weight for
        saturated pixels.

    use_extra_terms: bool
        True if we should include Massimo Robberto's terms in the inverse
        weight matrix.
        See JWST-STScI-003193.pdf

    Returns
    -------
    tuple:  (result2d, variances)
        result2d is a 2-D ndarray; shape (nz, 2 + num_cr)
        The computed values of intercept, slope, and cosmic-ray amplitudes
        (there will be num_cr cosmic-ray amplitudes) for each of the nz
        pixels.

        variances is a 2-D ndarray; shape (nz, 2 + num_cr)
        The variance for the intercept, slope, and for the amplitude of
        each cosmic ray that was detected.
    """

    M = float(nframes_used)

    ngroups = ramp_data.shape[0]
    nz = ramp_data.shape[1]
    num_cr = int(num_cr)

    # x is an array (length nz) of matrices, each of which is the
    # independent variable of a linear equation.  Each such matrix
    # has ngroups rows and 2 + num_cr columns.  The first column is set
    # to 1, for finding the intercept.  The second column is the time at
    # each group, for finding the slope.  The remaining columns (if any),
    # are 0 for all rows prior to a certain point, then 1 for all
    # subsequent rows (i.e. the Heaviside function).  The transition from
    # 0 to 1 is the location of a cosmic ray hit; the first 1 in a column
    # corresponds to the value in cr_flagged_2d being 1.
    x = np.zeros((nz, ngroups, 2 + num_cr), dtype=np.float64)
    x[:, :, 0] = 1.
    x[:, :, 1] = np.arange(ngroups, dtype=np.float64) * group_time + \
                 frame_time * (M + 1.) / 2.

    if num_cr > 0:
        sum_crs = cr_flagged_2d.cumsum(axis=0)
        for k in range(ngroups):
            s = slice(k, ngroups)
            for n in range(1, num_cr + 1):
                temp = np.where(np.logical_and(cr_flagged_2d[k] == 1,
                                               sum_crs[k] == n))
                if len(temp[0]) > 0:
                    index = (temp[0], s, n + 1)
                    x[index] = 1
        del temp, index

    y = np.transpose(ramp_data, (1, 0)).reshape((nz, ngroups, 1))

    # cov is an array of nz matrices, each ngroups x ngroups.  The
    # inverse of each of these matrices is a weight matrix.
    # Note that there are two objects that are called the covariance
    # matrix:  (1) the ngroups x ngroups matrices in cov, and (2) the
    # smaller matrix (see near the end of this function) that contains
    # the variances and covariances of the fitted parameters.

    cov = np.ones((nz, ngroups, ngroups), dtype=np.float64)

    # Use the previous fit to the data to populate the covariance matrix,
    # for each of the nz pixels.  prev_fit_data has shape (ngroups, nz),
    # similar to the ramp data, but we want the nz axis to be the first
    # (we're constructing an array of nz matrix equations), so transpose
    # prev_fit_data.
    prev_fit_T = np.transpose(prev_fit_data, (1, 0))
    for k in range(ngroups):
        # Populate the upper right, row by row.
        cov[:, k, k:ngroups] = prev_fit_T[:, k:k+1]
        # Populate the lower left, column by column.
        cov[:, k:ngroups, k] = prev_fit_T[:, k:k+1]
        # Propagate errors from input.
        cov[:, k, k] += input_var_data[k, :]
        # Give saturated pixels very low weight (i.e. high variance).
        cov[:, k, k] += saturated_data[k, :]
    del prev_fit_T

    # I is 2-D, but it can broadcast to 4-D.  This is used to add terms to
    # the diagonal of the covariance matrix.
    I = np.identity(ngroups)

    # Divide by sqrt(2) to convert the readnoise from CDS to single readout.
    rn3d = readnoise.reshape((nz, 1, 1)) * SINGLE_READOUT_RN_FACTOR
    cov += (I * (rn3d**2 / M))

    # prev_slope_data must be non-negative.
    flags = prev_slope_data < 0.
    prev_slope_data[flags] = 1.

    if use_extra_terms:
        # Include two dummy axes to allow broadcasting with cov.
        slope3d = prev_slope_data.reshape((nz, 1, 1))
        # diagonal:
        if gain is not None:
            g3d = gain.reshape((nz, 1, 1))
        else:
            g3d = 1.
        delta_diag = I * (slope3d * frame_time *
                          (M - 1.) * (M - 2.) / (3. * M) +
                          (g3d * M)**2 / 12.)
        cov += delta_diag
        del delta_diag

    # This is the solution:  (xT @ weight @ x)^-1 @ [xT @ weight @ y]
    # where @ means matrix multiplication.

    # shape of xT is (nz, 2 + num_cr, ngroups)
    xT = np.transpose(x, (0, 2, 1))

    # shape of `weight` is (nz, ngroups, ngroups)
    I = I.reshape((1, ngroups, ngroups))
    weight = la.solve(cov, I)                   # inverse of cov
    del I

    # temp1 = xT @ weight
    # shape of temp1 is (nz, 2 + num_cr, ngroups)
    temp1 = np.einsum('...ij,...jk->...ik', xT, weight)

    # temp_var = xT @ weight @ x
    # shape of temp_var is (nz, 2 + num_cr, 2 + num_cr)
    temp_var = np.einsum('...ij,...jk->...ik', temp1, x)

    # `covar` is an array of nz covariance matrices.
    # covar = (xT @ weight @ x)^-1
    # shape of covar is (nz, 2 + num_cr, 2 + num_cr)
    I_2 = np.eye(2 + num_cr).reshape((1, 2 + num_cr, 2 + num_cr))
    try:
        covar = la.solve(temp_var, I_2)         # inverse of temp_var
    except la.LinAlgError as msg:
        for z in range(nz):
            try:
                dummy = la.solve(temp_var[z], I_2)
            except la.LinAlgError as msg2:
                log.warn("singular matrix, z = %d" % z)
                raise la.LinAlgError(msg2)
    del I_2

    # [xT @ weight @ y]
    # shape of temp2 is (nz, 2 + num_cr, 1)
    temp2 = np.einsum('...ij,...jk->...ik', temp1, y)

    # shape of result is (nz, 2 + num_cr, 1)
    result = np.einsum('...ij,...jk->...ik', covar, temp2)
    r_shape = result.shape
    result2d = result.reshape((r_shape[0], r_shape[1]))
    del result

    # shape of both result2d and variances is (nz, 2 + num_cr)
    variances = covar.diagonal(axis1=1, axis2=2).copy()

    return (result2d, variances)
コード例 #11
0
def gls_fit(ramp_data, prev_fit_data, prev_slope_data, readnoise, gain,
            frame_time, group_time, nframes_used, num_cr, cr_flagged_2d,
            saturated_data):
    """Generalized least squares linear fit.

    It is assumed that every input pixel has num_cr cosmic-ray hits
    somewhere within the ramp.  This function should be called separately
    for different values of num_cr.

    Notes
    -----
    Curently the noise model is assumed to be a combination of
    read and photon noise alone.
    Same technique could be used with more complex noise models, but then
    the ramp covariance matrix should be input.

    Parameters
    ----------
    ramp_data: 2-D ndarray; indices:  group, pixel number
        The ramp data for one of the integrations in an exposure.  This
        may be a subset in detector coordinates, but covering all groups.
        The shape is (ngroups, nz), where ngroups is the length of the
        ramp, and nz is the number of pixels in the current subset.

    prev_fit_data: 2-D ndarray, shape (ngroups, nz)
        The fit to ramp_data, based on applying the values of intercept,
        slope, and cosmic-ray amplitudes that were determined in a previous
        call to gls_fit.  This array is only used for setting up the
        covariance matrix.

    prev_slope_data: 1-D ndarray, length nz.
        An estimate (e.g. from a previous iteration) of the slope at each
        pixel, in electrons per second.

    readnoise: 1-D ndarray, length nz.
        The read noise in electrons at each detector pixel.

    gain: 1-D ndarray, shape (nz,)
        The analog-to-digital gain (electrons per dn) at each detector
        pixel.

    frame_time: float
        The time to read one frame, in seconds (e.g. 10.6 s).

    group_time: float
        Time increment between groups, in seconds.

    nframes_used: int
        Number of frames that were averaged together to make a group.
        Note that this value does not include the number (if any) of
        skipped frames.

    num_cr: int
        The number of cosmic rays that will be handled.  All pixels in the
        current set (ramp_data) are assumed to have this many cosmic ray
        hits somewhere within the ramp.

    cr_flagged_2d: 2-D ndarray, shape (ngroups, nz)
        The values should be 0 or 1; 1 indicates that a cosmic ray was
        detected (by another step) at that point.

    saturated_data: 2-D ndarray, shape (ngroups, nz)
        Normal values are zero; the value will be a huge number for
        saturated pixels.  This will be added to the main diagonal of the
        inverse of the weight matrix to greatly reduce the weight for
        saturated pixels.

    Returns
    -------
    tuple:  (result2d, variances)
        result2d is a 2-D ndarray; shape (nz, 2 + num_cr)
        The computed values of intercept, slope, and cosmic-ray amplitudes
        (there will be num_cr cosmic-ray amplitudes) for each of the nz
        pixels.

        variances is a 2-D ndarray; shape (nz, 2 + num_cr)
        The variance for the intercept, slope, and for the amplitude of
        each cosmic ray that was detected.
    """

    M = float(nframes_used)

    ngroups = ramp_data.shape[0]
    nz = ramp_data.shape[1]
    num_cr = int(num_cr)

    # x is an array (length nz) of matrices, each of which is the
    # independent variable of a linear equation.  Each such matrix
    # has ngroups rows and 2 + num_cr columns.  The first column is set
    # to 1, for finding the intercept.  The second column is the time at
    # each group, for finding the slope.  The remaining columns (if any),
    # are 0 for all rows prior to a certain point, then 1 for all
    # subsequent rows (i.e. the Heaviside function).  The transition from
    # 0 to 1 is the location of a cosmic ray hit; the first 1 in a column
    # corresponds to the value in cr_flagged_2d being 1.
    x = np.zeros((nz, ngroups, 2 + num_cr), dtype=np.float64)
    x[:, :, 0] = 1.
    x[:, :, 1] = np.arange(ngroups, dtype=np.float64) * group_time + \
                 frame_time * (M + 1.) / 2.

    if num_cr > 0:
        sum_crs = cr_flagged_2d.cumsum(axis=0)
        for k in range(ngroups):
            s = slice(k, ngroups)
            for n in range(1, num_cr + 1):
                temp = np.where(
                    np.logical_and(cr_flagged_2d[k] == 1, sum_crs[k] == n))
                if len(temp[0]) > 0:
                    index = (temp[0], s, n + 1)
                    x[index] = 1
        del temp, index

    y = np.transpose(ramp_data, (1, 0)).reshape((nz, ngroups, 1))

    # ramp_cov is an array of nz matrices, each ngroups x ngroups.
    # each matrix gives the covariance of that pixel's ramp data
    ramp_cov = np.ones((nz, ngroups, ngroups), dtype=np.float64)

    # Use the previous fit to the data to populate the covariance matrix,
    # for each of the nz pixels.  prev_fit_data has shape (ngroups, nz),
    # similar to the ramp data, but we want the nz axis to be the first
    # (we're constructing an array of nz matrix equations), so transpose
    # prev_fit_data.
    prev_fit_T = np.transpose(prev_fit_data, (1, 0))
    for k in range(ngroups):
        # Populate the upper right, row by row.
        ramp_cov[:, k, k:ngroups] = prev_fit_T[:, k:k + 1]
        # Populate the lower left, column by column.
        ramp_cov[:, k:ngroups, k] = prev_fit_T[:, k:k + 1]
        # Give saturated pixels a very high high variance (hence a low weight)
        ramp_cov[:, k, k] += saturated_data[k, :]
    del prev_fit_T

    # I is 2-D, but it can broadcast to 4-D.  This is used to add terms to
    # the diagonal of the covariance matrix.
    I = np.identity(ngroups)

    rn3d = readnoise.reshape((nz, 1, 1))
    ramp_cov += (I * rn3d**2)

    # prev_slope_data must be non-negative.
    flags = prev_slope_data < 0.
    prev_slope_data[flags] = 1.

    # The resulting fit parameters are
    #  (xT @ ramp_cov^-1 @ x)^-1 @ [xT @ ramp_cov^-1 @ y]
    #  = [y-intercept, slope, cr_amplitude_1, cr_amplitude_2, ...]
    # where @ means matrix multiplication.

    # shape of xT is (nz, 2 + num_cr, ngroups)
    xT = np.transpose(x, (0, 2, 1))

    # shape of `ramp_invcov` is (nz, ngroups, ngroups)
    I = I.reshape((1, ngroups, ngroups))
    ramp_invcov = la.solve(ramp_cov, I)

    del I

    # temp1 = xT @ ramp_invcov
    # np.einsum use is equivalent to matrix multiplication
    # shape of temp1 is (nz, 2 + num_cr, ngroups)
    temp1 = np.einsum('...ij,...jk->...ik', xT, ramp_invcov)

    # temp_var = xT @ ramp_invcov @ x
    # shape of temp_var is (nz, 2 + num_cr, 2 + num_cr)
    temp_var = np.einsum('...ij,...jk->...ik', temp1, x)

    # `fitparam_cov` is an array of nz covariance matrices.
    # fitparam_cov = (xT @ ramp_invcov @ x)^-1
    # shape of fitparam_covar is (nz, 2 + num_cr, 2 + num_cr)
    I_2 = np.eye(2 + num_cr).reshape((1, 2 + num_cr, 2 + num_cr))
    try:
        # inverse of temp_var
        fitparam_cov = la.solve(temp_var, I_2)
    except la.LinAlgError:
        # find the pixel with the singular matrix
        for z in range(nz):
            try:
                la.solve(temp_var[z], I_2)
            except la.LinAlgError as msg2:
                log.warning("singular matrix, z = %d" % z)
                raise la.LinAlgError(msg2)
    del I_2

    # [xT @ ramp_invcov @ y]
    # shape of temp2 is (nz, 2 + num_cr, 1)
    temp2 = np.einsum('...ij,...jk->...ik', temp1, y)

    # shape of fitparam is (nz, 2 + num_cr, 1)
    fitparam = np.einsum('...ij,...jk->...ik', fitparam_cov, temp2)
    r_shape = fitparam.shape
    fitparam2d = fitparam.reshape((r_shape[0], r_shape[1]))
    del fitparam

    # shape of both result2d and variances is (nz, 2 + num_cr)
    fitparam_uncs = fitparam_cov.diagonal(axis1=1, axis2=2).copy()

    return (fitparam2d, fitparam_uncs)
コード例 #12
0
def qr(a, mode='reduced'):
    '''QR decomposition.

    Decompose a given two-dimensional matrix into ``Q * R``, where ``Q``
    is an orthonormal and ``R`` is an upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix.
        mode (str): The mode of decomposition. Currently 'reduced',
            'complete', 'r', and 'raw' modes are supported. The default mode
            is 'reduced', and decompose a matrix ``A = (M, N)`` into ``Q``,
            ``R`` with dimensions ``(M, K)``, ``(K, N)``, where
            ``K = min(M, N)``.

    .. seealso:: :func:`numpy.linalg.qr`
    '''
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    if mode not in ('reduced', 'complete', 'r', 'raw'):
        if mode in ('f', 'full', 'e', 'economic'):
            msg = 'The deprecated mode \'{}\' is not supported'.format(mode)
            raise ValueError(msg)
        else:
            raise ValueError('Unrecognized mode \'{}\''.format(mode))

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    m, n = a.shape
    x = a.transpose().astype(dtype, order='C', copy=True)
    mn = min(m, n)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    # compute working space of geqrf and ormqr, and solve R
    if dtype == 'f':
        buffersize = cusolver.sgeqrf_bufferSize(handle, m, n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        tau = cupy.empty(mn, dtype=numpy.float32)
        cusolver.sgeqrf(
            handle, m, n, x.data.ptr, m,
            tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:  # dtype == 'd'
        buffersize = cusolver.dgeqrf_bufferSize(handle, n, m, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        tau = cupy.empty(mn, dtype=numpy.float64)
        cusolver.dgeqrf(
            handle, m, n, x.data.ptr, m,
            tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)
    status = int(dev_info[0])
    if status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')

    if mode == 'r':
        r = x[:, :mn].transpose()
        return util._triu(r)

    if mode == 'raw':
        if a.dtype.char == 'f':
            # The original numpy.linalg.qr returns float64 in raw mode,
            # whereas the cusolver returns float32. We agree that the
            # following code would be inappropriate, however, in this time
            # we explicitly convert them to float64 for compatibility.
            return x.astype(numpy.float64), tau.astype(numpy.float64)
        return x, tau

    if mode == 'complete' and m > n:
        mc = m
        q = cupy.empty((m, m), dtype)
    else:
        mc = mn
        q = cupy.empty((n, m), dtype)
    q[:n] = x

    # solve Q
    if dtype == 'f':
        buffersize = cusolver.sorgqr_bufferSize(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        cusolver.sorgqr(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr,
            workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:
        buffersize = cusolver.dorgqr_bufferSize(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        cusolver.dorgqr(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr,
            workspace.data.ptr, buffersize, dev_info.data.ptr)

    q = q[:mc].transpose()
    r = x[:, :mc].transpose()
    return q, util._triu(r)
コード例 #13
0
def svd(a, full_matrices=True, compute_uv=True):
    '''Singular Value Decomposition.

    Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and
    ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s
    singular values.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        full_matrices (bool): If True, it returns U and V with dimensions
            ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of U and V
            are respectively ``(M, K)`` and ``(K, N)``, where
            ``K = min(M, N)``.
        compute_uv (bool): If True, it only returns singular values.

    .. seealso:: :func:`numpy.linalg.svd`
    '''
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    # Remark 1: gesvd only supports m >= n (WHAT?)
    # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A'
    # Remark 3: gesvd returns matrix U and V^H
    # Remark 4: Remark 2 is removed since cuda 8.0 (new!)
    n, m = a.shape
    if m >= n:
        x = a.astype(dtype, order='C', copy=False)
        trans_flag = False
    else:
        m, n = a.shape
        x = a.transpose().astype(dtype, order='C', copy=False)
        trans_flag = True
    mn = min(m, n)

    if compute_uv:
        if full_matrices:
            u = cupy.empty((m, m), dtype=dtype)
            vt = cupy.empty((n, n), dtype=dtype)
        else:
            u = cupy.empty((mn, m), dtype=dtype)
            vt = cupy.empty((mn, n), dtype=dtype)
        u_ptr, vt_ptr = u.data.ptr, vt.data.ptr
    else:
        u_ptr, vt_ptr = 0, 0  # Use nullptr
    s = cupy.empty(mn, dtype=dtype)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    if compute_uv:
        job = ord('A') if full_matrices else ord('S')
    else:
        job = ord('N')
    if dtype == 'f':
        buffersize = cusolver.sgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=dtype)
        cusolver.sgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)
    else:  # dtype == 'd'
        buffersize = cusolver.dgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=dtype)
        cusolver.dgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)

    status = int(dev_info[0])
    if status > 0:
        raise linalg.LinAlgError(
            'SVD computation does not converge')
    elif status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')

    # Note that the returned array may need to be transporsed
    # depending on the structure of an input
    if compute_uv:
        if trans_flag:
            return u.transpose(), s, vt.transpose()
        else:
            return vt, s, u
    else:
        return s
コード例 #14
0
ファイル: decomposition.py プロジェクト: wxrui/cupy
def cholesky(a):
    """Cholesky decomposition.

    Decompose a given two-dimensional square matrix into ``L * L.T``,
    where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate
    transpose operator.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(N, N)``

    Returns:
        cupy.ndarray: The lower-triangular matrix.

    .. seealso:: :func:`numpy.linalg.cholesky`
    """
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    x = a.astype(dtype, order='C', copy=True)
    n = len(a)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    if dtype == 'f':
        buffersize = cusolver.spotrf_bufferSize(handle,
                                                cublas.CUBLAS_FILL_MODE_UPPER,
                                                n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        cusolver.spotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr,
                        n, workspace.data.ptr, buffersize, dev_info.data.ptr)
    elif dtype == 'd':
        buffersize = cusolver.dpotrf_bufferSize(handle,
                                                cublas.CUBLAS_FILL_MODE_UPPER,
                                                n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        cusolver.dpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr,
                        n, workspace.data.ptr, buffersize, dev_info.data.ptr)
    elif dtype == 'F':
        buffersize = cusolver.cpotrf_bufferSize(handle,
                                                cublas.CUBLAS_FILL_MODE_UPPER,
                                                n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.complex64)
        cusolver.cpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr,
                        n, workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:  # dtype == 'D':
        buffersize = cusolver.zpotrf_bufferSize(handle,
                                                cublas.CUBLAS_FILL_MODE_UPPER,
                                                n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.complex128)
        cusolver.zpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr,
                        n, workspace.data.ptr, buffersize, dev_info.data.ptr)

    status = int(dev_info[0])
    if status > 0:
        raise linalg.LinAlgError('The leading minor of order {} '
                                 'is not positive definite'.format(status))
    elif status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')
    util._tril(x, k=0)
    return x