def _assert_nd_squareness(*arrays): for a in arrays: if max(a.shape[-2:]) != min(a.shape[-2:]): raise linalg.LinAlgError( 'Last 2 dimensions of the array must be square')
def _assert_cupy_array(*arrays): for a in arrays: if not isinstance(a, cupy.core.ndarray): raise linalg.LinAlgError( 'cupy.linalg only supports cupy.core.ndarray')
def _assert_rank2(*arrays): for a in arrays: if a.ndim != 2: raise linalg.LinAlgError( '{}-dimensional array given. Array must be ' 'two-dimensional'.format(a.ndim))
def _check_status(dev_info): status = int(dev_info) if status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)')
def lstsq(a, b, rcond=1e-15): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.lstsq` """ _util._assert_cupy_array(a, b) _util._assert_rank2(a) if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vt = cupy.linalg.svd(a, full_matrices=False) # number of singular values and matrix rank cutoff = rcond * s.max() s1 = 1 / s sing_vals = s <= cutoff s1[sing_vals] = 0 rank = s.size - sing_vals.sum() if b.ndim == 2: s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1) # Solve the least-squares solution z = core.dot(u.transpose(), b) * s1 x = core.dot(vt.transpose(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if rank != n or m <= n: resids = cupy.array([], dtype=a.dtype) elif b.ndim == 2: e = b - core.dot(a, x) resids = cupy.sum(cupy.square(e), axis=0) else: e = b - cupy.dot(a, x) resids = cupy.dot(e.T, e).reshape(-1) return x, resids, rank, s
def lstsq(a, b, rcond='warn'): """Return the least-squares solution to a linear matrix equation. Solves the equation `a x = b` by computing a vector `x` that minimizes the Euclidean 2-norm `|| b - a x ||^2`. The equation may be under-, well-, or over- determined (i.e., the number of linearly independent rows of `a` can be less than, equal to, or greater than its number of linearly independent columns). If `a` is square and of full rank, then `x` (but for round-off error) is the "exact" solution of the equation. Args: a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)`` b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)`` or ``(M, K)`` rcond (float): Cutoff parameter for small singular values. For stability it computes the largest singular value denoted by ``s``, and sets all singular values smaller than ``s`` to zero. Returns: tuple: A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the least-squares solution with shape ``(N,)`` or ``(N, K)`` depending if ``b`` was two-dimensional. The sums of ``residuals`` is the squared Euclidean 2-norm for each column in b - a*x. The ``residuals`` is an empty array if the rank of a is < N or M <= N, but iff b is 1-dimensional, this is a (1,) shape array, Otherwise the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The singular values of ``a`` are ``s``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.lstsq` """ if rcond == 'warn': warnings.warn( '`rcond` parameter will change to the default of ' 'machine precision times ``max(M, N)`` where M and N ' 'are the input matrix dimensions.\n' 'To use the future default and silence this warning ' 'we advise to pass `rcond=None`, to keep using the old, ' 'explicitly pass `rcond=-1`.', FutureWarning) rcond = -1 _util._assert_cupy_array(a, b) _util._assert_rank2(a) if b.ndim > 2: raise linalg.LinAlgError('{}-dimensional array given. Array must be at' ' most two-dimensional'.format(b.ndim)) m, n = a.shape[-2:] m2 = b.shape[0] if m != m2: raise linalg.LinAlgError('Incompatible dimensions') u, s, vh = cupy.linalg.svd(a, full_matrices=False) if rcond is None: rcond = numpy.finfo(s.dtype).eps * max(m, n) elif rcond <= 0 or rcond >= 1: # some doc of gelss/gelsd says "rcond < 0", but it's not true! rcond = numpy.finfo(s.dtype).eps # number of singular values and matrix rank cutoff = rcond * s.max() s1 = 1 / s sing_vals = s <= cutoff s1[sing_vals] = 0 rank = s.size - sing_vals.sum(dtype=numpy.int32) # Solve the least-squares solution # x = vh.T.conj() @ diag(s1) @ u.T.conj() @ b z = (cupy.dot(b.T, u.conj()) * s1).T x = cupy.dot(vh.T.conj(), z) # Calculate squared Euclidean 2-norm for each column in b - a*x if m <= n or rank != n: resids = cupy.empty((0, ), dtype=s.dtype) else: e = b - a.dot(x) resids = cupy.atleast_1d(_nrm2_last_axis(e.T)) return x, resids, rank, s
def solve(a, b): '''Solves a linear matrix equation. It computes the exact solution of ``x`` in ``ax = b``, where ``a`` is a square and full rank matrix. Args: a (cupy.ndarray): The matrix with dimension ``(M, M)`` b (cupy.ndarray): The vector with ``M`` elements, or the matrix with dimension ``(M, K)`` .. seealso:: :func:`numpy.linalg.solve` ''' # NOTE: Since cusolver in CUDA 8.0 does not support gesv, # we manually solve a linear system with QR decomposition. # For details, please see the following: # http://docs.nvidia.com/cuda/cusolver/index.html#qr_examples if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a, b) util._assert_rank2(a) util._assert_nd_squareness(a) if 2 < b.ndim: raise linalg.LinAlgError( '{}-dimensional array given. Array must be ' 'one or two-dimensional'.format(b.ndim)) if len(a) != len(b): raise linalg.LinAlgError( 'The number of rows of array a must be ' 'the same as that of array b') # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, k = (b.size, 1) if b.ndim == 1 else b.shape a = a.transpose().astype(dtype, order='C', copy=True) b = b.transpose().astype(dtype, order='C', copy=True) cusolver_handle = device.get_cusolver_handle() cublas_handle = device.get_cublas_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': geqrf = cusolver.sgeqrf geqrf_bufferSize = cusolver.sgeqrf_bufferSize ormqr = cusolver.sormqr trsm = cublas.strsm else: # dtype == 'd' geqrf = cusolver.dgeqrf geqrf_bufferSize = cusolver.dgeqrf_bufferSize ormqr = cusolver.dormqr trsm = cublas.dtrsm # 1. QR decomposition (A = Q * R) buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) tau = cupy.empty(m, dtype=dtype) geqrf( cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 2. ormqr (Q^T * B) ormqr( cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T, m, k, m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr, buffersize, dev_info.data.ptr) _check_status(dev_info) # 3. trsm (X = R^{-1} * (Q^T * B)) trsm( cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER, cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr, m, b.data.ptr, m) return b.transpose()
def batched_gesv(a, b): """Solves multiple linear matrix equations using cublas<t>getr[fs]Batched(). Computes the solution to system of linear equation ``ax = b``. Args: a (cupy.ndarray): The matrix with dimension ``(..., M, M)``. b (cupy.ndarray): The matrix with dimension ``(..., M)`` or ``(..., M, K)``. Returns: cupy.ndarray: The matrix with dimension ``(..., M)`` or ``(..., M, K)``. """ util._assert_cupy_array(a, b) util._assert_nd_squareness(a) if not ((a.ndim == b.ndim or a.ndim == b.ndim + 1) and a.shape[:-1] == b.shape[:a.ndim - 1]): raise ValueError( 'a must have (..., M, M) shape and b must have (..., M) ' 'or (..., M, K)') dtype = numpy.promote_types(a.dtype.char, 'f') if dtype == 'f': t = 's' elif dtype == 'd': t = 'd' elif dtype == 'F': t = 'c' elif dtype == 'D': t = 'z' else: raise TypeError('invalid dtype') getrf = getattr(cublas, t + 'getrfBatched') getrs = getattr(cublas, t + 'getrsBatched') bs = numpy.prod(a.shape[:-2]) if a.ndim > 2 else 1 n = a.shape[-1] nrhs = b.shape[-1] if a.ndim == b.ndim else 1 b_shape = b.shape a_data_ptr = a.data.ptr b_data_ptr = b.data.ptr a = cupy.ascontiguousarray(a.reshape(bs, n, n).transpose(0, 2, 1), dtype=dtype) b = cupy.ascontiguousarray(b.reshape(bs, n, nrhs).transpose(0, 2, 1), dtype=dtype) if a.data.ptr == a_data_ptr: a = a.copy() if b.data.ptr == b_data_ptr: b = b.copy() if n > get_batched_gesv_limit(): warnings.warn('The matrix size ({}) exceeds the set limit ({})'.format( n, get_batched_gesv_limit())) handle = device.get_cublas_handle() lda = n a_step = lda * n * a.itemsize a_array = cupy.arange(a.data.ptr, a.data.ptr + a_step * bs, a_step, dtype=cupy.uintp) ldb = n b_step = ldb * nrhs * b.itemsize b_array = cupy.arange(b.data.ptr, b.data.ptr + b_step * bs, b_step, dtype=cupy.uintp) pivot = cupy.empty((bs, n), dtype=numpy.int32) dinfo = cupy.empty((bs, ), dtype=numpy.int32) info = numpy.empty((1, ), dtype=numpy.int32) # LU factorization (A = L * U) getrf(handle, n, a_array.data.ptr, lda, pivot.data.ptr, dinfo.data.ptr, bs) util._check_cublas_info_array_if_synchronization_allowed(getrf, dinfo) # Solves Ax = b getrs(handle, cublas.CUBLAS_OP_N, n, nrhs, a_array.data.ptr, lda, pivot.data.ptr, b_array.data.ptr, ldb, info.ctypes.data, bs) if info[0] != 0: msg = 'Error reported by {} in cuBLAS. '.format(getrs.__name__) if info[0] < 0: msg += 'The {}-th parameter had an illegal value.'.format(-info[0]) raise linalg.LinAlgError(msg) return b.transpose(0, 2, 1).reshape(b_shape)
def slogdet(a): """Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: tuple of :class:`~cupy.ndarray`: It returns a tuple ``(sign, logdet)``. ``sign`` represents each sign of the determinant as a real number ``0``, ``1`` or ``-1``. 'logdet' represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. warning:: To produce the same results as :func:`numpy.linalg.slogdet` for singular inputs, set the `linalg` configuration to `raise`. .. seealso:: :func:`numpy.linalg.slogdet` """ if a.ndim < 2: msg = ('%d-dimensional array given. ' 'Array must be at least two-dimensional' % a.ndim) raise linalg.LinAlgError(msg) util._assert_nd_squareness(a) dtype = numpy.promote_types(a.dtype.char, 'f') real_dtype = dtype # TODO(kataoka): support complex types if dtype not in (numpy.float32, numpy.float64): msg = ('dtype must be float32 or float64' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) a_shape = a.shape shape = a_shape[:-2] n = a_shape[-2] if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 sign = cupy.ones(shape, dtype) logdet = cupy.zeros(shape, real_dtype) return sign, logdet lu, ipiv, dev_info = decomposition._lu_factor(a, dtype) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diagonal(lu, axis1=-2, axis2=-1) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) + cupy.count_nonzero(diag < 0, axis=-1)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum(axis=-1) singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape), cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape), )
def gls_fit(ramp_data, input_var_data, prev_fit_data, prev_slope_data, readnoise, gain, frame_time, group_time, nframes_used, num_cr, cr_flagged_2d, saturated_data, use_extra_terms=True): """Generalized least squares linear fit. It is assumed that every input pixel has num_cr cosmic-ray hits somewhere within the ramp. This function should be called separately for different values of num_cr. Parameters ---------- ramp_data: 2-D ndarray; indices: group, pixel number The ramp data for one of the integrations in an exposure. This may be a subset in detector coordinates, but covering all groups. The shape is (ngroups, nz), where ngroups is the length of the ramp, and nz is the number of pixels in the current subset. input_var_data: 2-D ndarray, shape (ngroups, nz) The square of the input ERR array, matching ramp_data. prev_fit_data: 2-D ndarray, shape (ngroups, nz) The fit to ramp_data, based on applying the values of intercept, slope, and cosmic-ray amplitudes that were determined in a previous call to gls_fit. This array is only used for setting up the covariance matrix. prev_slope_data: 1-D ndarray, length nz. An estimate (e.g. from a previous iteration) of the slope at each pixel, in electrons per second. readnoise: 1-D ndarray, length nz. The read noise in electrons at each detector pixel. gain: 1-D ndarray, shape (nz,) The analog-to-digital gain (electrons per dn) at each detector pixel. frame_time: float The time to read one frame, in seconds (e.g. 10.6 s). group_time: float Time increment between groups, in seconds. nframes_used: int Number of frames that were averaged together to make a group. Note that this value does not include the number (if any) of skipped frames. num_cr: int The number of cosmic rays that will be handled. All pixels in the current set (ramp_data) are assumed to have this many cosmic ray hits somewhere within the ramp. cr_flagged_2d: 2-D ndarray, shape (ngroups, nz) The values should be 0 or 1; 1 indicates that a cosmic ray was detected (by another step) at that point. saturated_data: 2-D ndarray, shape (ngroups, nz) Normal values are zero; the value will be a huge number for saturated pixels. This will be added to the main diagonal of the inverse of the weight matrix to greatly reduce the weight for saturated pixels. use_extra_terms: bool True if we should include Massimo Robberto's terms in the inverse weight matrix. See JWST-STScI-003193.pdf Returns ------- tuple: (result2d, variances) result2d is a 2-D ndarray; shape (nz, 2 + num_cr) The computed values of intercept, slope, and cosmic-ray amplitudes (there will be num_cr cosmic-ray amplitudes) for each of the nz pixels. variances is a 2-D ndarray; shape (nz, 2 + num_cr) The variance for the intercept, slope, and for the amplitude of each cosmic ray that was detected. """ M = float(nframes_used) ngroups = ramp_data.shape[0] nz = ramp_data.shape[1] num_cr = int(num_cr) # x is an array (length nz) of matrices, each of which is the # independent variable of a linear equation. Each such matrix # has ngroups rows and 2 + num_cr columns. The first column is set # to 1, for finding the intercept. The second column is the time at # each group, for finding the slope. The remaining columns (if any), # are 0 for all rows prior to a certain point, then 1 for all # subsequent rows (i.e. the Heaviside function). The transition from # 0 to 1 is the location of a cosmic ray hit; the first 1 in a column # corresponds to the value in cr_flagged_2d being 1. x = np.zeros((nz, ngroups, 2 + num_cr), dtype=np.float64) x[:, :, 0] = 1. x[:, :, 1] = np.arange(ngroups, dtype=np.float64) * group_time + \ frame_time * (M + 1.) / 2. if num_cr > 0: sum_crs = cr_flagged_2d.cumsum(axis=0) for k in range(ngroups): s = slice(k, ngroups) for n in range(1, num_cr + 1): temp = np.where(np.logical_and(cr_flagged_2d[k] == 1, sum_crs[k] == n)) if len(temp[0]) > 0: index = (temp[0], s, n + 1) x[index] = 1 del temp, index y = np.transpose(ramp_data, (1, 0)).reshape((nz, ngroups, 1)) # cov is an array of nz matrices, each ngroups x ngroups. The # inverse of each of these matrices is a weight matrix. # Note that there are two objects that are called the covariance # matrix: (1) the ngroups x ngroups matrices in cov, and (2) the # smaller matrix (see near the end of this function) that contains # the variances and covariances of the fitted parameters. cov = np.ones((nz, ngroups, ngroups), dtype=np.float64) # Use the previous fit to the data to populate the covariance matrix, # for each of the nz pixels. prev_fit_data has shape (ngroups, nz), # similar to the ramp data, but we want the nz axis to be the first # (we're constructing an array of nz matrix equations), so transpose # prev_fit_data. prev_fit_T = np.transpose(prev_fit_data, (1, 0)) for k in range(ngroups): # Populate the upper right, row by row. cov[:, k, k:ngroups] = prev_fit_T[:, k:k+1] # Populate the lower left, column by column. cov[:, k:ngroups, k] = prev_fit_T[:, k:k+1] # Propagate errors from input. cov[:, k, k] += input_var_data[k, :] # Give saturated pixels very low weight (i.e. high variance). cov[:, k, k] += saturated_data[k, :] del prev_fit_T # I is 2-D, but it can broadcast to 4-D. This is used to add terms to # the diagonal of the covariance matrix. I = np.identity(ngroups) # Divide by sqrt(2) to convert the readnoise from CDS to single readout. rn3d = readnoise.reshape((nz, 1, 1)) * SINGLE_READOUT_RN_FACTOR cov += (I * (rn3d**2 / M)) # prev_slope_data must be non-negative. flags = prev_slope_data < 0. prev_slope_data[flags] = 1. if use_extra_terms: # Include two dummy axes to allow broadcasting with cov. slope3d = prev_slope_data.reshape((nz, 1, 1)) # diagonal: if gain is not None: g3d = gain.reshape((nz, 1, 1)) else: g3d = 1. delta_diag = I * (slope3d * frame_time * (M - 1.) * (M - 2.) / (3. * M) + (g3d * M)**2 / 12.) cov += delta_diag del delta_diag # This is the solution: (xT @ weight @ x)^-1 @ [xT @ weight @ y] # where @ means matrix multiplication. # shape of xT is (nz, 2 + num_cr, ngroups) xT = np.transpose(x, (0, 2, 1)) # shape of `weight` is (nz, ngroups, ngroups) I = I.reshape((1, ngroups, ngroups)) weight = la.solve(cov, I) # inverse of cov del I # temp1 = xT @ weight # shape of temp1 is (nz, 2 + num_cr, ngroups) temp1 = np.einsum('...ij,...jk->...ik', xT, weight) # temp_var = xT @ weight @ x # shape of temp_var is (nz, 2 + num_cr, 2 + num_cr) temp_var = np.einsum('...ij,...jk->...ik', temp1, x) # `covar` is an array of nz covariance matrices. # covar = (xT @ weight @ x)^-1 # shape of covar is (nz, 2 + num_cr, 2 + num_cr) I_2 = np.eye(2 + num_cr).reshape((1, 2 + num_cr, 2 + num_cr)) try: covar = la.solve(temp_var, I_2) # inverse of temp_var except la.LinAlgError as msg: for z in range(nz): try: dummy = la.solve(temp_var[z], I_2) except la.LinAlgError as msg2: log.warn("singular matrix, z = %d" % z) raise la.LinAlgError(msg2) del I_2 # [xT @ weight @ y] # shape of temp2 is (nz, 2 + num_cr, 1) temp2 = np.einsum('...ij,...jk->...ik', temp1, y) # shape of result is (nz, 2 + num_cr, 1) result = np.einsum('...ij,...jk->...ik', covar, temp2) r_shape = result.shape result2d = result.reshape((r_shape[0], r_shape[1])) del result # shape of both result2d and variances is (nz, 2 + num_cr) variances = covar.diagonal(axis1=1, axis2=2).copy() return (result2d, variances)
def gls_fit(ramp_data, prev_fit_data, prev_slope_data, readnoise, gain, frame_time, group_time, nframes_used, num_cr, cr_flagged_2d, saturated_data): """Generalized least squares linear fit. It is assumed that every input pixel has num_cr cosmic-ray hits somewhere within the ramp. This function should be called separately for different values of num_cr. Notes ----- Curently the noise model is assumed to be a combination of read and photon noise alone. Same technique could be used with more complex noise models, but then the ramp covariance matrix should be input. Parameters ---------- ramp_data: 2-D ndarray; indices: group, pixel number The ramp data for one of the integrations in an exposure. This may be a subset in detector coordinates, but covering all groups. The shape is (ngroups, nz), where ngroups is the length of the ramp, and nz is the number of pixels in the current subset. prev_fit_data: 2-D ndarray, shape (ngroups, nz) The fit to ramp_data, based on applying the values of intercept, slope, and cosmic-ray amplitudes that were determined in a previous call to gls_fit. This array is only used for setting up the covariance matrix. prev_slope_data: 1-D ndarray, length nz. An estimate (e.g. from a previous iteration) of the slope at each pixel, in electrons per second. readnoise: 1-D ndarray, length nz. The read noise in electrons at each detector pixel. gain: 1-D ndarray, shape (nz,) The analog-to-digital gain (electrons per dn) at each detector pixel. frame_time: float The time to read one frame, in seconds (e.g. 10.6 s). group_time: float Time increment between groups, in seconds. nframes_used: int Number of frames that were averaged together to make a group. Note that this value does not include the number (if any) of skipped frames. num_cr: int The number of cosmic rays that will be handled. All pixels in the current set (ramp_data) are assumed to have this many cosmic ray hits somewhere within the ramp. cr_flagged_2d: 2-D ndarray, shape (ngroups, nz) The values should be 0 or 1; 1 indicates that a cosmic ray was detected (by another step) at that point. saturated_data: 2-D ndarray, shape (ngroups, nz) Normal values are zero; the value will be a huge number for saturated pixels. This will be added to the main diagonal of the inverse of the weight matrix to greatly reduce the weight for saturated pixels. Returns ------- tuple: (result2d, variances) result2d is a 2-D ndarray; shape (nz, 2 + num_cr) The computed values of intercept, slope, and cosmic-ray amplitudes (there will be num_cr cosmic-ray amplitudes) for each of the nz pixels. variances is a 2-D ndarray; shape (nz, 2 + num_cr) The variance for the intercept, slope, and for the amplitude of each cosmic ray that was detected. """ M = float(nframes_used) ngroups = ramp_data.shape[0] nz = ramp_data.shape[1] num_cr = int(num_cr) # x is an array (length nz) of matrices, each of which is the # independent variable of a linear equation. Each such matrix # has ngroups rows and 2 + num_cr columns. The first column is set # to 1, for finding the intercept. The second column is the time at # each group, for finding the slope. The remaining columns (if any), # are 0 for all rows prior to a certain point, then 1 for all # subsequent rows (i.e. the Heaviside function). The transition from # 0 to 1 is the location of a cosmic ray hit; the first 1 in a column # corresponds to the value in cr_flagged_2d being 1. x = np.zeros((nz, ngroups, 2 + num_cr), dtype=np.float64) x[:, :, 0] = 1. x[:, :, 1] = np.arange(ngroups, dtype=np.float64) * group_time + \ frame_time * (M + 1.) / 2. if num_cr > 0: sum_crs = cr_flagged_2d.cumsum(axis=0) for k in range(ngroups): s = slice(k, ngroups) for n in range(1, num_cr + 1): temp = np.where( np.logical_and(cr_flagged_2d[k] == 1, sum_crs[k] == n)) if len(temp[0]) > 0: index = (temp[0], s, n + 1) x[index] = 1 del temp, index y = np.transpose(ramp_data, (1, 0)).reshape((nz, ngroups, 1)) # ramp_cov is an array of nz matrices, each ngroups x ngroups. # each matrix gives the covariance of that pixel's ramp data ramp_cov = np.ones((nz, ngroups, ngroups), dtype=np.float64) # Use the previous fit to the data to populate the covariance matrix, # for each of the nz pixels. prev_fit_data has shape (ngroups, nz), # similar to the ramp data, but we want the nz axis to be the first # (we're constructing an array of nz matrix equations), so transpose # prev_fit_data. prev_fit_T = np.transpose(prev_fit_data, (1, 0)) for k in range(ngroups): # Populate the upper right, row by row. ramp_cov[:, k, k:ngroups] = prev_fit_T[:, k:k + 1] # Populate the lower left, column by column. ramp_cov[:, k:ngroups, k] = prev_fit_T[:, k:k + 1] # Give saturated pixels a very high high variance (hence a low weight) ramp_cov[:, k, k] += saturated_data[k, :] del prev_fit_T # I is 2-D, but it can broadcast to 4-D. This is used to add terms to # the diagonal of the covariance matrix. I = np.identity(ngroups) rn3d = readnoise.reshape((nz, 1, 1)) ramp_cov += (I * rn3d**2) # prev_slope_data must be non-negative. flags = prev_slope_data < 0. prev_slope_data[flags] = 1. # The resulting fit parameters are # (xT @ ramp_cov^-1 @ x)^-1 @ [xT @ ramp_cov^-1 @ y] # = [y-intercept, slope, cr_amplitude_1, cr_amplitude_2, ...] # where @ means matrix multiplication. # shape of xT is (nz, 2 + num_cr, ngroups) xT = np.transpose(x, (0, 2, 1)) # shape of `ramp_invcov` is (nz, ngroups, ngroups) I = I.reshape((1, ngroups, ngroups)) ramp_invcov = la.solve(ramp_cov, I) del I # temp1 = xT @ ramp_invcov # np.einsum use is equivalent to matrix multiplication # shape of temp1 is (nz, 2 + num_cr, ngroups) temp1 = np.einsum('...ij,...jk->...ik', xT, ramp_invcov) # temp_var = xT @ ramp_invcov @ x # shape of temp_var is (nz, 2 + num_cr, 2 + num_cr) temp_var = np.einsum('...ij,...jk->...ik', temp1, x) # `fitparam_cov` is an array of nz covariance matrices. # fitparam_cov = (xT @ ramp_invcov @ x)^-1 # shape of fitparam_covar is (nz, 2 + num_cr, 2 + num_cr) I_2 = np.eye(2 + num_cr).reshape((1, 2 + num_cr, 2 + num_cr)) try: # inverse of temp_var fitparam_cov = la.solve(temp_var, I_2) except la.LinAlgError: # find the pixel with the singular matrix for z in range(nz): try: la.solve(temp_var[z], I_2) except la.LinAlgError as msg2: log.warning("singular matrix, z = %d" % z) raise la.LinAlgError(msg2) del I_2 # [xT @ ramp_invcov @ y] # shape of temp2 is (nz, 2 + num_cr, 1) temp2 = np.einsum('...ij,...jk->...ik', temp1, y) # shape of fitparam is (nz, 2 + num_cr, 1) fitparam = np.einsum('...ij,...jk->...ik', fitparam_cov, temp2) r_shape = fitparam.shape fitparam2d = fitparam.reshape((r_shape[0], r_shape[1])) del fitparam # shape of both result2d and variances is (nz, 2 + num_cr) fitparam_uncs = fitparam_cov.diagonal(axis1=1, axis2=2).copy() return (fitparam2d, fitparam_uncs)
def qr(a, mode='reduced'): '''QR decomposition. Decompose a given two-dimensional matrix into ``Q * R``, where ``Q`` is an orthonormal and ``R`` is an upper-triangular matrix. Args: a (cupy.ndarray): The input matrix. mode (str): The mode of decomposition. Currently 'reduced', 'complete', 'r', and 'raw' modes are supported. The default mode is 'reduced', and decompose a matrix ``A = (M, N)`` into ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where ``K = min(M, N)``. .. seealso:: :func:`numpy.linalg.qr` ''' if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) if mode not in ('reduced', 'complete', 'r', 'raw'): if mode in ('f', 'full', 'e', 'economic'): msg = 'The deprecated mode \'{}\' is not supported'.format(mode) raise ValueError(msg) else: raise ValueError('Unrecognized mode \'{}\''.format(mode)) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=True) mn = min(m, n) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) # compute working space of geqrf and ormqr, and solve R if dtype == 'f': buffersize = cusolver.sgeqrf_bufferSize(handle, m, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) tau = cupy.empty(mn, dtype=numpy.float32) cusolver.sgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dgeqrf_bufferSize(handle, n, m, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) tau = cupy.empty(mn, dtype=numpy.float64) cusolver.dgeqrf( handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') if mode == 'r': r = x[:, :mn].transpose() return util._triu(r) if mode == 'raw': if a.dtype.char == 'f': # The original numpy.linalg.qr returns float64 in raw mode, # whereas the cusolver returns float32. We agree that the # following code would be inappropriate, however, in this time # we explicitly convert them to float64 for compatibility. return x.astype(numpy.float64), tau.astype(numpy.float64) return x, tau if mode == 'complete' and m > n: mc = m q = cupy.empty((m, m), dtype) else: mc = mn q = cupy.empty((n, m), dtype) q[:n] = x # solve Q if dtype == 'f': buffersize = cusolver.sorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.sorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) else: buffersize = cusolver.dorgqr_bufferSize( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dorgqr( handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr) q = q[:mc].transpose() r = x[:, :mc].transpose() return q, util._triu(r)
def svd(a, full_matrices=True, compute_uv=True): '''Singular Value Decomposition. Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s singular values. Args: a (cupy.ndarray): The input matrix with dimension ``(M, N)``. full_matrices (bool): If True, it returns U and V with dimensions ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of U and V are respectively ``(M, K)`` and ``(K, N)``, where ``K = min(M, N)``. compute_uv (bool): If True, it only returns singular values. .. seealso:: :func:`numpy.linalg.svd` ''' if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) # Cast to float32 or float64 if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char # Remark 1: gesvd only supports m >= n (WHAT?) # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A' # Remark 3: gesvd returns matrix U and V^H # Remark 4: Remark 2 is removed since cuda 8.0 (new!) n, m = a.shape if m >= n: x = a.astype(dtype, order='C', copy=False) trans_flag = False else: m, n = a.shape x = a.transpose().astype(dtype, order='C', copy=False) trans_flag = True mn = min(m, n) if compute_uv: if full_matrices: u = cupy.empty((m, m), dtype=dtype) vt = cupy.empty((n, n), dtype=dtype) else: u = cupy.empty((mn, m), dtype=dtype) vt = cupy.empty((mn, n), dtype=dtype) u_ptr, vt_ptr = u.data.ptr, vt.data.ptr else: u_ptr, vt_ptr = 0, 0 # Use nullptr s = cupy.empty(mn, dtype=dtype) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if compute_uv: job = ord('A') if full_matrices else ord('S') else: job = ord('N') if dtype == 'f': buffersize = cusolver.sgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=dtype) cusolver.sgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) else: # dtype == 'd' buffersize = cusolver.dgesvd_bufferSize(handle, m, n) workspace = cupy.empty(buffersize, dtype=dtype) cusolver.dgesvd( handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr, n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError( 'SVD computation does not converge') elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') # Note that the returned array may need to be transporsed # depending on the structure of an input if compute_uv: if trans_flag: return u.transpose(), s, vt.transpose() else: return vt, s, u else: return s
def cholesky(a): """Cholesky decomposition. Decompose a given two-dimensional square matrix into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input matrix with dimension ``(N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. .. seealso:: :func:`numpy.linalg.cholesky` """ if not cuda.cusolver_enabled: raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0') # TODO(Saito): Current implementation only accepts two-dimensional arrays util._assert_cupy_array(a) util._assert_rank2(a) util._assert_nd_squareness(a) if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char x = a.astype(dtype, order='C', copy=True) n = len(a) handle = device.get_cusolver_handle() dev_info = cupy.empty(1, dtype=numpy.int32) if dtype == 'f': buffersize = cusolver.spotrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float32) cusolver.spotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) elif dtype == 'd': buffersize = cusolver.dpotrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.float64) cusolver.dpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) elif dtype == 'F': buffersize = cusolver.cpotrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.complex64) cusolver.cpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) else: # dtype == 'D': buffersize = cusolver.zpotrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n) workspace = cupy.empty(buffersize, dtype=numpy.complex128) cusolver.zpotrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n, workspace.data.ptr, buffersize, dev_info.data.ptr) status = int(dev_info[0]) if status > 0: raise linalg.LinAlgError('The leading minor of order {} ' 'is not positive definite'.format(status)) elif status < 0: raise linalg.LinAlgError( 'Parameter error (maybe caused by a bug in cupy.linalg?)') util._tril(x, k=0) return x