def get_rnn_lin_layer_bias_params( handle, rnn_desc, layer, x_desc, w_desc, w, lin_layer_id): bias_desc = Descriptor(cudnn.createFilterDescriptor(), cudnn.destroyFilterDescriptor) ptr = numpy.array(0, dtype=numpy.intp) cudnn.getRNNLinLayerBiasParams( handle, rnn_desc.value, layer, x_desc.value, w_desc.value, w.data.ptr, lin_layer_id, bias_desc.value, ptr.ctypes.data) offset = (ptr - w.data.ptr) // 4 _, _, _, dim = cudnn.getFilterNdDescriptor(bias_desc.value, 3) size = internal.prod(dim) bias = w[offset: offset + size] return bias
def _potrf_batched(a): """Batched Cholesky decomposition. Decompose a given array of two-dimensional square matrices into ``L * L.T``, where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate transpose operator. Args: a (cupy.ndarray): The input array of matrices with dimension ``(..., N, N)`` Returns: cupy.ndarray: The lower-triangular matrix. """ if not check_availability('potrfBatched'): raise RuntimeError('potrfBatched is not available') if a.dtype.char == 'f' or a.dtype.char == 'd': dtype = a.dtype.char else: dtype = numpy.promote_types(a.dtype.char, 'f').char if dtype == 'f': potrfBatched = cusolver.spotrfBatched elif dtype == 'd': potrfBatched = cusolver.dpotrfBatched elif dtype == 'F': potrfBatched = cusolver.cpotrfBatched else: # dtype == 'D': potrfBatched = cusolver.zpotrfBatched x = a.astype(dtype, order='C', copy=True) xp = cupy.core._mat_ptrs(x) n = x.shape[-1] ldx = x.strides[-2] // x.dtype.itemsize handle = device.get_cusolver_handle() batch_size = internal.prod(x.shape[:-2]) dev_info = cupy.empty(batch_size, dtype=numpy.int32) potrfBatched( handle, cublas.CUBLAS_FILL_MODE_UPPER, n, xp.data.ptr, ldx, dev_info.data.ptr, batch_size) cupy.linalg._util._check_cusolver_dev_info_if_synchronization_allowed( potrfBatched, dev_info) return cupy.tril(x)
def tensorsolve(a, b, axes=None): """Solves tensor equations denoted by ``ax = b``. Suppose that ``b`` is equivalent to ``cupy.tensordot(a, x)``. This function computes tensor ``x`` from ``a`` and ``b``. Args: a (cupy.ndarray): The tensor with ``len(shape) >= 1`` b (cupy.ndarray): The tensor with ``len(shape) >= 1`` axes (tuple of ints): Axes in ``a`` to reorder to the right before inversion. Returns: cupy.ndarray: The tensor with shape ``Q`` such that ``b.shape + Q == a.shape``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.tensorsolve` """ if axes is not None: allaxes = list(range(a.ndim)) for k in axes: allaxes.remove(k) allaxes.insert(a.ndim, k) a = a.transpose(allaxes) oldshape = a.shape[-(a.ndim - b.ndim):] prod = internal.prod(oldshape) a = a.reshape(-1, prod) b = b.ravel() result = solve(a, b) return result.reshape(oldshape)
def tensorinv(a, ind=2): """Computes the inverse of a tensor. This function computes tensor ``a_inv`` from tensor ``a`` such that ``tensordot(a_inv, a, ind) == I``, where ``I`` denotes the identity tensor. Args: a (cupy.ndarray): The tensor such that ``prod(a.shape[:ind]) == prod(a.shape[ind:])``. ind (int): The positive number used in ``axes`` option of ``tensordot``. Returns: cupy.ndarray: The inverse of a tensor whose shape is equivalent to ``a.shape[ind:] + a.shape[:ind]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. seealso:: :func:`numpy.linalg.tensorinv` """ _util._assert_cupy_array(a) if ind <= 0: raise ValueError('Invalid ind argument') oldshape = a.shape invshape = oldshape[ind:] + oldshape[:ind] prod = internal.prod(oldshape[ind:]) a = a.reshape(prod, -1) a_inv = inv(a) return a_inv.reshape(*invshape)
def test_empty(self): self.assertEqual(internal.prod([]), 1)
def test_two(self): self.assertEqual(internal.prod([2, 3]), 6)
def test_one(self): self.assertEqual(internal.prod([2]), 2)
def check_usv(self, shape, dtype): array = testing.shaped_random( shape, numpy, dtype=dtype, seed=self.seed) a_cpu = numpy.asarray(array, dtype=dtype) a_gpu = cupy.asarray(array, dtype=dtype) result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices) result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices) # Check if the input matrix is not broken cupy.testing.assert_allclose(a_gpu, a_cpu) assert len(result_gpu) == 3 for i in range(3): assert result_gpu[i].shape == result_cpu[i].shape assert result_gpu[i].dtype == result_cpu[i].dtype u_cpu, s_cpu, vh_cpu = result_cpu u_gpu, s_gpu, vh_gpu = result_gpu cupy.testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4) # reconstruct the matrix k = s_cpu.shape[-1] if len(shape) == 2: if self.full_matrices: a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :]) else: a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu) else: if self.full_matrices: a_gpu_usv = cupy.matmul(u_gpu[..., :k] * s_gpu[..., None, :], vh_gpu[..., :k, :]) else: a_gpu_usv = cupy.matmul(u_gpu*s_gpu[..., None, :], vh_gpu) cupy.testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4) # assert unitary if len(shape) == 2: cupy.testing.assert_allclose( cupy.matmul(u_gpu.T.conj(), u_gpu), numpy.eye(u_gpu.shape[1]), atol=1e-4) cupy.testing.assert_allclose( cupy.matmul(vh_gpu, vh_gpu.T.conj()), numpy.eye(vh_gpu.shape[0]), atol=1e-4) else: batch = prod(shape[:-2]) u_len = u_gpu.shape[-1] vh_len = vh_gpu.shape[-2] if batch == 0: id_u_cpu = numpy.empty(shape[:-2] + (u_len, u_len)) id_vh_cpu = numpy.empty(shape[:-2] + (vh_len, vh_len)) else: id_u_cpu = [numpy.eye(u_len) for _ in range(batch)] id_u_cpu = numpy.stack(id_u_cpu, axis=0).reshape( *(shape[:-2]), u_len, u_len) id_vh_cpu = [numpy.eye(vh_len) for _ in range(batch)] id_vh_cpu = numpy.stack(id_vh_cpu, axis=0).reshape( *(shape[:-2]), vh_len, vh_len) cupy.testing.assert_allclose( cupy.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu), id_u_cpu, atol=1e-4) cupy.testing.assert_allclose( cupy.matmul(vh_gpu, vh_gpu.swapaxes(-1, -2).conj()), id_vh_cpu, atol=1e-4)