Ejemplo n.º 1
0
def _qr_batched(a, mode):
    batch_shape = a.shape[:-2]
    batch_size = internal.prod(batch_shape)
    m, n = a.shape[-2:]
    k = min(m, n)

    # first handle any 0-size inputs
    if batch_size == 0 or k == 0:
        # support float32, float64, complex64, and complex128
        dtype, out_dtype = _util.linalg_common_type(a)

        if mode == 'reduced':
            return (cupy.empty(batch_shape + (m, k), out_dtype),
                    cupy.empty(batch_shape + (k, n), out_dtype))
        elif mode == 'complete':
            q = _util.stacked_identity(batch_shape, m, out_dtype)
            return (q, cupy.empty(batch_shape + (m, n), out_dtype))
        elif mode == 'r':
            return cupy.empty(batch_shape + (k, n), out_dtype)
        elif mode == 'raw':
            return (cupy.empty(batch_shape + (n, m), out_dtype),
                    cupy.empty(batch_shape + (k,), out_dtype))

    # ...then delegate real computation to cuSOLVER/rocSOLVER
    a = a.reshape(-1, *(a.shape[-2:]))
    out = _geqrf_orgqr_batched(a, mode)

    if mode == 'r':
        return out.reshape(batch_shape + out.shape[-2:])
    q, r = out
    q = q.reshape(batch_shape + q.shape[-2:])
    idx = -1 if mode == 'raw' else -2
    r = r.reshape(batch_shape + r.shape[idx:])
    return (q, r)
Ejemplo n.º 2
0
    def check_usv(self, shape, dtype):
        array = testing.shaped_random(shape,
                                      numpy,
                                      dtype=dtype,
                                      seed=self.seed)
        a_cpu = numpy.asarray(array, dtype=dtype)
        a_gpu = cupy.asarray(array, dtype=dtype)
        result_cpu = numpy.linalg.svd(a_cpu, full_matrices=self.full_matrices)
        result_gpu = cupy.linalg.svd(a_gpu, full_matrices=self.full_matrices)
        # Check if the input matrix is not broken
        cupy.testing.assert_allclose(a_gpu, a_cpu)

        assert len(result_gpu) == 3
        for i in range(3):
            assert result_gpu[i].shape == result_cpu[i].shape
            assert result_gpu[i].dtype == result_cpu[i].dtype
        u_cpu, s_cpu, vh_cpu = result_cpu
        u_gpu, s_gpu, vh_gpu = result_gpu
        cupy.testing.assert_allclose(s_gpu, s_cpu, rtol=1e-5, atol=1e-4)

        # reconstruct the matrix
        k = s_cpu.shape[-1]
        if len(shape) == 2:
            if self.full_matrices:
                a_gpu_usv = cupy.dot(u_gpu[:, :k] * s_gpu, vh_gpu[:k, :])
            else:
                a_gpu_usv = cupy.dot(u_gpu * s_gpu, vh_gpu)
        else:
            if self.full_matrices:
                a_gpu_usv = cupy.matmul(u_gpu[..., :k] * s_gpu[..., None, :],
                                        vh_gpu[..., :k, :])
            else:
                a_gpu_usv = cupy.matmul(u_gpu * s_gpu[..., None, :], vh_gpu)
        cupy.testing.assert_allclose(a_gpu, a_gpu_usv, rtol=1e-4, atol=1e-4)

        # assert unitary
        u_len = u_gpu.shape[-1]
        vh_len = vh_gpu.shape[-2]
        cupy.testing.assert_allclose(
            cupy.matmul(u_gpu.swapaxes(-1, -2).conj(), u_gpu),
            _util.stacked_identity(shape[:-2], u_len, dtype),
            atol=1e-4)
        cupy.testing.assert_allclose(
            cupy.matmul(vh_gpu,
                        vh_gpu.swapaxes(-1, -2).conj()),
            _util.stacked_identity(shape[:-2], vh_len, dtype),
            atol=1e-4)
Ejemplo n.º 3
0
def _svd_batched(a, full_matrices, compute_uv):
    batch_shape = a.shape[:-2]
    batch_size = internal.prod(batch_shape)
    n, m = a.shape[-2:]

    dtype, uv_dtype = _util.linalg_common_type(a)
    s_dtype = uv_dtype.char.lower()

    # first handle any 0-size inputs
    if batch_size == 0:
        k = min(m, n)
        s = cupy.empty(batch_shape + (k, ), s_dtype)
        if compute_uv:
            if full_matrices:
                u = cupy.empty(batch_shape + (n, n), dtype=uv_dtype)
                vt = cupy.empty(batch_shape + (m, m), dtype=uv_dtype)
            else:
                u = cupy.empty(batch_shape + (n, k), dtype=uv_dtype)
                vt = cupy.empty(batch_shape + (k, m), dtype=uv_dtype)
            return u, s, vt
        else:
            return s
    elif m == 0 or n == 0:
        s = cupy.empty(batch_shape + (0, ), s_dtype)
        if compute_uv:
            if full_matrices:
                u = _util.stacked_identity(batch_shape, n, uv_dtype)
                vt = _util.stacked_identity(batch_shape, m, uv_dtype)
            else:
                u = cupy.empty(batch_shape + (n, 0), dtype=uv_dtype)
                vt = cupy.empty(batch_shape + (0, m), dtype=uv_dtype)
            return u, s, vt
        else:
            return s

    # ...then delegate real computation to cuSOLVER
    a = a.reshape(-1, *(a.shape[-2:]))
    if runtime.is_hip or (m <= 32 and n <= 32):
        # copy is done in _gesvdj_batched, so let's try not to do it here
        a = a.astype(dtype, order='C', copy=False)
        out = _gesvdj_batched(a, full_matrices, compute_uv, False)
    else:
        # manually loop over cusolverDn<t>gesvd()
        # copy (via possible type casting) is done in _gesvd_batched
        # note: _gesvd_batched returns V, not V^H
        out = _gesvd_batched(a, dtype.char, full_matrices, compute_uv, False)

    if compute_uv:
        u, s, v = out
        u = u.astype(uv_dtype, copy=False)
        u = u.reshape(*batch_shape, *(u.shape[-2:]))
        s = s.astype(s_dtype, copy=False)
        s = s.reshape(*batch_shape, *(s.shape[-1:]))
        v = v.astype(uv_dtype, copy=False)
        v = v.reshape(*batch_shape, *(v.shape[-2:]))
        return u, s, v.swapaxes(-2, -1).conj()
    else:
        s = out
        s = s.astype(s_dtype, copy=False)
        s = s.reshape(*batch_shape, *(s.shape[-1:]))
        return s