Example #1
0
def gpu_skcuda_cusolver_cusolverDnDgesvd_S(input):
    coloring_print("\nGPU: skcuda.cusolver.cusolverDnDgesvd() 'S' option")

    # #縦横を逆(≒転地)してcolumn-majorにし、GPUのcusolverDnDgesvd()に対応させる。結果配列のU,Vが逆になる
    n, m = input.shape

    # change function by data type
    if input.dtype == np.dtype('float64'):
        get_buffer = solver.cusolverDnDgesvd_bufferSize
        cusolver_svd = solver.cusolverDnDgesvd
    elif input.dtype == np.dtype('float32'):
        get_buffer = solver.cusolverDnSgesvd_bufferSize
        cusolver_svd = solver.cusolverDnSgesvd
    else:
        print "Error: data type must be float64 or float32"

    h2d_start = time.time()
    input_gpu = gpuarray.to_gpu(input)
    h2d_end = time.time()
    print "H2D: ", h2d_end - h2d_start, "[sec]"

    # Set up work buffers:
    h = solver.cusolverDnCreate()
    Lwork = get_buffer(h, m, n)
    workspace_gpu = gpuarray.zeros(Lwork, input.dtype)
    devInfo_gpu = gpuarray.zeros(1, np.int32)

    # Set up output buffers:
    s_gpu = gpuarray.zeros(min(m, n), input.dtype)
    u_gpu = gpuarray.zeros((n, n), input.dtype)
    vh_gpu = gpuarray.zeros((m, m), input.dtype)

    # 'S': the first min(m,n) columns of U (the left singular vectors) are returned in the array U
    cusolver_S_svd_start = time.time()
    status = cusolver_svd(h, 'S', 'S', m, n, input_gpu.gpudata, m,
                          s_gpu.gpudata, u_gpu.gpudata, m, vh_gpu.gpudata, n,
                          workspace_gpu.gpudata, Lwork, 0, devInfo_gpu.gpudata)
    cusolver_S_svd_end = time.time()

    print "solver.cusolverDnSgesvd() 'S' option", cusolver_S_svd_end - cusolver_S_svd_start, "[sec]"
    print "Total: ", cusolver_S_svd_end - h2d_start, "[sec]"

    # u and s is swapped (数学的に正しいかはわからない)
    check_result(input, vh_gpu.get(), s_gpu.get(), u_gpu.get())

    solver.cusolverDnDestroy(h)
Example #2
0
def gpu_skcuda_cusolver_cusolverDnDgesvd_N(input):
    coloring_print("\nGPU: skcuda.cusolver.cusolverDnDgesvd() 'N' option")

    # #縦横を逆(≒転地)してcolumn-majorにし、GPUのcusolverDnDgesvd()に対応させる。結果配列のU,Vが逆になる
    n, m = input.shape

    # change function by data type
    if input.dtype == np.dtype('float64'):
        get_buffer = solver.cusolverDnDgesvd_bufferSize
        cusolver_svd = solver.cusolverDnDgesvd
    elif input.dtype == np.dtype('float32'):
        get_buffer = solver.cusolverDnSgesvd_bufferSize
        cusolver_svd = solver.cusolverDnSgesvd
    else:
        print "Error: data type must be float64 or float32"

    h2d_start = time.time()
    input_gpu = gpuarray.to_gpu(input)
    h2d_end = time.time()
    print "H2D: ", h2d_end - h2d_start, "[sec]"

    # Set up work buffers:
    h = solver.cusolverDnCreate()
    Lwork = get_buffer(h, m, n)
    workspace_gpu = gpuarray.zeros(Lwork, input.dtype)
    devInfo_gpu = gpuarray.zeros(1, np.int32)

    # Set up output buffers:
    s_gpu = gpuarray.zeros(min(m, n), input.dtype)

    # 'N': no columns of U (no left singular vectors) are computed.
    cusolver_N_svd_start = time.time()
    status = cusolver_svd(h, 'N', 'N', m, n, input_gpu.gpudata, m,
                          s_gpu.gpudata, 0, m, 0, n, workspace_gpu.gpudata, 0,
                          0, devInfo_gpu.gpudata)
    cusolver_N_svd_end = time.time()
    print "solver.cusolverDnSgesvd() 'N' option: ", cusolver_N_svd_end - cusolver_N_svd_start, "[sec]"
    print "Total: ", cusolver_N_svd_end - h2d_start, "[sec]"

    print "only s is computed"
    # print s_gpu.get()

    solver.cusolverDnDestroy(h)
Example #3
0
workspace_gpu = gpuarray.zeros(lwork, dtype=A.dtype)
info = gpuarray.zeros(batchSize, dtype=np.int32)

# Compute:
solver.cusolverDnCheevjBatched(handle, 'CUSOLVER_EIG_MODE_VECTOR', 'u', n,
                               x_gpu.gpudata, n, w_gpu.gpudata,
                               workspace_gpu.gpudata, lwork, info.gpudata,
                               params, batchSize)

# Print info
tmp = info.get()
if any(tmp):
    print "the following job did not converge:", np.nonzero(tmp)[0]
else:
    print "all jobs converged"

# Destroy handle
solver.cusolverDnDestroySyevjInfo(params)
solver.cusolverDnDestroy(handle)

Q = x_gpu.get()
W = w_gpu.get()
print 'maximum error in A * Q - Q * Lambda is:'
for i in range(batchSize):
    q = Q[i * n:(i + 1) * n, :].T.copy()
    x = A[i * n:(i + 1) * n, :].copy()
    w = W[i, :].copy()
    print '{}th matrix'.format(i), np.abs(
        np.dot(x, q) - np.dot(q, np.diag(w))).max()
Example #4
0
h = solver.cusolverDnCreate()
x = np.asarray([[1.80, 2.88, 2.05, -0.89], [5.25, -2.95, -0.95, -3.80],
                [1.58, -2.69, -2.90, -1.04], [-1.11, -0.66, -0.59,
                                              0.80]]).astype(np.float32)

# Need to reverse dimensions because CUSOLVER expects column-major matrices:
n, m = x.shape
x_gpu = gpuarray.to_gpu(x)

# Set up work buffers:
Lwork = solver.cusolverDnSgesvd_bufferSize(h, m, n)
workspace_gpu = gpuarray.zeros(Lwork, np.float32)
devInfo_gpu = gpuarray.zeros(1, np.int32)

# Set up output buffers:
s_gpu = gpuarray.zeros(min(m, n), np.float32)
u_gpu = gpuarray.zeros((m, m), np.float32)
vh_gpu = gpuarray.zeros((n, n), np.float32)

# Compute:
status = solver.cusolverDnSgesvd(h, 'A', 'A', m, n, x_gpu.gpudata, m,
                                 s_gpu.gpudata, u_gpu.gpudata, m,
                                 vh_gpu.gpudata, n, workspace_gpu.gpudata,
                                 Lwork, 0, devInfo_gpu.gpudata)

# Confirm that solution is correct by ensuring that the original matrix can be
# obtained from the decomposition:
print 'correct solution: ', np.allclose(
    x, np.dot(vh_gpu.get(), np.dot(np.diag(s_gpu.get()), u_gpu.get())), 1e-4)
solver.cusolverDnDestroy(h)
h = solver.cusolverDnCreate()
x = np.asarray([[1.80, 2.88, 2.05, -0.89],
                [5.25, -2.95, -0.95, -3.80], 
                [1.58, -2.69, -2.90, -1.04],
                [-1.11, -0.66, -0.59, 0.80]]).astype(np.float32)

# Need to reverse dimensions because CUSOLVER expects column-major matrices:
n, m = x.shape
x_gpu = gpuarray.to_gpu(x)

# Set up work buffers:
Lwork = solver.cusolverDnSgesvd_bufferSize(h, m, n)
workspace_gpu = gpuarray.zeros(Lwork, np.float32)
devInfo_gpu = gpuarray.zeros(1, np.int32)

# Set up output buffers:
s_gpu = gpuarray.zeros(min(m, n), np.float32)
u_gpu = gpuarray.zeros((m, m), np.float32)
vh_gpu = gpuarray.zeros((n, n), np.float32)

# Compute:
status = solver.cusolverDnSgesvd(h, 'A', 'A', m, n, x_gpu.gpudata, m, s_gpu.gpudata,
                                 u_gpu.gpudata, m, vh_gpu.gpudata, n,
                                 workspace_gpu.gpudata, Lwork, 0, devInfo_gpu.gpudata)

# Confirm that solution is correct by ensuring that the original matrix can be
# obtained from the decomposition:
print 'correct solution: ', np.allclose(x, np.dot(vh_gpu.get(), np.dot(np.diag(s_gpu.get()), u_gpu.get())), 1e-4)
solver.cusolverDnDestroy(h)
lwork = solver.cusolverDnDsyevjBatched_bufferSize(handle, 'CUSOLVER_EIG_MODE_VECTOR',
                                    'u', n, x_gpu.gpudata, n,
                                    w_gpu.gpudata, params, batchSize)

workspace_gpu = gpuarray.zeros(lwork, dtype = A.dtype)
info = gpuarray.zeros(batchSize, dtype = np.int32)

# Compute:
solver.cusolverDnDsyevjBatched(handle, 'CUSOLVER_EIG_MODE_VECTOR',
                       'u', n, x_gpu.gpudata, n,
                        w_gpu.gpudata, workspace_gpu.gpudata,
                        lwork, info.gpudata, params, batchSize)

# print info
tmp = info.get()
if any(tmp):
    print("the following job did not converge: %r" % np.nonzero(tmp)[0])

# Destroy handle
solver.cusolverDnDestroySyevjInfo(params)
solver.cusolverDnDestroy(handle)

Q = x_gpu.get()
W = w_gpu.get()
print('maximum error in A * Q - Q * Lambda is:')
for i in range(batchSize):
    q = Q[i*n:(i+1)*n,:].T.copy()
    x = A[i*n:(i+1)*n,:].copy()
    w = W[i, :].copy()
    print('{}th matrix %r'.format(i) % np.abs(np.dot(x, q) - np.dot(q, np.diag(w))).max())