def ikj_lu_decomposer_opt_gpu(M):

    m = M.shape[0]
    n = M.shape[1]

    import pycuda.autoinit
    import pycuda.gpuarray as gpuarray

    from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDscal, cublasDestroy
    import skcuda.misc as misc

    N_gpu = gpuarray.to_gpu(M)

    h = cublasCreate()

    for i in range(0,n):
        for k in range(0,i):

            #N[i,k] = N[i,k] / N[k,k]
            cublasDscal(h, N_gpu[i,k].size, 1.0/np.float64(N_gpu[k,k].get()), N_gpu[i,k].gpudata, 1)
            #N[i,k+1:] -= N[i,k] * N[k,k+1:]
            cublasDaxpy(h, N_gpu[k,k+1:].size, -np.float64(N_gpu[i,k].get()), N_gpu[k,k+1:].gpudata, 1, N_gpu[i,k+1:].gpudata, 1)

    #Move from GPU to CPU
    N = N_gpu.get()

    cublasDestroy(h)

    return N
Exemple #2
0
 def test_cublasDscal(self):
     x = np.random.rand(5).astype(np.float64)
     x_gpu = gpuarray.to_gpu(x)
     alpha = np.float64(np.random.rand())
     cublas.cublasDscal(self.cublas_handle, x_gpu.size, alpha,
                        x_gpu.gpudata, 1)
     assert np.allclose(x_gpu.get(), alpha*x)
def jki_lu_decomposer_opt_gpu(M):

    m = M.shape[0]
    n = M.shape[1]

    import pycuda.autoinit
    import pycuda.gpuarray as gpuarray

    from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDscal, cublasDestroy
    import skcuda.misc as misc

    N_gpu = gpuarray.to_gpu(M)

    h = cublasCreate()

    for j in range(0,n):
        for k in range(0,j):

            #N[k+1:,j] = N[k+1:,j] - N[k+1:,k] * N[k,j]
            cublasDaxpy(h, N_gpu[k+1:,k].size, -np.float64(N_gpu[k,j].get()), N_gpu[k+1:,k].gpudata, n, N_gpu[k+1:,j].gpudata, n)

        #N[j+1:,j] /= N[j,j]
        cublasDscal(h, N_gpu[j+1:,j].size, 1.0/np.float64(N_gpu[j,j].get()), N_gpu[j+1:,j].gpudata, n)

    #Move from GPU to CPU
    N = N_gpu.get()

    cublasDestroy(h)

    return N