def kij_lu_decomposer_opt_gpu(M): m = M.shape[0] n = M.shape[1] import pycuda.autoinit import pycuda.gpuarray as gpuarray from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDestroy import skcuda.misc as misc N_gpu = gpuarray.to_gpu(M) h = cublasCreate() for k in range(0,n): for i in range(k+1,n): N_gpu[i,k] = N_gpu[i,k] / N_gpu[k,k] #N[i,k+1:] -= N[i,k] * N[k,k+1:] cublasDaxpy(h, N_gpu[k,k+1:].size, -np.float64(N_gpu[i,k].get()), N_gpu[k,k+1:].gpudata, 1, N_gpu[i,k+1:].gpudata, 1) #Move from GPU to CPU N = N_gpu.get() cublasDestroy(h) return N
def jki_lu_decomposer_opt_gpu(M): m = M.shape[0] n = M.shape[1] import pycuda.autoinit import pycuda.gpuarray as gpuarray from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDscal, cublasDestroy import skcuda.misc as misc N_gpu = gpuarray.to_gpu(M) h = cublasCreate() for j in range(0,n): for k in range(0,j): #N[k+1:,j] = N[k+1:,j] - N[k+1:,k] * N[k,j] cublasDaxpy(h, N_gpu[k+1:,k].size, -np.float64(N_gpu[k,j].get()), N_gpu[k+1:,k].gpudata, n, N_gpu[k+1:,j].gpudata, n) #N[j+1:,j] /= N[j,j] cublasDscal(h, N_gpu[j+1:,j].size, 1.0/np.float64(N_gpu[j,j].get()), N_gpu[j+1:,j].gpudata, n) #Move from GPU to CPU N = N_gpu.get() cublasDestroy(h) return N
def fun_b_k(self, b_k_gpu, k): cublas.cublasDcopy(self.h, self.b_gpu.size, self.b_gpu.gpudata, 1, b_k_gpu.gpudata, 1) for i in range(len(self.Ax_gpu)): if i != k: cublas.cublasDaxpy(self.h, b_k_gpu.size, np.float64(-1), self.Ax_gpu[i].gpudata, 1, b_k_gpu.gpudata, 1)
def test_cublasDaxpy(self): alpha = np.float64(np.random.rand()) x = np.random.rand(5).astype(np.float64) x_gpu = gpuarray.to_gpu(x) y = np.random.rand(5).astype(np.float64) y_gpu = gpuarray.to_gpu(y) cublas.cublasDaxpy(self.cublas_handle, x_gpu.size, alpha, x_gpu.gpudata, 1, y_gpu.gpudata, 1) assert np.allclose(y_gpu.get(), alpha*x+y)
def _axpy(self, handle, alpha, x_gpu, y_gpu): cublas.cublasDaxpy(handle, x_gpu.size, alpha, x_gpu.gpudata, 1, y_gpu.gpudata, 1)
def _zaxpy(self, handle, z_gpu, alpha, x_gpu, y_gpu): # copy y to z cublas.cublasDcopy(handle, y_gpu.size, y_gpu.gpudata, 1, z_gpu.gpudata, 1) cublas.cublasDaxpy(handle, x_gpu.size, alpha, x_gpu.gpudata, 1, z_gpu.gpudata, 1)