def sum(self, x, abs = False): ''' sum(float* x, float* result, ulong x_size) ''' if abs == False: result = gpuarray.to_gpu(np.zeros(1).astype(np.float32)) self._sum_kernel(x.gpudata, result.gpudata, \ np.int64(x.size), \ block = (1024, 1, 1), \ grid = (int(x.size / 1024) + 1, 1) \ ) return result[0] else: return cublas.cublasSasum(self.handle, x.size, x.gpudata, 1)
def test_cublasSasum(self): x = np.random.rand(5).astype(np.float32) x_gpu = gpuarray.to_gpu(x) result = cublas.cublasSasum(x_gpu.size, x_gpu.gpudata, 1) assert np.allclose(result, np.sum(np.abs(x)))
def test_cublasSasum(self): x = np.random.rand(5).astype(np.float32) x_gpu = gpuarray.to_gpu(x) result = cublas.cublasSasum(self.cublas_handle, x_gpu.size, x_gpu.gpudata, 1) assert np.allclose(result, np.sum(np.abs(x)))