def cuda_svd(): # demo_types = [np.float32, np.complex64] demo_types = [np.float32] if cula._libcula_toolkit == "premium" and cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print "Testing svd for type " + str(np.dtype(t)) # numpy.float32 a = np.asarray((np.random.rand(1000, 1000) - 0.5) / 10, t) print a.shape u_cpu, s_cpu, v_cpu = np.linalg.svd(a) # gpu array a_gpu = gpuarray.to_gpu(a) # call cula rutine u_gpu, s_gpu, vh_gpu = culinalg.svd(a_gpu) print u_gpu.get() print u_cpu # print s_gpu.get() # print vh_gpu.get() a_rec = np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())) print "Success status: ", np.allclose(a, a_rec, atol=1e-3) print "Maximum error: ", np.max(np.abs(a - a_rec)) print ""
def test_svd_so_float64(self): a = np.asarray(np.random.randn(6, 6), np.float64) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 's', 'o') assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float64)
def test_svd_so_complex128(self): a = np.asarray(np.random.randn(6, 6) + 1j*np.random.randn(6, 6), np.complex128) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 's', 'o') assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float64)
def test_svd_ss_float64(self): a = np.asarray(np.random.randn(9, 6), np.float64) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 's', 's') assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float64)
def test_svd_ss_complex128(self): a = np.asarray( np.random.randn(9, 6) + 1j * np.random.randn(9, 6), np.complex128) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 's', 's') assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float64)
def testForSize(x): print 'Image Size %dx%d' % (x,x) x = np.random.rand(x**2, 40).astype(np.float32) def svdoverwrite(a_gpu, u_gpu, s_gpu, v_gpu, m, n, lda, ldu, ldvt): data_type = a_gpu.dtype.type real_type = np.float32 cula_func = cula._libcula.culaDeviceSgesvd jobu = 'S' jobvt = 'S' status = cula_func(jobu, jobvt, m, n, int(a_gpu.gpudata), lda, int(s_gpu.gpudata), int(u_gpu.gpudata), ldu, int(v_gpu.gpudata), ldvt) cula.culaCheckStatus(status) # Free internal CULA memory: cula.culaFreeBuffers() t = time.time() gpux = gpuarray.to_gpu(x) pushtime = time.time() - t print '[Push results]', pushtime t = time.time() u_g, s_g, v_g = cla.svd(gpux, 'S', 'S') gpusvdtime = time.time() - t print '[GPU results]', gpusvdtime t = time.time() u_g = u_g.get() s_g = s_g.get() v_g = v_g.get() fetchtime = time.time() - t print '[Fetch time]', fetchtime t = time.time() u_c, s_c, v_c = la.svd(x, full_matrices=False) cputime = time.time() - t print '[CPU time]', cputime print '[GPU time]', pushtime + gpusvdtime + fetchtime # Result on desktop Quadro FX1800 print
def test_svd_so_float32(self): a = np.asarray(np.random.randn(6, 6), np.float32) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, "s", "o") assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float32)
def test_svd_ss_complex64(self): a = np.asarray(np.random.randn(9, 6) + 1j * np.random.randn(9, 6), np.complex64) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, "s", "s") assert np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), atol=atol_float32)
import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import scikits.cuda.linalg as culinalg import scikits.cuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string import scikits.cuda.cula as cula demo_types = [np.float32, np.complex64] if cula._libcula_toolkit == 'premium' and \ cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print 'Testing svd for type ' + str(np.dtype(t)) a = np.asarray((np.random.rand(50, 50) - 0.5) / 10, t) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = culinalg.svd(a_gpu) a_rec = np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())) print 'Success status: ', np.allclose(a, a_rec, atol=1e-3) print 'Maximum error: ', np.max(np.abs(a - a_rec)) print ''
""" import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import scikits.cuda.linalg as culinalg import scikits.cuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string import scikits.cuda.cula as cula demo_types = [np.float32, np.complex64] if cula._libcula_toolkit == 'premium' and \ cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print 'Testing svd for type ' + str(np.dtype(t)) a = np.asarray((np.random.rand(50, 50)-0.5)/10, t) a_gpu = gpuarray.to_gpu(a) u_gpu, s_gpu, vh_gpu = culinalg.svd(a_gpu) a_rec = np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())) print 'Success status: ', np.allclose(a, a_rec, atol=1e-3) print 'Maximum error: ', np.max(np.abs(a-a_rec)) print ''