def test_pinv_complex128(self): a = np.asarray(np.random.rand(8, 4) + \ 1j*np.random.rand(8, 4), np.complex128) a_gpu = gpuarray.to_gpu(a) a_inv_gpu = linalg.pinv(a_gpu) assert np.allclose(np.linalg.pinv(a), a_inv_gpu.get(), atol=atol_float64)
def test_pinv_float32(self): a = np.asarray(np.random.rand(8, 4), np.float32) a_gpu = gpuarray.to_gpu(a) a_inv_gpu = linalg.pinv(a_gpu) assert np.allclose(np.linalg.pinv(a), a_inv_gpu.get(), atol=atol_float32)
def ginverse(self, A): A = A.transpose() #print("INV", A.shape, type(A), A.flags.c_contiguous, A.flags.f_contiguous) #A = gpuarray.to_gpu(np.array(A.get(), dtype=A.dtype, order='C')) #print("INV", A.shape, type(A), A.flags.c_contiguous, A.flags.f_contiguous) out = linalg.pinv(A) out = out.transpose() #o = out.get() #print("Sum", np.sum(np.subtract(o, o.T))) #print("INV2", out.shape, type(out), out.flags.c_contiguous, out.flags.f_contiguous) #out = gpuarray.to_gpu(np.array(out.get(), dtype=out.dtype, order='F')) #A = A.get() #A = np.nan_to_num(A) #out = self.togpu(la.pinv(A), dtype=A.dtype) sync_only() return out
def itkrm(data,K,S,maxitr,startD=np.array([1])): M, N = data.shape if startD.all()==1: D_init = np.random.randn(M, K) else: D_init = startD Y = data I_D = np.zeros((S,N), dtype=np.int32) # N_timer.log(0,log_s='20 data test, 14/03',open_file=1) #Algorithm D_old = D_init for i in range(maxitr): start_time = N_timer.cont_timer(0,0) N_timer.Timer(i,maxitr) for n in range(N): I_D[:,n] = max_atoms(D_old,Y[:,n],S) D_new = np.zeros((M,K)) for n in range(N): print(n) to_proj = D_old[:,I_D[:,n]] X_GPU = gpuarray.to_gpu(to_proj.T) Z_GPU = linalg.pinv(X_GPU, lib='cusolver') # RES_GPU = linalg.dot(Z_GPU,X_GPU) to_proj = Z_GPU.get() @ to_proj.T # to_proj = proj(to_proj) matproj = np.repeat(np.array([to_proj @ Y[:,n] ]).T, S, axis=1) vecproj = D_old[:,I_D[:,n]] @ np.diag(np.diag(D_old[:,I_D[:,n]].T @ D_old[:,I_D[:,n]] )**-1*(D_old[:,I_D[:,n]].T@Y[:,n])) signer = np.sign(D_old[:,I_D[:,n]].T@Y[:,n]) D_new[:,I_D[:,n]] = D_new[:,I_D[:,n]] + (np.repeat(np.array([Y[:,n]]).T, S, axis=1) - matproj + vecproj)*signer # for k in I_D[:,n]: # vecproj = D_old[:,k] * (D_old[:,k].T@D_old[:,k])**-1 * (D_old[:,k].T@Y[:,n]) # signer = np.sign(D_old[:,I_D[:,n]].T@Y[:,n]) # D_new[:,k] = D_new[:,k]+(Y[:,n]-matproj+vecproj[:,m])*signer #hugget fra Karin scale = np.sum(D_new*D_new, axis=0) iszero = np.where(scale < 0.00001)[0] D_new[:,iszero] = np.random.randn(M, len(iszero)) #end hugget D_new = normalize_mat_col(D_new) D_old = 1*D_new # N_timer.log(N_timer.cont_timer(start_time,1)) # N_timer.log("end",open_file=-1) return D_old
import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import skcuda.linalg as culinalg import skcuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string import scikits.cuda.cula as cula demo_types = [np.float32, np.complex64] if cula._libcula_toolkit == 'premium' and \ cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print('Testing pinv for type ' + str(np.dtype(t))) a = np.asarray((np.random.rand(50, 50) - 0.5) / 10, t) a_gpu = gpuarray.to_gpu(a) a_inv_gpu = culinalg.pinv(a_gpu) print('Success status: ', np.allclose(np.linalg.pinv(a), a_inv_gpu.get(), atol=1e-2)) print('Maximum error: ', np.max(np.abs(np.linalg.pinv(a) - a_inv_gpu.get()))) print('')
START = time() foo[BLOCKS_PER_GRID, THREADS_PER_BLOCK](C_GPU) print(time() - START) # Copy the result back to the host C = C_GPU.copy_to_host() print(C) A = np.random.rand(2, 2) CU_A = cuda.device_array_like(A) PYCU_A = pycuda.gpuarray.GPUArray( shape=CU_A.shape, dtype=CU_A.dtype, gpudata=CU_A.gpu_data.device_ctypes_pointer.value, strides=CU_A.strides) PYCU_A.get() X = np.asarray(np.random.rand(57, 57), np.float32) Y = np.asarray(np.random.rand(4, 4), np.float32) X_GPU = gpuarray.to_gpu(X) Y_GPU = gpuarray.to_gpu(Y) START = time() Z_GPU = linalg.pinv(PYCU_A, lib='cusolver') GPU_RES = Z_GPU.get() print(time() - START) START = time() CPU_RES = np.linalg.pinv(A) print(time() - START)
@author: Niels """ import math import os from time import time import numpy as np import pycuda.autoinit import scipy as sp import pycuda.gpuarray as gpuarray import skcuda.linalg as linalg from numba import cuda if os.system("cl.exe"): os.environ[ 'PATH'] += ';' + r"C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin" if os.system("cl.exe"): raise RuntimeError("cl.exe still not found, path probably incorrect") @cuda.jit def GPU_pin(A, B): B = np.linalg.pinv(A) return B to_proj = A.T To_proj = proj(to_proj) X_GPU = gpuarray.to_gpu(to_proj.T) Z_GPU = linalg.pinv(X_GPU, lib='cusolver') to_proj = to_proj @ Z_GPU.get().T
from __future__ import print_function import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import skcuda.linalg as culinalg import skcuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string import scikits.cuda.cula as cula demo_types = [np.float32, np.complex64] if cula._libcula_toolkit == 'premium' and \ cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print('Testing pinv for type ' + str(np.dtype(t))) a = np.asarray((np.random.rand(50, 50) - 0.5) / 10, t) a_gpu = gpuarray.to_gpu(a) a_inv_gpu = culinalg.pinv(a_gpu) print('Success status: ', np.allclose(np.linalg.pinv(a), a_inv_gpu.get(), atol=1e-2)) print('Maximum error: ', np.max(np.abs(np.linalg.pinv(a) - a_inv_gpu.get()))) print('')