def gpu_calculation(train_data): train_data = train_data - train_data.mean(axis=1, keepdims=True) cov_mat = np.cov(train_data) np.save('gpu_cov_mat.npy', cov_mat) cov_mat = gpuarray.to_gpu(cov_mat) va_eig, ve_eig = linalg.eig(cov_mat, 'V', 'N') val_eig = np.asarray(va_eig) vec_eig = np.asarray(ve_eig) np.save('gpu_eig_val.npy', val_eig) np.save('gpu_eig_vec.npy', vec_eig) # cov_mat=np.load('gpu_cov_mat.npy') # val_eig=np.load('gpu_eig_val.npy') # vec_eig=np.load('gpu_eig_vec.npy') idx = val_eig.argsort()[::-1] aval_eig = val_eig[idx] avec_eig = vec_eig[:, idx].T pca_vec = [] total = sum(val_eig) eig_sum = 0 for i in range(0, len(aval_eig)): eig_sum = eig_sum + aval_eig[i] if (eig_sum / total) < 0.9: pca_vec.append(avec_eig[i]) else: break # train_data_pca=np.multiply(np.transpose(vec_eig),train_data) # test_data_pca = np.multiply(np.transpose(vec_eig), test_data) # Display first eigenface pca_vec = np.asarray(pca_vec) pca_vec = np.real(pca_vec) dis_eig = np.real(avec_eig[0]) dis_eig[dis_eig < 0] = 0
def sorted_eig(X, ascending=True, mode='cpu'): if mode == 'cpu': e_vals, e_vecs = np.linalg.eig(X) idx = np.argsort(e_vals) if not ascending: idx = idx[::-1] e_vecs = e_vecs[:, idx] e_vals = e_vals[idx] return e_vals, e_vecs elif mode == 'gpu': import skcuda.linalg as LA import pycuda.gpuarray as gpuarray e_vecs_gpu, e_vals_gpu = LA.eig(X, 'N', 'V', lib='cusolver') e_vals = e_vals_gpu.get() idx = np.argsort(e_vals) V_gpu = gpuarray.empty((X.shape[0], X.shape[1]), np.float32) d = X.shape[0] for i in range(d): V_gpu[i] = e_vecs_gpu[idx[i]] V_gpu = LA.transpose(V_gpu) return e_vals, V_gpu
linalg.init() # loading the sparse matrix supmat = np.load( '/scratch/gpfs/sbdas/Helioseismology/qdpy_output/output_files/w135_antia/super_matrix.npy' ) supmat_gpu = gpuarray.to_gpu(supmat) # number of timeit iterations niter = 10 # timing the numpy eigenvalue solver time_numpy = timeit(lambda: np.linalg.eigh(supmat), number=niter) # timing the skcuda eigenvalue solver time_skcuda = timeit(lambda: linalg.eig(supmat_gpu), number=niter) time_numpy /= niter time_skcuda /= niter print(f'numpy: {time_numpy}') print(f'skcuda: {time_skcuda}') # getting eigenvalues and eigenvectors for both cases to compare eval_np, evec_np = np.linalg.eigh(supmat) eval_sk_gpu, evec_sk_gpu = linalg.eig(supmat_gpu) evec_sk, eval_sk = eval_sk_gpu.get(), evec_sk_gpu.get() # reorderign the skcuda matrix evec_sk = np.transpose(evec_sk)
def test_eig_complex128(self): a = np.array(np.random.rand(9, 9) + 1j*np.random.rand(9,9), np.complex128, order='F') a_gpu = gpuarray.to_gpu(a) w_gpu = linalg.eig(a_gpu, 'N', 'N') assert np.allclose(np.trace(a), sum(w_gpu.get()), atol=atol_float64)
def test_eig_float32(self): a = np.asarray(np.random.rand(9, 9), np.float32, order='F') a_gpu = gpuarray.to_gpu(a) w_gpu = linalg.eig(a_gpu, 'N', 'N') assert np.allclose(np.trace(a), sum(w_gpu.get()), atol=1e-4)
def getEigenValues(inData, maxNumClusters, catCols=np.array([], dtype=int), save_to_file="eigenValues.csv", additional_comments=""): ### Input: ### # inData: (numpy.ndarray) Array of shape (numSamples, numFeatures) ### # maxNumClusters: (int) Maximum number of clusters we want ### # catCols: (numpy.ndarray, dtype=int) 1-D array containing indices of categorical columns in inData ### # save_to_file: (str) Filepath to save the eigenvalues. Default: ./eigenValues.csv ### # additional_comments: (str) Any additional comments to be added in the header of output file. (e.g. "Data that I collected last Monday") ### ### Output: ### # eigVals: (numpy.ndarray) Array of shape (maxNumClusters+1, numFeatures+1). eigVals[:,i] (i>0) contains the first maxNumClusters+1 eigenvalues of the laplacian with i'th feature removed (i = 1, 2, ... numFeatures). eigVals[:,0] contains the first maxNumClusters+1 eigenvalues of the laplacian with no feature removed. ### ### Description: ### # Gets eigenValues of the laplacian matrix for input data inData, with catCols. This is done iteratively numFeatures+1 ( = inData.shape[1]+1) times. ### ### Note: ### # This function is expected to take a long time to execute, depending on the size on inData. Suggest calling this function on smaller data first, and proceed on actual data, if all is good. start_time = time.time() if np.argwhere(np.isnan(inData)).size != 0: raise ValueError("Input array inData contains NaN entries.") return 0 inData = np.asanyarray(inData, dtype=np.float32, order='C') numSamples = inData.shape[0] numFeatures = inData.shape[1] eigVals = np.zeros((maxNumClusters + 1, numFeatures + 1), dtype=np.float32, order='C') print("\n#### Working on base data\n") similarity_matrix_gpu = ck.clinicalKernel(inData, catCols=catCols, return_to_CPU=False) laplacian_matrix = laplacian.laplacian_normalised(similarity_matrix_gpu, numRows=numSamples, simMatrix_on_CPU=False, return_to_CPU=True) ### Change the order of laplacian_matrix from 'C' to 'F', because cuSolver and skcuda like 'F' ### Send laplacian_matrix back to GPU as pycuda.gpuarray.GPUArray, with order 'F' laplacian_matrix = np.asanyarray(laplacian_matrix, order='F', dtype=np.float32) laplacian_matrix_gpu = gpuarray.to_gpu(laplacian_matrix) linalg.init() print("\n# Computing eigenvalues ...") eigVals_current = linalg.eig(laplacian_matrix_gpu, jobvl='N', jobvr='N', lib='cusolver') eigVals_current = eigVals_current.get( ) #Convert GPUArray to numpy array and sort in increasing order eigVals_current.sort() eigVals_current = eigVals_current[:(maxNumClusters + 1)] eigVals[:, 0] = eigVals_current ### Main for loop that iterates over features for i in range(numFeatures): time_elapsed = time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)) print("\n#### Feature dropped: " + repr(i + 1) + " out of " + repr(numFeatures) + " (time elapsed: " + time_elapsed + ")") catCols_reduced = np.delete( catCols, np.nonzero(catCols == i)) #If a catCols is currently being dropped mask = catCols_reduced > i catCols_reduced[mask] = catCols_reduced[ mask] - 1 # When i'th feature is dropped, indices > i decrease by 1 # print("catCols: "+repr(catCols_reduced)) inData_reduced = np.delete(inData, i, axis=1) similarity_matrix_gpu = ck.clinicalKernel(inData_reduced, catCols=catCols_reduced, return_to_CPU=False) laplacian_matrix = laplacian.laplacian_normalised( similarity_matrix_gpu, numRows=numSamples, simMatrix_on_CPU=False, return_to_CPU=True) ### Change the order of laplacian_matrix from 'C' to 'F', because cuSolver and skcuda like 'F' ### Send laplacian_matrix back to GPU as pycuda.gpuarray.GPUArray, with order 'F' laplacian_matrix = np.asanyarray(laplacian_matrix, order='F', dtype=np.float32) laplacian_matrix_gpu = gpuarray.to_gpu(laplacian_matrix) print("\n# Computing eigenvalues ...") eigVals_current = linalg.eig(laplacian_matrix_gpu, jobvl='N', jobvr='N', lib='cusolver') eigVals_current = eigVals_current.get( ) #Convert GPUArray to numpy array and sort in increasing order eigVals_current.sort() eigVals_current = eigVals_current[:(maxNumClusters + 1)] eigVals[:, (i + 1)] = eigVals_current timeNow = datetime.datetime.now() timeString = timeNow.strftime("%H:%M:%S %Y-%m-%d") header = "##### Eigenvalues #####\n# " + timeString + "\n# First " + repr( maxNumClusters + 1 ) + " Eigenvalues of Laplacian.\n# Eigenvalues stored along columns. 0th column for no feature dropped, i'th column for i'th feature dropped (i > 0)\n#\n# Comment: " + str( additional_comments) np.savetxt(save_to_file, eigVals, header=header, comments="", delimiter=",") print("\n#### Iterations complete\n") print("\n# Eigenvalues saved to file " + save_to_file) return eigVals
def eigh_gpu(X): X=gpuarray.to_gpu(X) eigen, w_gpu = cuda_linalg.eig(X, lib='cusolver') return eigen.get(),w_gpu.get()