def suite(): s = TestSuite() s.addTest(test_linalg('test_rsvd_float32')) s.addTest(test_linalg('test_rsvd_float64')) s.addTest(test_linalg('test_rsvd_complex64')) s.addTest(test_linalg('test_rsvd_complex128')) s.addTest(test_linalg('test_rsvdf_float32')) s.addTest(test_linalg('test_rsvdf_float64')) s.addTest(test_linalg('test_rsvdf_complex64')) s.addTest(test_linalg('test_rsvdf_complex128')) s.addTest(test_linalg('test_rdmd_float32')) s.addTest(test_linalg('test_rdmd_float64')) s.addTest(test_linalg('test_rdmd_complex64')) s.addTest(test_linalg('test_rdmd_complex128')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_linalg('test_rsvd_float32')) s.addTest(test_linalg('test_rsvd_float64')) s.addTest(test_linalg('test_rsvd_complex64')) s.addTest(test_linalg('test_rsvd_complex128')) s.addTest(test_linalg('test_rsvdf_float32')) s.addTest(test_linalg('test_rsvdf_float64')) s.addTest(test_linalg('test_rsvdf_complex64')) s.addTest(test_linalg('test_rsvdf_complex128')) s.addTest(test_linalg('test_rdmd_float32')) s.addTest(test_linalg('test_rdmd_float64')) s.addTest(test_linalg('test_rdmd_complex64')) s.addTest(test_linalg('test_rdmd_complex128')) return s
def prepare_gpu(basis_size): if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: gpu_data_type = np.float64 print("Data type", str(gpu_data_type)) else: print("NO DOUBLE PRECIISION OMG!") exit(0) T_i = np.fromfunction(ud.construct_kinetic_factor, (basis_size, basis_size), dtype=np.float64) # assume that we are in i,j,k arrangement initially v_vector = np.asarray(np.random.random_sample(size=(basis_size, basis_size, basis_size)), dtype=np.float64, order='C') # arrange the vector data on the gpu v_x_gpu = gpuarray.to_gpu(v_vector) #v_y_gpu = gpuarray.to_gpu(v_vector.transpose(y_shape).copy(order='C')) #v_z_gpu = gpuarray.to_gpu(v_vector.transpose(y_shape).copy(order='C')) # transfer kinetic portion to gpu T_gpu = gpuarray.to_gpu(T_i) # allocate space on gpu for results U_x_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size #U_y_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size #U_z_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size #return T_gpu, v_x_gpu, v_y_gpu, v_z_gpu, U_x_gpu, U_y_gpu, U_z_gpu return T_gpu, v_x_gpu, U_x_gpu, v_vector
def suite(): s = TestSuite() s.addTest(test_cublas('test_cublasIsamax')) s.addTest(test_cublas('test_cublasIcamax')) s.addTest(test_cublas('test_cublasIsamin')) s.addTest(test_cublas('test_cublasIcamin')) s.addTest(test_cublas('test_cublasSasum')) s.addTest(test_cublas('test_cublasScasum')) s.addTest(test_cublas('test_cublasSaxpy')) s.addTest(test_cublas('test_cublasCaxpy')) s.addTest(test_cublas('test_cublasScopy')) s.addTest(test_cublas('test_cublasCcopy')) s.addTest(test_cublas('test_cublasSdot')) s.addTest(test_cublas('test_cublasCdotu')) s.addTest(test_cublas('test_cublasCdotc')) s.addTest(test_cublas('test_cublasSrnm2')) s.addTest(test_cublas('test_cublasScrnm2')) s.addTest(test_cublas('test_cublasSscal')) s.addTest(test_cublas('test_cublasCscal')) s.addTest(test_cublas('test_cublasSrot')) s.addTest(test_cublas('test_cublasSswap')) s.addTest(test_cublas('test_cublasCswap')) s.addTest(test_cublas('test_cublasSgemv')) s.addTest(test_cublas('test_cublasCgemv')) s.addTest(test_cublas('test_cublasSgeam')) s.addTest(test_cublas('test_cublasCgeam')) s.addTest(test_cublas('test_cublasSgemmBatched')) s.addTest(test_cublas('test_cublasCgemmBatched')) s.addTest(test_cublas('test_cublasStrsmBatched')) s.addTest(test_cublas('test_cublasSgetrfBatched')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_cublas('test_cublasIdamax')) s.addTest(test_cublas('test_cublasIzamax')) s.addTest(test_cublas('test_cublasIdamin')) s.addTest(test_cublas('test_cublasIzamin')) s.addTest(test_cublas('test_cublasDasum')) s.addTest(test_cublas('test_cublasDzasum')) s.addTest(test_cublas('test_cublasDaxpy')) s.addTest(test_cublas('test_cublasZaxpy')) s.addTest(test_cublas('test_cublasDcopy')) s.addTest(test_cublas('test_cublasZcopy')) s.addTest(test_cublas('test_cublasDdot')) s.addTest(test_cublas('test_cublasZdotu')) s.addTest(test_cublas('test_cublasZdotc')) s.addTest(test_cublas('test_cublasDrnm2')) s.addTest(test_cublas('test_cublasDzrnm2')) s.addTest(test_cublas('test_cublasDscal')) s.addTest(test_cublas('test_cublasZscal')) s.addTest(test_cublas('test_cublasZdscal')) s.addTest(test_cublas('test_cublasDswap')) s.addTest(test_cublas('test_cublasZswap')) s.addTest(test_cublas('test_cublasDgemv')) s.addTest(test_cublas('test_cublasZgemv')) s.addTest(test_cublas('test_cublasDgeam')) s.addTest(test_cublas('test_cublasZgeam')) s.addTest(test_cublas('test_cublasDgemmBatched')) s.addTest(test_cublas('test_cublasZgemmBatched')) s.addTest(test_cublas('test_cublasDtrsmBatched')) s.addTest(test_cublas('test_cublasDgetrfBatched')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_fft('test_fft_float32_to_complex64_1d')) s.addTest(test_fft('test_fft_float32_to_complex64_2d')) s.addTest(test_fft('test_batch_fft_float32_to_complex64_1d')) s.addTest(test_fft('test_batch_fft_float32_to_complex64_2d')) s.addTest(test_fft('test_ifft_complex64_to_float32_1d')) s.addTest(test_fft('test_ifft_complex64_to_float32_2d')) s.addTest(test_fft('test_batch_ifft_complex64_to_float32_1d')) s.addTest(test_fft('test_batch_ifft_complex64_to_float32_2d')) s.addTest(test_fft('test_multiple_streams')) s.addTest(test_fft('test_work_area')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_fft('test_fft_float64_to_complex128_1d')) s.addTest(test_fft('test_fft_float64_to_complex128_2d')) s.addTest(test_fft('test_batch_fft_float64_to_complex128_1d')) s.addTest(test_fft('test_batch_fft_float64_to_complex128_2d')) s.addTest(test_fft('test_ifft_complex128_to_float64_1d')) s.addTest(test_fft('test_ifft_complex128_to_float64_2d')) s.addTest(test_fft('test_batch_ifft_complex128_to_float64_1d')) s.addTest(test_fft('test_batch_ifft_complex128_to_float64_2d')) return s
def suite(): s = TestSuite() s.addTest(test_cublasxt('test_cublasXtSgemm')) s.addTest(test_cublasxt('test_cublasXtCgemm')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_cublasxt('test_cublasXtDgemm')) s.addTest(test_cublasxt('test_cublasXtZgemm')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_misc('test_maxabs_float32')) s.addTest(test_misc('test_maxabs_complex64')) s.addTest(test_misc('test_cumsum_float32')) s.addTest(test_misc('test_cumsum_complex64')) s.addTest(test_misc('test_diff_float32')) s.addTest(test_misc('test_diff_complex64')) s.addTest(test_misc('test_get_by_index_float32')) s.addTest(test_misc('test_set_by_index_dest_float32')) s.addTest(test_misc('test_set_by_index_src_float32')) s.addTest(test_misc('test_binaryop_2d_int32')) s.addTest(test_misc('test_binaryop_2d_float32')) s.addTest(test_misc('test_binaryop_2d_complex64')) s.addTest(test_misc('test_binaryop_matvec_int32')) s.addTest(test_misc('test_binaryop_matvec_float32')) s.addTest(test_misc('test_binaryop_matvec_complex64')) s.addTest(test_misc('test_sum_float32')) s.addTest(test_misc('test_sum_complex64')) s.addTest(test_misc('test_mean_float32')) s.addTest(test_misc('test_mean_complex64')) s.addTest(test_misc('test_var_float32')) s.addTest(test_misc('test_var_complex64')) s.addTest(test_misc('test_std_float32')) s.addTest(test_misc('test_std_complex64')) s.addTest(test_misc('test_minmax_float32')) s.addTest(test_misc('test_argminmax_float32')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_misc('test_maxabs_float64')) s.addTest(test_misc('test_maxabs_complex128')) s.addTest(test_misc('test_cumsum_float64')) s.addTest(test_misc('test_cumsum_complex128')) s.addTest(test_misc('test_diff_float64')) s.addTest(test_misc('test_diff_complex128')) s.addTest(test_misc('test_get_by_index_float64')) s.addTest(test_misc('test_set_by_index_dest_float64')) s.addTest(test_misc('test_set_by_index_src_float64')) s.addTest(test_misc('test_sum_float64')) s.addTest(test_misc('test_sum_complex128')) s.addTest(test_misc('test_mean_float64')) s.addTest(test_misc('test_mean_complex128')) s.addTest(test_misc('test_binaryop_2d_float64')) s.addTest(test_misc('test_binaryop_2d_complex128')) s.addTest(test_misc('test_binaryop_matvec_float64')) s.addTest(test_misc('test_binaryop_matvec_complex128')) s.addTest(test_misc('test_var_float64')) s.addTest(test_misc('test_var_complex128')) s.addTest(test_misc('test_std_float64')) s.addTest(test_misc('test_std_complex128')) s.addTest(test_misc('test_minmax_float64')) s.addTest(test_misc('test_argminmax_float64')) return s
def suite(): s = TestSuite() s.addTest(test_special('test_sici_float32')) s.addTest(test_special('test_exp1_complex64')) s.addTest(test_special('test_expi_complex64')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_special('test_sici_float64')) s.addTest(test_special('test_exp1_complex128')) s.addTest(test_special('test_expi_complex128')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_cublasxt('test_cublasXtSgemm')) s.addTest(test_cublasxt('test_cublasXtCgemm')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_cublasxt('test_cublasXtDgemm')) s.addTest(test_cublasxt('test_cublasXtZgemm')) return s
def suite(): s = TestSuite() s.addTest(test_integrate('test_trapz_float32')) s.addTest(test_integrate('test_trapz_complex64')) s.addTest(test_integrate('test_trapz2d_float32')) s.addTest(test_integrate('test_trapz2d_complex64')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_integrate('test_trapz_float64')) s.addTest(test_integrate('test_trapz_complex128')) s.addTest(test_integrate('test_trapz2d_float64')) s.addTest(test_integrate('test_trapz2d_complex128')) return s
def suite(): s = TestSuite() s.addTest(test_misc('test_maxabs_float32')) s.addTest(test_misc('test_maxabs_complex64')) s.addTest(test_misc('test_cumsum_float32')) s.addTest(test_misc('test_cumsum_complex64')) s.addTest(test_misc('test_diff_float32')) s.addTest(test_misc('test_diff_complex64')) s.addTest(test_misc('test_get_by_index_float32')) s.addTest(test_misc('test_set_by_index_dest_float32')) s.addTest(test_misc('test_set_by_index_src_float32')) s.addTest(test_misc('test_binaryop_2d_int32')) s.addTest(test_misc('test_binaryop_2d_float32')) s.addTest(test_misc('test_binaryop_2d_complex64')) s.addTest(test_misc('test_binaryop_matvec_int32')) s.addTest(test_misc('test_binaryop_matvec_float32')) s.addTest(test_misc('test_binaryop_matvec_complex64')) s.addTest(test_misc('test_sum_float32')) s.addTest(test_misc('test_sum_complex64')) s.addTest(test_misc('test_mean_float32')) s.addTest(test_misc('test_mean_complex64')) s.addTest(test_misc('test_var_float32')) s.addTest(test_misc('test_var_complex64')) s.addTest(test_misc('test_std_float32')) s.addTest(test_misc('test_std_complex64')) s.addTest(test_misc('test_minmax_float32')) s.addTest(test_misc('test_argminmax_float32')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_misc('test_maxabs_float64')) s.addTest(test_misc('test_maxabs_complex128')) s.addTest(test_misc('test_cumsum_float64')) s.addTest(test_misc('test_cumsum_complex128')) s.addTest(test_misc('test_diff_float64')) s.addTest(test_misc('test_diff_complex128')) s.addTest(test_misc('test_get_by_index_float32')) s.addTest(test_misc('test_set_by_index_dest_float64')) s.addTest(test_misc('test_set_by_index_src_float64')) s.addTest(test_misc('test_sum_float64')) s.addTest(test_misc('test_sum_complex128')) s.addTest(test_misc('test_mean_float64')) s.addTest(test_misc('test_mean_complex128')) s.addTest(test_misc('test_binaryop_2d_float64')) s.addTest(test_misc('test_binaryop_2d_complex128')) s.addTest(test_misc('test_binaryop_matvec_float64')) s.addTest(test_misc('test_binaryop_matvec_complex128')) s.addTest(test_misc('test_var_float64')) s.addTest(test_misc('test_var_complex128')) s.addTest(test_misc('test_std_float64')) s.addTest(test_misc('test_std_complex128')) s.addTest(test_misc('test_minmax_float64')) s.addTest(test_misc('test_argminmax_float64')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_special('test_sici_float32')) s.addTest(test_special('test_exp1_complex64')) s.addTest(test_special('test_expi_complex64')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_special('test_sici_float64')) s.addTest(test_special('test_exp1_complex128')) s.addTest(test_special('test_expi_complex128')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_integrate('test_trapz_float32')) s.addTest(test_integrate('test_trapz_complex64')) s.addTest(test_integrate('test_trapz2d_float32')) s.addTest(test_integrate('test_trapz2d_complex64')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_integrate('test_trapz_float64')) s.addTest(test_integrate('test_trapz_complex128')) s.addTest(test_integrate('test_trapz2d_float64')) s.addTest(test_integrate('test_trapz2d_complex128')) return s
def prepare_gpu(basis_size): gpu_data_type = np.float32 if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3 else np.float64 T_i = np.fromfunction(ud.construct_kinetic_factor, (basis_size, basis_size), dtype=np.float64) # assume that we are in i,j,k arrangement initially v_vector = np.asarray(np.random.random_sample(size=(basis_size, basis_size, basis_size)), dtype=np.float64, order='C') # arrange the vector data on the gpu v_x_gpu = gpuarray.to_gpu(v_X) v_y_gpu = v_x_gpu.transpose(y_shape).copy() v_z_gpu = v_x_gpu.transpose(z_shape).copy() # transfer kinetic portion to gpu T_gpu = gpuarray.to_gpu(T_i) # allocate space on gpu for results U_x_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size U_y_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size U_z_gpu = gpuarray.zeros((basis_size, basis_size*basis_size), np.float64) # an empty matrix of the right size
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_integrate('test_trapz_float32')) s.addTest(test_integrate('test_trapz_complex64')) s.addTest(test_integrate('test_simps_float32')) s.addTest(test_integrate('test_simps_complex64')) s.addTest(test_integrate('test_trapz2d_float32')) s.addTest(test_integrate('test_trapz2d_complex64')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_integrate('test_trapz_float64')) s.addTest(test_integrate('test_trapz_complex128')) s.addTest(test_integrate('test_simps_float64')) s.addTest(test_integrate('test_simps_complex128')) s.addTest(test_integrate('test_trapz2d_float64')) s.addTest(test_integrate('test_trapz2d_complex128')) return s
def suite(): s = TestSuite() s.addTest(test_fft('test_fft_float32_to_complex64_1d')) s.addTest(test_fft('test_fft_float32_to_complex64_2d')) s.addTest(test_fft('test_batch_fft_float32_to_complex64_1d')) s.addTest(test_fft('test_batch_fft_float32_to_complex64_2d')) s.addTest(test_fft('test_ifft_complex64_to_float32_1d')) s.addTest(test_fft('test_ifft_complex64_to_float32_2d')) s.addTest(test_fft('test_batch_ifft_complex64_to_float32_1d')) s.addTest(test_fft('test_batch_ifft_complex64_to_float32_2d')) s.addTest(test_fft('test_multiple_streams')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_fft('test_fft_float64_to_complex128_1d')) s.addTest(test_fft('test_fft_float64_to_complex128_2d')) s.addTest(test_fft('test_batch_fft_float64_to_complex128_1d')) s.addTest(test_fft('test_batch_fft_float64_to_complex128_2d')) s.addTest(test_fft('test_ifft_complex128_to_float64_1d')) s.addTest(test_fft('test_ifft_complex128_to_float64_2d')) s.addTest(test_fft('test_batch_ifft_complex128_to_float64_1d')) s.addTest(test_fft('test_batch_ifft_complex128_to_float64_2d')) return s
def suite(): s = TestSuite() s.addTest(test_rlinalg('test_rsvd_float32')) s.addTest(test_rlinalg('test_rsvd_float64')) s.addTest(test_rlinalg('test_rsvd_complex64')) s.addTest(test_rlinalg('test_rsvd_complex128')) s.addTest(test_rlinalg('test_rsvdf_float32')) s.addTest(test_rlinalg('test_rsvdf_float64')) s.addTest(test_rlinalg('test_rsvdf_complex64')) s.addTest(test_rlinalg('test_rsvdf_complex128')) s.addTest(test_rlinalg('test_rdmd_float32')) s.addTest(test_rlinalg('test_rdmd_float64')) s.addTest(test_rlinalg('test_rdmd_complex64')) s.addTest(test_rlinalg('test_rdmd_complex128')) s.addTest(test_rlinalg('test_cdmd_float32')) s.addTest(test_rlinalg('test_cdmd_float64')) s.addTest(test_rlinalg('test_cdmd_complex64')) s.addTest(test_rlinalg('test_cdmd_complex128')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_rlinalg('test_rsvd_float32')) s.addTest(test_rlinalg('test_rsvd_float64')) s.addTest(test_rlinalg('test_rsvd_complex64')) s.addTest(test_rlinalg('test_rsvd_complex128')) s.addTest(test_rlinalg('test_rsvdf_float32')) s.addTest(test_rlinalg('test_rsvdf_float64')) s.addTest(test_rlinalg('test_rsvdf_complex64')) s.addTest(test_rlinalg('test_rsvdf_complex128')) s.addTest(test_rlinalg('test_rdmd_float32')) s.addTest(test_rlinalg('test_rdmd_float64')) s.addTest(test_rlinalg('test_rdmd_complex64')) s.addTest(test_rlinalg('test_rdmd_complex128')) s.addTest(test_rlinalg('test_cdmd_float32')) s.addTest(test_rlinalg('test_cdmd_float64')) s.addTest(test_rlinalg('test_cdmd_complex64')) s.addTest(test_rlinalg('test_cdmd_complex128')) return s
def suite(): context = make_default_context() device = context.get_device() context.pop() s = TestSuite() s.addTest(test_cublas('test_cublasIsamax')) s.addTest(test_cublas('test_cublasIcamax')) s.addTest(test_cublas('test_cublasIsamin')) s.addTest(test_cublas('test_cublasIcamin')) s.addTest(test_cublas('test_cublasSasum')) s.addTest(test_cublas('test_cublasScasum')) s.addTest(test_cublas('test_cublasSaxpy')) s.addTest(test_cublas('test_cublasCaxpy')) s.addTest(test_cublas('test_cublasScopy')) s.addTest(test_cublas('test_cublasCcopy')) s.addTest(test_cublas('test_cublasSdot')) s.addTest(test_cublas('test_cublasCdotu')) s.addTest(test_cublas('test_cublasCdotc')) s.addTest(test_cublas('test_cublasSrnm2')) s.addTest(test_cublas('test_cublasScrnm2')) s.addTest(test_cublas('test_cublasSscal')) s.addTest(test_cublas('test_cublasCscal')) s.addTest(test_cublas('test_cublasSrot')) s.addTest(test_cublas('test_cublasSswap')) s.addTest(test_cublas('test_cublasCswap')) s.addTest(test_cublas('test_cublasSgemv')) s.addTest(test_cublas('test_cublasCgemv')) s.addTest(test_cublas('test_cublasSgeam')) s.addTest(test_cublas('test_cublasCgeam')) s.addTest(test_cublas('test_cublasSgemmBatched')) s.addTest(test_cublas('test_cublasCgemmBatched')) s.addTest(test_cublas('test_cublasStrsmBatched')) s.addTest(test_cublas('test_cublasSgetrfBatched')) if misc.get_compute_capability(device) >= 1.3: s.addTest(test_cublas('test_cublasIdamax')) s.addTest(test_cublas('test_cublasIzamax')) s.addTest(test_cublas('test_cublasIdamin')) s.addTest(test_cublas('test_cublasIzamin')) s.addTest(test_cublas('test_cublasDasum')) s.addTest(test_cublas('test_cublasDzasum')) s.addTest(test_cublas('test_cublasDaxpy')) s.addTest(test_cublas('test_cublasZaxpy')) s.addTest(test_cublas('test_cublasDcopy')) s.addTest(test_cublas('test_cublasZcopy')) s.addTest(test_cublas('test_cublasDdot')) s.addTest(test_cublas('test_cublasZdotu')) s.addTest(test_cublas('test_cublasZdotc')) s.addTest(test_cublas('test_cublasDrnm2')) s.addTest(test_cublas('test_cublasDzrnm2')) s.addTest(test_cublas('test_cublasDscal')) s.addTest(test_cublas('test_cublasZscal')) s.addTest(test_cublas('test_cublasZdscal')) s.addTest(test_cublas('test_cublasDswap')) s.addTest(test_cublas('test_cublasZswap')) s.addTest(test_cublas('test_cublasDgemv')) s.addTest(test_cublas('test_cublasZgemv')) s.addTest(test_cublas('test_cublasDgeam')) s.addTest(test_cublas('test_cublasZgeam')) s.addTest(test_cublas('test_cublasDgemmBatched')) s.addTest(test_cublas('test_cublasZgemmBatched')) s.addTest(test_cublas('test_cublasDtrsmBatched')) s.addTest(test_cublas('test_cublasDgetrfBatched')) return s
# pycuda stuff import pycuda.gpuarray as gpuarray import pycuda.driver as cuda import pycuda.cumath as cumath import pycuda.autoinit from pycuda.reduction import ReductionKernel from pycuda.elementwise import ElementwiseKernel # scikit stuff import skcuda.misc as misc import skcuda.linalg as linalg import skcuda.cublas as cublas # Double precision is only supported by devices with compute capability >= 1.3: gpu_data_type = np.float64 if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3 else np.float32 np.random.seed(463912) # pick our seed for testing purposes def diag_gpu(A, v1): # handle current_handle = cublas.cublasCreate() m = A.shape[0] Q = np.zeros((m, m), dtype=np.float64) # Q[0, :] = 0.0 # implied Q[1, :] = v1.copy() beta = np.zeros(m, dtype=np.float64) alpha = np.zeros(m, dtype=np.float64) # move data onto the GPU
def suite(): s = TestSuite() s.addTest(test_linalg('test_svd_ss_cula_float32')) s.addTest(test_linalg('test_svd_ss_cula_complex64')) s.addTest(test_linalg('test_svd_so_cula_float32')) s.addTest(test_linalg('test_svd_so_cula_complex64')) s.addTest(test_linalg('test_svd_aa_cusolver_float32')) s.addTest(test_linalg('test_svd_aa_cusolver_complex64')) s.addTest(test_linalg('test_dot_matrix_vector_float32')) s.addTest(test_linalg('test_dot_matrix_vector_complex64')) s.addTest(test_linalg('test_dot_matrix_float32')) s.addTest(test_linalg('test_dot_matrix_complex64')) s.addTest(test_linalg('test_dot_matrix_h_complex64')) s.addTest(test_linalg('test_dot_vector_float32')) s.addTest(test_linalg('test_dot_vector_complex64')) s.addTest(test_linalg('test_mdot_matrix_float32')) s.addTest(test_linalg('test_mdot_matrix_complex64')) s.addTest(test_linalg('test_dot_diag_float32')) s.addTest(test_linalg('test_dot_diag_complex64')) s.addTest(test_linalg('test_dot_diag_t_float32')) s.addTest(test_linalg('test_dot_diag_t_complex64')) s.addTest(test_linalg('test_transpose_float32')) s.addTest(test_linalg('test_transpose_complex64')) s.addTest(test_linalg('test_hermitian_float32')) s.addTest(test_linalg('test_hermitian_complex64')) s.addTest(test_linalg('test_conj_complex64')) s.addTest(test_linalg('test_diag_1d_float32')) s.addTest(test_linalg('test_diag_2d_wide_float32')) s.addTest(test_linalg('test_diag_2d_tall_float32')) s.addTest(test_linalg('test_diag_1d_complex64')) s.addTest(test_linalg('test_diag_2d_wide_complex64')) s.addTest(test_linalg('test_diag_2d_tall_complex64')) s.addTest(test_linalg('test_eye_float32')) s.addTest(test_linalg('test_eye_complex64')) s.addTest(test_linalg('test_pinv_float32')) s.addTest(test_linalg('test_pinv_complex64')) s.addTest(test_linalg('test_tril_float32')) s.addTest(test_linalg('test_tril_complex64')) s.addTest(test_linalg('test_triu_float32')) s.addTest(test_linalg('test_triu_complex64')) s.addTest(test_linalg('test_multiply_float32')) s.addTest(test_linalg('test_multiply_complex64')) s.addTest(test_linalg('test_cho_factor_float32')) s.addTest(test_linalg('test_cho_solve_float32')) s.addTest(test_linalg('test_inv_float32')) s.addTest(test_linalg('test_inv_complex64')) s.addTest(test_linalg('test_add_diag_float32')) s.addTest(test_linalg('test_add_diag_complex64')) s.addTest(test_linalg('test_inv_exceptions')) s.addTest(test_linalg('test_eye_large_float32')) s.addTest(test_linalg('test_trace_float32')) s.addTest(test_linalg('test_trace_complex64')) s.addTest(test_linalg('test_add_dot_matrix_float32')) s.addTest(test_linalg('test_add_dot_matrix_complex64')) s.addTest(test_linalg('test_dot_strided_float32')) s.addTest(test_linalg('test_dot_strided_complex64')) s.addTest(test_linalg('test_det_float32')) s.addTest(test_linalg('test_det_complex64')) s.addTest(test_linalg('test_qr_reduced_float32')) s.addTest(test_linalg('test_qr_reduced_float64')) s.addTest(test_linalg('test_qr_reduced_complex64')) s.addTest(test_linalg('test_qr_reduced_complex128')) s.addTest(test_linalg('test_eig_float32')) s.addTest(test_linalg('test_eig_float64')) s.addTest(test_linalg('test_eig_complex64')) s.addTest(test_linalg('test_eig_complex128')) s.addTest(test_linalg('test_vander_float32')) s.addTest(test_linalg('test_vander_float64')) s.addTest(test_linalg('test_vander_complex64')) s.addTest(test_linalg('test_vander_complex128')) s.addTest(test_linalg('test_dmd_float32')) s.addTest(test_linalg('test_dmd_float64')) s.addTest(test_linalg('test_dmd_complex64')) s.addTest(test_linalg('test_dmd_complex128')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_linalg('test_svd_ss_cula_float64')) s.addTest(test_linalg('test_svd_ss_cula_complex128')) s.addTest(test_linalg('test_svd_so_cula_float64')) s.addTest(test_linalg('test_svd_so_cula_complex128')) s.addTest(test_linalg('test_svd_aa_cusolver_float64')) s.addTest(test_linalg('test_svd_aa_cusolver_complex128')) s.addTest(test_linalg('test_dot_matrix_vector_float64')) s.addTest(test_linalg('test_dot_matrix_vector_complex128')) s.addTest(test_linalg('test_dot_matrix_float64')) s.addTest(test_linalg('test_dot_matrix_complex128')) s.addTest(test_linalg('test_dot_matrix_h_complex128')) s.addTest(test_linalg('test_dot_vector_float64')) s.addTest(test_linalg('test_dot_vector_complex128')) s.addTest(test_linalg('test_mdot_matrix_float64')) s.addTest(test_linalg('test_mdot_matrix_complex128')) s.addTest(test_linalg('test_dot_diag_t_float64')) s.addTest(test_linalg('test_dot_diag_t_complex128')) s.addTest(test_linalg('test_transpose_float64')) s.addTest(test_linalg('test_transpose_complex128')) s.addTest(test_linalg('test_hermitian_float64')) s.addTest(test_linalg('test_hermitian_complex64')) s.addTest(test_linalg('test_conj_complex128')) s.addTest(test_linalg('test_diag_1d_float64')) s.addTest(test_linalg('test_diag_2d_wide_float64')) s.addTest(test_linalg('test_diag_2d_tall_float64')) s.addTest(test_linalg('test_diag_1d_complex128')) s.addTest(test_linalg('test_diag_2d_wide_complex128')) s.addTest(test_linalg('test_diag_2d_tall_complex128')) s.addTest(test_linalg('test_eye_float64')) s.addTest(test_linalg('test_eye_complex128')) s.addTest(test_linalg('test_pinv_float64')) s.addTest(test_linalg('test_pinv_complex128')) s.addTest(test_linalg('test_tril_float64')) s.addTest(test_linalg('test_tril_complex128')) s.addTest(test_linalg('test_triu_float32')) s.addTest(test_linalg('test_triu_complex64')) s.addTest(test_linalg('test_multiply_float64')) s.addTest(test_linalg('test_multiply_complex128')) s.addTest(test_linalg('test_inv_float64')) s.addTest(test_linalg('test_inv_complex128')) s.addTest(test_linalg('test_add_diag_float64')) s.addTest(test_linalg('test_add_diag_complex128')) s.addTest(test_linalg('test_trace_float64')) s.addTest(test_linalg('test_trace_complex128')) s.addTest(test_linalg('test_add_dot_matrix_float64')) s.addTest(test_linalg('test_add_dot_matrix_complex128')) s.addTest(test_linalg('test_dot_strided_float64')) s.addTest(test_linalg('test_dot_strided_complex128')) s.addTest(test_linalg('test_det_float64')) s.addTest(test_linalg('test_det_complex128')) s.addTest(test_linalg('test_qr_reduced_float32')) s.addTest(test_linalg('test_qr_reduced_float64')) s.addTest(test_linalg('test_qr_reduced_complex64')) s.addTest(test_linalg('test_qr_reduced_complex128')) s.addTest(test_linalg('test_eig_float32')) s.addTest(test_linalg('test_eig_float64')) s.addTest(test_linalg('test_eig_complex64')) s.addTest(test_linalg('test_eig_complex128')) s.addTest(test_linalg('test_vander_float32')) s.addTest(test_linalg('test_vander_float64')) s.addTest(test_linalg('test_vander_complex64')) s.addTest(test_linalg('test_vander_complex128')) s.addTest(test_linalg('test_dmd_float32')) s.addTest(test_linalg('test_dmd_float64')) s.addTest(test_linalg('test_dmd_complex64')) s.addTest(test_linalg('test_dmd_complex128')) return s
def suite(): s = TestSuite() s.addTest(test_linalg('test_svd_ss_float32')) s.addTest(test_linalg('test_svd_ss_complex64')) s.addTest(test_linalg('test_svd_so_float32')) s.addTest(test_linalg('test_svd_so_complex64')) s.addTest(test_linalg('test_dot_matrix_float32')) s.addTest(test_linalg('test_dot_matrix_complex64')) s.addTest(test_linalg('test_dot_matrix_h_complex64')) s.addTest(test_linalg('test_dot_vector_float32')) s.addTest(test_linalg('test_dot_vector_complex64')) s.addTest(test_linalg('test_mdot_matrix_float32')) s.addTest(test_linalg('test_mdot_matrix_complex64')) s.addTest(test_linalg('test_dot_diag_float32')) s.addTest(test_linalg('test_dot_diag_complex64')) s.addTest(test_linalg('test_dot_diag_t_float32')) s.addTest(test_linalg('test_dot_diag_t_complex64')) s.addTest(test_linalg('test_transpose_float32')) s.addTest(test_linalg('test_transpose_complex64')) s.addTest(test_linalg('test_hermitian_float32')) s.addTest(test_linalg('test_hermitian_complex64')) s.addTest(test_linalg('test_conj_complex64')) s.addTest(test_linalg('test_diag_1d_float32')) s.addTest(test_linalg('test_diag_2d_wide_float32')) s.addTest(test_linalg('test_diag_2d_tall_float32')) s.addTest(test_linalg('test_diag_1d_complex64')) s.addTest(test_linalg('test_diag_2d_wide_complex64')) s.addTest(test_linalg('test_diag_2d_tall_complex64')) s.addTest(test_linalg('test_eye_float32')) s.addTest(test_linalg('test_eye_complex64')) s.addTest(test_linalg('test_pinv_float32')) s.addTest(test_linalg('test_pinv_complex64')) s.addTest(test_linalg('test_tril_float32')) s.addTest(test_linalg('test_tril_complex64')) s.addTest(test_linalg('test_multiply_float32')) s.addTest(test_linalg('test_multiply_complex64')) s.addTest(test_linalg('test_cho_factor_float32')) s.addTest(test_linalg('test_cho_solve_float32')) s.addTest(test_linalg('test_inv_float32')) s.addTest(test_linalg('test_inv_complex64')) s.addTest(test_linalg('test_add_diag_float32')) s.addTest(test_linalg('test_add_diag_complex64')) s.addTest(test_linalg('test_inv_exceptions')) s.addTest(test_linalg('test_eye_large_float32')) s.addTest(test_linalg('test_trace_float32')) s.addTest(test_linalg('test_trace_complex64')) s.addTest(test_linalg('test_add_dot_matrix_float32')) s.addTest(test_linalg('test_add_dot_matrix_complex64')) s.addTest(test_linalg('test_dot_strided_float32')) s.addTest(test_linalg('test_dot_strided_complex64')) s.addTest(test_linalg('test_det_float32')) s.addTest(test_linalg('test_det_complex64')) if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3: s.addTest(test_linalg('test_svd_ss_float64')) s.addTest(test_linalg('test_svd_ss_complex128')) s.addTest(test_linalg('test_svd_so_float64')) s.addTest(test_linalg('test_svd_so_complex128')) s.addTest(test_linalg('test_dot_matrix_float64')) s.addTest(test_linalg('test_dot_matrix_complex128')) s.addTest(test_linalg('test_dot_matrix_h_complex128')) s.addTest(test_linalg('test_dot_vector_float64')) s.addTest(test_linalg('test_dot_vector_complex128')) s.addTest(test_linalg('test_mdot_matrix_float64')) s.addTest(test_linalg('test_mdot_matrix_complex128')) s.addTest(test_linalg('test_dot_diag_t_float64')) s.addTest(test_linalg('test_dot_diag_t_complex128')) s.addTest(test_linalg('test_transpose_float64')) s.addTest(test_linalg('test_transpose_complex128')) s.addTest(test_linalg('test_hermitian_float64')) s.addTest(test_linalg('test_hermitian_complex64')) s.addTest(test_linalg('test_conj_complex128')) s.addTest(test_linalg('test_diag_1d_float64')) s.addTest(test_linalg('test_diag_2d_wide_float64')) s.addTest(test_linalg('test_diag_2d_tall_float64')) s.addTest(test_linalg('test_diag_1d_complex128')) s.addTest(test_linalg('test_diag_2d_wide_complex128')) s.addTest(test_linalg('test_diag_2d_tall_complex128')) s.addTest(test_linalg('test_eye_float64')) s.addTest(test_linalg('test_eye_complex128')) s.addTest(test_linalg('test_pinv_float64')) s.addTest(test_linalg('test_pinv_complex128')) s.addTest(test_linalg('test_tril_float64')) s.addTest(test_linalg('test_tril_complex128')) s.addTest(test_linalg('test_multiply_float64')) s.addTest(test_linalg('test_multiply_complex128')) s.addTest(test_linalg('test_inv_float64')) s.addTest(test_linalg('test_inv_complex128')) s.addTest(test_linalg('test_add_diag_float64')) s.addTest(test_linalg('test_add_diag_complex128')) s.addTest(test_linalg('test_trace_float64')) s.addTest(test_linalg('test_trace_complex128')) s.addTest(test_linalg('test_add_dot_matrix_float64')) s.addTest(test_linalg('test_add_dot_matrix_complex128')) s.addTest(test_linalg('test_dot_strided_float64')) s.addTest(test_linalg('test_dot_strided_complex128')) s.addTest(test_linalg('test_det_float64')) s.addTest(test_linalg('test_det_complex128')) return s
from __future__ import print_function import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import skcuda.linalg as culinalg import skcuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string demo_types = [np.float32, np.complex64] if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print('Testing transpose for type ' + str(np.dtype(t))) if np.iscomplexobj(t()): b = np.array([[1j, 2j, 3j, 4j, 5j, 6j], [7j, 8j, 9j, 10j, 11j, 12j]], t) else: a = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], t) a_gpu = gpuarray.to_gpu(a) at_gpu = culinalg.transpose(a_gpu) if np.iscomplexobj(t()): print('Success status: ', np.all(np.conj(a.T) == at_gpu.get())) else:
import numpy as np #scikit stuff import scipy.constants as s_const import skcuda.linalg as linalg import skcuda.cublas as cublas import skcuda.misc as misc import string linalg.init() # my module import universe_definitions as ud # Double precision is only supported by devices with compute capability >= 1.3: gpu_data_type = np.float64 if misc.get_compute_capability(pycuda.autoinit.device) >= 1.3 else np.float32 # create kinetic component T_i = np.fromfunction(ud.construct_kinetic_factor, (ud.basis_size, ud.basis_size), dtype=np.float64) print("Finished creating our square kinetic matrix\n") t0 = time.clock() #potential = np.fromfunction(ud.construct_potential_factor, (ud.basis_size, ud.basis_size, ud.basis_size), dtype=np.float64) print(time.clock() - t0, "seconds process time") #print("Kinetic Term: ", T_i.size, "Potential size: ", potential.size) # define the three dimensions x_shape = (0, 1, 2) y_shape = (1, 0, 2) z_shape = (2, 1, 0)
import pycuda.autoinit import pycuda.driver as drv import pycuda.gpuarray as gpuarray import numpy as np import skcuda.linalg as culinalg import skcuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string import scikits.cuda.cula as cula demo_types = [np.float32, np.complex64] if cula._libcula_toolkit == 'premium' and \ cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print('Testing pinv for type ' + str(np.dtype(t))) a = np.asarray((np.random.rand(50, 50) - 0.5) / 10, t) a_gpu = gpuarray.to_gpu(a) a_inv_gpu = culinalg.pinv(a_gpu) print('Success status: ', np.allclose(np.linalg.pinv(a), a_inv_gpu.get(), atol=1e-2)) print('Maximum error: ', np.max(np.abs(np.linalg.pinv(a) - a_inv_gpu.get()))) print('')