def __init__(self, n_proc=1, stop='p10', epsilon=6, dtype=np.float32): self.stop = stop self.epsilon = epsilon self.profile = False self._locals = {} self.dtype = dtype self.comm = MPI.COMM_WORLD self.rank = self.comm.rank self.n_proc = n_proc #import ctypes #mkl_rt = ctypes.CDLL('libmkl_rt.so') #mkl_get_max_threads = mkl_rt.mkl_get_max_threads #def mkl_set_num_threads(cores): # mkl_rt.mkl_set_num_threads(ctypes.byref(ctypes.c_int(cores))) #mkl_set_num_threads(6) #print("N thhreads", mkl_get_max_threads()) self.sparse = False #ctx = make_default_context() #ngpus = driver.Device.count() #gpuid = self.rank % ngpus self.ctx = driver.Device(self.rank).make_context() linalg.init() cusparse.init() self.mod = SourceModule(gpucode.code)
def __enter__(self): self.load() ngpus = driver.Device.count() gpuid = self.rank % ngpus self.ctx = driver.Device(gpuid).make_context() cusparse.init() linalg.init() #from crow.transfer.kernels import * #self.operation.mod = mod self.load_gpu()
def __init__(self, n_proc=1, stop='p10', epsilon=6, dtype=np.float32): self.stop = stop self.epsilon = epsilon self.profile = False self.memory = {} self._locals = {} self.dtype = np.float32 self.sparse = False self.n_proc = 1 self.rank = 0 #ctx = make_default_context() #ngpus = driver.Device.count() #gpuid = self.rank % ngpus self.ctx = driver.Device(0).make_context() linalg.init() cusparse.init() self.mod = SourceModule(gpucode.code)
from cuda_cffi import cusparse import numpy as np from numpy.testing import (run_module_suite, assert_raises, assert_equal, assert_almost_equal) from unittest import skipIf import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import scipy.sparse # TODO: refactor to remove this cusparse.init() cusparse_real_dtypes = [np.float32, np.float64] cusparse_complex_dtypes = [np.complex64, np.complex128] cusparse_dtypes = cusparse_real_dtypes + cusparse_complex_dtypes trans_list = [CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE] def test_context_create_destroy(): handle = cusparseCreate() cusparseDestroy(handle) def test_get_version():
from __future__ import division from cuda_cffi.cusparse import * from cuda_cffi.cusparse import (_csrgeamNnz, _csrgemmNnz) from cuda_cffi import cusparse cusparse.init() import numpy as np from numpy.testing import assert_raises, assert_equal, assert_almost_equal from unittest import skipIf import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import scipy.sparse # TODO: refactor to remove this cusparse_real_dtypes = [np.float32, np.float64] cusparse_complex_dtypes = [np.complex64, np.complex128] cusparse_dtypes = cusparse_real_dtypes + cusparse_complex_dtypes trans_list = [CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, CUSPARSE_OPERATION_CONJUGATE_TRANSPOSE] def test_context_create_destroy(): handle = cusparseCreate() cusparseDestroy(handle)