def test_gpu_stimp(T, m): if not cuda.is_available(): # pragma: no cover pytest.skip("Skipping Tests No GPUs Available") if T.ndim > 1: T = T.copy() T = T[0] n = 3 seed = np.random.randint(100000) np.random.seed(seed) ref = stumpy.gpu_aamp_stimp(T, m) for i in range(n): ref.update() np.random.seed(seed) cmp = stumpy.gpu_stimp(T, m, normalize=False) for i in range(n): cmp.update() # Compare raw pan ref_PAN = ref._PAN cmp_PAN = cmp._PAN naive.replace_inf(ref_PAN) naive.replace_inf(cmp_PAN) npt.assert_almost_equal(ref_PAN, cmp_PAN) # Compare transformed pan npt.assert_almost_equal(ref.PAN_, cmp.PAN_)
class TestNoCudaBoundsCheck(SerialMixin, unittest.TestCase): def setUp(self): self.old_boundscheck = config.BOUNDSCHECK config.BOUNDSCHECK = None @unittest.skipIf(not cuda.is_available(), "NO CUDA") def test_no_cuda_boundscheck(self): with self.assertRaises(NotImplementedError): @cuda.jit(boundscheck=True) def func(): pass with override_env_config('NUMBA_BOUNDSCHECK', '1'): @cuda.jit def func2(x, a): a[1] = x[1] a = np.ones((1,)) x = np.zeros((1,)) # Out of bounds but doesn't raise (it does raise in the simulator, # so skip there) if not config.ENABLE_CUDASIM: func2[1, 1](x, a) def tearDown(self): config.BOUNDSCHECK = self.old_boundscheck
def test(**kwargs): """ Run all tests under ``numba.tests``. kwargs ------ - descriptions - verbosity - buffer - failfast - xmloutput [str] Path of XML output directory """ from numba import cuda suite = discover_tests("numba.tests") ok = run_tests(suite, **kwargs).wasSuccessful() if ok: if cuda.is_available(): gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): print("== Run CUDA tests ==") ok = cuda.test() else: print("== Skipped CUDA tests because GPU CC < 2.0 ==") else: print("== Skipped CUDA tests ==") return ok
def numba_cuda_is_supported(min_version: str) -> bool: """ Tests if an appropriate version of numba is installed, and if it is, if cuda is supported properly within it. Args: min_version: The minimum version of numba that is required. Returns: bool, whether cuda is supported with this current installation or not. """ module_available, msg = model_utils.check_lib_version('numba', checked_version=min_version, operator=operator.ge) # If numba is not installed if module_available is None: return False # If numba version is installed and available if module_available is True: from numba import cuda # this method first arrived in 0.53, and that's the minimum version required if hasattr(cuda, 'is_supported_version'): try: return cuda.is_available() and cuda.is_supported_version() except OSError: # dlopen(libcudart.dylib) might fail if CUDA was never installed in the first place. return False else: # assume cuda is supported, but it may fail due to CUDA incompatibility return False else: return False
def test_fbp_reconstruction(self): mat_rec1 = reco.fbp_reconstruction(self.sino_180, self.center, apply_log=False, gpu=False) num1 = np.max(np.abs(self.mat - mat_rec1)) mat_rec2 = reco.fbp_reconstruction(self.sino_360, self.center, angles=np.deg2rad(self.angles), apply_log=False, gpu=False) num2 = np.max(np.abs(self.mat - mat_rec2)) check = True if cuda.is_available() is True: mat_rec1 = reco.fbp_reconstruction(self.sino_180, self.center, apply_log=False, gpu=True) num3 = np.max(np.abs(self.mat - mat_rec1)) mat_rec2 = reco.fbp_reconstruction(self.sino_360, self.center, angles=np.deg2rad(self.angles), apply_log=False, gpu=True) num4 = np.max(np.abs(self.mat - mat_rec2)) if num3 > 0.1 or num4 > 0.1: check = False self.assertTrue(num1 <= 0.1 and num2 <= 0.1 and check)
def generate_instance(radius, alpha, theta1, theta2, iters, epsilon, cuda_device=None): """Generate an instance of the class Parameters ---------- radius : ndarray radius to consider alpha : ndarray initial angle theta1 : ndarray initial theta1 theta2 : ndarray initial theta2 iters : ndarray n_iterations to perform epsilon : float intensity of the modulation Returns ------- class instance optimized class instance """ if cuda_device == None: cuda_device = cuda.is_available() if cuda_device: return gpu_full_track(radius, alpha, theta1, theta2, iters, epsilon) else: return cpu_full_track(radius, alpha, theta1, theta2, iters, epsilon)
def generate_instance(dr, alpha, theta1, theta2, epsilon, starting_position=0.0, cuda_device=None): """init an henon optimized radial tracker! Parameters ---------- dr : float radial step alpha : ndarray alpha angles to consider (raw) theta1 : ndarray theta1 angles to consider (raw) theta2 : ndarray theta2 angles to consider (raw) epsilon : float intensity of modulation Returns ------- Optimized instance optimized instance of the class (CPU or GPU) """ if cuda_device == None: cuda_device = cuda.is_available() if cuda_device: return gpu_radial_scan(dr, alpha, theta1, theta2, epsilon, starting_position) else: return cpu_radial_scan(dr, alpha, theta1, theta2, epsilon, starting_position)
def generate_instance(epsilon, top, steps, starting_radius=0.0001, cuda_device=None): """Create an uniform scan object Parameters ---------- epsilon : float modulation intensity top : float maximum radius steps : int steps from zero to top (becomes steps * 2 + 1) starting_radius : float, optional from which position we have to start with the actual computation, by default 0.0001 cuda_device : bool, optional do we have a CUDA capable device (make it manual), by default None Returns ------- object uniform_scan object """ if cuda_device == None: cuda_device = cuda.is_available() if cuda_device: return gpu_uniform_scan(epsilon, top, steps, starting_radius) else: return cpu_uniform_scan(epsilon, top, steps, starting_radius)
def _do_discovery(self, argv, Loader=None): """The discovery process is complicated by the fact that: * different test suites live under different directories * some test suites may not be available (CUDA) * some tests may have to be run serially, even in the presence of the '-m' flag.""" from numba import cuda join = os.path.join loader = unittest.TestLoader() if Loader is None else Loader() topdir = os.path.abspath(join(os.path.dirname(__file__), '../..')) base_tests = loader.discover(join(topdir, 'numba/tests'), 'test*.py', topdir) cuda_tests = [ loader.discover(join(topdir, 'numba/cuda/tests/nocuda'), 'test*.py', topdir) ] if cuda.is_available(): gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): cuda_tests.append( loader.discover(join(topdir, 'numba/cuda/tests/cudadrv'), 'test*.py', topdir)) cuda_tests.append( loader.discover(join(topdir, 'numba/cuda/tests/cudapy'), 'test*.py', topdir)) else: print("skipped CUDA tests because GPU CC < 2.0") else: print("skipped CUDA tests") self.test = suite.TestSuite(tests=(base_tests, SerialSuite(cuda_tests)))
def calculate_matrix_profile(column, seq_length): import stumpy try: # stumpy needs np float old_data = np.array(column, dtype=np.floating) except ValueError: raise Exception('Can\'t convert column to float') try: if cuda.is_available(): gpu_device_ids = [device.id for device in cuda.list_devices()] mp = stumpy.gpu_stump(old_data, m=seq_length, ignore_trivial=False, device_id=gpu_device_ids) else: mp = stumpy.stump(old_data, m=seq_length, ignore_trivial=False) except TypeError as e: print('Type issue in stumpy:') raise e except ValueError as e: print('Seq_length issue in stumpy') raise e if pd.isnull(mp).any(): raise Exception( 'Matrix profile for the column contains NaN values. Try to increase the dataset size' ) return mp
def generate_instance(radius, alpha, theta1, theta2, epsilon, cuda_device=None): """Generate an instance of the engine. Parameters ---------- radius : ndarray array of radiuses to consider alpha : ndarray array of initial alphas theta1 : ndarray array of initial theta1 theta2 : ndarray array of initial theta2 epsilon : float modulation intensity Returns ------- class instance optimized class instance """ if cuda_device == None: cuda_device = cuda.is_available() if cuda_device: return gpu_partial_track(radius, alpha, theta1, theta2, epsilon) else: return cpu_partial_track(radius, alpha, theta1, theta2, epsilon)
def instance_group_string(self): """ Returns ------- str representation of the instance group used to generate this result """ model_config = self.get_config() # TODO change when remote mode is fixed # Set default count/kind count = 1 if cuda.is_available(): kind = 'GPU' else: kind = 'CPU' if 'instance_group' in model_config: instance_group_list = model_config['instance_group'] group_str_list = [] for group in instance_group_list: group_kind, group_count = kind, count # Update with instance group values if 'kind' in group: group_kind = group['kind'].split('_')[1] if 'count' in group: group_count = group['count'] group_str_list.append(f"{group_count}/{group_kind}") return ','.join(group_str_list) return f"{count}/{kind}"
def is_available(): """ Indicates the availability of this backend. @return: True if this backend is available, False otherwise """ return cuda.is_available()
class TestNoCudaBoundsCheck(SerialMixin, unittest.TestCase): def setUp(self): self.old_boundscheck = config.BOUNDSCHECK config.BOUNDSCHECK = None @unittest.skipIf(not cuda.is_available(), "NO CUDA") def test_no_cuda_boundscheck(self): with self.assertRaises(NotImplementedError): @cuda.jit(boundscheck=True) def func(): pass # Make sure we aren't raising "not supported" error if we aren't # requesting bounds checking anyway. Related pull request: #5257 @cuda.jit(boundscheck=False) def func3(): pass with override_env_config("NUMBA_BOUNDSCHECK", "1"): @cuda.jit def func2(x, a): a[1] = x[1] a = np.ones((1,)) x = np.zeros((1,)) # Out of bounds but doesn't raise (it does raise in the simulator, # so skip there) if not config.ENABLE_CUDASIM: func2[1, 1](x, a) def tearDown(self): config.BOUNDSCHECK = self.old_boundscheck
def test_input_fix(): if not cuda.is_available(): return bp.profile.set(jit=True, device='gpu') lif = define_lif() num = 100 group = bp.NeuGroup(lif, geometry=(num, )) runner = Runner(group) res = runner.get_codes_of_input([('ST.input', 1., '=', 'fix')]) assert res['input-0']['num_data'] == num assert res['input-0']['codes'][-1].endswith('ST_input_inp') print() pprint(res) print('\n' * 3) runner = Runner(group) res = runner.get_codes_of_input([('ST.input', np.random.random(100), '=', 'fix')]) assert res['input-0']['num_data'] == num assert res['input-0']['codes'][-1].endswith('ST_input_inp[cuda_i]') pprint(res)
def get_measurement_matrix(self,ix,iy): shifted_theta = self.theta+self.theta[1] theta_grid,r_grid = cp.meshgrid(shifted_theta*util.PI/180,self.r) #Because theta will be on the x axis, and r will be on the y axis # theta_grid = theta_grid[:,::-1] H = cp.zeros((r_grid.size,ix.size),dtype=cp.complex64) if not self.use_skimage: #launch CUDA kernel if cuda.is_available(): bpg=((H.shape[0]+TPBn-1)//TPBn,(H.shape[1]+TPBn-1)//TPBn) print("Cuda is available, now running CUDA kernel with Thread perblock = {}, Block Per Grids = {}, and H shape {}".format(TPB,bpg,H.shape)) util._calculate_H_Tomography[bpg,TPB](r_grid.ravel(ORDER),theta_grid.ravel(ORDER),ix,iy,H) ratio = self.n_r/(2*(self.n_r//2)) H *= ratio #Some Temporary Fixing nPart=4 n_theta = self.n_theta n_r = self.n_r for i in range(n_theta//nPart): H[i*n_r:(i+1)*n_r,:] = cp.roll(H[i*n_r:(i+1)*n_r,:],1,axis=0) # util.calculate_H_Tomography(r_grid.ravel(ORDER),theta_grid.ravel(ORDER),ix,iy,H) #due to some problem the resulted H is flipped upside down #hence # H = cp.flipud(H) # norm_ratio = (self.n_r/2)/(self.n_r//2) else: H_n = cp.asnumpy(H) util.calculate_H_Tomography_skimage(cp.asnumpy(self.theta),cp.asnumpy(ix),cp.asnumpy(iy),H_n,self.target_image.shape[0]) H = cp.asarray(H_n) return H.astype(cp.complex64)
def main(): if cuda.is_available(): dev_no = cuda.cudadrv.driver.Device(0).id print(cuda.list_devices()) print(cuda.cudadrv.driver.Device(dev_no).compute_capability) print(cuda.cudadrv.driver.Device(dev_no).name) else: print("no GPU detected")
def to_cuda(self): """ Convert the array to a Numba DeviceND array, transferring array data from the arkouda server to Python via ndarray. If the array exceeds a builtin size limit, a RuntimeError is raised. Returns ------- numba.DeviceNDArray A Numba ndarray with the same attributes and data as the pdarray; on GPU Notes ----- The number of bytes in the array cannot exceed ``arkouda.maxTransferBytes``, otherwise a ``RuntimeError`` will be raised. This is to protect the user from overflowing the memory of the system on which the Python client is running, under the assumption that the server is running on a distributed system with much more memory than the client. The user may override this limit by setting ak.maxTransferBytes to a larger value, but proceed with caution. See Also -------- array Examples -------- >>> a = ak.arange(0, 5, 1) >>> a.to_cuda() array([0, 1, 2, 3, 4]) >>> type(a.to_cuda()) numpy.devicendarray """ try: from numba import cuda if not(cuda.is_available()): raise ImportError('CUDA is not available. Check for the CUDA toolkit and ensure a GPU is installed.') return except: raise ModuleNotFoundError('Numba is not enabled or installed and is required for GPU support.') return # Total number of bytes in the array data arraybytes = self.size * self.dtype.itemsize # Guard against overflowing client memory if arraybytes > maxTransferBytes: raise RuntimeError("Array exceeds allowed size for transfer. Increase ak.maxTransferBytes to allow") # The reply from the server will be a bytes object rep_msg = generic_msg("tondarray {}".format(self.name), recv_bytes=True) # Make sure the received data has the expected length if len(rep_msg) != self.size*self.dtype.itemsize: raise RuntimeError("Expected {} bytes but received {}".format(self.size*self.dtype.itemsize, len(rep_msg))) # Use struct to interpret bytes as a big-endian numeric array fmt = '>{:n}{}'.format(self.size, structDtypeCodes[self.dtype.name]) # Return a numba devicendarray return cuda.to_device(struct.unpack(fmt, rep_msg))
def numba_environment() -> Dict[str, Any]: """return information about the numba setup used Returns: (dict) information about the numba setup """ # determine whether Nvidia Cuda is available try: from numba import cuda cuda_available = cuda.is_available() except ImportError: cuda_available = False # determine whether AMD ROC is available try: from numba import roc roc_available = roc.is_available() except ImportError: roc_available = False # determine threading layer try: threading_layer = nb.threading_layer() except ValueError: # threading layer was not initialized, so compile a mock function @nb.jit("i8()", parallel=True) def f(): s = 0 for i in nb.prange(4): s += i return s f() try: threading_layer = nb.threading_layer() except ValueError: # cannot initialize threading threading_layer = None except AttributeError: # old numba version threading_layer = None return { "version": nb.__version__, "parallel": NUMBA_PARALLEL, "fastmath": NUMBA_FASTMATH, "debug": NUMBA_DEBUG, "using_svml": nb.config.USING_SVML, "threading_layer": threading_layer, "omp_num_threads": os.environ.get("OMP_NUM_THREADS"), "mkl_num_threads": os.environ.get("MKL_NUM_THREADS"), "num_threads": nb.config.NUMBA_NUM_THREADS, "num_threads_default": nb.config.NUMBA_DEFAULT_NUM_THREADS, "cuda_available": cuda_available, "roc_available": roc_available, }
def __init__(self, mode, kernel_size, stride, padding=0, device=None): if device is (None or 'gpu'): if cuda.is_available(): device = 'gpu' else: device = 'cpu' if device is 'gpu': self.pool = pooling_gpu(mode, kernel_size, stride, padding) else: self.pool = pooling_cpu(mode, kernel_size, stride, padding)
def setup(self): if (not cuda.is_available() or os.environ.get("NUMBA_ENABLE_CUDASIM", "0") == "1"): raise NotImplementedError self.data = np.random.randint(low=0, high=3, size=(2000, 1000), dtype=np.int8) self.data_cuda = cuda.to_device(self.data)
def test_gpu_stump(T, m): if not cuda.is_available(): # pragma: no cover pytest.skip("Skipping Tests No GPUs Available") if T.ndim > 1: T = T.copy() T = T[0] ref = stumpy.gpu_aamp(T, m) comp = stumpy.gpu_stump(T, m, normalize=False) npt.assert_almost_equal(ref, comp)
def test_gpu_mpdist(): if not cuda.is_available(): # pragma: no cover pytest.skip("Skipping Tests No GPUs Available") T_A = np.random.uniform(-1000, 1000, [8]).astype(np.float64) T_B = np.random.uniform(-1000, 1000, [64]).astype(np.float64) m = 5 ref = stumpy.gpu_aampdist(T_A, T_B, m) comp = stumpy.gpu_mpdist(T_A, T_B, m, normalize=False) npt.assert_almost_equal(ref, comp)
def use_cuda(args: argparse.Namespace) -> bool: """Determine whether to use GPU-accelerated code or not. """ try: import numba.cuda as cuda # noqa use_cuda = cuda.is_available() and not args.no_gpu except ImportError: use_cuda = False return use_cuda
def numba_environment() -> Dict[str, Any]: """ return information about the numba setup used Returns: (dict) information about the numba setup """ # determine whether Nvidia Cuda is available try: from numba import cuda cuda_available = cuda.is_available() except ImportError: cuda_available = False # determine whether AMD ROC is available try: from numba import roc roc_available = roc.is_available() except ImportError: roc_available = False # determine threading layer try: threading_layer = nb.threading_layer() except ValueError: # threading layer was not initialized, so compile a mock function @nb.jit('i8()', parallel=True) def f(): s = 0 for i in nb.prange(4): s += i return s f() try: threading_layer = nb.threading_layer() except ValueError: # cannot initialize threading threading_layer = None except AttributeError: # old numba version threading_layer = None return { 'version': nb.__version__, 'parallel': NUMBA_PARALLEL, 'fastmath': NUMBA_FASTMATH, 'debug': NUMBA_DEBUG, 'using_svml': nb.config.USING_SVML, 'threading_layer': threading_layer, 'omp_num_threads': os.environ.get('OMP_NUM_THREADS'), 'mkl_num_threads': os.environ.get('MKL_NUM_THREADS'), 'num_threads': nb.config.NUMBA_NUM_THREADS, 'num_threads_default': nb.config.NUMBA_DEFAULT_NUM_THREADS, 'cuda_available': cuda_available, 'roc_available': roc_available }
def is_cuda_available(): """Check if the system has cuda Returns: bool: True if cuda is installed, False otherwise. Examples: >>> is_cuda_available() False """ return cuda.is_available()
def skip_cuda_tests(): try: if cuda.is_available(): gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): return False else: return True return True except CudaSupportError: return True
def detect(logging: int, kernel: str, **kwargs): try: if logging >= Logging.Everything: cuda.detect() if cuda.is_available(): return gpu(logging, kernel, **kwargs) except cuda.cudadrv.error.CudaSupportError as e: if logging >= Logging.Everything: print(f"Unable to initialize cuda driver {e}") return cpu(logging, kernel, **kwargs)
def test_gpu_ostinato(): if not cuda.is_available(): # pragma: no cover pytest.skip("Skipping Tests No GPUs Available") m = 50 Ts = [np.random.rand(n) for n in [64, 128, 256]] ref_radius, ref_Ts_idx, ref_subseq_idx = stumpy.gpu_aamp_ostinato(Ts, m) comp_radius, comp_Ts_idx, comp_subseq_idx = stumpy.gpu_ostinato( Ts, m, normalize=False) npt.assert_almost_equal(ref_radius, comp_radius) npt.assert_almost_equal(ref_Ts_idx, comp_Ts_idx) npt.assert_almost_equal(ref_subseq_idx, comp_subseq_idx)
def load_tests(loader, tests, pattern): suite = SerialSuite() this_dir = dirname(__file__) suite.addTests(load_testsuite(loader, join(this_dir, 'nocuda'))) if cuda.is_available(): gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): suite.addTests(load_testsuite(loader, join(this_dir, 'cudadrv'))) suite.addTests(load_testsuite(loader, join(this_dir, 'cudapy'))) else: print("skipped CUDA tests because GPU CC < 2.0") else: print("skipped CUDA tests") return suite
def load_tests(loader, tests, pattern): suite = unittest.TestSuite() this_dir = dirname(__file__) suite.addTests(load_testsuite(loader, join(this_dir, "nocuda"))) if cuda.is_available(): suite.addTests(load_testsuite(loader, join(this_dir, "cudasim"))) gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): suite.addTests(load_testsuite(loader, join(this_dir, "cudadrv"))) suite.addTests(load_testsuite(loader, join(this_dir, "cudapy"))) else: print("skipped CUDA tests because GPU CC < 2.0") else: print("skipped CUDA tests") return suite
def test(**kwargs): """ Run all tests under ``numba.tests``. kwargs ------ - descriptions - verbosity - buffer - failfast - xmloutput [str] Path of XML output directory """ from numba import cuda suite = discover_tests("numba.tests") ok = run_tests(suite, **kwargs).wasSuccessful() if ok: if cuda.is_available(): print("== Run CUDA tests ==") ok = cuda.test() else: print("== Skipped CUDA tests ==") return ok
def _do_discovery(self, argv, Loader=None): """The discovery process is complicated by the fact that: * different test suites live under different directories * some test suites may not be available (CUDA) * some tests may have to be run serially, even in the presence of the '-m' flag.""" from numba import cuda join = os.path.join loader = unittest.TestLoader() if Loader is None else Loader() topdir = os.path.abspath(join(os.path.dirname(__file__), "../..")) base_tests = loader.discover(join(topdir, "numba/tests"), "test*.py", topdir) cuda_tests = [loader.discover(join(topdir, "numba/cuda/tests/nocuda"), "test*.py", topdir)] if cuda.is_available(): gpus = cuda.list_devices() if gpus and gpus[0].compute_capability >= (2, 0): cuda_tests.append(loader.discover(join(topdir, "numba/cuda/tests/cudadrv"), "test*.py", topdir)) cuda_tests.append(loader.discover(join(topdir, "numba/cuda/tests/cudapy"), "test*.py", topdir)) else: print("skipped CUDA tests because GPU CC < 2.0") else: print("skipped CUDA tests") self.test = suite.TestSuite(tests=(base_tests, SerialSuite(cuda_tests)))
def test_cuda(self): # Even without CUDA enabled, there is at least one test # (in numba.cuda.tests.nocuda) minsize = 100 if cuda.is_available() else 1 self.check_testsuite_size(['numba.cuda.tests'], minsize)
'blackscholes/blackscholes_numba.py', 'laplace2d/laplace2d.py', 'laplace2d/laplace2d-numba.py', # The following scripts are interactive #'example.py', #'mandel.py', #'mandel/mandel_vectorize.py', #'mandel/mandel_autojit.py', 'nbody/nbody.py', 'nbody/nbody_modified_by_MarkHarris.py', # Missing input files !? #'vectorize/sum.py', 'vectorize/polynomial.py', ] if cuda.is_available(): test_scripts.extend([ 'blackscholes/blackscholes_cuda.py', 'cudajit/matmul.py', 'cudajit/matmul_smem.py', 'cudajit/sum.py', 'laplace2d/laplace2d-numba-cuda.py', 'laplace2d/laplace2d-numba-cuda-improve.py', 'laplace2d/laplace2d-numba-cuda-smem.py', 'vectorize/cuda_polynomial.py', ]) notebooks = [#'j0 in Numba.ipynb', # contains errors 'Failure.ipynb', 'LinearRegr.ipynb', 'numba.ipynb',